This page includes a small portion of the global.R script for running the App, but should be representative of many scenarios discussed in the series of posts.
Load all libraries that will be needed.
library(readxl)
library(dplyr)
library(stringr)
library(data.table)
library(ggplot2)
Load data. Define the color palette that will be used later in plotting.
load("survey")
# color palette
palette <- list(purple = c("#351F39", "#351C4D", "#6c1f55", "#765285", "#8a6899" ),
turquoise = c("#709FB0", "#849974", "#A0C1B8"),
golden = c("#D1A827", "#f3da4c"))
set <- survey[c("Q7.3", "Q7.2", "Q7.4", "country")]
tb <- replicate(length(set), data.frame(), simplify=FALSE)
for (m in 1:length(set)){
tb[m] <- list(data.frame(table(set[m])))
tb[[m]]$`%` <- round(tb[[m]]$Freq/320 *100, 2)
names(tb[[m]]) <- c("", "#", "%")
tb[[m]] <- tb[[m]][order(tb[[m]]$`#`,decreasing = T),]
}
## replicate() creates a list of 4 df; each df for one summary table
## tb[m] <- list() adds one df by another
## tb[[m]] extracting df
## tb[[m]][order(tb[[m]]$`#`,decreasing = T),] orders rows by freq
for (i in 1:4){
colnames(tb[[i]])[1] <- c("Student Status", "Country / Region", "Major", "country")[i]
}
tb[[2]][,2:3] <- NULL
tb
## [[1]]
## Student Status # %
## 1 Freshman 131 40.94
## 5 Sophomore 80 25.00
## 4 Senior 71 22.19
## 3 Junior 19 5.94
## 6 Study Away 14 4.38
## 2 Graduate Student / Joint Program 5 1.56
##
## [[2]]
## Country / Region
## 10 China
## 46 United States of America
## 26 Mexico
## 28 Pakistan
## 17 Hungary
## 30 Philippines
## 33 Russian Federation
## 45 Turkey
## 4 Bangladesh
## 6 Bosnia and Herzegovina
## 20 Israel
## 31 Poland
## 32 Republic of Korea
## 35 Singapore
## 37 South Korea
## 42 Taiwan
## 47 Venezuela, Bolivarian Republic of...
## 1 Afghanistan
## 2 Albania
## 3 Australia
## 5 Bolivia
## 7 Brazil
## 8 Bulgaria
## 9 Canada
## 11 Colombia
## 12 Costa Rica
## 13 Ecuador
## 14 Ethiopia
## 15 Honduras
## 16 Hong Kong (S.A.R.)
## 18 Indonesia
## 19 Iran, Islamic Republic of...
## 21 Italy
## 22 Japan
## 23 Kazakhstan
## 24 Latvia
## 25 Malaysia
## 27 New Zealand
## 29 Peru
## 34 Serbia
## 36 South Africa
## 38 Spain
## 39 Sweden
## 40 Switzerland
## 41 Syrian Arab Republic
## 43 The former Yugoslav Republic of Macedonia
## 44 Trinidad and Tobago
## 48 Viet Nam
##
## [[3]]
## Major # %
## 2 Business and Finance 71 22.19
## 3 Business and Marketing 24 7.50
## 5 Computer Systems Engineering 23 7.19
## 7 Economics 23 7.19
## 13 Mathematics 21 6.56
## 11 Interactive Media Arts 20 6.25
## 6 Data Science 16 5.00
## 16 Social Science 15 4.69
## 10 Humanities 12 3.75
## 14 Neural Science 9 2.81
## 12 Interactive Media Business 8 2.50
## 15 Physics 6 1.88
## 9 Global China Studies 5 1.56
## 1 Biology 4 1.25
## 8 Electrical and Systems Engineering 4 1.25
## 4 Chemistry 2 0.62
##
## [[4]]
## country # %
## 1 China 178 55.62
## 3 Other 71 22.19
## 2 U.S. 69 21.56
## 4 Undefined 2 0.62
Define groups for questions of multiple choices.
g <- list(status = c("Freshman", "Sophomore", "Junior", "Senior", "Study Away", "Graduate/Joint Program") ,
country = c("China", "U.S.", "Other"),
major = c("Science, CS, Engineering, Mathematics",
"Business, Finance, Economics",
"Humanities, Social Sciences",
"Data Science, Interactive Media Business",
"Interactive Media Arts"))
## Q2.2: My top reasons for visiting Library
Reason <- c("Work on a class assignment/paper",
"Watch video or listen audio", "Use specialized databases \\(e.g. Bloomberg, Wind\\)",
"Use a library computer", "Use a group study room", "Print, photocopy, scan", "Other",
"Meet up with friends" , "Hang out between classes",
"Get readings from Course Reserve", "Get help from a librarian", "Find a quiet place to study",
"Borrow books and materials", "Attend a library workshop")
r <- data.frame(survey$status, survey$country, survey$major2)
for (m in 1: length(Reason)){
r[,Reason[m]] <- str_extract(survey$Q2.2, Reason[m])
}
dtset <- replicate(3, data.frame(Reason), simplify = FALSE)
for (i in 1:3){
levels(dtset[[i]]$Reason)[levels(dtset[[i]]$Reason) == "Use specialized databases \\(e.g. Bloomberg, Wind\\)"] <- "Use specialized databases (e.g. Bloomberg, Wind)"
}
for (m in 1:length(dtset)){
for (n in 1:length(g[[m]])){
subgroup <- r[r[,m] == g[[m]][n], (length(g)+1):(length(Reason) + length(g))]
dtset[[m]][,g[[m]][n]] <- apply(subgroup, 2, function(x) length(which(!is.na(x))))
}
dtset[[m]]$Total<- rowSums(dtset[[m]][,2:n], na.rm = TRUE, dims = 1)
dtset[[m]] <- dtset[[m]][order(dtset[[m]]$Total,decreasing = T),]
}
## m denotes how many groups we have. We have 3 groups: country/status/major
## n denotes how many elements we have in each group. e.g. 3 elements in the group "country" (China, U.S., Other)
Reason2 <- c("Work on a class assignment/paper", "Watch video or listen audio",
"Use specialized databases (e.g. Bloomberg, Wind)",
"Use a library computer", "Use a group study room", "Print, photocopy, scan", "Other",
"Meet up with friends", "Hang out between classes", "Get readings from Course Reserve",
"Get help from a librarian", "Find a quiet place to study",
"Borrow books and materials", "Attend a library workshop")
l <- data.frame(Reason = Reason2, Level = c("High","Low","Low","Medium","Medium","High","Low","Medium","Low","Medium","Low","High","High","Low"))
for (i in 1:3){
dtset[[i]] <- dtset[[i]] %>% left_join(l, by = "Reason")
}
## labeling; for grouping in graphing
dtset
## [[1]]
## Reason Freshman Sophomore
## 1 Find a quiet place to study 94 55
## 2 Print, photocopy, scan 76 50
## 3 Work on a class assignment/paper 76 44
## 4 Borrow books and materials 38 25
## 5 Get readings from Course Reserve 13 12
## 6 Use a group study room 18 12
## 7 Use a library computer 16 10
## 8 Meet up with friends 17 12
## 9 Hang out between classes 14 5
## 10 Get help from a librarian 7 7
## 11 Use specialized databases (e.g. Bloomberg, Wind) 4 3
## 12 Attend a library workshop 13 0
## 13 Watch video or listen audio 5 4
## 14 Other 2 1
## Junior Senior Study Away Graduate/Joint Program Total Level
## 1 11 45 13 3 218 High
## 2 10 52 10 4 198 High
## 3 9 28 8 2 165 High
## 4 9 34 0 1 106 High
## 5 3 15 2 0 45 Medium
## 6 3 7 1 0 41 Medium
## 7 3 7 4 1 40 Medium
## 8 1 4 1 1 35 Medium
## 9 2 2 2 2 25 Low
## 10 1 8 0 0 23 Low
## 11 2 8 0 0 17 Low
## 12 2 1 0 1 16 Low
## 13 0 1 1 0 11 Low
## 14 1 1 0 0 5 Low
##
## [[2]]
## Reason China U.S. Other Total
## 1 Find a quiet place to study 123 47 50 170
## 2 Print, photocopy, scan 104 45 52 149
## 3 Work on a class assignment/paper 89 46 31 135
## 4 Borrow books and materials 63 15 28 78
## 5 Use a group study room 32 4 5 36
## 6 Get readings from Course Reserve 30 6 8 36
## 7 Use a library computer 11 20 10 31
## 8 Meet up with friends 23 4 9 27
## 9 Hang out between classes 16 6 5 22
## 10 Use specialized databases (e.g. Bloomberg, Wind) 12 3 2 15
## 11 Get help from a librarian 13 2 7 15
## 12 Attend a library workshop 9 5 3 14
## 13 Watch video or listen audio 7 2 2 9
## 14 Other 2 2 1 4
## Level
## 1 High
## 2 High
## 3 High
## 4 High
## 5 Medium
## 6 Medium
## 7 Medium
## 8 Medium
## 9 Low
## 10 Low
## 11 Low
## 12 Low
## 13 Low
## 14 Low
##
## [[3]]
## Reason
## 1 Find a quiet place to study
## 2 Print, photocopy, scan
## 3 Work on a class assignment/paper
## 4 Borrow books and materials
## 5 Use a group study room
## 6 Use a library computer
## 7 Get readings from Course Reserve
## 8 Meet up with friends
## 9 Get help from a librarian
## 10 Use specialized databases (e.g. Bloomberg, Wind)
## 11 Hang out between classes
## 12 Attend a library workshop
## 13 Watch video or listen audio
## 14 Other
## Science, CS, Engineering, Mathematics Business, Finance, Economics
## 1 9 80
## 2 12 78
## 3 6 63
## 4 9 36
## 5 2 17
## 6 2 15
## 7 4 10
## 8 1 17
## 9 2 9
## 10 0 10
## 11 0 6
## 12 1 5
## 13 0 7
## 14 0 1
## Humanities, Social Sciences Data Science, Interactive Media Business
## 1 21 17
## 2 23 12
## 3 18 16
## 4 13 9
## 5 3 2
## 6 3 3
## 7 6 2
## 8 1 1
## 9 2 1
## 10 3 0
## 11 1 5
## 12 1 4
## 13 0 0
## 14 1 0
## Interactive Media Arts Total Level
## 1 10 127 High
## 2 10 125 High
## 3 9 103 High
## 4 8 67 High
## 5 5 24 Medium
## 6 5 23 Medium
## 7 4 22 Medium
## 8 2 20 Medium
## 9 3 14 Low
## 10 1 13 Low
## 11 1 12 Low
## 12 1 11 Low
## 13 0 7 Low
## 14 1 2 Low
survey <- survey %>% mutate(
mostdays_crowded = ifelse(is.na(Q2.5_0_15_RANK), Q2.5_0_1_RANK, NA),
mostdays_crowded = ifelse(is.na(Q2.5_0_1_RANK), Q2.5_0_15_RANK, mostdays_crowded),
mostdays_modpop = ifelse(is.na(Q2.5_0_16_RANK), Q2.5_0_9_RANK, NA),
mostdays_modpop = ifelse(is.na(Q2.5_0_9_RANK), Q2.5_0_16_RANK, mostdays_modpop),
mostdays_noisy = ifelse(is.na(Q2.5_0_17_RANK), Q2.5_0_3_RANK, NA),
mostdays_noisy = ifelse(is.na(Q2.5_0_3_RANK), Q2.5_0_17_RANK, mostdays_noisy),
mostdays_quiet = ifelse(is.na(Q2.5_0_18_RANK), Q2.5_0_4_RANK, NA),
mostdays_quiet = ifelse(is.na(Q2.5_0_4_RANK), Q2.5_0_18_RANK, mostdays_quiet),
mostdays_silent = ifelse(is.na(Q2.5_0_19_RANK), Q2.5_0_5_RANK, NA),
mostdays_silent = ifelse(is.na(Q2.5_0_5_RANK), Q2.5_0_19_RANK, mostdays_silent),
mostdays_relaxed = ifelse(is.na(Q2.5_0_20_RANK), Q2.5_0_6_RANK, NA),
mostdays_relaxed = ifelse(is.na(Q2.5_0_6_RANK), Q2.5_0_20_RANK, mostdays_relaxed),
mostdays_focused = ifelse(is.na(Q2.5_0_21_RANK), Q2.5_0_7_RANK, NA),
mostdays_focused = ifelse(is.na(Q2.5_0_7_RANK), Q2.5_0_21_RANK, mostdays_focused)
)
## keep only if one of the pair is missing and the other non-missing
## this is due to how the Qualtrics survey was designed; not important for our purpose here
days_status <-
data.frame(survey %>% group_by(status) %>% summarise(mostdays_crowded = round(mean(mostdays_crowded, na.rm =TRUE),2))) %>%
left_join(data.frame(survey %>% group_by(status) %>% summarise(mostdays_modpop = round(mean(mostdays_modpop, na.rm =TRUE),2)))) %>%
left_join(data.frame(survey %>% group_by(status) %>% summarise(mostdays_noisy = round(mean(mostdays_noisy, na.rm =TRUE),2)))) %>%
left_join(data.frame(survey %>% group_by(status) %>% summarise(mostdays_quiet = round(mean(mostdays_quiet, na.rm =TRUE),2)))) %>%
left_join(data.frame(survey %>% group_by(status) %>% summarise(mostdays_silent = round(mean(mostdays_silent, na.rm =TRUE),2)))) %>%
left_join(data.frame(survey %>% group_by(status) %>% summarise(mostdays_relaxed = round(mean(mostdays_relaxed, na.rm =TRUE),2)))) %>%
left_join(data.frame(survey %>% group_by(status) %>% summarise(mostdays_focused = round(mean(mostdays_focused, na.rm =TRUE),2)))) %>%
rename(`crowded` = mostdays_crowded, `moderately populated` = mostdays_modpop,
`noisy` = mostdays_noisy, `quiet (occasional whispers)` = mostdays_quiet,
`(close to) silent` = mostdays_silent, `relaxed, cozy or homey` = mostdays_relaxed,
`focused` = mostdays_focused, `G` = status) %>%
melt()
## same for creating the country and major matrices
## Q2.5_0_GROUP: most days
Place <- c("crowded", "moderately populated", "noisy", "quiet \\(occasional whispers\\)", "\\(close to\\) silent", "relaxed, cozy or homey", "focused")
p1 <- data.frame(survey$status, survey$country, survey$major2)
for (m in 1:length(Place)){
p1[,Place[m]] <- str_extract(survey$Q2.5_0_GROUP, Place[m])
}
dtset4 <- replicate(3, data.frame(Place), simplify = FALSE)
for (m in 1:length(dtset4)){
for (n in 1:length(g[[m]])){
subgroup4 <- p1[p1[,m] == g[[m]][n], (length(g)+1):(length(Place) + length(g))]
dtset4[[m]][,g[[m]][n]] <- apply(subgroup4, 2, function(x) length(which(!is.na(x))))
}
dtset4[[m]]$Total<- rowSums(dtset4[[m]][,2:n], na.rm = TRUE, dims = 1)
}
for (i in 1:3){
levels(dtset4[[i]]$Place)[levels(dtset4[[i]]$Place) == "quiet \\(occasional whispers\\)"] <- "quiet (occasional whispers)"
levels(dtset4[[i]]$Place)[levels(dtset4[[i]]$Place) == "\\(close to\\) silent"] <- "(close to) silent"
}
dtset4
## [[1]]
## Place Freshman Sophomore Junior Senior Study Away
## 1 crowded 19 18 1 13 3
## 2 moderately populated 44 28 10 36 7
## 3 noisy 15 15 0 12 2
## 4 quiet (occasional whispers) 84 43 12 49 11
## 5 (close to) silent 49 32 6 31 9
## 6 relaxed, cozy or homey 66 34 12 40 8
## 7 focused 45 36 12 35 7
## Graduate/Joint Program Total
## 1 1 54
## 2 2 125
## 3 1 44
## 4 3 199
## 5 3 127
## 6 4 160
## 7 2 135
##
## [[2]]
## Place China U.S. Other Total
## 1 crowded 32 12 10 44
## 2 moderately populated 59 37 31 96
## 3 noisy 27 10 7 37
## 4 quiet (occasional whispers) 111 46 44 157
## 5 (close to) silent 64 32 34 96
## 6 relaxed, cozy or homey 80 44 39 124
## 7 focused 70 32 34 102
##
## [[3]]
## Place Science, CS, Engineering, Mathematics
## 1 crowded 2
## 2 moderately populated 7
## 3 noisy 4
## 4 quiet (occasional whispers) 9
## 5 (close to) silent 6
## 6 relaxed, cozy or homey 6
## 7 focused 9
## Business, Finance, Economics Humanities, Social Sciences
## 1 23 3
## 2 50 12
## 3 18 3
## 4 73 24
## 5 46 14
## 6 56 19
## 7 47 14
## Data Science, Interactive Media Business Interactive Media Arts Total
## 1 4 3 32
## 2 13 8 82
## 3 2 3 27
## 4 18 12 124
## 5 10 6 76
## 6 16 12 97
## 7 9 9 79
## grand matrix - most days
days_status <- days_status %>%
left_join(rename(melt(dtset4[[1]]), `G` = variable, `variable` = Place, `count` = value ), by = c("G", "variable"))
## Warning: Column `G` joining factors with different levels, coercing to
## character vector
## Warning: Column `variable` joining factors with different levels, coercing
## to character vector
days_status$G <- factor(days_status$G, levels = c("Freshman", "Sophomore", "Junior", "Senior",
"Graduate/Joint Program", "Study Away"))
## same for creating the country and major matrices
lib <- unlist(strsplit(survey$Q2.3.y, ","))
lib <- str_trim(lib, side = "both")
lib <- lib[!lib %in% c("NULL")]
lib <- data.frame(table(lib)) %>% arrange(-Freq)
color1 <- rep(c(palette[[1]][4], palette[[2]][1], palette[[3]][1], palette[[2]][2], palette[[2]][3], palette[[3]][2]),
length.out=nrow(lib))
## for later use in wordcloud2 package
lib
## lib Freq
## 1 silent 77
## 2 407 58
## 3 windows 47
## 4 other 31
## 5 group study room 23
## 6 400 22
## 7 428 17
## 8 desk 14
## 9 sofa 11
## 10 large desk 7
## 11 corner 6
## 12 small desk 4
## 13 4F 2
## 14 table 2
## 15 outlet 1
## 16 privacy 1
## 17 reserve 1
## 18 talking 1