This page includes a small portion of the global.R script for running the App, but should be representative of many scenarios discussed in the series of posts.


Load all libraries that will be needed.

library(readxl)
library(dplyr)
library(stringr)
library(data.table)
library(ggplot2)

Load data. Define the color palette that will be used later in plotting.

load("survey")

# color palette
palette <- list(purple = c("#351F39", "#351C4D", "#6c1f55", "#765285", "#8a6899" ),
                turquoise = c("#709FB0", "#849974", "#A0C1B8"),
                golden = c("#D1A827", "#f3da4c"))

Summary table

set <- survey[c("Q7.3", "Q7.2", "Q7.4", "country")]
tb <- replicate(length(set), data.frame(), simplify=FALSE)
for (m in 1:length(set)){
  tb[m] <- list(data.frame(table(set[m])))
  tb[[m]]$`%` <- round(tb[[m]]$Freq/320 *100, 2)
  names(tb[[m]]) <- c("", "#", "%")
  tb[[m]] <- tb[[m]][order(tb[[m]]$`#`,decreasing = T),]
}
## replicate() creates a list of 4 df; each df for one summary table
## tb[m] <- list() adds one df by another
## tb[[m]] extracting df
## tb[[m]][order(tb[[m]]$`#`,decreasing = T),] orders rows by freq

for (i in 1:4){
  colnames(tb[[i]])[1] <- c("Student Status", "Country / Region", "Major", "country")[i]
}

tb[[2]][,2:3] <- NULL 

tb
## [[1]]
##                     Student Status   #     %
## 1                         Freshman 131 40.94
## 5                        Sophomore  80 25.00
## 4                           Senior  71 22.19
## 3                           Junior  19  5.94
## 6                       Study Away  14  4.38
## 2 Graduate Student / Joint Program   5  1.56
## 
## [[2]]
##                             Country / Region
## 10                                     China
## 46                  United States of America
## 26                                    Mexico
## 28                                  Pakistan
## 17                                   Hungary
## 30                               Philippines
## 33                        Russian Federation
## 45                                    Turkey
## 4                                 Bangladesh
## 6                     Bosnia and Herzegovina
## 20                                    Israel
## 31                                    Poland
## 32                         Republic of Korea
## 35                                 Singapore
## 37                               South Korea
## 42                                    Taiwan
## 47      Venezuela, Bolivarian Republic of...
## 1                                Afghanistan
## 2                                    Albania
## 3                                  Australia
## 5                                    Bolivia
## 7                                     Brazil
## 8                                   Bulgaria
## 9                                     Canada
## 11                                  Colombia
## 12                                Costa Rica
## 13                                   Ecuador
## 14                                  Ethiopia
## 15                                  Honduras
## 16                        Hong Kong (S.A.R.)
## 18                                 Indonesia
## 19              Iran, Islamic Republic of...
## 21                                     Italy
## 22                                     Japan
## 23                                Kazakhstan
## 24                                    Latvia
## 25                                  Malaysia
## 27                               New Zealand
## 29                                      Peru
## 34                                    Serbia
## 36                              South Africa
## 38                                     Spain
## 39                                    Sweden
## 40                               Switzerland
## 41                      Syrian Arab Republic
## 43 The former Yugoslav Republic of Macedonia
## 44                       Trinidad and Tobago
## 48                                  Viet Nam
## 
## [[3]]
##                                 Major  #     %
## 2                Business and Finance 71 22.19
## 3              Business and Marketing 24  7.50
## 5        Computer Systems Engineering 23  7.19
## 7                           Economics 23  7.19
## 13                        Mathematics 21  6.56
## 11             Interactive Media Arts 20  6.25
## 6                        Data Science 16  5.00
## 16                     Social Science 15  4.69
## 10                         Humanities 12  3.75
## 14                     Neural Science  9  2.81
## 12         Interactive Media Business  8  2.50
## 15                            Physics  6  1.88
## 9                Global China Studies  5  1.56
## 1                             Biology  4  1.25
## 8  Electrical and Systems Engineering  4  1.25
## 4                           Chemistry  2  0.62
## 
## [[4]]
##     country   #     %
## 1     China 178 55.62
## 3     Other  71 22.19
## 2      U.S.  69 21.56
## 4 Undefined   2  0.62

Service use

Define groups for questions of multiple choices.

g <- list(status = c("Freshman", "Sophomore", "Junior", "Senior", "Study Away", "Graduate/Joint Program") ,
          country = c("China", "U.S.", "Other"),
          major = c("Science, CS, Engineering, Mathematics", 
                    "Business, Finance, Economics", 
                    "Humanities, Social Sciences", 
                    "Data Science, Interactive Media Business", 
                    "Interactive Media Arts"))

Visiting Library

## Q2.2: My top reasons for visiting Library

Reason <- c("Work on a class assignment/paper", 
            "Watch video or listen audio", "Use specialized databases \\(e.g. Bloomberg, Wind\\)",
            "Use a library computer",  "Use a group study room", "Print, photocopy, scan",  "Other", 
            "Meet up with friends" , "Hang out between classes", 
            "Get readings from Course Reserve", "Get help from a librarian", "Find a quiet place to study",
            "Borrow books and materials", "Attend a library workshop")

r <- data.frame(survey$status, survey$country, survey$major2)
for (m in 1: length(Reason)){
  r[,Reason[m]] <- str_extract(survey$Q2.2, Reason[m])
}

dtset <- replicate(3, data.frame(Reason), simplify = FALSE)
for (i in 1:3){
  levels(dtset[[i]]$Reason)[levels(dtset[[i]]$Reason) == "Use specialized databases \\(e.g. Bloomberg, Wind\\)"] <- "Use specialized databases (e.g. Bloomberg, Wind)"
}

for (m in 1:length(dtset)){
  for (n in 1:length(g[[m]])){
    subgroup <- r[r[,m] == g[[m]][n], (length(g)+1):(length(Reason) + length(g))]
    dtset[[m]][,g[[m]][n]] <- apply(subgroup, 2, function(x) length(which(!is.na(x))))
  }
  dtset[[m]]$Total<- rowSums(dtset[[m]][,2:n], na.rm = TRUE, dims = 1)
  dtset[[m]] <- dtset[[m]][order(dtset[[m]]$Total,decreasing = T),]
}

## m denotes how many groups we have. We have 3 groups: country/status/major
## n denotes how many elements we have in each group. e.g. 3 elements in the group "country" (China, U.S., Other)

Reason2 <- c("Work on a class assignment/paper", "Watch video or listen audio", 
             "Use specialized databases (e.g. Bloomberg, Wind)",
             "Use a library computer",  "Use a group study room", "Print, photocopy, scan",  "Other", 
             "Meet up with friends", "Hang out between classes", "Get readings from Course Reserve", 
             "Get help from a librarian", "Find a quiet place to study",
             "Borrow books and materials", "Attend a library workshop")
l <- data.frame(Reason = Reason2, Level = c("High","Low","Low","Medium","Medium","High","Low","Medium","Low","Medium","Low","High","High","Low"))

for (i in 1:3){
  dtset[[i]] <- dtset[[i]] %>% left_join(l, by = "Reason")
}
## labeling; for grouping in graphing

dtset
## [[1]]
##                                              Reason Freshman Sophomore
## 1                       Find a quiet place to study       94        55
## 2                            Print, photocopy, scan       76        50
## 3                  Work on a class assignment/paper       76        44
## 4                        Borrow books and materials       38        25
## 5                  Get readings from Course Reserve       13        12
## 6                            Use a group study room       18        12
## 7                            Use a library computer       16        10
## 8                              Meet up with friends       17        12
## 9                          Hang out between classes       14         5
## 10                        Get help from a librarian        7         7
## 11 Use specialized databases (e.g. Bloomberg, Wind)        4         3
## 12                        Attend a library workshop       13         0
## 13                      Watch video or listen audio        5         4
## 14                                            Other        2         1
##    Junior Senior Study Away Graduate/Joint Program Total  Level
## 1      11     45         13                      3   218   High
## 2      10     52         10                      4   198   High
## 3       9     28          8                      2   165   High
## 4       9     34          0                      1   106   High
## 5       3     15          2                      0    45 Medium
## 6       3      7          1                      0    41 Medium
## 7       3      7          4                      1    40 Medium
## 8       1      4          1                      1    35 Medium
## 9       2      2          2                      2    25    Low
## 10      1      8          0                      0    23    Low
## 11      2      8          0                      0    17    Low
## 12      2      1          0                      1    16    Low
## 13      0      1          1                      0    11    Low
## 14      1      1          0                      0     5    Low
## 
## [[2]]
##                                              Reason China U.S. Other Total
## 1                       Find a quiet place to study   123   47    50   170
## 2                            Print, photocopy, scan   104   45    52   149
## 3                  Work on a class assignment/paper    89   46    31   135
## 4                        Borrow books and materials    63   15    28    78
## 5                            Use a group study room    32    4     5    36
## 6                  Get readings from Course Reserve    30    6     8    36
## 7                            Use a library computer    11   20    10    31
## 8                              Meet up with friends    23    4     9    27
## 9                          Hang out between classes    16    6     5    22
## 10 Use specialized databases (e.g. Bloomberg, Wind)    12    3     2    15
## 11                        Get help from a librarian    13    2     7    15
## 12                        Attend a library workshop     9    5     3    14
## 13                      Watch video or listen audio     7    2     2     9
## 14                                            Other     2    2     1     4
##     Level
## 1    High
## 2    High
## 3    High
## 4    High
## 5  Medium
## 6  Medium
## 7  Medium
## 8  Medium
## 9     Low
## 10    Low
## 11    Low
## 12    Low
## 13    Low
## 14    Low
## 
## [[3]]
##                                              Reason
## 1                       Find a quiet place to study
## 2                            Print, photocopy, scan
## 3                  Work on a class assignment/paper
## 4                        Borrow books and materials
## 5                            Use a group study room
## 6                            Use a library computer
## 7                  Get readings from Course Reserve
## 8                              Meet up with friends
## 9                         Get help from a librarian
## 10 Use specialized databases (e.g. Bloomberg, Wind)
## 11                         Hang out between classes
## 12                        Attend a library workshop
## 13                      Watch video or listen audio
## 14                                            Other
##    Science, CS, Engineering, Mathematics Business, Finance, Economics
## 1                                      9                           80
## 2                                     12                           78
## 3                                      6                           63
## 4                                      9                           36
## 5                                      2                           17
## 6                                      2                           15
## 7                                      4                           10
## 8                                      1                           17
## 9                                      2                            9
## 10                                     0                           10
## 11                                     0                            6
## 12                                     1                            5
## 13                                     0                            7
## 14                                     0                            1
##    Humanities, Social Sciences Data Science, Interactive Media Business
## 1                           21                                       17
## 2                           23                                       12
## 3                           18                                       16
## 4                           13                                        9
## 5                            3                                        2
## 6                            3                                        3
## 7                            6                                        2
## 8                            1                                        1
## 9                            2                                        1
## 10                           3                                        0
## 11                           1                                        5
## 12                           1                                        4
## 13                           0                                        0
## 14                           1                                        0
##    Interactive Media Arts Total  Level
## 1                      10   127   High
## 2                      10   125   High
## 3                       9   103   High
## 4                       8    67   High
## 5                       5    24 Medium
## 6                       5    23 Medium
## 7                       4    22 Medium
## 8                       2    20 Medium
## 9                       3    14    Low
## 10                      1    13    Low
## 11                      1    12    Low
## 12                      1    11    Low
## 13                      0     7    Low
## 14                      1     2    Low

Space and study habit

Space Preference: Most Days - ranking

survey <- survey %>% mutate(
  mostdays_crowded = ifelse(is.na(Q2.5_0_15_RANK), Q2.5_0_1_RANK, NA),
  mostdays_crowded = ifelse(is.na(Q2.5_0_1_RANK), Q2.5_0_15_RANK, mostdays_crowded),
  mostdays_modpop = ifelse(is.na(Q2.5_0_16_RANK), Q2.5_0_9_RANK, NA),
  mostdays_modpop = ifelse(is.na(Q2.5_0_9_RANK), Q2.5_0_16_RANK, mostdays_modpop),
  mostdays_noisy = ifelse(is.na(Q2.5_0_17_RANK), Q2.5_0_3_RANK, NA),
  mostdays_noisy = ifelse(is.na(Q2.5_0_3_RANK), Q2.5_0_17_RANK, mostdays_noisy),
  mostdays_quiet = ifelse(is.na(Q2.5_0_18_RANK), Q2.5_0_4_RANK, NA),
  mostdays_quiet = ifelse(is.na(Q2.5_0_4_RANK), Q2.5_0_18_RANK, mostdays_quiet),
  mostdays_silent = ifelse(is.na(Q2.5_0_19_RANK), Q2.5_0_5_RANK, NA),
  mostdays_silent = ifelse(is.na(Q2.5_0_5_RANK), Q2.5_0_19_RANK, mostdays_silent),
  mostdays_relaxed = ifelse(is.na(Q2.5_0_20_RANK), Q2.5_0_6_RANK, NA),
  mostdays_relaxed = ifelse(is.na(Q2.5_0_6_RANK), Q2.5_0_20_RANK, mostdays_relaxed),
  mostdays_focused = ifelse(is.na(Q2.5_0_21_RANK), Q2.5_0_7_RANK, NA),
  mostdays_focused = ifelse(is.na(Q2.5_0_7_RANK), Q2.5_0_21_RANK, mostdays_focused)
)
## keep only if one of the pair is missing and the other non-missing
## this is due to how the Qualtrics survey was designed; not important for our purpose here

days_status <- 
  data.frame(survey %>% group_by(status) %>% summarise(mostdays_crowded = round(mean(mostdays_crowded, na.rm =TRUE),2))) %>% 
  left_join(data.frame(survey %>% group_by(status) %>% summarise(mostdays_modpop = round(mean(mostdays_modpop, na.rm =TRUE),2)))) %>%
  left_join(data.frame(survey %>% group_by(status) %>% summarise(mostdays_noisy = round(mean(mostdays_noisy, na.rm =TRUE),2)))) %>%
  left_join(data.frame(survey %>% group_by(status) %>% summarise(mostdays_quiet = round(mean(mostdays_quiet, na.rm =TRUE),2)))) %>%
  left_join(data.frame(survey %>% group_by(status) %>% summarise(mostdays_silent = round(mean(mostdays_silent, na.rm =TRUE),2)))) %>%
  left_join(data.frame(survey %>% group_by(status) %>% summarise(mostdays_relaxed = round(mean(mostdays_relaxed, na.rm =TRUE),2)))) %>%
  left_join(data.frame(survey %>% group_by(status) %>% summarise(mostdays_focused = round(mean(mostdays_focused, na.rm =TRUE),2)))) %>%
  rename(`crowded` = mostdays_crowded, `moderately populated` = mostdays_modpop, 
         `noisy` = mostdays_noisy, `quiet (occasional whispers)` = mostdays_quiet, 
         `(close to) silent` = mostdays_silent, `relaxed, cozy or homey` = mostdays_relaxed,
         `focused` = mostdays_focused, `G` = status) %>% 
  melt()

## same for creating the country and major matrices 

Space Preference: Most Days - multiple options

## Q2.5_0_GROUP: most days

Place <- c("crowded", "moderately populated", "noisy", "quiet \\(occasional whispers\\)", "\\(close to\\) silent", "relaxed, cozy or homey", "focused")

p1 <- data.frame(survey$status, survey$country, survey$major2)
for (m in 1:length(Place)){
  p1[,Place[m]] <- str_extract(survey$Q2.5_0_GROUP, Place[m])
}

dtset4 <- replicate(3, data.frame(Place), simplify = FALSE)
for (m in 1:length(dtset4)){
  for (n in 1:length(g[[m]])){
    subgroup4 <- p1[p1[,m] == g[[m]][n], (length(g)+1):(length(Place) + length(g))]
    dtset4[[m]][,g[[m]][n]] <- apply(subgroup4, 2, function(x) length(which(!is.na(x))))
  }
  dtset4[[m]]$Total<- rowSums(dtset4[[m]][,2:n], na.rm = TRUE, dims = 1)
}

for (i in 1:3){
  levels(dtset4[[i]]$Place)[levels(dtset4[[i]]$Place) == "quiet \\(occasional whispers\\)"] <- "quiet (occasional whispers)"
  levels(dtset4[[i]]$Place)[levels(dtset4[[i]]$Place) == "\\(close to\\) silent"] <- "(close to) silent"
}

dtset4
## [[1]]
##                         Place Freshman Sophomore Junior Senior Study Away
## 1                     crowded       19        18      1     13          3
## 2        moderately populated       44        28     10     36          7
## 3                       noisy       15        15      0     12          2
## 4 quiet (occasional whispers)       84        43     12     49         11
## 5           (close to) silent       49        32      6     31          9
## 6      relaxed, cozy or homey       66        34     12     40          8
## 7                     focused       45        36     12     35          7
##   Graduate/Joint Program Total
## 1                      1    54
## 2                      2   125
## 3                      1    44
## 4                      3   199
## 5                      3   127
## 6                      4   160
## 7                      2   135
## 
## [[2]]
##                         Place China U.S. Other Total
## 1                     crowded    32   12    10    44
## 2        moderately populated    59   37    31    96
## 3                       noisy    27   10     7    37
## 4 quiet (occasional whispers)   111   46    44   157
## 5           (close to) silent    64   32    34    96
## 6      relaxed, cozy or homey    80   44    39   124
## 7                     focused    70   32    34   102
## 
## [[3]]
##                         Place Science, CS, Engineering, Mathematics
## 1                     crowded                                     2
## 2        moderately populated                                     7
## 3                       noisy                                     4
## 4 quiet (occasional whispers)                                     9
## 5           (close to) silent                                     6
## 6      relaxed, cozy or homey                                     6
## 7                     focused                                     9
##   Business, Finance, Economics Humanities, Social Sciences
## 1                           23                           3
## 2                           50                          12
## 3                           18                           3
## 4                           73                          24
## 5                           46                          14
## 6                           56                          19
## 7                           47                          14
##   Data Science, Interactive Media Business Interactive Media Arts Total
## 1                                        4                      3    32
## 2                                       13                      8    82
## 3                                        2                      3    27
## 4                                       18                     12   124
## 5                                       10                      6    76
## 6                                       16                     12    97
## 7                                        9                      9    79
## grand matrix - most days
days_status <- days_status %>% 
  left_join(rename(melt(dtset4[[1]]), `G` = variable, `variable` = Place, `count` = value ), by = c("G", "variable")) 
## Warning: Column `G` joining factors with different levels, coercing to
## character vector
## Warning: Column `variable` joining factors with different levels, coercing
## to character vector
days_status$G <- factor(days_status$G, levels = c("Freshman", "Sophomore", "Junior", "Senior", 
                                                  "Graduate/Joint Program", "Study Away"))

## same for creating the country and major matrices 

Space Preference: Student Submissions

lib <- unlist(strsplit(survey$Q2.3.y, ","))
lib <- str_trim(lib, side = "both")
lib <- lib[!lib %in% c("NULL")]
lib <- data.frame(table(lib)) %>% arrange(-Freq)

color1 <- rep(c(palette[[1]][4], palette[[2]][1], palette[[3]][1], palette[[2]][2], palette[[2]][3], palette[[3]][2]),
              length.out=nrow(lib))

## for later use in wordcloud2 package

lib
##                 lib Freq
## 1            silent   77
## 2               407   58
## 3           windows   47
## 4             other   31
## 5  group study room   23
## 6               400   22
## 7               428   17
## 8              desk   14
## 9              sofa   11
## 10       large desk    7
## 11           corner    6
## 12       small desk    4
## 13               4F    2
## 14            table    2
## 15           outlet    1
## 16          privacy    1
## 17          reserve    1
## 18          talking    1