-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathfunctions.R
657 lines (565 loc) · 36.4 KB
/
functions.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
# http://stackoverflow.com/questions/32136304/conditional-calculation-of-mean
# function calculates the mean only if there are 10 or more respondents to each individual question
f1 <- function(x) if(sum(!is.na(x))>9) mean(as.numeric(x), na.rm=TRUE) else NA_real_
#function calculates number of respondents (not NA)
f2 <- function(x) sum(!is.na(x))
table_course <- function(dataset, response){
temp <- as.data.frame(table(dataset$A.5.When.did.you.start.EM.Course._Response))
if(dim(response)[1] == 0){
names(temp) <- c("Year when respondent started the EMJMD", "Number of respondents to CQSS 2015")
return(temp)
}
temp2 <- as.data.frame(t(response))
temp2$year <- substr(row.names(temp2), 2, 6)
temp2 <- temp2[3:nrow(temp2), ]
temp <- left_join(temp, temp2, by = c("Var1" = "year"))
names(temp) <- c("Year when respondent started the EMJMD", "Number of respondents to CQSS 2015", "Self-reported number of students enrolled")
return(temp)
}
questionprint <- function(x, dataset = overall){
### function for printing out the likert plot about each individual section of a survey.
### x = name of the question to be printed
### dataset = which dataset should be used to extract data from. Default = overall.
z <- question_prepare(x, dataset)
question <- z[[1]]
name_of_the_question <- z[[2]]
### checking to see if question has more than 1 dimension with 10 or more respondents to proceed.
### Otherwise it doesn't make sense to calculate Cronbach's alpha and plot
if (!is.null(dim(question)[2])){
if (dim(question)[2] > 1) {
### calculating Cronbach's alpha. If there is an error it won't print out anything
#try(printing_alpha(x, question))
wrap_function <- wrap_format(85) #wrap-function to print question correctly
name_of_the_question <- wrap_function(name_of_the_question)
p <- plot_question(question, name_of_the_question)
return(p)
}
}
}
questionprint_grouping <- function(x, dataset = overall, grouping, grouping_levels){
### x = name of the question to be printed
### dataset = which dataset should be used to extract data from. Default = overall.
### grouping = grouping variable to be used to display the results. MUST be the same length as the dataset itself.
### grouping_levels = grouping levels to be used to display. Function automatically deletes rows with levels not present in grouping_levels variable.
### returns nothing, prints the plot directly to the window.
z <- question_prepare(x, dataset)
question <- z[[1]]
name_of_the_question <- z[[2]]
grouping_variable <- factor(grouping, levels = grouping_levels)
question <- question[complete.cases(grouping_variable), ]
grouping_variable <- grouping_variable[complete.cases(grouping_variable)]
questionl <- likert(question, grouping = grouping_variable)
plot(questionl) +
ggtitle(name_of_the_question)
}
question_prepare <- function(x, dataset){
# function to prepare a dataset for future use
# x = name of the question
# dataset = dataset that needs to be transformed
# returns a list with 2 elements:
# First element is a transformed dataset. All column names are readable. All columns are transformed into respective levels.
# Each column has at least 10 answers in them.
# Second element is the name of the question that will be used as a title for the plot.
likert_levels <- c("Very unsatisfied", "Somewhat unsatisfied", "Somewhat satisfied", "Very satisfied")
agree_levels <- c("Disagree", "Somewhat disagree", "Somewhat agree", "Agree")
question <- dataset[, substr(names(dataset), 1, nchar(x)) == x]
colnames(question) <- gsub("\\.", " ", colnames(question)) #making names of questions readable
name_of_the_question <- extract_name(x, dataset)
colnames(question) <- gsub("(.*?)_(.*)", "\\2", colnames(question)) #leaving just the dimension name
levels <- likert_levels # default is likert_levels
if (x == "L.6" || x == "L.5" || x == "L.4")
levels <- agree_levels # using agree levels only for relevant questions
### making sure that levels go in order they were meant to go
for(i in seq_along(question)) {
### this step will also reduce all other answers to NA's
question[,i] <- factor(question[,i], levels = levels)
}
### checking to see to have 10 or more answers in each column, otherwise delete it
question <- question[, colSums(!is.na(question)) >= 10]
output <- list(as.data.frame(question), name_of_the_question)
return(output)
}
extract_name <- function(x, dataset){
#extracts name of the question and returns it
### x = string containing the identifier of the question (e.g. "B.1.3")
question <- dataset[, substr(names(dataset), 1, nchar(x)) == x]
name_of_the_question <- gsub("(.*)_(.*)", "\\1", colnames(question)[1]) #storing the name of the section for title of the plot
name_of_the_question <- substring(name_of_the_question, nchar(x)+1)
name_of_the_question <- gsub("\\.", " ", name_of_the_question)
return(name_of_the_question)
}
printing_alpha <- function(x, question_dataset, saved = TRUE){
### Function takes in name of the quetions, dataset containing information about the question and prints out information about Cronbach alpha
### either to a specified folder "Question_statistics" or directly to a prompt. Later can be used to generate reports.
### x = name of the question
### question_dataset = dataset containing information only about given question
### saved = logical flag indicating whether to just save the tables in a folder, or print them
if (sum(complete.cases(question_dataset)) > 0){ # calculating Cronbach's alpha only for cases where complete cases exist
question_alpha <- psych::alpha(data.matrix(question_dataset))
question_alpha_head <- xtable(question_alpha$total, caption = sprintf("Statistics for %s question", x), digits = 2)
question_alpha_drop <- xtable(question_alpha$alpha.drop, caption = sprintf("What if each individual dimension is dropped for %s question", x), digits = 2)
question_alpha_stats <- xtable(question_alpha$item.stats, caption = sprintf("Summary statistics for %s question", x), digits = 2)
if(saved){
print.xtable(question_alpha_head, type = "html", file = sprintf("./Question_statistics/%s_alpha.html", x))
print.xtable(question_alpha_drop, type = "html", file = sprintf("./Question_statistics/%s_drop.html", x))
print.xtable(question_alpha_stats, type = "html", file = sprintf("./Question_statistics/%s_stats.html", x))
} else {
xtable(question_alpha$item.stats, caption = sprintf("Summary statistics for %s question", x), digits = 2, type = "html")
}
}
}
comparative_df <- function(x, course_dataset, dataset){
### function to create a dataframe to compare course with all other courses on a given question. Takes in a question and returns a data frame
### with summary statistics for both a course and overall for all courses.
### x = name of the question.
overall_dataset <- question_prepare(x, dataset)[[1]]
question_dataset <- question_prepare(x, course_dataset)[[1]]
# checking to see if there is any data for that question
if(dim(question_dataset)[2] < 2)
return (as.data.frame("Not enough data were collected on this question"))
#calculating means for a specific course
means_question <- question_dataset %>%
summarise_each(funs(f1, f2)) %>%
gather(variable, value) %>%
separate(variable, c("var", "stat"), sep = "\\_f") %>%
spread(var, value)%>%
t() %>%
as.data.frame()
#cleaning the means_question dataset to use it in a table
names(means_question) <- c("Mean", "Respondents")
means_question <- means_question[2:nrow(means_question),]
means_question$Mean <- as.numeric(as.character(means_question$Mean))
means_question$Respondents <- as.numeric(as.character(means_question$Respondents))
#calculating means for the entire dataset
means_overall <- overall_dataset %>%
summarise_each(funs(f1))%>%
t()%>%
as.data.frame()
names(means_overall) <- c("Mean for all courses")
#merging the
df <- merge(means_question, means_overall, by = 0, all = TRUE)
rownames(df) <- df$Row.names
df$Row.names <- NULL
####################################################################################################
### normally function between those two lines will be a separate function means_prepare, but because of NSE in dplyr I don't know how to make it work
### inside of Shiny. So that is something we need to think about.
#calculating means for each course and each question to create a quantile variable
overall_dataset$course <- dataset$A.2.name.of.Erasmus.Mundus.master.course.
means_overall_each <- overall_dataset %>%
group_by(course) %>%
summarise_each(funs(f1))
#storing the names of the courses for future use
rownames_store <- means_overall_each$course
means_overall_each$course <- NULL
#logical vector to find out rows with all NA's
vector <- !!rowSums(!is.na(means_overall_each))
means_overall_each <- means_overall_each[vector,] #deleting the rows with all NA's
rownames(means_overall_each) <- rownames_store[vector] #writing down the names of the courses
colnames(means_overall_each) <- gsub("\\.", " ", colnames(means_overall_each)) #making names of questions readable
colnames(means_overall_each) <- gsub("(.*?)_(.*)", "\\2", colnames(means_overall_each)) #leaving just the dimension name
####################################################################################################
### adding information about quartiles
quantiles <- as.data.frame(t(apply(X = means_overall_each, FUN = function(x) quantile(x, na.rm = TRUE), MARGIN = 2)))
quartile_info <- merge(quantiles, means_question, by = 0, all = TRUE)
rownames(quartile_info) <- quartile_info$Row.names
quartile_info$Row.names <- NULL
quartile_info$Mean <- quartile_info$Mean + 0.0001 #just to make sure that lowest means are also recognized by cut
quartile_info$quartile <- apply(quartile_info, 1, function(x) cut(x[6], x[1:5], include.lowest = TRUE, labels = FALSE))
quartile_info <- quartile_info["quartile"]
clean_quantiles <- quantiles #copying quantiles to a different data frame to create ranges
clean_quantiles$first <- apply(quantiles, 1, function(x) sprintf("%.2f - %.2f", x[1], x[2])) #creating ranges
clean_quantiles$second <- apply(quantiles, 1, function(x) sprintf("%.2f - %.2f", x[2] + 0.01, x[3])) #+0.01 to create non-overlapping range.
clean_quantiles$third <- apply(quantiles, 1, function(x) sprintf("%.2f - %.2f", x[3] + 0.01, x[4]))
clean_quantiles$fourth <- apply(quantiles, 1, function(x) sprintf("%.2f - %.2f", x[4] + 0.01, x[5]))
clean_quantiles <- clean_quantiles[c("first", "second", "third", "fourth")] #deleting unnecessary columns
df <- merge(df, clean_quantiles, by = 0, all = TRUE)
rownames(df) <- df$Row.names
df$Row.names <- NULL
df <- merge(df, quartile_info, by = 0, all = TRUE)
names(df) <- c("Dimension", "Course mean", "n", "EM mean", "0% - 25%", "25% - 50%", "50% - 75%", "75% - 100%", "quartile")
df <- df[complete.cases(df),] #deleting all rows with NA in the mean
#rounding mean and EM mean to 2 decimal places
df[,2] <- round(df[,2], 2)
df[,4] <- round(df[,4], 2)
df$quartile <- NULL
return(df[,c(1,3,2,4:ncol(df))])
}
means_prepare <- function(x, dataset){
### calculates means for the given question. Returns prepared means matrix.
### x = name of the question.
### dataset = overall dataset with data for all courses
overall_dataset <- question_prepare(x, dataset)[[1]]
#calculating means for each course and each question to create a quantile variable
overall_dataset$course <- dataset$A.2.name.of.Erasmus.Mundus.master.course.
means <- overall_dataset %>%
group_by(course) %>%
summarise_each(funs(f1))
#storing the names of the courses for future use
rownames_store <- means$course
means$course <- NULL
#logical vector to find out rows with all NA's
vector <- !!rowSums(!is.na(means))
means <- means[vector,] #deleting the rows with all NA's
rownames(means) <- rownames_store[vector] #writing down the names of the courses
colnames(means) <- gsub("\\.", " ", colnames(means)) #making names of questions readable
colnames(means) <- gsub("(.*?)_(.*)", "\\2", colnames(means)) #leaving just the dimension name
# wrap_function_x <- wrap_format(35)
# colnames(means) <- wrap_function_x(colnames(means))
# wrap_function_y <- wrap_format(50)
# rownames(means) <- wrap_function_y(rownames(means))
return(list(means, vector))
}
heatmap_printing <- function(means, vector, scaled = TRUE, saved = TRUE){
### function to print out a heatmap for any given question. Used in conjuction with means_prepare.
### means = data frame containing means for any given question. Comes from means_prepare function.
### vector = vector that is used to calculate the height and width of the resulting plot.
### scaled = logical flag to indicate whether heatmap should be scaled or not. Default is TRUE.
### saved = logical flag indicating whether to save or print the graph directly. Default is TRUE.
# different colors depending on scaled/not scaled
if (scaled){
mycolors <- brewer.pal(length(c(-Inf,-2:2,Inf)), "BrBG")
} else {
mycolors <- brewer.pal(length(seq(1,4, by = 0.5)), "BrBG")
}
# scaling the matrix or not
if (scaled){
means_matrix <- scale(data.matrix(means))
} else {
means_matrix <- data.matrix(means)
}
# melted matrix is easier to use with ggplot
means_matrix_melted <- melt(means_matrix)
### different ways to cut values depending if scaled/not scaled
if (scaled) {
means_matrix_melted$value1 <- cut(means_matrix_melted$value, c(-Inf,-2:2,Inf), right = FALSE)
} else {
means_matrix_melted$value1 <- cut(means_matrix_melted$value, seq(1,4, by = 0.5), right = FALSE)
}
#plotting the heatmap
p <- ggplot(means_matrix_melted, aes(x = X2, y = X1)) +
ggtitle(as.character(questions[i])) +
geom_tile(aes(fill = value1)) +
scale_fill_manual(name = levels(means_matrix_melted$value1),
values = mycolors,
na.value = "black", # default color for NA values
drop = FALSE) + # not dropping levels with 0 values in them
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
axis.title.x=element_blank(),
axis.title.y=element_blank())
if (saved){
if(scaled){
ggsave(filename = sprintf("./Heatmaps_scaled/%s.png", questions[i]), plot = p, units = "mm", width = 250, height = (70 + sum(vector)*4))
} else {
ggsave(filename = sprintf("./Heatmaps/%s.png", questions[i]), plot = p, units = "mm", width = 250, height = (70 + sum(vector)*4))
}
} else {print(p)}
}
report_question <- function(question, course_dataset){
## Function to print out the question in the individual report.
## question = string, containing the question to be printed.
## course_dataset = course dataset to produce graphs and tables.
switch(question,
B.1.1 = {
first_heading <- '###Consortia\n'
intro_text <- 'Some introductory text about this particular question. Likely to be the same for all courses.\n'
graph_text <- 'Some supporting text explaining the graph and highlighting some of the issues. Should be individual for each course.\n'
table_text <- 'Some supporting about the table and highlighting some of the issues. Should be individual for each course.\n'
},
B.1.3 = {
first_heading <- '###Consortia\n'
intro_text <- 'Some introductory text about this particular question. Likely to be the same for all courses.\n'
graph_text <- 'Some supporting text explaining the graph and highlighting some of the issues. Should be individual for each course.\n'
table_text <- 'Some supporting about the table and highlighting some of the issues. Should be individual for each course.\n'
},
B.2.1 = {
first_heading <- '###Consortia\n'
intro_text <- 'Some introductory text about this particular question. Likely to be the same for all courses.\n'
graph_text <- 'Some supporting text explaining the graph and highlighting some of the issues. Should be individual for each course.\n'
table_text <- 'Some supporting about the table and highlighting some of the issues. Should be individual for each course.\n'
},
B.2.2 = {
first_heading <- '###Consortia\n'
intro_text <- 'Some introductory text about this particular question. Likely to be the same for all courses.\n'
graph_text <- 'Some supporting text explaining the graph and highlighting some of the issues. Should be individual for each course.\n'
table_text <- 'Some supporting about the table and highlighting some of the issues. Should be individual for each course.\n'
},
C.1 = {
first_heading <- '###Consortia\n'
intro_text <- 'Some introductory text about this particular question. Likely to be the same for all courses.\n'
graph_text <- 'Some supporting text explaining the graph and highlighting some of the issues. Should be individual for each course.\n'
table_text <- 'Some supporting about the table and highlighting some of the issues. Should be individual for each course.\n'
},
L.2.a = {
first_heading <- '##Internship experience.\n'
intro_text <- 'Some introductory text about this particular question. Likely to be the same for all courses.\n'
graph_text <- 'Some supporting text explaining the graph and highlighting some of the issues. Should be individual for each course.\n'
table_text <- 'Some supporting about the table and highlighting some of the issues. Should be individual for each course.\n'
},
L.3.a = {
first_heading <- '##Field experience.\n'
intro_text <- 'Some introductory text about this particular question. Likely to be the same for all courses.\n'
graph_text <- 'Some supporting text explaining the graph and highlighting some of the issues. Should be individual for each course.\n'
table_text <- 'Some supporting about the table and highlighting some of the issues. Should be individual for each course.\n'
},
L.4 = {
first_heading <- '##First supervisor.\n'
intro_text <- 'Some introductory text about this particular question. Likely to be the same for all courses.\n'
graph_text <- 'Some supporting text explaining the graph and highlighting some of the issues. Should be individual for each course.\n'
table_text <- 'Some supporting about the table and highlighting some of the issues. Should be individual for each course.\n'
},
L.5 = {
first_heading <- '##Second supervisor.\n'
intro_text <- 'Some introductory text about this particular question. Likely to be the same for all courses.\n'
graph_text <- 'Some supporting text explaining the graph and highlighting some of the issues. Should be individual for each course.\n'
table_text <- 'Some supporting about the table and highlighting some of the issues. Should be individual for each course.\n'
},
L.6 = {
first_heading <- '##Personal development.\n'
intro_text <- 'Some introductory text about this particular question. Likely to be the same for all courses.\n'
graph_text <- 'Some supporting text explaining the graph and highlighting some of the issues. Should be individual for each course.\n'
table_text <- 'Some supporting about the table and highlighting some of the issues. Should be individual for each course.\n'
},
N.1.1 = {
first_heading <- '###First university.\n'
intro_text <- 'Some introductory text about this particular question. Likely to be the same for all courses.\n'
graph_text <- 'Some supporting text explaining the graph and highlighting some of the issues. Should be individual for each course.\n'
table_text <- 'Some supporting about the table and highlighting some of the issues. Should be individual for each course.\n'
},
N.1.3 = {
first_heading <- '###First university.\n'
intro_text <- 'Some introductory text about this particular question. Likely to be the same for all courses.\n'
graph_text <- 'Some supporting text explaining the graph and highlighting some of the issues. Should be individual for each course.\n'
table_text <- 'Some supporting about the table and highlighting some of the issues. Should be individual for each course.\n'
},
N.2.1 = {
first_heading <- '###First university.\n'
intro_text <- 'Some introductory text about this particular question. Likely to be the same for all courses.\n'
graph_text <- 'Some supporting text explaining the graph and highlighting some of the issues. Should be individual for each course.\n'
table_text <- 'Some supporting about the table and highlighting some of the issues. Should be individual for each course.\n'
},
N.2.2 = {
first_heading <- '###First university.\n'
intro_text <- 'Some introductory text about this particular question. Likely to be the same for all courses.\n'
graph_text <- 'Some supporting text explaining the graph and highlighting some of the issues. Should be individual for each course.\n'
table_text <- 'Some supporting about the table and highlighting some of the issues. Should be individual for each course.\n'
},
N.3.1 = {
first_heading <- '###First university.\n'
intro_text <- 'Some introductory text about this particular question. Likely to be the same for all courses.\n'
graph_text <- 'Some supporting text explaining the graph and highlighting some of the issues. Should be individual for each course.\n'
table_text <- 'Some supporting about the table and highlighting some of the issues. Should be individual for each course.\n'
},
N.4.1 = {
first_heading <- '###First university.\n'
intro_text <- 'Some introductory text about this particular question. Likely to be the same for all courses.\n'
graph_text <- 'Some supporting text explaining the graph and highlighting some of the issues. Should be individual for each course.\n'
table_text <- 'Some supporting about the table and highlighting some of the issues. Should be individual for each course.\n'
},
O.1.1 = {
first_heading <- '###Second university.\n'
intro_text <- 'Some introductory text about this particular question. Likely to be the same for all courses.\n'
graph_text <- 'Some supporting text explaining the graph and highlighting some of the issues. Should be individual for each course.\n'
table_text <- 'Some supporting about the table and highlighting some of the issues. Should be individual for each course.\n'
},
O.1.3 = {
first_heading <- '###Second university.\n'
intro_text <- 'Some introductory text about this particular question. Likely to be the same for all courses.\n'
graph_text <- 'Some supporting text explaining the graph and highlighting some of the issues. Should be individual for each course.\n'
table_text <- 'Some supporting about the table and highlighting some of the issues. Should be individual for each course.\n'
},
O.2.1 = {
first_heading <- '###Second university.\n'
intro_text <- 'Some introductory text about this particular question. Likely to be the same for all courses.\n'
graph_text <- 'Some supporting text explaining the graph and highlighting some of the issues. Should be individual for each course.\n'
table_text <- 'Some supporting about the table and highlighting some of the issues. Should be individual for each course.\n'
},
O.2.2 = {
first_heading <- '###Second university.\n'
intro_text <- 'Some introductory text about this particular question. Likely to be the same for all courses.\n'
graph_text <- 'Some supporting text explaining the graph and highlighting some of the issues. Should be individual for each course.\n'
table_text <- 'Some supporting about the table and highlighting some of the issues. Should be individual for each course.\n'
},
O.3.1 = {
first_heading <- '###Second university.\n'
intro_text <- 'Some introductory text about this particular question. Likely to be the same for all courses.\n'
graph_text <- 'Some supporting text explaining the graph and highlighting some of the issues. Should be individual for each course.\n'
table_text <- 'Some supporting about the table and highlighting some of the issues. Should be individual for each course.\n'
},
O.4.1 = {
first_heading <- '###Second university.\n'
intro_text <- 'Some introductory text about this particular question. Likely to be the same for all courses.\n'
graph_text <- 'Some supporting text explaining the graph and highlighting some of the issues. Should be individual for each course.\n'
table_text <- 'Some supporting about the table and highlighting some of the issues. Should be individual for each course.\n'
},
P.1.1 = {
first_heading <- '###Third university.\n'
intro_text <- 'Some introductory text about this particular question. Likely to be the same for all courses.\n'
graph_text <- 'Some supporting text explaining the graph and highlighting some of the issues. Should be individual for each course.\n'
table_text <- 'Some supporting about the table and highlighting some of the issues. Should be individual for each course.\n'
},
P.1.3 = {
first_heading <- '###Third university.\n'
intro_text <- 'Some introductory text about this particular question. Likely to be the same for all courses.\n'
graph_text <- 'Some supporting text explaining the graph and highlighting some of the issues. Should be individual for each course.\n'
table_text <- 'Some supporting about the table and highlighting some of the issues. Should be individual for each course.\n'
},
P.2.1 = {
first_heading <- '###Third university.\n'
intro_text <- 'Some introductory text about this particular question. Likely to be the same for all courses.\n'
graph_text <- 'Some supporting text explaining the graph and highlighting some of the issues. Should be individual for each course.\n'
table_text <- 'Some supporting about the table and highlighting some of the issues. Should be individual for each course.\n'
},
P.2.2 = {
first_heading <- '###Third university.\n'
intro_text <- 'Some introductory text about this particular question. Likely to be the same for all courses.\n'
graph_text <- 'Some supporting text explaining the graph and highlighting some of the issues. Should be individual for each course.\n'
table_text <- 'Some supporting about the table and highlighting some of the issues. Should be individual for each course.\n'
},
P.3.1 = {
first_heading <- '###Third university.\n'
intro_text <- 'Some introductory text about this particular question. Likely to be the same for all courses.\n'
graph_text <- 'Some supporting text explaining the graph and highlighting some of the issues. Should be individual for each course.\n'
table_text <- 'Some supporting about the table and highlighting some of the issues. Should be individual for each course.\n'
},
P.4.1 = {
first_heading <- '###Third university.\n'
intro_text <- 'Some introductory text about this particular question. Likely to be the same for all courses.\n'
graph_text <- 'Some supporting text explaining the graph and highlighting some of the issues. Should be individual for each course.\n'
table_text <- 'Some supporting about the table and highlighting some of the issues. Should be individual for each course.\n'
},
Q.1.1 = {
first_heading <- '###Fourth university.\n'
intro_text <- 'Some introductory text about this particular question. Likely to be the same for all courses.\n'
graph_text <- 'Some supporting text explaining the graph and highlighting some of the issues. Should be individual for each course.\n'
table_text <- 'Some supporting about the table and highlighting some of the issues. Should be individual for each course.\n'
},
Q.1.3 = {
first_heading <- '###Fourth university.\n'
intro_text <- 'Some introductory text about this particular question. Likely to be the same for all courses.\n'
graph_text <- 'Some supporting text explaining the graph and highlighting some of the issues. Should be individual for each course.\n'
table_text <- 'Some supporting about the table and highlighting some of the issues. Should be individual for each course.\n'
},
Q.2.1 = {
first_heading <- '###Fourth university.\n'
intro_text <- 'Some introductory text about this particular question. Likely to be the same for all courses.\n'
graph_text <- 'Some supporting text explaining the graph and highlighting some of the issues. Should be individual for each course.\n'
table_text <- 'Some supporting about the table and highlighting some of the issues. Should be individual for each course.\n'
},
Q.2.2 = {
first_heading <- '###Fourth university.\n'
intro_text <- 'Some introductory text about this particular question. Likely to be the same for all courses.\n'
graph_text <- 'Some supporting text explaining the graph and highlighting some of the issues. Should be individual for each course.\n'
table_text <- 'Some supporting about the table and highlighting some of the issues. Should be individual for each course.\n'
},
Q.3.1 = {
first_heading <- '###Fourth university.\n'
intro_text <- 'Some introductory text about this particular question. Likely to be the same for all courses.\n'
graph_text <- 'Some supporting text explaining the graph and highlighting some of the issues. Should be individual for each course.\n'
table_text <- 'Some supporting about the table and highlighting some of the issues. Should be individual for each course.\n'
},
Q.4.1 = {
first_heading <- '###Fourth university.\n'
intro_text <- 'Some introductory text about this particular question. Likely to be the same for all courses.\n'
graph_text <- 'Some supporting text explaining the graph and highlighting some of the issues. Should be individual for each course.\n'
table_text <- 'Some supporting about the table and highlighting some of the issues. Should be individual for each course.\n'
}
)
#logical flag to see if there is any data on a given question.
#It will evaluate to TRUE if there is any error or will return the value otherwise.
try_flag <- tryCatch(comparative_df(question, course_dataset), error = function(err) return(TRUE))
if(!is.logical(try_flag)){ #checking if try_flag is logical. If it is, then do nothing. Otherwise print out the information about the question.
temp <- sprintf("\n%s%s%s\n\n", first_heading, "Question:", question)
cat(temp)
#cat(intro_text)
cat("\n")
#prtinting out the question
questionprint(question, dataset = course_dataset, save = FALSE)
#cat(graph_text)
cat("\n")
not_print <- c("N", "O", "P", "Q") #not printing comparative tables for questions on specific university
if (!(substr(question, 1, 1) %in% not_print)) {
#preparing and printing table
df <- comparative_df(question, course_dataset)
z <- xtable(df, caption = sprintf("Summary statistics"), digits = c(0,0,2,2,2,2,2,2), type = "html")
align(z) <- "|p{5cm}|cc|c|cccc|"
print(z, NA.string = "NA", sanitize.text.function = function(x) x)
}
cat("\n")
#cat(table_text)
}
}
means_printing <- function(x){
### function to print out a boxplot for any given question. Used in conjuction with means_prepare.
# getting the means table for a given question
means <- means_prepare(x)[[1]]
# melting the means table to use it in plotting
melted_means <- as.data.frame(melt(as.matrix(means)))
#getting name of the question
name_of_the_question <- extract_name(x)
#wrap statement to have new lines after a given character
wrap_function_title <- wrap_format(80)
name_of_the_question <- wrap_function_title(name_of_the_question)
#same for dimension names
wrap_function_label <- wrap_format(40)
#plotting
p <- ggplot(melted_means, aes(y = value, x = X2)) +
ggtitle(name_of_the_question) +
geom_boxplot() +
coord_flip() +
ylab("Distribution of means") +
#http://stackoverflow.com/questions/21878974/auto-wrapping-of-labels-via-labeller-label-wrap-in-ggplot2
scale_x_discrete(labels = function(x) wrap_function_label(x))+
theme(axis.title.y=element_blank())
#saving the resulting graph
ggsave(filename = sprintf("./Mean_plots/%s.png", x), plot = p, units = "mm", width = 250, height = (70 + length(levels(factor(melted_means$X2)))*10))
}
figure_height <- function(question, course_dataset){
# function to determine the height of the figure for a given question. Returns height if there is something to return, returns 0
# if comparative_df returns error. Function is needed since otherwise it would be impossible to determine the height and make sure
# that script runs properly when there is no data to print out
question_dataset <- question_prepare(question, course_dataset)[[1]]
#calculating means for a specific course
if(ncol(question_dataset) > 1) {
means_question <- question_dataset %>%
summarise_each(funs(f1, n())) %>%
gather(variable, value) %>%
separate(variable, c("var", "stat"), sep = "\\_") %>%
spread(var, value)%>%
t() %>%
as.data.frame()
means_question <- means_question[-1,]
means_question <- means_question[complete.cases(means_question),]
if(nrow(means_question) > 1){ #checking if try_flag is logical. If it is, then do nothing. Otherwise print out the information about the question.
return (50+50*nrow(means_question))
}
}
else return (1) #return 1 height in case there is nothing to print
}
plot_question <- function(question, name_of_the_question){
#function to create a graph of a question
#question - dataset containing prepared question data
#name_of_the_question - string, containing the name of the question to be used in title
questionl <- likert(question) #creating likert-type variable for plotting
### printing out the file
p <- plot(questionl,
plot.percents = TRUE, # displaying percents for each answer
plot.percent.low = FALSE, # displaying cummulative percents for negative answers
plot.percent.high = FALSE, # displaying cummulative percents for positive answers
centered = FALSE, # stretcthing the bar from left to right
wrap = 40, # wrap statement for dimension names
legend.position = "top",
legend = NULL,
group.order = sort(names(question))) +
ggtitle(name_of_the_question) + # title of the question
theme(text = element_text(size = 14, family = "Open Sans"), # setting the text size of the plot
plot.margin = unit(c(0.2, 0.8, 0.3, 0), "lines"), # decreasing white space around the plot
legend.margin = unit(0, "lines"), # deleting space around legend
legend.key.size = unit(0.5, "lines"), # decreasing size of legend elements
legend.background = element_rect(colour = "gray", fill = NA, size = 0.1), # adding a frame around the legend
axis.title.x=element_blank(), #deleting x-label
plot.title = element_text(size = 14)) + #size of the text in the title
geom_hline(yintercept=seq(25, 75, by=25), linetype = "dashed", size = 0.2) + # adding dashed lines at 25, 50, 75% to make it more clear
coord_fixed() +
coord_flip(ylim = c(4,96)) #reducing white space left to 0 and right to 100
return(p)
}