-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_DFE_tests.R
83 lines (67 loc) · 3.41 KB
/
run_DFE_tests.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# DFE_U_tests.R -- repeat Mann-Whitney U test and report stats from DFE studies
#
# this generates the data for Table 2. Just do this:
#
# > source( "run_DFE_tests.R" )
#
# this generates output files "DFE_U_tests.tab", which you can easily turn
# into a formatted table using the spreadsheet "DFE_results_formatted_table.xslx".
# Just paste the results into the "raw" sheet and they will appear formatted in
# the other sheet. Fisher's method is easier to implement in the spreadsheet.
#
# This also runs the meta-statistical tests and puts these in a separate table.
# get the full version of the Mann-Whitney from conditional inference package
library(coin)
source("DFE_tests/DFE_functions.R")
# get the data from all the studies (in separate file)
source( "DFE_tests/DFE_data.R" )
all_dfes <- list( firnberg, roscoe, hietpas, lind, domingo_calap, carrasco, peris, sanjuan, maclean, rokyta )
# utility function for std error of mean
sem <- function(x) sqrt(var(x)/length(x))
# set up results table (one row of NAs that we'll discard later)
result <- matrix(,nrow=1,ncol=8)
colnames(result) <- c("Study", "MeanTi", "StdTi", "CountTi", "MeanTv", "StdTv", "CountTv", "U test" )
# compute results
for (i in 1:length(all_dfes)) {
data <- all_dfes[[i]]
titv <- as.factor(c(rep("ti",length(data$ti)),rep("tv",length(data$tv))))
# "greater" indicates one-sided test with alt ::= x shifted to the right of y
# Apparently x refers to ti because it is the factor level with the lower numeric encoding
out <- wilcox_test(c(data$ti,data$tv) ~ titv, alternative="greater")
result <- rbind( result, c(data$name, mean(data$ti), sem(data$ti), length(data$ti), mean(data$tv), sem(data$tv), length(data$tv), pvalue(out) ) )
}
# first row in result is NA, so leave it out
result <- result[-1,]
write.table( result,"DFE_tests/DFE_U_tests.tab", sep="\t", row.names=FALSE )
# now let's do the other tests
# define another results table
meta_result <- matrix(,nrow=1,ncol=6)
colnames(meta_result) <- c("Group", "n", "Fisher_Xsq", "Fisher_P", "Stouffer_Z", "Stouffer_P" )
# define a test function
#
test_meta <- function( group, result, meta_result ) {
# set up
names <- lapply( group, function(x) { x$name })
test_name <- do.call("paste",c(names,sep="_"))
p_values <- unlist(lapply( names, function(x) { as.numeric(result[ result[,1]==x, 8 ]) }))
sizes <- unlist(lapply( names, function(x) { sum(as.numeric(result[ result[,1]==x, c(4,7) ])) }) )
# run tests
fisher <- Fisher.test(p_values)
stouffer <- Stouffer.test(p_values, sizes)
# output an updated meta table
rbind( meta_result, c(test_name, length(p_values), fisher["Xsq"], fisher["p.value"], stouffer["Z"], stouffer["p.value"] ) )
}
# run tests on groups (sorry didn't streamline this)
# studies that use competition and study the whole DFE
comp_subgroup <- list( lind, carrasco, domingo_calap, peris, sanjuan )
meta_result <- test_meta( comp_subgroup, result, meta_result )
# high-throughput studies that use a surrogate for fitness
ht_subgroup <- list( firnberg, roscoe, hietpas )
meta_result <- test_meta( ht_subgroup, result, meta_result )
# studies that use competition and are only looking at beneficial mutations
adaptive_subgroup <- list( maclean, rokyta )
meta_result <- test_meta( adaptive_subgroup, result, meta_result )
# all studies
meta_result <- test_meta( all_dfes, result, meta_result )
write.table( meta_result[-1,],"DFE_tests/DFE_meta_tests.tab", sep="\t", row.names=FALSE )
# end