-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathbayesian_functions.R
130 lines (117 loc) · 7.21 KB
/
bayesian_functions.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File: Bayesian_functions.R
# Date created: 12/6/19
# Last update: 12/17/19 by Adriano Schneider
# Authors: Adriano Schneider, Reilly Hostager & Simon Dellicour
library(knitr) #required for calculateBF and lograteextract
library(tidyverse) #required for lograteextract and filterrates
library(RColorBrewer) #required for plotmigrationevents
#### rateextract is a function to extract the phylogeography rates and multiply them by the indicators from BEAST phylogeography logfiles ####
rateextract <- function(logfile,burninpercentage,locations,traitname) {
print("Loading data...")
locations = as.data.frame(locations)
burnin = as.numeric(burninpercentage)
log = read.table(logfile, header=T) #AS_done
print("Removing burn-in...")
log = log[(round((dim(log)[1]/100)*burnin)+1):dim(log)[1],] # to remove the burn-in
print("Creating empty matrix...")
df2 <- data.frame(matrix(NA, nrow = dim(log)[1], ncol = 1))
df2 <- as_tibble(df2)
print("Extracting rates and indicators from logfile...") #can replace "rates" with "indicator" for extracting "indicator"
for (i in 1:dim(locations)[1]) { #for all locations in list of locations
for (j in 1:dim(locations)[1]) { #and all locations in list of locations
if (i != j) { #if location of i is different from location of j, extract rates from i to j from logfile and add column to matrix
ratelabel = combine_words(c(traitname,".rates.",as.character(locations[i,]),".",as.character(locations[j,])), sep="", and="")
index1 = match(ratelabel,names(log)) # has to be the column index of the transition rate from location i to j
rateindex <- as_tibble(log[index1]) #transforms the current column from a list into a tibble
indicatorslabel = combine_words(c(traitname,".indicators.",as.character(locations[i,]),".",as.character(locations[j,])), sep="", and="")
index2 = match(indicatorslabel,names(log)) # has to be the column index of the transition rate from location i to j
indicatorsindex <- as_tibble(log[index2]) #transforms the current column from a list into a tibble
currentindex <- mapply('*',indicatorsindex,rateindex) #multiplies rates with indicators
currentindex <- as_tibble(currentindex)
df2 <- cbind(df2,currentindex) #joins the columnn
}}}
df <- subset(df2, select = -1) #removes empty column from list
colnames(df)<-gsub(".indicators.","",colnames(df)) #remove indicators from column labels
colnames(df)<-gsub(traitname,"",colnames(df)) #remove traitname from column labels
print("Storing the product of rates and indicators...")
write.csv(df,file = combine_words(c(traitname,".product.indicator.rates.csv"), sep="", and=""),row.names=FALSE) #print column from list
print("Rates successfully extracted and saved on a csv file in your working folder.")
}
########################
#### calculateBF is a function to calculate the Bayes Factor Approximation of BEAST BSSVS logfiles ####
calculateBF <- function(logfile,burninpercentage,locations,traitname) {
print("Loading data...")
N_traits = as.numeric(length(locations))
locations = as.data.frame(locations)
burnin = as.numeric(burninpercentage)
log = read.table(logfile, header=T)
print("Removing burn-in...")
log = log[(round((dim(log)[1]/100)*burnin)+1):dim(log)[1],] # to remove the burn-in
print("Creating empty matrix...")
df <- tibble("Transition" = character(),"BayesFactor" = numeric()) #creates empty BF tibble
print("Calculating Bayes Factor...")
for (i in 1:dim(locations)[1])
{
for (j in 1:dim(locations)[1])
{
if (i != j)
{
indicatorlabel = combine_words(c(traitname,".indicators.",as.character(locations[i,]),".",as.character(locations[j,])), sep="", and="")
index1 = match(indicatorlabel,names(log)) # has to be the column index of the indicator for transition from location i to j
p = sum(log[,index1]==1)/dim(log)[1]
K = N_traits # K should be divided by 2 if "symetric" case
q = (log(2)+K-1)/(K*(K-1))
BF = (p/(1-p))/(q/(1-q))
label <- as.character(combine_words(c(as.character(locations[i,]),".",as.character(locations[j,])),sep="",and=""))
df <- add_row(df,Transition = label, BayesFactor = BF)
}
}
}
write.csv(df,file = combine_words(c(traitname,".BFs.csv"), sep="", and=""),row.names=FALSE)
print("Bayes Factor approximation calculated and saved on a csv file in your working folder.")
}
########################
#### filterrates function takes a Bayes Factor list from calculateBFonly function and the rates from the rateextract function and filters the rates dataset based on BF threshold ####
filterrates <- function(BFlist,rates,BFthreshold) {
print("Importing input files...")
threshold = as.numeric(BFthreshold)
rates = as_tibble(read.csv(rates, header = TRUE))
BF = as_tibble(read.csv(BFlist, header = TRUE))
print("Reducing BF matrix to selected threshold...")
filteredBF = BF %>% filter(BayesFactor >= threshold)
filteredBF = droplevels(filteredBF)
print("Creating empty matrix...")
df <- data.frame(matrix(NA, nrow = dim(rates)[1], ncol = 1))
df <- as_tibble(df)
print("Filtering rates of transitions to selected threshold...")
for (i in 1:dim(filteredBF)[1]){
test <- as.character(filteredBF$Transition[i])
index = match(test,names(rates))
filteredrate <- rates[,index]
df <- cbind(df,filteredrate)
}
df <- subset(df, select = -1) #removes empty column from list
write.csv(df,file = combine_words(c(traitname,".filtered.product.indicator.rates.csv"), sep="", and=""),row.names=FALSE) #print column from list
print("Rates successfully filtered and saved on a csv file in your working folder.")
}
########################
#### plotmigrationevents plots the filtered rates from filterrates in density plots and export them as individual PDFs ####
### Note: Probability that the number of migration events is zero is shown as a bar at the 0 on the X-axis.
plotmigrationevents <- function(plotList,colorscheme) {
for (plotName in plotList){
cooPlot<-read.csv(plotName,sep=',')
cList <- colorRampPalette(brewer.pal(dim(cooPlot)[2],colorscheme))(dim(cooPlot)[2])
for (p in 1:dim(cooPlot)[2]) {
d <- density(cooPlot[[p]][cooPlot[[p]]>0]) #Creates variable with density for all values above zero for each column
z <- length(cooPlot[[p]][cooPlot[[p]]==0])/dim(cooPlot)[1] #
plot(d,main=names(cooPlot[p]),xlab="Migration events",ylab="Density",xaxt="n",yaxt="n",xlim=c(0,max(d$x)),ylim=c(0,max(d$y,z)), cex.lab=1.25, cex.main=1.25)
#plot(d,main=names(cooPlot[p]),xlab="Migration events",ylab="Density",xaxt="n",yaxt="n",xlim=c(0,12),cex.lab=1.25, cex.main=1.25) #option to replace max(cooPlot) with actual value for x axis limit so all plots look the same.
polygon(d, col=cList[p], border=cList[p]) #Plots polygon with density values above zero, optional col for coloring and border for line coloring can be used
rect(-0.1,0,0.1,z,col=cList[p]) #optional border=NA to remove border and have rectangle behind polygon(curve)
axis(1,at=c(0,2,4,6,8,10,12),labels=c("0","2","4","6","8","10","12"))
axis(2,at=c(0.0,0.2,0.4,0.6,0.8,1.0),labels=c("0.0","0.2","0.4","0.6","0.8","1.0"))
dev.copy(pdf,combine_words(c(names(cooPlot[p]),".pdf"),sep="",and=""))
dev.off()
}
}
}