-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathStarling2022.r
296 lines (228 loc) · 12.8 KB
/
Starling2022.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
# Package load
library(vegan)
library(MASS)
library(ggplot2)
library(plyr)
library(dplyr)
library(magrittr)
library(scales)
library(grid)
library(reshape2)
library(phyloseq)
library(randomForest)
library(knitr)
library(ape)
library(ggpubr)
#####
#Functions
#####
table_for_plotting<-function(Physeq, Level, Variable1,Variable2=NULL){
#####
# Inputs
#####
# Required
# Physeq: A phyloseq object of abundance combined at the desired level (using tax_glom)
# Level: The taxonomic level at which you want the rel. abu. summarized
# This is the same level used in tax_glom()
# Variable1: The first variable to summarize by
# Optional
# Variable2: A second variable to summarize by
# Example syntax: table_for_plotting(PhylumLevel,"Phylum","Region","sample_Species")
# Note: If you are using 'Species' as a metadata variable, phyloseq will
# rename it sample_Species to avoid conflicts
#####
# Required packages
#####
library(plyr)
library(phyloseq)
df <- psmelt(Physeq) # Convert phyloseq object to a dataframe
df$Abundance=df$Abundance*100 # Multiply by 100 so scale is 0-100%
# The section below will run if only one variable to summarize by is present
if(is.null(Variable2)==T) {
# Summarize the abundance based on desired level (phylum,family,etc) and variable of interest (variable 1)
Trtdata <- ddply(df, c(Level, Variable1), summarise,
N = length(Abundance),
mean = mean(Abundance),
sd = sd(Abundance),
se = sd / sqrt(N))
# Create a new table with the sum relative of all taxa not filtered out
Summarized<-ddply(Trtdata,c(Variable1),summarize,sum= sum(mean),N=mean(N))
# Subtract sum of all other taxa to get the 'Other' category rel abu
Summarized$mean<-100-Summarized$sum
# Modified the summarized object to match the Trtdata format
TableForCombining<-data.frame(Variable1=Summarized[,1],N=Summarized$N,Level="Other",mean=Summarized$mean,sd="NA",se="NA")
# Rename columns based on variable name and level
colnames(TableForCombining)[1]<-Variable1
colnames(TableForCombining)[3]<-Level
# Combine the other category with the already summarized data
TableWithOther<-rbind(Trtdata,TableForCombining)
TableWithOther[Level] = factor(TableWithOther[,Level], levels = unique(TableWithOther[,Level])) # Moves other to the bottom of taxa list
}
# The below section will run if two variables to summarize by are present
else {Trtdata <- ddply(df, c(Level, Variable1,Variable2), summarise,
N = length(Abundance),
mean = mean(Abundance),
sd = sd(Abundance),
se = sd / sqrt(N))
Summarized<-ddply(Trtdata,c(Variable1,Variable2),summarize,sum= sum(mean),N=mean(N))
Summarized$mean<-100-Summarized$sum
TableForCombining<-data.frame(Variable1=Summarized[,1],Variable2=Summarized[,2],N=Summarized$N,Level="Other",mean=Summarized$mean,sd="NA",se="NA")
colnames(TableForCombining)[1]<-Variable1
colnames(TableForCombining)[2]<-Variable2
colnames(TableForCombining)[4]<-Level
TableWithOther<-rbind(Trtdata,TableForCombining)}
TableWithOther[Level] = factor(TableWithOther[,Level], levels = unique(TableWithOther[,Level])) # Moves other to the bottom of taxa list
return(TableWithOther)
}
#####
#Import
#####
setwd("~/Documents/Microbiome/2022/Starling/starlingdecomp")
cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#000000","#CC79A7")
theme_set(theme_bw(base_size = 18)+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()))
biom=import_biom("table-with-taxonomy500.biom",parseFunction= parse_taxonomy_greengenes)
meta=read.csv("StarlingMeta5.13.22.csv",header=TRUE)
meta=sample_data(meta)
sample_names(meta)=meta$SampleID
physeq=merge_phyloseq(biom,meta)
physeq
physeq=subset_samples(physeq, Sample_Type != "Headgut_Swab" & Sample_Type != "Small_Intestine_Contents") #Remove headgut swabs
physeq
#######
#Taxonomic Filtering
########
GPr = transform_sample_counts(physeq, function(x) x / sum(x) ) #transform samples based on relative abundance
GPrPhylum=tax_glom(GPr,"Phylum")
PhylumLevel = filter_taxa(GPrPhylum, function(x) mean(x) > 1e-3, TRUE) #filter out any taxa lower tha 0.1%
#PhylumLevel = transform_sample_counts(PhylumLevel, function(x) x / sum(x) )
GPrFamily=tax_glom(GPr,"Family")
FamilyLevel = filter_taxa(GPrFamily, function(x) mean(x) > 1e-2, TRUE) #filter out any taxa lower tha 1%
#FamilyLevel = transform_sample_counts(FamilyLevel, function(x) x / sum(x) )
GPrGenus=tax_glom(GPr,"Genus")
GenusLevel = filter_taxa(GPrGenus, function(x) mean(x) > 1e-2, TRUE) #filter out any taxa lower tha 1%
######
#Figure 1A
######
Antimortem=subset_samples(physeq, Anti_Post== "Perimortem")
GPr = transform_sample_counts(Antimortem, function(x) x / sum(x) ) #transform samples based on relative abundance
GPrPhylum=tax_glom(GPr,"Phylum")
PhylumLevel = filter_taxa(GPrPhylum, function(x) mean(x) > 1e-2, TRUE) #filter out any taxa lower tha 1%
TrtdataWOther<-table_for_plotting(PhylumLevel,"Phylum","Sample_Type")
TrtdataWOther$Phylum = factor(TrtdataWOther$Phylum, levels = c("Acidobacteria","Actinobacteria","Bacteroidetes","Firmicutes","Proteobacteria","Tenericutes","Other"))
TrtdataWOther$Sample_Type = factor(TrtdataWOther$Sample_Type, levels = c("Small Intestine","Ceca", "Large Intestine","Cloaca"))
theme_set(theme_bw(base_size = 10)+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()))
PhylumAntePlot2A=ggplot(TrtdataWOther, aes(x=Sample_Type,y=mean,color=Phylum))+geom_bar(aes(fill = Phylum),colour="black", stat="identity")+xlab("Sample Type")+ylab("Relative Abundance (> 1%)") + theme(axis.text.x = element_text(angle = 0, hjust = 0.5))+theme(axis.title.x=element_blank())+
theme(axis.text.x = element_text(angle = 0,size=12),legend.position = "bottom",legend.title= element_blank())+ scale_fill_manual(values=cbPalette)+ scale_x_discrete(labels=c("Ceca" = "Ceca", "Cloaca" = "Cloaca","Large Intestine"="Large\n Intestine","Small Intestine"="Small\n Intestine"))
PhylumAntePlot2A
#####
# Figure 1B
#####
set.seed(546)
sample_data(physeq)$Sample_Type = factor(sample_data(physeq)$Sample_Type, levels = c("Small Intestine","Ceca", "Large Intestine","Cloaca"))
Antimortem=subset_samples(physeq, Anti_Post== "Perimortem")
Antimortem
sample_data(Antimortem)
ord=ordinate(Antimortem,"PCoA", "jaccard") #other common methods besides for weighted unifrac include jaccard, bray, jsd
ordplot=plot_ordination(Antimortem, ord,"samples", color="Sample_Type")+geom_point(size=4)+scale_colour_manual(name="Sample Type",values=cbPalette)+scale_fill_manual(values=cbPalette)
ordplot2B<-ordplot+ theme(legend.position=c(0.7,0.75),legend.title = element_blank(),legend.background = element_rect(color="black"))
#Remove legend title+facet_wrap(~Anti_Post)+ stat_ellipse(type= "norm",geom = "polygon", alpha = 1/4, aes(fill = Anti_Post))
ordplot2B
AntimortemOrd=phyloseq::distance(Antimortem, "jaccard") #Tree will not be a submultiple as some of the taxa are subset out
adonis2(AntimortemOrd ~ Sample_Type, as(sample_data(Antimortem), "data.frame"))
AntimortemNoCloaca<-subset_samples(Antimortem,Sample_Type!="Cloaca")
GPdist=phyloseq::distance(AntimortemNoCloaca, "jaccard")
adonis2(GPdist ~ Sample_Type, as(sample_data(AntimortemNoCloaca), "data.frame"))
########
# Figure 3A
########
GPr = transform_sample_counts(physeq, function(x) x / sum(x) ) #transform samples based on relative abundance
GPrPhylum=tax_glom(GPr,"Phylum")
PhylumLevel = filter_taxa(GPrPhylum, function(x) mean(x) > 1e-2, TRUE) #filter out any taxa lower tha 0.1%
TrtdataOther<-table_for_plotting(PhylumLevel,"Phylum","Sample_Type","Hours")
TrtdataOther$Sample_Type = factor(TrtdataOther$Sample_Type, levels = c("Small Intestine","Ceca", "Large Intestine","Cloaca"))
TrtdataOther$Hours <- as.character(TrtdataOther$Hours)
Figure3A=ggplot(TrtdataOther, aes(x=Hours,y=mean))+geom_bar(aes(fill = Phylum),colour="black", stat="identity")+ylab("Relative Abundance (> 1%)") +
theme(axis.text.x = element_text(angle = 0, hjust = 0.5,size=12), legend.position = "bottom",legend.title= element_blank(),strip.text.x = element_text(size = 15))+
scale_fill_manual(values=cbPalette)+facet_wrap(~Sample_Type)+xlab("Time (hrs)")+scale_x_discrete(labels=c("0_hrs" = "0","24_hrs"="24","48_hrs"="48","72_hrs"="72"))
Figure3A<-Figure3A+scale_fill_manual(values=c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#000000","#CC79A7")
)
#Figure3B####
#Small Intestine
SmallIntestine=subset_samples(physeq,Sample_Type=="Small Intestine")
sample_data(SmallIntestine)
SmallIntestineOrd=phyloseq::distance(SmallIntestine, "jaccard") #Tree will not be a submultiple as some of the taxa are subset out
adonis2(SmallIntestineOrd ~ Anti_Post, as(sample_data(SmallIntestine), "data.frame"))
#F1,12 = 1.87, P = 0.007
#Ceca
Ceca=subset_samples(physeq,Sample_Type=="Ceca")
sample_data(Ceca)
CecaOrd=phyloseq::distance(Ceca, "jaccard")
adonis2(CecaOrd ~ Anti_Post, as(sample_data(Ceca), "data.frame"))
#F1,12 =1.79, P = 0.016
#Large Intestine
LargeIntestine=subset_samples(physeq,Sample_Type=="Large Intestine")
sample_data(LargeIntestine)
LargeIntestineOrd=phyloseq::distance(LargeIntestine, "jaccard")
adonis2(LargeIntestineOrd ~ Anti_Post, as(sample_data(LargeIntestine), "data.frame"))
#F1,15 = 2.49, P < 0.001
#Cloaca Not run as Cloaca only has 2 antemortem samples
# Cloaca=subset_samples(physeq,Sample.Type=="Cloaca")
# sample_data(Cloaca)
# CloacaOrd=phyloseq::distance(Cloaca, "jaccard")
PERMANOVAText<-c("F = 6.06,P = 0.033","F = 2.43, P = 0.007","F = 7.70, P < 0.001"," ")
PERMANOVAText
sample_data(physeq)$Sample.Type = factor(sample_data(physeq)$Sample.Type, levels = c("Small Intestine","Ceca", "Large Intestine","Cloaca"))
theme_set(theme_bw(base_size = 12)+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()))
ord=ordinate(physeq,"NMDS", "jaccard") #other common methods besides for weighted unifrac include jaccard, bray, jsd
ordplot=plot_ordination(physeq, ord,"samples", color="Anti_Post",shape="Anti_Post")+geom_point(size=2)+scale_colour_manual(values=cbPalette)+scale_fill_manual(values=cbPalette)+
facet_wrap(~Sample_Type,scales = "free")+ stat_ellipse(type= "norm",geom = "polygon", alpha = 1/4, aes(fill = Anti_Post))+
theme(legend.title = element_blank(),legend.text = element_text(size = 10),legend.position = "bottom",strip.text.x = element_text(size = 15))#+ annotate("text", label = PERMANOVAText, size = 2, x = 0, y = c(-0.15))
#legend.position=c(0.73,0.14)
ordplot
Figure3B <-ordplot
Figure3B
dev.off()
tiff("Figures/Figure3B.tiff", width = 3.3, height = 3.3, units = 'in', res = 300)
Figure3B
dev.off()
######
# Phylum level plot with error bars
#####
Perimortem=subset_samples(physeq, Anti_Post =="Perimortem")
GPr = transform_sample_counts(Perimortem, function(x) x / sum(x) ) #transform samples based on relative abundance
GPrPhylum=tax_glom(GPr,"Phylum")
PhylumLevelPerimortem = filter_taxa(GPrPhylum, function(x) mean(x) > 1e-2, TRUE) #filter out any taxa lower tha 0.1%
df <- psmelt(PhylumLevelPerimortem) #FamilyLevel
df$Abundance=df$Abundance*100
Trtdata <- ddply(df, c("Phylum", "Sample_Type"), summarise,
N = length(Abundance),
mean = mean(Abundance),
sd = sd(Abundance),
se = sd / sqrt(N)
)
write.csv(Trtdata,"TrtdataWErrorBars.csv")
Trtdata2<-read.csv("TrtdataWErrorBarsCloacaErrorRemoved.csv",header=T)
Trtdata2$Sample_Type = factor(Trtdata2$Sample_Type, levels = c("Small Intestine","Ceca", "Large Intestine","Cloaca"))
Trtdata2<-subset(Trtdata2,Phylum=="Tenericutes"|Phylum=="Bacteroidetes")
PhylumFacetPlotByType=ggplot(Trtdata2, aes(x=Sample_Type,y=mean))+geom_bar(aes(fill = Phylum),colour="black", stat="identity")+ylab("Relative Abundance (> 1%, SEM)") +
theme(axis.title.x = element_blank(),legend.position = "none",axis.text.x = element_text(angle = 0, hjust = 0.5,size=11),strip.text.x = element_text(size = 15))+geom_errorbar(aes(ymin=mean-se,ymax=mean+se))+
scale_x_discrete(labels=c("Ceca" = "Ceca", "Cloaca" = "Cloaca","Large Intestine"="Large\n Intestine","Small Intestine"="Small\n Intestine"))+
scale_fill_manual(values=c("#56B4E9", "#0072B2", "#D55E00", "#000000","#CC79A7")
)+facet_wrap(~Phylum)+xlab("Time (hrs)")
PhylumFacetPlotByType
#####
# Combine figure
#####
PhylumAntePlot2A
PhylumFacetPlotByType
Figure3A # Across Time
# Perimortem vs Antimortem
Figure3B # NMDS
#
# dev.off()
# tiff("UpdatedFigMay2022.tiff", width = 200, height = 200, units = 'mm', res = 600)
theme_set(theme_bw(base_size = 10)+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()))
ggarrange(PhylumAntePlot2A, PhylumFacetPlotByType,Figure3A,Figure3B,
labels = c("a","b","c","d"),
ncol = 2, nrow = 2)
dev.off()