-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdiffbind_hdl_db.R
212 lines (169 loc) · 6.86 KB
/
diffbind_hdl_db.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
setwd("/data/2023_DAPseq/HDL_DAP-seq/10diffbind/")
library(DiffBind)
library(ggplot2)
browseVignettes("DiffBind")
hdl <- dba.analyze("meta/hdl_dap.csv")
hdl.DB <- dba.report(hdl)
View(hdl.DB)
## with 3 replicates
# Reading in of peakset
samples_hdl <- read.csv("meta/hdl_dap.csv")
names(samples_hdl)
samples_hdl
hdl <- dba(sampleSheet="meta/hdl_dap.csv", minOverlap=0.33, config=data.frame(AnalysisMethod=DBA_DESEQ2,th=0.05))
hdl <- dba(sampleSheet=samples_hdl)
hdl
#correlation plot
plot(hdl)
# Counting reads
hdlcount <- dba.count(hdl)
hdlcount
# number of reads that overlap a consensus peak
info <- dba.show(hdlcount)
libsizes <- cbind(LibReads=info$Reads, FRiP=info$FRiP, PeakReads=round(info$Reads * info$FRiP))
rownames(libsizes) <- info$ID
libsizes
plot(hdlcount)
#if summits is equal to a number greater than zero, you can not retrieve the original consensus peaks as
#they were before they are re-centered (and possibly merged).
##If you want to see them, you can call dba.count() twice, first with summits=TRUE and then setting summits to the desired value after retrieving the consensus peaks
hdlcount <- dba.count(hdl, summits = TRUE)
hdlcount
consensus <- dba.peakset(hdlcount, bRetrieve = TRUE)
write.csv(consensus, file = "consensus_peakset.csv")
#no use
hdlcount_s <- dba.count(hdl, summits = 200)
hdlcount_s
consensus_s <- dba.peakset(hdlcount_s, bRetrieve = TRUE)
write.csv(consensus_s, file = "consensus_peak_summit.csv")
hdlcount_nos <- dba.count(hdl, summits = FALSE)
hdlcount_nos
consensus_nos <- dba.peakset(hdlcount_nos, bRetrieve = TRUE)
write.csv(consensus_nos, file = "consensus_peakset_nos.csv")
#By default, the data are normalized based on sequencing depth
hdlcount_norm <- dba.normalize(hdlcount)
#Establishing a model design and contrast
hdlcount_contrast <- dba.contrast(hdlcount_norm, reorderMeta=list(Condition="M"))
hdlcount_contrast
#The main differential analysis function is invoked as follows:
hdlcount_analyse <- dba.analyze(hdlcount_contrast, bBlacklist=FALSE, bGreylist=FALSE)
dba.show(hdlcount_analyse, bContrasts=TRUE)
#The final step is to retrieve the differentially bound sites as follows:
hdlcount_analyse.DB <- dba.report(hdlcount_analyse)
hdlcount_analyse.DB
write.csv(hdlcount_analyse.DB, file = "hdl_diffbound.csv")
dba.plotVenn(hdlcount_analyse, contrast=1)
##consensus-of-consensus approach, where you first make a consensus peakset for each sample group, then combine them in an overall consensus you use for counting
condition_cons <- dba.peakset(hdl, consensus = DBA_CONDITION)
write.csv(condition_cons, file = "consensus_peakset_cons.csv")
myDBAcons <- dba(condition_cons, mask = condition_cons$masks$Consensus)
consensus.peaks <- dba.peakset(myDBAcons, bRetrieve = TRUE)
write.csv(consensus.peaks, file = "consensus_condition.csv")
myDBA.counts <- dba.count(hdl, peaks = consensus.peaks)
### older analysis
hdl <- dba.analyze("meta/hdl_dap.csv")
hdl.DB <- dba.report(hdl)
View(hdl.DB)
hdl_3r <- dba.analyze("meta/hdl_dap3r.csv")
hdl_3r.DB <- dba.report(hdl_3r)
View(hdl_3r.DB)
hdl_2r <- dba.analyze("meta/hdl_dap2r.csv")
hdl_2r.DB <- dba.report(hdl_2r)
View(hdl_2r.DB)
## with 4 replicates
# Reading in of peakset
samples_hdl <- read.csv("meta/hdl_dap.csv")
names(samples_hdl)
samples_hdl
hdl <- dba(sampleSheet="meta/hdl_dap.csv", minOverlap=0.5, config=data.frame(AnalysisMethod=DBA_DESEQ2,th=0.99))
## with 3 replicates and default parameters
# Reading in of peakset
samples_hdl3rd <- read.csv("meta/hdl_dap3r.csv")
names(samples_hdl3rd)
samples_hdl3rd
hdl3rd <- dba(sampleSheet="meta/hdl_dap3r.csv")
## with 3 replicates
# Reading in of peakset
samples_hdl3r <- read.csv("meta/hdl_dap3r.csv")
names(samples_hdl3r)
samples_hdl3r
hdl3r <- dba(sampleSheet="meta/hdl_dap3r.csv", minOverlap=0.5, config=data.frame(AnalysisMethod=DBA_DESEQ2,th=0.99))
# Alternatively, the previously read-in sample sheet could be used directly to create the DBA object
# hdl <- dba(sampleSheet=samples_hdl)
## with 2 replicates and default parameters
# Reading in of peakset
samples_hdl2rd <- read.csv("meta/hdl_dap2r.csv")
names(samples_hdl2rd)
samples_hdl2rd
hdl2rd <- dba(sampleSheet="meta/hdl_dap2r.csv", minOverlap=0.5, config=data.frame(AnalysisMethod=DBA_DESEQ2,th=0.99))
## with 2 replicates
# Reading in of peakset
samples_hdl2r <- read.csv("meta/hdl_dap2r.csv")
names(samples_hdl2r)
samples_hdl2r
hdl2r <- dba(sampleSheet="meta/hdl_dap2r.csv", minOverlap=0.5, config=data.frame(AnalysisMethod=DBA_DESEQ2,th=0.99))
# metadata associated with this object can be displayed simply as
hdl
hdl3rd
hdl3r
hdl2rd
hdl2r
plot(hdl)
plot(hdl3rd)
plot(hdl2rd)
# Using the data from the peak calls, a correlation heatmap can be generated
# plot(hdl)
## can also be generated by
# dba.plotHeatmap(hdl)
#Consensus peaks
consensus <- dba(hdl, minOverlap=2)
consensus.peaks <- dba.peakset(consensus, bRetrieve=TRUE)
counts <- dba.count(hdl, peaks=consensus.peaks)
counts
plot(counts)
consensus.peaks
cons_peak<-data.frame("consensus.peaks")
write.csv("cons_peak")
# Counting reads
hdl_count <- dba.count(hdl, minOverlap=0.5)
# After the dba.count call, the DBA object can be examined:
hdl_count
# number of reads that overlap a consensus peak
info <- dba.show(hdl_count)
libsizes <- cbind(LibReads=info$Reads, FRiP=info$FRiP, PeakReads=round(info$Reads * info$FRiP))
rownames(libsizes) <- info$ID
libsizes
# new correlation heatmap based on the count scores (using default normalization parameters)
plot(hdl_count)
dba.plotPCA(hdl, label = c("ID"), transpose=T)
dba.plotPCA(hdl3rd, label = c("ID"))
dba.plotPCA(hdl2rd, label = c("ID"))
dba.plotMA(hdl, contrast=c("Condition"), label = c("ID"))
# Normalizing the data
## By default, the data are normalized based on sequencing depth.
hdl_count_norm <- dba.normalize(hdl_count)
## The details of the normalization can be examined
norm <- dba.normalize(hdl_count_norm, bRetrieve=TRUE)
norm
## The default library-size based methods results in all the library sizes being normalized to be the same (the mean library size)
normlibs <- cbind(FullLibSize=norm$lib.sizes, NormFacs=norm$norm.factors, NormLibSize=round(norm$lib.sizes/norm$norm.factors))
rownames(normlibs) <- info$ID
normlibs
# Before running the differential analysis, establishing a model design and contrast
hdl_count_norm_condition <- dba.contrast(hdl_count_norm, reorderMeta=list(Condition="WT"))
hdl_count_norm_condition
hdl_count_condition <- dba.contrast(hdl_count, reorderMeta=list(Condition="WT"))
hdl_count_condition
# Performing the differential analysis
hdl_db <- dba.analyze(hdl_count_norm_condition)
dba.show(hdl_db, bContrasts=TRUE, th=0.99)
hdl_db2 <- dba.analyze(hdl_count)
dba.show(hdl_db2, bContrasts=TRUE, th=0.99)
hdl_db3 <- dba.analyze(hdl_count_condition)
dba.show(hdl_db3, bContrasts=TRUE, th=0.99)
# plotting MAplot
dba.plotMA(hdl_db, bNormalized=FALSE)
dba.peakset(hdl_db, bRetrieve=TRUE)
dba.peakset(hdl_db, bRetrieve=FALSE)
# Compute binding site overlaps (occupancy analysis)
dba.overlap(hdl, mode=DBA_OLAP_ALL, )