diffbind_hdl_db.R

setwd("/data/2023_DAPseq/HDL_DAP-seq/10diffbind/")

library(DiffBind)
library(ggplot2)
browseVignettes("DiffBind")

hdl <- dba.analyze("meta/hdl_dap.csv")
hdl.DB <- dba.report(hdl)
View(hdl.DB)

## with 3 replicates
# Reading in of peakset
samples_hdl <- read.csv("meta/hdl_dap.csv")
names(samples_hdl)
samples_hdl
hdl <- dba(sampleSheet="meta/hdl_dap.csv", minOverlap=0.33, config=data.frame(AnalysisMethod=DBA_DESEQ2,th=0.05))
hdl <- dba(sampleSheet=samples_hdl)
hdl

#correlation plot
plot(hdl)

# Counting reads
hdlcount <- dba.count(hdl)
hdlcount

# number of reads that overlap a consensus peak
info <- dba.show(hdlcount)
libsizes <- cbind(LibReads=info$Reads, FRiP=info$FRiP, PeakReads=round(info$Reads * info$FRiP))
rownames(libsizes) <- info$ID
libsizes
plot(hdlcount)

#if summits is equal to a number greater than zero, you can not retrieve the original consensus peaks as 
#they were before they are re-centered (and possibly merged).
##If you want to see them, you can call dba.count() twice, first with summits=TRUE and then setting summits to the desired value after retrieving the consensus peaks
hdlcount <- dba.count(hdl, summits = TRUE)
hdlcount
consensus <- dba.peakset(hdlcount, bRetrieve = TRUE)
write.csv(consensus, file = "consensus_peakset.csv")
#no use
hdlcount_s <- dba.count(hdl, summits = 200)
hdlcount_s
consensus_s <- dba.peakset(hdlcount_s, bRetrieve = TRUE)
write.csv(consensus_s, file = "consensus_peak_summit.csv")
hdlcount_nos <- dba.count(hdl, summits = FALSE)
hdlcount_nos
consensus_nos <- dba.peakset(hdlcount_nos, bRetrieve = TRUE)
write.csv(consensus_nos, file = "consensus_peakset_nos.csv")

#By default, the data are normalized based on sequencing depth
hdlcount_norm <- dba.normalize(hdlcount)

#Establishing a model design and contrast
hdlcount_contrast <- dba.contrast(hdlcount_norm, reorderMeta=list(Condition="M"))
hdlcount_contrast

#The main differential analysis function is invoked as follows:
hdlcount_analyse <- dba.analyze(hdlcount_contrast, bBlacklist=FALSE, bGreylist=FALSE)
dba.show(hdlcount_analyse, bContrasts=TRUE)

#The final step is to retrieve the differentially bound sites as follows:
hdlcount_analyse.DB <- dba.report(hdlcount_analyse)
hdlcount_analyse.DB
write.csv(hdlcount_analyse.DB, file = "hdl_diffbound.csv")

dba.plotVenn(hdlcount_analyse, contrast=1)


##consensus-of-consensus approach, where you first make a consensus peakset for each sample group, then combine them in an overall consensus you use for counting
condition_cons <- dba.peakset(hdl, consensus = DBA_CONDITION)
write.csv(condition_cons, file = "consensus_peakset_cons.csv")
myDBAcons <- dba(condition_cons, mask = condition_cons$masks$Consensus)
consensus.peaks <- dba.peakset(myDBAcons, bRetrieve = TRUE)
write.csv(consensus.peaks, file = "consensus_condition.csv")
myDBA.counts <- dba.count(hdl, peaks = consensus.peaks)

### older analysis
hdl <- dba.analyze("meta/hdl_dap.csv")
hdl.DB <- dba.report(hdl)
View(hdl.DB)

hdl_3r <- dba.analyze("meta/hdl_dap3r.csv")
hdl_3r.DB <- dba.report(hdl_3r)
View(hdl_3r.DB)

hdl_2r <- dba.analyze("meta/hdl_dap2r.csv")
hdl_2r.DB <- dba.report(hdl_2r)
View(hdl_2r.DB)


## with 4 replicates
# Reading in of peakset
samples_hdl <- read.csv("meta/hdl_dap.csv")
names(samples_hdl)
samples_hdl
hdl <- dba(sampleSheet="meta/hdl_dap.csv", minOverlap=0.5, config=data.frame(AnalysisMethod=DBA_DESEQ2,th=0.99))

## with 3 replicates and default parameters
# Reading in of peakset
samples_hdl3rd <- read.csv("meta/hdl_dap3r.csv")
names(samples_hdl3rd)
samples_hdl3rd
hdl3rd <- dba(sampleSheet="meta/hdl_dap3r.csv")

## with 3 replicates
# Reading in of peakset
samples_hdl3r <- read.csv("meta/hdl_dap3r.csv")
names(samples_hdl3r)
samples_hdl3r
hdl3r <- dba(sampleSheet="meta/hdl_dap3r.csv", minOverlap=0.5, config=data.frame(AnalysisMethod=DBA_DESEQ2,th=0.99))

# Alternatively, the previously read-in sample sheet could be used directly to create the DBA object
# hdl <- dba(sampleSheet=samples_hdl)

## with 2 replicates and default parameters
# Reading in of peakset
samples_hdl2rd <- read.csv("meta/hdl_dap2r.csv")
names(samples_hdl2rd)
samples_hdl2rd
hdl2rd <- dba(sampleSheet="meta/hdl_dap2r.csv", minOverlap=0.5, config=data.frame(AnalysisMethod=DBA_DESEQ2,th=0.99))

## with 2 replicates
# Reading in of peakset
samples_hdl2r <- read.csv("meta/hdl_dap2r.csv")
names(samples_hdl2r)
samples_hdl2r
hdl2r <- dba(sampleSheet="meta/hdl_dap2r.csv", minOverlap=0.5, config=data.frame(AnalysisMethod=DBA_DESEQ2,th=0.99))

# metadata associated with this object can be displayed simply as
hdl
hdl3rd
hdl3r
hdl2rd
hdl2r

plot(hdl)
plot(hdl3rd)
plot(hdl2rd)

# Using the data from the peak calls, a correlation heatmap can be generated
# plot(hdl)
## can also be generated by
# dba.plotHeatmap(hdl)

#Consensus peaks
consensus <- dba(hdl, minOverlap=2)
consensus.peaks <- dba.peakset(consensus, bRetrieve=TRUE)
counts <- dba.count(hdl, peaks=consensus.peaks)
counts
plot(counts)
consensus.peaks
cons_peak<-data.frame("consensus.peaks")
write.csv("cons_peak")

# Counting reads
hdl_count <- dba.count(hdl, minOverlap=0.5)

# After the dba.count call, the DBA object can be examined:
hdl_count

# number of reads that overlap a consensus peak
info <- dba.show(hdl_count)
libsizes <- cbind(LibReads=info$Reads, FRiP=info$FRiP, PeakReads=round(info$Reads * info$FRiP))
rownames(libsizes) <- info$ID
libsizes

# new correlation heatmap based on the count scores (using default normalization parameters)
plot(hdl_count)
dba.plotPCA(hdl, label = c("ID"), transpose=T)
dba.plotPCA(hdl3rd, label = c("ID"))
dba.plotPCA(hdl2rd, label = c("ID"))
dba.plotMA(hdl, contrast=c("Condition"), label = c("ID"))

# Normalizing the data
## By default, the data are normalized based on sequencing depth.
hdl_count_norm <- dba.normalize(hdl_count)
## The details of the normalization can be examined
norm <- dba.normalize(hdl_count_norm, bRetrieve=TRUE)
norm
## The default library-size based methods results in all the library sizes being normalized to be the same (the mean library size)
normlibs <- cbind(FullLibSize=norm$lib.sizes, NormFacs=norm$norm.factors, NormLibSize=round(norm$lib.sizes/norm$norm.factors))
rownames(normlibs) <- info$ID
normlibs

# Before running the differential analysis, establishing a model design and contrast
hdl_count_norm_condition <- dba.contrast(hdl_count_norm, reorderMeta=list(Condition="WT"))
hdl_count_norm_condition

hdl_count_condition <- dba.contrast(hdl_count, reorderMeta=list(Condition="WT"))
hdl_count_condition

# Performing the differential analysis
hdl_db <- dba.analyze(hdl_count_norm_condition)
dba.show(hdl_db, bContrasts=TRUE, th=0.99)

hdl_db2 <- dba.analyze(hdl_count)
dba.show(hdl_db2, bContrasts=TRUE, th=0.99)

hdl_db3 <- dba.analyze(hdl_count_condition)
dba.show(hdl_db3, bContrasts=TRUE, th=0.99)

# plotting MAplot
dba.plotMA(hdl_db, bNormalized=FALSE)

dba.peakset(hdl_db, bRetrieve=TRUE)
dba.peakset(hdl_db, bRetrieve=FALSE)

# Compute binding site overlaps (occupancy analysis)
dba.overlap(hdl, mode=DBA_OLAP_ALL, )