-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvolcano.ma.R
97 lines (91 loc) · 5.72 KB
/
volcano.ma.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
volcano.ma <- function(Data, PlotType = "ma", HighlightEnsemblIDs = NA, GeneNameColumnName = "gene_name", EnsemblIDColumnName = "ensembl_gene_id", log2FoldChangeColumnName = "log2FoldChange", abslog2FoldChangeThreshold = 1, abslog2FoldChangeLimit = 3, baseMeanColumnName = "baseMean", log2baseMeanLowerLimit = 0, log2baseMeanUpperLimit = NA, AdjustedPValueColumnName = "padj", SignificanceThreshold = 0.01, negativelog10AdjustedPValueLimit = 15, LineWidth = 0.25, Alpha = 1, NSAlpha = 0.1, UpColor = "#FFD300", DownColor = "#0087BD", HighlightColor = "#C40233", HighlightSize = 2.5, log2FoldChangeLabel = bquote(log[2](Escape/Diapause)), log2FoldChangeTickDistance = 1, log10AdjustedPValueTickDistance = 5) {
suppressPackageStartupMessages(library("ggplot2"))
# Categorize each gene based on its log2FoldChange and AdjustedPValue and assemble the data frame
Category = rep("ns", nrow(Data))
for (i in 1 : nrow(Data)) {
if ((Data[i, log2FoldChangeColumnName] >= abslog2FoldChangeThreshold) &
(!is.na(Data[i, AdjustedPValueColumnName])) &
(Data[i, AdjustedPValueColumnName] < SignificanceThreshold)) {
Category[i] = "up"
}
if ((Data[i, log2FoldChangeColumnName] <= -abslog2FoldChangeThreshold) &
(!is.na(Data[i, AdjustedPValueColumnName])) &
(Data[i, AdjustedPValueColumnName] < SignificanceThreshold)) {
Category[i] = "down"
}
}
negativelog10AdjustedPValue <- -log10(Data[, AdjustedPValueColumnName])
log2baseMean <- log2(Data[, baseMeanColumnName])
colnames(negativelog10AdjustedPValue) <- "negativelog10AdjustedPValue"
colnames(log2baseMean) <- "log2baseMean"
Data <- cbind(as.data.frame(Data), Category, negativelog10AdjustedPValue, log2baseMean)
names(Data)[names(Data) == log2FoldChangeColumnName] <- "log2FoldChange"
names(Data)[names(Data) == GeneNameColumnName] <- "gene_name"
# Preprocessing
Data <- Data[!is.na(Data[, AdjustedPValueColumnName]),]
if (!is.na(negativelog10AdjustedPValueLimit)) {
for (i in 1 : nrow(Data)) {
if (Data[i, "negativelog10AdjustedPValue"] > negativelog10AdjustedPValueLimit) {
Data[i, "negativelog10AdjustedPValue"] <- negativelog10AdjustedPValueLimit
}
}
}
if (!is.na(abslog2FoldChangeLimit)) {
for (i in 1 : nrow(Data)) {
if (Data[i, "log2FoldChange"] > abslog2FoldChangeLimit) {
Data[i, "log2FoldChange"] <- abslog2FoldChangeLimit
}
if (Data[i, "log2FoldChange"] < -abslog2FoldChangeLimit) {
Data[i, "log2FoldChange"] <- -abslog2FoldChangeLimit
}
}
}
if (!is.na(log2baseMeanUpperLimit)) {
for (i in 1 : nrow(Data)) {
if (Data[i, "log2baseMean"] > log2baseMeanUpperLimit) {
Data[i, "log2baseMean"] <- log2baseMeanUpperLimit
}
}
}
if (!is.na(log2baseMeanLowerLimit)) {
for (i in 1 : nrow(Data)) {
if (Data[i, "log2baseMean"] < log2baseMeanLowerLimit) {
Data[i, "log2baseMean"] <- log2baseMeanLowerLimit
}
}
}
# Volcano plot
if (PlotType == "volcano") {
Plot <- ggplot(data = Data, aes(x = log2FoldChange, y = negativelog10AdjustedPValue, GeneName = gene_name)) +
geom_point(aes(color = Category, alpha = Category), stroke = 0) +
scale_color_manual(values = c("up" = UpColor, "down" = DownColor, "ns" = "black")) +
scale_alpha_manual(values = c("up" = Alpha, "down" = Alpha, "ns" = NSAlpha)) +
geom_hline(yintercept = -log10(SignificanceThreshold), linetype = "dashed", linewidth = LineWidth) +
geom_vline(xintercept = c(-abslog2FoldChangeThreshold, abslog2FoldChangeThreshold), linetype = "dashed", linewidth = LineWidth) +
xlab(log2FoldChangeLabel) +
ylab(bquote(-log[10](italic(p)[adj]))) +
theme_bw() +
theme(legend.position = "none", panel.grid.minor = element_blank(), panel.grid.major = element_blank()) +
coord_cartesian(expand = FALSE, clip = "off") +
scale_x_continuous(breaks = c(seq(from = -abslog2FoldChangeLimit, by = log2FoldChangeTickDistance, to = abslog2FoldChangeLimit), -abslog2FoldChangeThreshold, abslog2FoldChangeThreshold), limits = c(-abslog2FoldChangeLimit, abslog2FoldChangeLimit)) +
scale_y_continuous(breaks = c(seq(from = 0, by = log10AdjustedPValueTickDistance, to = negativelog10AdjustedPValueLimit), -log10(SignificanceThreshold)), limits = c(0, negativelog10AdjustedPValueLimit))
}
# MA plot
if (PlotType == "ma") {
Plot <- ggplot(data = Data, aes(x = log2baseMean, y = log2FoldChange, GeneName = gene_name)) +
geom_point(aes(color = Category, alpha = Category), stroke = 0) +
scale_color_manual(values = c("up" = UpColor, "down" = DownColor, "ns" = "black")) +
scale_alpha_manual(values = c("up" = Alpha, "down" = Alpha, "ns" = NSAlpha)) +
geom_hline(yintercept = c(-abslog2FoldChangeThreshold, abslog2FoldChangeThreshold), linetype = "dashed", linewidth = LineWidth) +
ylab(log2FoldChangeLabel) +
xlab(bquote(log[2](base~mean))) +
theme_bw() +
theme(legend.position = "none", panel.grid.minor = element_blank(), panel.grid.major = element_blank()) +
coord_cartesian(expand = FALSE, clip = "off") +
scale_x_continuous(limits = c(log2baseMeanLowerLimit, log2baseMeanUpperLimit)) +
scale_y_continuous(breaks = c(seq(from = -abslog2FoldChangeLimit, by = log2FoldChangeTickDistance, to = abslog2FoldChangeLimit), -abslog2FoldChangeThreshold, abslog2FoldChangeThreshold), limits = c(-abslog2FoldChangeLimit, abslog2FoldChangeLimit))
}
# Hightlight certain genes
Plot <- Plot + geom_point(data = Data[Data[, EnsemblIDColumnName] %in% HighlightEnsemblIDs,], color = HighlightColor, stroke = 0, size = HighlightSize)
return(Plot)
}