-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCorrelateOmics.R
60 lines (53 loc) · 3.3 KB
/
CorrelateOmics.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
CorrelateOmics <- function(ProteomicsDataFilePath = "LFQ_intensities.xlsx",
UniProtIDColumnName = "Protein IDs",
To = "Ensembl",
GeneNameColumnName = "Gene name",
ProteomicsColumnsToCalculateMean = 2 : 5,
TranscriptomicsDataFilePath = "Hu2020Rerun_group_non_diap_vs_diap.results.xlsx",
EnsemblIDColumnName = "ensembl_gene_id",
TranscriptomicsColumnsToCalculateMean = 3 : 7,
RefreshGeneNames = TRUE) {
suppressPackageStartupMessages(library("readxl"))
suppressPackageStartupMessages(library("retry"))
suppressPackageStartupMessages(library("biomaRt"))
biomartCacheClear()
ProteomicsData <- as.data.frame(read_xlsx(ProteomicsDataFilePath))
TranscriptomicsData <- as.data.frame(read_xlsx(TranscriptomicsDataFilePath))
DataMatrix <- as.data.frame(matrix(data = NA, nrow = nrow(ProteomicsData), ncol = 5))
colnames(DataMatrix) <- c("logTranscriptomicsMean", "logTranscriptomicsStdev", "logProteomicsMean", "logProteomicsStdev", "GeneName")
All.UniProtKB.Entries <- c()
for (i in 1 : nrow(ProteomicsData)) {
All.UniProtKB.Entries <- c(All.UniProtKB.Entries, strsplit(ProteomicsData[i, UniProtIDColumnName], split = ";")[[1]])
}
Table <- UniProtKBAC2EnsemblID(paste(All.UniProtKB.Entries, collapse = ","), To = To)
for (i in 1 : nrow(ProteomicsData)) {
DataMatrix[i, "logProteomicsMean"] <- Alt.ln(mean(2 ^ (as.numeric(ProteomicsData[i, ProteomicsColumnsToCalculateMean]))))
DataMatrix[i, "logProteomicsStdev"] <- Alt.ln(sd(2 ^ (as.numeric(ProteomicsData[i, ProteomicsColumnsToCalculateMean]))))
DataMatrix[i, "GeneName"] <- ProteomicsData[i, GeneNameColumnName]
UniProtKB.Entries <- strsplit(ProteomicsData[i, UniProtIDColumnName], split = ";")[[1]]
EnsemblMapping <- Table[(Table[, "uniprotsptrembl"] %in% UniProtKB.Entries), "ensembl_gene_id"]
if ((length(unique(EnsemblMapping)) == 1) && !is.na(unique(EnsemblMapping)) && (unique(EnsemblMapping) != "")) {
EnsemblID <- unique(EnsemblMapping)
rownames(DataMatrix)[i] <- EnsemblID
if (EnsemblID %in% TranscriptomicsData[, EnsemblIDColumnName]) {
DataMatrix[i, "logTranscriptomicsMean"] <- Alt.ln(mean(as.numeric(TranscriptomicsData[(TranscriptomicsData[, EnsemblIDColumnName] == EnsemblID), TranscriptomicsColumnsToCalculateMean])))
DataMatrix[i, "logTranscriptomicsStdev"] <- Alt.ln(sd(as.numeric(TranscriptomicsData[(TranscriptomicsData[, EnsemblIDColumnName] == EnsemblID), TranscriptomicsColumnsToCalculateMean])))
}
}
}
DataMatrix <- DataMatrix[((!is.na(DataMatrix[, "logProteomicsMean"])) & (!is.na(DataMatrix[, "logTranscriptomicsMean"]))),]
Counts <- as.data.frame(table(rownames(DataMatrix)))
ToBeRemovedEnsemblIDs <- as.character(Counts[Counts$Freq > 1, 1])
DataMatrix <- DataMatrix[!(rownames(DataMatrix) %in% ToBeRemovedEnsemblIDs),]
if (RefreshGeneNames) {
EnsemblIDs <- rownames(DataMatrix)
DataMatrix$CurrentEntrezGeneName <- rep(NA, nrow(DataMatrix))
for (i in 1 : nrow(DataMatrix)) {
DataMatrix[i, "CurrentEntrezGeneName"] <- EnsemblID2Entrez(EnsemblIDs[i], Output = "Name")
}
}
return(DataMatrix)
}
Alt.ln <- function(x) {
return(log(x + 1))
}