-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path200707_lvl4new_mixall.R
47 lines (40 loc) · 1.46 KB
/
200707_lvl4new_mixall.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
library(cmapR)
library(ranger)
library(pbapply)
# prepping data ---
if (exists("lvl4new_data")) {
} else if (file.exists("~/Dropbox/GDB_archive/CMapCorr_files/lvl4new.RData")) {
load("~/Dropbox/GDB_archive/CMapCorr_files/lvl4new.RData")
} else {
source("200706_ZscoreFromAssayed.R")
}
temp <- load("~/Dropbox/GDB_archive/CMapCorr_files/lvl5_inputs.RData")
rm(list=c("temp",temp[!temp %in% c("ct14","lig16")]))
# Data saturation test ----
temp_lig_id <- sapply(lig16,function(L)
rownames(lvl4new_data@cdesc)[lvl4new_data@cdesc$pert_iname == L],
simplify=F)
trainIDs <- sapply(
seq(1,min(sapply(temp_lig_id,length)) - 1,1),
function(N)
sapply(temp_lig_id,function(X) sample(X,N),simplify=F),
simplify=F)
testIDs <- sapply(trainIDs,function(X)
mapply(function(all,train) setdiff(all,train),
all=temp_lig_id,train=X),
simplify=F)
trainIDs <- sapply(trainIDs,unlist,use.names=F)
testIDs <- sapply(testIDs,unlist,use.names=F)
# ^ training ----
rfmodel <- pbsapply(seq_along(trainIDs),function(N)
ranger(x=t(lvl4new_data@mat[,trainIDs[[N]]]),
y=as.factor(lvl4new_data@cdesc[trainIDs[[N]],"pert_iname"]),
num.threads=8,
verbose=F),
simplify=F)
# ^ testing ----
rfresults <- pbsapply(seq_along(rfmodel),function(N)
predict(rfmodel[[N]],t(lvl4new_data@mat[,testIDs[[N]]])),
simplify=F)
save(rfmodel,rfresults,trainIDs,testIDs,
file="~/Dropbox/GDB_archive/CMapCorr_files/200707_lvl4new_mixall_balanced_saturated.RData")