-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy path_model_iter.r
61 lines (51 loc) · 1.85 KB
/
_model_iter.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# point of this file:
# - use the zscores to create a linear model
library(dplyr)
b = import('base')
io = import('io')
ar = import('array')
st = import('stats')
INFILE = commandArgs(TRUE)[1] %or% "../data/zscores.RData"
OUTFILE = commandArgs(TRUE)[2] %or% "model_iter.RData"
# load speed data, index
zobj = io$load('../data/zscores.RData')
zscores = zobj$scores
index = zobj$index
inh = index$effect=="inhibiting"
zscores[,inh] = -zscores[,inh]
zscores = t(zscores)
# load dscores
dobj = io$load('../data/dscores.RData')
dscores = dobj$scores
dscores[,inh] = -dscores[,inh]
dscores = t(dscores)
iterate = function(zscores, dscores, index) {
# fit model to pathway perturbations
data = c(as.list(index), list(zscores=zscores))
mod = st$lm(zscores ~ 0 + pathway, data=data) %>%
transmute(gene = zscores,
pathway = sub("^pathway", "", term),
zscore = estimate,
p.value = p.value) %>%
group_by(gene) %>%
mutate(p.adj = p.adjust(p.value, method="fdr")) %>%
ungroup()
zfit = ar$construct(zscore ~ gene + pathway, data=mod)
pval = ar$construct(p.adj ~ gene + pathway, data=mod)
# filter zfit to only include top 100 genes per pathway
zfit[apply(pval, 2, p -> !b$min_mask(p, 100))] = 0
# make sure scores are highest in perturbed pathway, discard otherwise
scores = dscores[,rownames(zfit)] %*% zfit
bscores = ar$map(scores, along=2, x -> x==max(x))
bscores[,"EGFR"] = bscores[,"MAPK"] = bscores[,"EGFR"] | bscores[,"MAPK"]
keep = as.logical(rowSums(bscores & ar$mask(index$pathway)))
print(sum(keep))
if (all(keep))
zfit
else
iterate(zscores[keep,], dscores[keep,], index[keep,])
}
# build linear model iteratively discarding bad arrays
zfit = iterate(zscores, dscores, index)
# save resulting object
save(zfit, file=OUTFILE)