-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathassignExprGroup
executable file
·78 lines (65 loc) · 1.98 KB
/
assignExprGroup
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#!/bin/sh
desc="assign expression level (given percentile interval) groups from genes_fpkm.tracking (cufflinks/cuffdiff)"
usage="usage: `basename $0` [-i 10/20/25/50] [-o outputdir] [-c column-name-of-gene] genes_fpkm.tracking"
colname="gene_id"
interval=10
while getopts hi:o:c: opt
do
case ${opt} in
h)
echo ${desc}
echo ${usage}
exit 1;;
i)
interval=${OPTARG};;
c)
colname=${OPTARG};;
o)
outdir=${OPTARG};;
\?)
echo ${usage}
exit 1;;
esac
done
if [ $interval -ne 10 ] && [ $interval -ne 20 ] && [ $interval -ne 25 ] && [ $interval -ne 50 ]
then
echo "[error] the interval should be 10, 20, 25 or 50"
exit 1
fi
shift `expr $OPTIND - 1`
outdir="./assignedPer$interval"
mkdir -p $outdir
Rscript - $* <<EOS
read.genes_fpkm <- function(file, genename='gene_id')
{
X <- read.table(file,head=TRUE)
g <- strsplit(as.character(X[[genename]]),',')
fpkm <- as.matrix(X[,grep('_?FPKM$',colnames(X))])
fpkm <- fpkm[rep.int(seq_along(g),sapply(g,length)),,drop=FALSE]
colnames(fpkm) <- sub('_?FPKM$','',colnames(fpkm))
g <- unlist(g)
apply(fpkm,2,function(x) tapply(x,unlist(g),mean))
}
assignExprGroup<- function(fpkm,interval=10)
{
glabel <- paste('q',seq(0,100-interval,interval),'to',seq(interval,100,interval),'th',sep='')
egroup <- apply(fpkm,2,function(x)
cut(x,breaks=quantile(x[x>0],seq(0,1,interval/100)),include.lowest=TRUE,label=glabel)
)
egroup[is.na(egroup)] <- 'zero'
rownames(egroup) <- rownames(fpkm)
data.frame(lapply(data.frame(egroup),function(x){
factor(x,c('zero',glabel),ordered=TRUE)
}))
}
args <- commandArgs(TRUE)
fpkm <- read.genes_fpkm(args[1],"$colname")
tbl <- assignExprGroup(fpkm,$interval)
g <- rownames(tbl)
invisible(mapply(function(x,xname){
ord <- order(x)
filename <- paste("${outdir}/",xname,"_ePer$interval.txt",sep='')
res <- data.frame(g[ord],x[ord])
write.table(res,file=filename,quote=FALSE,row.names=FALSE,col.names=FALSE,sep="\t")
}, tbl, colnames(tbl)))
EOS