-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpairLDboxplottable.R
151 lines (94 loc) · 3.88 KB
/
pairLDboxplottable.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#!/usr/bin/Rscript
#script to output a table in which each LD r2 value is categorized in size groups
#to read options
library(optparse)
####################### Parsing command line arguments
cat("\n\n#################################################\n\nStarting: Reading command line arguments...\n\n")
#> set interface
option_list <- list(
make_option(c("-f", "--flag"), action="store_true", default=FALSE, help="Flag [default: %default]"),
make_option(c("-r", "--ld"), type="character", default=FALSE, help="Plink LD analysis output.ld; analysis done for all pairs per scaffold. Required"),
make_option(c("-b", "--bin"), type="integer", default=30, help="Number of distance bins in which to clasify the R-squared values [default: %default]")
)
parser <- OptionParser(usage="%prog [options]\n\nUse this script to print summary tables from Plink LD analysis.\n\nIf you get an \'X11 display\' error it means you need to set a virtual framebuffer X11 server.\nCheck \"Initiate and declare a virtual graphical environment\" in the beginning of the script.\n", option_list=option_list)
args <- parse_args(parser, positional_arguments = 0)
opt <- args$options
file <- args$args
####################### LIBRARIES
cat("\n\n>Setting virtual display environment and loading packages.\n")
###### Initiate and declare a virtual graphical environment #########
# The line below is very important, if there is not display environment for X11 you'll need a virtual framebuffer X11 server: Xvfb
#Uncomment the line below the first time you run it, and don't worry if you get a "Fatal server error: Server is already active for display 0"
#Unix is very melodramatic but that just means the command was not needed. Line below for virtual framebuffer X11 server:
#system("Xvfb :0 -ac -screen 0 1960x2000x24 &") #<-- This line
Sys.setenv("DISPLAY"=":0")
############################################################################
#### Script, you know... don't touch anything unless... bla bla bla
###########################################################################
#< set interface
options(scipen = 999)
inputfile <- opt$ld
nbin <- opt$bin
#open and check
inputname <-gsub('^(.*).ld$', '\\1', inputfile)
cat("\n\nReading ", inputname, "\n\n", sep="")
dataset = read.table(inputfile, header=TRUE)
#calculate pairwise bp distances
dataset$DIST = abs(dataset$BP_B - dataset$BP_A)
cat(capture.output(head(dataset)), sep="\n")
#############################################
r = pretty(max(dataset$DIST), n=1)[2]
r
#select only distance and r2
str(dataset)
subdata = dataset[,8:7]
str(subdata)
names(subdata) <-c("binsize", "R2")
#length(subdata$DIST)
#str(subdata)
cat("\n\nNow clasifying all distances in ", nbin, " bins\n\n", sep="")
#define size of the bins
binfreq=r/nbin
cat("\nEach bin every ", binfreq, "bp\n", sep="")
myseq = seq(0, r, by=binfreq) #save the seq of limits between bins
#myseq
numbins = length(myseq)-1
#numbins
myrg = c(1:numbins)
#myrg
#b=2 ####debug
maxdata=nrow(subdata)
wholedata=c(1:maxdata)
eachval =1 #debug
for (eachval in wholedata) {
cat(eachval, " of ", maxdata, "\n", sep="")
mydist = subdata[eachval, 1]
mydist
b=1 #debug
#calculate the middle value
for (b in myrg) {
minor = myseq[b]
minor
major = myseq[b+1]
major
midval=mean(c(minor, major))
midval
label=round(midval/1000)
label
if (mydist > minor && mydist < major) {
subdata[eachval, 3] <- label
liminf = round((minor/1000),0)
limsup = round((major/1000),0)
interv=paste(liminf,"-",limsup, "kb", sep="")
interv
subdata[eachval, 1] <- interv
}
}
}
boxeddata <- subdata[,c(1, 3, 2)]
names(boxeddata)<-c("interval", "midvalue", "R2" )
head(boxeddata)
str(boxeddata)
forboxplots=paste(inputname, "allsize-r2.txt", sep="")
write.table(boxeddata, file=forboxplots, quote=FALSE, sep="\t", row.names=FALSE)
cat("\n\n\nAll done!!\n\n\n")