-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathbam_len_hist.py
executable file
·64 lines (50 loc) · 1.93 KB
/
bam_len_hist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/env python
from optparse import OptionParser
from rpy2.robjects.packages import importr
import rpy2.robjects as ro
import rpy2.robjects.lib.ggplot2 as ggplot2
import pdb
import pysam
grdevices = importr('grDevices')
################################################################################
# bam_len_hist.py
#
# Plot a histogram of the length of alignments in a BAM file.
################################################################################
################################################################################
# main
################################################################################
def main():
usage = 'usage: %prog [options] arg'
parser = OptionParser(usage)
#parser.add_option()
(options,args) = parser.parse_args()
if len(args) != 1:
parser.error('Must provide BAM file')
else:
bam_file = args[0]
align_lengths = {}
for aligned_read in pysam.Samfile(bam_file, 'rb'):
align_lengths[aligned_read.qlen] = align_lengths.get(aligned_read.qlen,0) + 1
min_len = min(align_lengths.keys())
max_len = max(align_lengths.keys())
# construct data frame
len_r = ro.IntVector(range(min_len,max_len+1))
counts_r = ro.IntVector([align_lengths.get(l,0) for l in range(min_len,max_len+1)])
df = ro.DataFrame({'length':len_r, 'counts':counts_r})
# construct full plot
gp = ggplot2.ggplot(df) + \
ggplot2.aes_string(x='length', y='counts') + \
ggplot2.geom_bar(stat='identity') + \
ggplot2.scale_x_continuous('Alignment length') + \
ggplot2.scale_y_continuous('')
# plot to file
grdevices.pdf(file='align_lengths.pdf')
gp.plot()
grdevices.dev_off()
################################################################################
# __main__
################################################################################
if __name__ == '__main__':
main()
#pdb.runcall(main)