-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathextract_chr22_to_fastq.sh
executable file
·85 lines (64 loc) · 1.75 KB
/
extract_chr22_to_fastq.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/bin/bash
# extract_chr22_to_fastq.sh
# extract chr22 bam records to new bam
# extract chr22 reads to new fastq files
# takes a BAM file from $1 and extracts paired chr22 reads to fastq's
if [ $# -lt 1 ]
then
echo "Usage: ${0##*/} <BAM file>"
exit
fi
# edit here
maxram="2G"
# get BAM from argument#1
bam=$1
# set names
name=$(basename ${bam} ".bam")
outbam=chr22_${name}
mkdir -p ${outbam}
outfastq=chr22_${name}_fastq
mkdir -p ${outfastq}
echo "# processing ${bam}"
# add suffix
sbam=${bam//.bam/_s.bam}
# sorted version required for region extraction
cmd="java -Xmx${maxram} -jar $PICARD/picard.jar SortSam \
I=${bam} \
O=${sbam} \
SO=coordinate"
echo "# ${cmd}"
eval ${cmd}
# index the obtained data before extraction
# same as: samtools index ${sbam}
cmd="java -Xmx${maxram} -jar $PICARD/picard.jar BuildBamIndex \
I=${sbam} \
O=${sbam}.bai"
echo "# ${cmd}"
eval ${cmd}
# extract chr22 mappings and index results
# only mapped and properly paired reads (tag=0x2)
cmd="samtools view -b -f 0x2 ${sbam} chr22 \
> ${outbam}/${outbam}.bam && \
samtools index ${outbam}/${outbam}.bam"
echo "# ${cmd}"
eval ${cmd}
# re-sort by read names to group reads in pairs
cmd="java -Xmx${maxram} -jar $PICARD/picard.jar SortSam \
I=${outbam}/${outbam}.bam \
O=${outbam}/${outbam}-n.bam \
SO=queryname"
echo "# ${cmd}"
eval ${cmd}
# convert back to fastq using picard
# be LENIENT for unpaired reads
cmd="java -Xmx${maxram} -jar $PICARD/picard.jar SamToFastq \
I=${outbam}/${outbam}-n.bam \
F=>(bgzip -c > ${outfastq}/${outfastq}_1.fastq.gz) \
F2=>(bgzip -c > ${outfastq}/${outfastq}_2.fastq.gz) \
RE_REVERSE=TRUE \
VALIDATION_STRINGENCY=LENIENT"
echo "# ${cmd}"
eval ${cmd}
# delete sorted files to save space
rm ${sbam} ${sbam}.bai
rm ${outbam}/${outbam}-n.bam