-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathPcirc_aligment.py
52 lines (42 loc) · 1.71 KB
/
Pcirc_aligment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import argparse
import os
parser = argparse.ArgumentParser(description='Sequence Aligment \n '
'usage: \n '
'[options] <reads1[,reads2,...]>')
parser.add_argument("-g",
"--genome",
help='reference genome files are Fasta')
parser.add_argument("-G",
"--gtf",
help='reference gtf files')
parser.add_argument("-p",
"--num_threads",
default=4)
parser.add_argument("-o",
"--output_dir")
parser.add_argument("RNA_seq",
nargs='*',
help='data of RNA_seq, the input format is <reads1[,reads2,...]>')
args = parser.parse_args()
inputs = args.RNA_seq
threads = int(args.num_threads)
output_dir = args.output_dir
gtf_filename = args.gtf
genome_filename = args.genome
genome_path = os.path.realpath(genome_filename)
genome_index = genome_path[:genome_path.rfind('/')] + '/genome_index'
print('='*20 + 'Bowtie begin' + '='*20)
os.system("bowtie2-build %s %s" %
(genome_filename, genome_index))
print('='*20 + 'Bowtie end' + '='*20)
print('='*20 + 'Tophat begin' + '='*20)
num_fq = len(inputs)
if num_fq == 1:
os.system("tophat2 -p %d -o %s -G %s %s %s" %
(threads, output_dir, gtf_filename, genome_index, inputs[0]))
elif num_fq == 2:
os.system("tophat2 -p %d -o %s -G %s %s %s %s" %
(threads, output_dir, gtf_filename, genome_index, inputs[0], inputs[1]))
print('='*20 + 'Tophat end' + '='*20)
os.system(r'''samtools view %s/unmapped.bam | awk '{OFS="\t"; print ">"$1"\n"$10}' - > %s/unmapped.fasta''' %
(output_dir, output_dir))