From d6bca17c97900fc3b6f1b96f0b6557a404009ca2 Mon Sep 17 00:00:00 2001 From: Philipp A Date: Tue, 9 May 2017 13:19:04 +0200 Subject: [PATCH 1/5] Whitespace fixes --- taxMaps | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/taxMaps b/taxMaps index c6d16f4..effc162 100755 --- a/taxMaps +++ b/taxMaps @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python ################################################################################ ### COPYRIGHT ################################################################## @@ -34,12 +34,12 @@ import sys, os, errno, commands, uuid ### FUNCTIONS ################################################################## def mkdir_p(path): - try: - os.makedirs(path) - except OSError as exc: - if exc.errno == errno.EEXIST and os.path.isdir(path): - pass - else: raise + try: + os.makedirs(path) + except OSError as exc: + if exc.errno == errno.EEXIST and os.path.isdir(path): + pass + else: raise def rp(path,start): return os.path.relpath(path, start) @@ -389,7 +389,7 @@ if n_cpus != n_indexes and n_indexes: if n_cpus == 1 : warning_list.append('WARNING: Setting the number of CPUs for all indexes [-c ' + ','.join([opt.m_cpus]*n_indexes) + ']') else: - error_list.append('ERROR: ' + str(n_cpus) + ' CPU specifications [-c ' + opt.m_cpus + '], incompatible with ' + str(n_indexes) + ' indexes [-d ' + opt.m_indices + ']') + error_list.append('ERROR: ' + str(n_cpus) + ' CPU specifications [-c ' + opt.m_cpus + '], incompatible with ' + str(n_indexes) + ' indexes [-d ' + opt.m_indices + ']') # taxonomy @@ -404,11 +404,11 @@ if opt.t_mode not in ['s', 'p', 'P']: error_list.append('ERROR: Invalid mode [-m ' + opt.t_mode + ']') -# misc +# misc if opt.m_queue: if opt.m_queue not in commands.getoutput('qstat -g c | egrep "CLUSTER|\--" -v | awk \'{print $1}\'').split('\n'): error_list.append('ERROR: Specified queue does not exist [-q ' + opt.m_queue + ']') - ecounter += 1 + ecounter += 1 sys.stderr.write('\n'.join(warning_list) + '\n') @@ -416,7 +416,7 @@ if error_list: sys.stderr.write('\n'.join(error_list) + '\n\n') parser.print_help() exit(-1) - + ######################################################### /ARGUMENTS,OPTIONS ### ################################################################################ @@ -490,8 +490,8 @@ if __name__ == '__main__': q_offset = opt.p_qoff # mapping - index_files = opt.m_indices.split(',') - len_files = ['.'.join(f.split('.')[:-1]) + '.len' for f in index_files] + index_files = opt.m_indices.split(',') + len_files = ['.'.join(f.split('.')[:-1]) + '.len' for f in index_files] index_names = ['.'.join(os.path.basename(f).split('.')[:-1]) for f in index_files] @@ -519,8 +519,8 @@ if __name__ == '__main__': det_mode = opt.t_mode - comp_coverage = opt.r_cov - exc_taxa = opt.r_exc + comp_coverage = opt.r_cov + exc_taxa = opt.r_exc # report rep_cutoff = opt.r_cutoff @@ -548,7 +548,7 @@ if __name__ == '__main__': input_command = comm_str elif opt.i_fq: if opt.i_fq[-3:] == '.gz': - input_command = 'zcat ' + ' '.join([rp(fq_lnk, run_dir) for fq_lnk in fq_lnks]) + input_command = 'zcat ' + ' '.join([rp(fq_lnk, run_dir) for fq_lnk in fq_lnks]) else: input_command = 'cat ' + ' '.join([rp(fq_lnk, run_dir) for fq_lnk in fq_lnks]) elif opt.i_bam: @@ -617,9 +617,9 @@ if __name__ == '__main__': map_out = rp(map_dir + '/' + prefix + '.' + index_names[i] + '.map', run_dir) out_files.append(map_out) map_str = gem_command + ' '.join([' -I', map_index, '-m', map_dist, '-e', map_dist, '-T', map_cpus, '2>', map_log, ' \\\n\t| txM_rescore \\\n\t| ']) - map_str += 'txM_mapout 2> ' + map_out + map_str += 'txM_mapout 2> ' + map_out map_commands.append(map_str) - map_lnker = ' \\\n\t| ' + 'txM_gem2fq \\\n\t| ' + map_lnker = ' \\\n\t| ' + 'txM_gem2fq \\\n\t| ' map_command = map_lnker.join(map_commands) map_out = rp(map_dir + '/' + prefix + '.unmapped.map', run_dir) out_files.append(map_out) @@ -636,8 +636,8 @@ if __name__ == '__main__': else: tax_command = 'txM_mergintlv ' tax_command += ' '.join(out_files) - tax_command += ' 2> /dev/null \\\n\t| ' - tax_command += 'txM_lca ' + tax_command += ' 2> /dev/null \\\n\t| ' + tax_command += 'txM_lca ' tax_command += '-t ' + rp(tax_lnk, run_dir) + ' ' tax_command += '-m ' + det_mode + ' ' tax_command += '2> ' + rp(map_dir + '/' + prefix + '.merged.map.lca', run_dir) @@ -647,7 +647,7 @@ if __name__ == '__main__': tax_command += '-e ' + max_edit + ' ' if comp_coverage: tax_command += '-c ' - tax_command += '-l ' + ','.join([rp(x, run_dir) for x in len_lnks]) + ' ' + tax_command += '-l ' + ','.join([rp(x, run_dir) for x in len_lnks]) + ' ' if exc_taxa: tax_command += '-x ' + exc_taxa + ' ' tax_command += '2> /dev/null > ' + rp(out_dir + '/' + prefix + '.merged.map.lca.summary', run_dir) @@ -667,7 +667,7 @@ if __name__ == '__main__': sh_file.write(tax_command + '\n\n') sh_file.write('cd ' + rp(out_dir, run_dir) +'\n') sh_file.write(rep_command + '\n') - sh_file.close() + sh_file.close() if sge_queue: pipe_file = open(pipe_filename, 'w') From 2c28ca56f4fd711fda0ff57eb27f381d50dbcfb7 Mon Sep 17 00:00:00 2001 From: Philipp Angerer Date: Tue, 9 May 2017 13:27:41 +0200 Subject: [PATCH 2/5] use subprocess instead of deprecated commands --- taxMaps | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/taxMaps b/taxMaps index effc162..0be5f8b 100755 --- a/taxMaps +++ b/taxMaps @@ -23,7 +23,7 @@ ### MODULES #################################################################### from optparse import OptionParser, OptionGroup -import sys, os, errno, commands, uuid +import sys, os, errno, subprocess, uuid ################################################################### /MODULES ### ################################################################################ @@ -406,7 +406,7 @@ if opt.t_mode not in ['s', 'p', 'P']: # misc if opt.m_queue: - if opt.m_queue not in commands.getoutput('qstat -g c | egrep "CLUSTER|\--" -v | awk \'{print $1}\'').split('\n'): + if opt.m_queue not in subprocess.check_output('qstat -g c | egrep "CLUSTER|\--" -v | awk \'{print $1}\'', shell=True).split('\n'): error_list.append('ERROR: Specified queue does not exist [-q ' + opt.m_queue + ']') ecounter += 1 From 17b4bbed3d8bfde0c39098205c3f0d8ef118b0cd Mon Sep 17 00:00:00 2001 From: Philipp Angerer Date: Tue, 9 May 2017 13:37:34 +0200 Subject: [PATCH 3/5] wrapped it all into functions --- taxMaps | 156 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 84 insertions(+), 72 deletions(-) diff --git a/taxMaps b/taxMaps index 0be5f8b..31adfde 100755 --- a/taxMaps +++ b/taxMaps @@ -332,90 +332,92 @@ parser_misc.add_option( parser.add_option_group(parser_misc) -(opt, args) = parser.parse_args() +def parse_args(args=None): + if args is None: + args = sys.argv + opt, args = parser.parse_args(args) -### check options -error_list = [] -warning_list = [] + ### check options + error_list = [] + warning_list = [] -# input -input_list = [] -if not opt.i_comm and not opt.i_fq and not opt.i_bam and (not opt.i_fq_1 or not opt.i_fq_2): - error_list.append('ERROR: Missing valid input [-i | -b | -f | (-1 & -2)]') -if opt.i_comm: - input_list.append('-i') -if opt.i_fq: - if missing_files(opt.i_fq): - error_list.append('ERROR: File(s) not found in [-f ' + opt.i_fq + ']') - else: - input_list.append('-f') -if opt.i_bam: - if missing_files(opt.i_bam): - error_list.append('ERROR: File(s) not found in [-b ' + opt.i_bam + ']') - else: - input_list.append('-b') -if opt.i_fq_1 and opt.i_fq_2: - if len(opt.i_fq_1.split(',')) != len(opt.i_fq_2.split(',')): - error_list.append('ERROR: Different number of R1 an R2 files [-1 ' + opt.i_fq_1 + ' -2 ' + opt.i_fq_2 + ']') - else: - if missing_files(opt.i_fq_1) or missing_files(opt.i_fq_2): - error_list.append('ERROR: File(s) not found in [-1 ' + opt.i_fq_1 + ' -2 ' + opt.i_fq_2 + ']') + # input + input_list = [] + if not opt.i_comm and not opt.i_fq and not opt.i_bam and (not opt.i_fq_1 or not opt.i_fq_2): + error_list.append('ERROR: Missing valid input [-i | -b | -f | (-1 & -2)]') + if opt.i_comm: + input_list.append('-i') + if opt.i_fq: + if missing_files(opt.i_fq): + error_list.append('ERROR: File(s) not found in [-f ' + opt.i_fq + ']') else: - input_list.append('(-1 & -2)') -if len(input_list) > 1: - error_list.append('ERROR: Too many inputs [' + ' & '.join(input_list) + ']') + input_list.append('-f') + if opt.i_bam: + if missing_files(opt.i_bam): + error_list.append('ERROR: File(s) not found in [-b ' + opt.i_bam + ']') + else: + input_list.append('-b') + if opt.i_fq_1 and opt.i_fq_2: + if len(opt.i_fq_1.split(',')) != len(opt.i_fq_2.split(',')): + error_list.append('ERROR: Different number of R1 an R2 files [-1 ' + opt.i_fq_1 + ' -2 ' + opt.i_fq_2 + ']') + else: + if missing_files(opt.i_fq_1) or missing_files(opt.i_fq_2): + error_list.append('ERROR: File(s) not found in [-1 ' + opt.i_fq_1 + ' -2 ' + opt.i_fq_2 + ']') + else: + input_list.append('(-1 & -2)') + if len(input_list) > 1: + error_list.append('ERROR: Too many inputs [' + ' & '.join(input_list) + ']') + - -# mapping -n_indexes = 0 -if opt.m_indices: - if missing_files(opt.m_indices): - error_list.append('ERROR: File(s) not found in [-d ' + opt.m_indices + ']') - else: - n_indexes = len(opt.m_indices.split(',')) -if not n_indexes: - error_list.append('ERROR: Missing valid index(es) [-d]') - -n_dist = len(opt.m_edits.split(',')) -if n_dist != n_indexes and n_indexes: - if n_dist == 1 : - warning_list.append('WARNING: Setting the maximum edit distance for all indexes [-e ' + ','.join([opt.m_edits]*n_indexes) + ']') - else: - error_list.append('ERROR: ' + str(n_dist) + ' distances specified [-e ' + opt.m_edits + '], incompatible with ' + str(n_indexes) + ' indexes [-d ' + opt.m_indices + ']') + # mapping + n_indexes = 0 + if opt.m_indices: + if missing_files(opt.m_indices): + error_list.append('ERROR: File(s) not found in [-d ' + opt.m_indices + ']') + else: + n_indexes = len(opt.m_indices.split(',')) + if not n_indexes: + error_list.append('ERROR: Missing valid index(es) [-d]') + + n_dist = len(opt.m_edits.split(',')) + if n_dist != n_indexes and n_indexes: + if n_dist == 1 : + warning_list.append('WARNING: Setting the maximum edit distance for all indexes [-e ' + ','.join([opt.m_edits]*n_indexes) + ']') + else: + error_list.append('ERROR: ' + str(n_dist) + ' distances specified [-e ' + opt.m_edits + '], incompatible with ' + str(n_indexes) + ' indexes [-d ' + opt.m_indices + ']') -n_cpus = len(opt.m_cpus.split(',')) -if n_cpus != n_indexes and n_indexes: - if n_cpus == 1 : - warning_list.append('WARNING: Setting the number of CPUs for all indexes [-c ' + ','.join([opt.m_cpus]*n_indexes) + ']') - else: - error_list.append('ERROR: ' + str(n_cpus) + ' CPU specifications [-c ' + opt.m_cpus + '], incompatible with ' + str(n_indexes) + ' indexes [-d ' + opt.m_indices + ']') + n_cpus = len(opt.m_cpus.split(',')) + if n_cpus != n_indexes and n_indexes: + if n_cpus == 1 : + warning_list.append('WARNING: Setting the number of CPUs for all indexes [-c ' + ','.join([opt.m_cpus]*n_indexes) + ']') + else: + error_list.append('ERROR: ' + str(n_cpus) + ' CPU specifications [-c ' + opt.m_cpus + '], incompatible with ' + str(n_indexes) + ' indexes [-d ' + opt.m_indices + ']') -# taxonomy -if opt.t_table: - if missing_files(opt.t_table): - error_list.append('ERROR: File not found [-t ' + opt.t_table + ']') -else: - error_list.append('ERROR: Missing taxonomic table [-t]') + # taxonomy + if opt.t_table: + if missing_files(opt.t_table): + error_list.append('ERROR: File not found [-t ' + opt.t_table + ']') + else: + error_list.append('ERROR: Missing taxonomic table [-t]') -if opt.t_mode not in ['s', 'p', 'P']: - error_list.append('ERROR: Invalid mode [-m ' + opt.t_mode + ']') + if opt.t_mode not in ['s', 'p', 'P']: + error_list.append('ERROR: Invalid mode [-m ' + opt.t_mode + ']') - -# misc -if opt.m_queue: - if opt.m_queue not in subprocess.check_output('qstat -g c | egrep "CLUSTER|\--" -v | awk \'{print $1}\'', shell=True).split('\n'): - error_list.append('ERROR: Specified queue does not exist [-q ' + opt.m_queue + ']') - ecounter += 1 + + # misc + if opt.m_queue: + if opt.m_queue not in subprocess.check_output('qstat -g c | egrep "CLUSTER|\--" -v | awk \'{print $1}\'', shell=True).split('\n'): + error_list.append('ERROR: Specified queue does not exist [-q ' + opt.m_queue + ']') + ecounter += 1 + sys.stderr.write('\n'.join(warning_list) + '\n') + if error_list: + raise Exception('\n'.join(error_list) + '\n\n') -sys.stderr.write('\n'.join(warning_list) + '\n') -if error_list: - sys.stderr.write('\n'.join(error_list) + '\n\n') - parser.print_help() - exit(-1) + return opt, args ######################################################### /ARGUMENTS,OPTIONS ### ################################################################################ @@ -433,7 +435,14 @@ if error_list: ################################################################################ ### MAIN ####################################################################### -if __name__ == '__main__': +def main(args=None): + try: + opt, args = parse_args(args) + except Exception as e: + sys.stderr.write(str(e)) + parser.print_help() + sys.exit(-1) + # base dirs module_dir = os.path.abspath(os.path.dirname(__file__)) start_dir = os.getcwd() @@ -685,3 +694,6 @@ if __name__ == '__main__': ###################################################################### /MAIN ### ################################################################################ + +if __name__ == '__main__': + main() From 60067d5182e0b96726de2d843912a6ed103a56ab Mon Sep 17 00:00:00 2001 From: Philipp Angerer Date: Tue, 9 May 2017 13:42:34 +0200 Subject: [PATCH 4/5] allow importing --- taxMaps | 2 +- taxmaps/__init__.py | 1 + taxmaps/index.py | 1 + taxmaps/taxtbl.py | 1 + 4 files changed, 4 insertions(+), 1 deletion(-) create mode 120000 taxmaps/__init__.py create mode 120000 taxmaps/index.py create mode 120000 taxmaps/taxtbl.py diff --git a/taxMaps b/taxMaps index 31adfde..aa712a6 100755 --- a/taxMaps +++ b/taxMaps @@ -444,7 +444,7 @@ def main(args=None): sys.exit(-1) # base dirs - module_dir = os.path.abspath(os.path.dirname(__file__)) + module_dir = os.path.realpath(os.path.dirname(__file__)) start_dir = os.getcwd() prefix = opt.m_prefix diff --git a/taxmaps/__init__.py b/taxmaps/__init__.py new file mode 120000 index 0000000..ff4874d --- /dev/null +++ b/taxmaps/__init__.py @@ -0,0 +1 @@ +../taxMaps \ No newline at end of file diff --git a/taxmaps/index.py b/taxmaps/index.py new file mode 120000 index 0000000..904a62c --- /dev/null +++ b/taxmaps/index.py @@ -0,0 +1 @@ +../taxMaps-index \ No newline at end of file diff --git a/taxmaps/taxtbl.py b/taxmaps/taxtbl.py new file mode 120000 index 0000000..2c9e791 --- /dev/null +++ b/taxmaps/taxtbl.py @@ -0,0 +1 @@ +../taxMaps-taxtbl \ No newline at end of file From 92d4780e9a7783203db49b14d962d98ca075ad28 Mon Sep 17 00:00:00 2001 From: Philipp Angerer Date: Tue, 9 May 2017 14:09:15 +0200 Subject: [PATCH 5/5] also converted index and taxtbl --- taxMaps-index | 19 ++++++++++++------- taxMaps-taxtbl | 27 +++++++++++++++++---------- 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/taxMaps-index b/taxMaps-index index d7b6460..47db26c 100755 --- a/taxMaps-index +++ b/taxMaps-index @@ -115,11 +115,6 @@ parser.add_option( help = "Dry run (default = False)" ) -(opt, args) = parser.parse_args() -if not opt.fasta_file or not opt.corr_file or not opt.tax_file: - parser.print_help() - sys.exit(-1) - ######################################################### /ARGUMENTS,OPTIONS ### ################################################################################ @@ -137,7 +132,15 @@ if not opt.fasta_file or not opt.corr_file or not opt.tax_file: ################################################################################ ### MAIN ####################################################################### -if __name__ == '__main__': +def main(args=None): + if args is None: + args = sys.argv + + opt, args = parser.parse_args(args) + if not opt.fasta_file or not opt.corr_file or not opt.tax_file: + parser.print_help() + sys.exit(-1) + module_dir = os.path.abspath(os.path.dirname(__file__)) fasta_f = opt.fasta_file @@ -160,7 +163,7 @@ if __name__ == '__main__': if sge_queue: sge_f = prefix + '.sge' - dry_run = opt.dry + dry_run = opt.dry gitax_str = ' '.join(['txM_gitax', '-i', fasta_f, '-c', gitax_f,'-t', tax_f, '2>', out_f, '>', in_f]) len_str = ' '.join(['txM_fastalen', '-i', in_f, '>', len_f]) @@ -187,3 +190,5 @@ if __name__ == '__main__': ###################################################################### /MAIN ### ################################################################################ +if __name__ == '__main__': + main() diff --git a/taxMaps-taxtbl b/taxMaps-taxtbl index 684a2af..ae6e637 100755 --- a/taxMaps-taxtbl +++ b/taxMaps-taxtbl @@ -67,11 +67,6 @@ parser.add_option( help = "NCBI Taxonomy nodes.dmp file (Mandatory)" ) -(opt, args) = parser.parse_args() - -if not opt.names_file or not opt.nodes_file: - parser.print_help() - exit(-1) ######################################################### /ARGUMENTS,OPTIONS ### ################################################################################ @@ -89,26 +84,35 @@ if not opt.names_file or not opt.nodes_file: ################################################################################ ### MAIN ####################################################################### -if __name__ == '__main__': +def main(args=None): + if args is None: + args = sys.argv + + opt, args = parser.parse_args(args) + + if not opt.names_file or not opt.nodes_file: + parser.print_help() + sys.exit(-1) + node_dict = {} nodes_file = open(opt.nodes_file, 'r') for line in nodes_file: - la = line.strip().split('\t') + la = line.strip().split('\t') node = la[0] parent = la[2] rank = la[4] node_dict[node] = [parent, rank] - nodes_file.close() + nodes_file.close() names_file = open(opt.names_file, 'r') for line in names_file: - la = line.strip().split('\t') + la = line.strip().split('\t') if la[6] == 'scientific name': node = la[0] sci_name = la[2] node_dict[node].append(sci_name) - names_file.close() + names_file.close() for node in node_dict: node_list = [node] + node_dict[node][1:] + [':'.join(find_path(node, node_dict))] @@ -116,3 +120,6 @@ if __name__ == '__main__': ###################################################################### /MAIN ### ################################################################################ + +if __name__ == '__main__': + main()