tb-profiler

#! /usr/bin/env python3
import sys
import pathogenprofiler as pp
import argparse
import json
from pathogenprofiler.utils import debug
import tbprofiler as tbp
import os
import csv
from datetime import datetime
from uuid import uuid4
import glob
import atexit

__softwarename__ = 'tbprofiler'

@atexit.register
def cleanup():
    if "last_traceback" in vars(sys):
        if args.files_prefix and not args.no_clean:
            sys.stderr.write("Cleaning up after failed run\n")
            for f in glob.glob(args.files_prefix+"*"):
                os.remove(f)
        import traceback
        
        if "prefix" in vars(args):
            outfile = "%s.errlog" % args.prefix
        elif "vcf" in vars(args):
            outfile = "%s.errlog" % args.vcf.split("/")[-1]
        else:
            outfile = "%s.errlog.txt" % uuid4() 
        with open(outfile, "w") as O:
            O.write("# tb-profiler error report\n\n")
            O.write("* OS: %s\n" % sys.platform)
            O.write("* Program version: %s\n" % tbp.__version__)
            O.write("* Database version: %s\n" % args.conf["version"]["commit"]) if "conf" in vars(args) else ""
            O.write("* Program call:\n")
            O.write("```\n")
            O.write("%s\n" % vars(args))
            O.write("```\n")

            O.write("## Traceback:\n")
            O.write("```\n")
            traceback.print_tb(sys.last_traceback,file=O)
            O.write("```\n")

            O.write("## Value:\n")
            O.write("```\n")
            O.write("%s" % sys.last_value)
            O.write("```\n")
        pp.errlog("""\n
################################# ERROR #######################################

This run has failed. Please check all arguments and make sure all input files
exist. If no solution is found, please open up an issue at
https://github.com/jodyphelan/TBProfiler/issues/new and paste or attach the
contents of the error log (%s)

###############################################################################
""" % (outfile))


try:
    sys.base_prefix
except:
    sys.base_prefix = getattr(sys, 'base_prefix', getattr(sys, 'real_prefix', sys.prefix))

def main_reprofile(args):
    #{'chrom': 'Chromosome', 'genome_pos': 6140, 'ref': 'G', 'alt': 'T', 'freq': 1.0, 'consequences': [{'gene_name': 'gyrB', 'gene_id': 'Rv0005', 'feature_id': 'CCP42727', 'type': 'missense_variant', 'nucleotide_change': 'c.901G>T', 'protein_change': 'p.Val301Leu'}]}
    
    new_locus_tag_list = [x.strip().split()[3] for x in open(args.conf["bed"])]
    columns = {
        "gene":"gene_name",
        "locus_tag":"gene_id",
        "protein_change":"protein_change",
        "nucleotide_change":"nucleotide_change",
        "type":"type",
        "feature_id":"feature_id"
    }
    old_results = json.load(open(args.json))
    new_results = old_results.copy()
    new_results["variants"] = []
    if args.extra_consequences:
        extra_csq_data = json.load(open(args.extra_consequences))
    for var in old_results["dr_variants"]+old_results["other_variants"]:
        if var["locus_tag"] not in new_locus_tag_list:
            k = f"{var['locus_tag']}_{var['nucleotide_change']}"
            if args.extra_consequences and k in extra_csq_data:
                for c,v in columns.items():
                    var[c] = extra_csq_data[k][v]
            else:
                pp.infolog(f"Skipping {var['gene']} {var['change']} because the gene isn't in the new db!")
                continue
        if "drugs" in var:
            del var["drugs"]
        if "gene_associated_drugs" in var:
            del var["gene_associated_drugs"]
        del var["change"]
        tmp_csq = {}
        for k,v in columns.items():
            tmp_csq[v] = var[k]
            del var[k]

        var["consequences"] = var["alternate_consequences"]
        var["consequences"].append(tmp_csq)
        del var["alternate_consequences"]
        if "annotation" in var:
            del var["annotation"]
        new_results["variants"].append(var)


    del new_results["other_variants"]
    del new_results["dr_variants"]
    del new_results["qc"]["gene_coverage"]


    new_results = pp.db_compare(db_file=args.conf["json_db"],mutations=new_results)
    new_results = tbp.reformat(new_results, args.conf, reporting_af=args.reporting_af, mutation_metadata=args.add_mutation_metadata)

    new_results["db_version"] = args.conf["version"]
    json.dump(new_results,open("%s.results.json"%args.prefix,"w"))

def main_profile(args):
    
    
    ### Create folders for results if they don't exist ###
    if pp.nofolder(args.dir):
        os.mkdir(args.dir)


    args = pp.set_platform_params(args)
    ### Create bam file if fastq has been supplied ###
    # bam_file = pp.get_bam_file(args)

    if not args.missing_cov_threshold:
        args.missing_cov_threshold = args.min_depth
    else:
        pp.warninglog("\nWARNING: The --missing_cov_threshold argument is deprecated and will be removed in future releases. This parameter can now be set with --min_depth.\n")

    if args.no_lineage:
        del args.conf["barcode"]
    results = {
        "id":args.prefix,
        "tbprofiler_version": tbp.__version__
        }

    results.update(pp.run_profiler(args))
    if args.spoligotype:
        if args.vcf:
            pp.warninglog("Spoligotyping not available with vcf format... skipping!")
        else:
            results["spoligotype"] = tbp.spoligotype(args)
        

    ### Reformat the results to TB-Profiler style ###
    results = tbp.reformat(results, args.conf, reporting_af=args.reporting_af, mutation_metadata=args.add_mutation_metadata)
    results["id"] = args.prefix

    
    results["pipeline"] = [
        {"Analysis":"Mapping","Program":args.read1 if not args.bam else "N/A"},
        {"Analysis":"Variant calling","Program":args.caller}
    ]

    for x in ["bam","vcf","results"]:
        if pp.nofolder(args.dir+"/"+x):
            os.mkdir(args.dir+"/"+x)

    tbp.write_outputs(args,results)
    

    ### Move files to respective directories ###
    result_files = {
        "%s.delly.bcf" % args.files_prefix: "%s/vcf/%s.delly.bcf" % (args.dir,args.prefix),
        "%s.targets.csq.vcf.gz" % args.files_prefix: "%s/vcf/%s.targets.csq.vcf.gz" % (args.dir,args.prefix),
        "%s.vcf.gz" % args.files_prefix: "%s/vcf/%s.vcf.gz" % (args.dir,args.prefix),
        "%s.bam" % args.files_prefix: "%s/bam/%s.bam" % (args.dir,args.prefix),
        "%s.bam.bai" % args.files_prefix: "%s/bam/%s.bam.bai" % (args.dir,args.prefix),
    }
    for file,target in result_files.items():
        if os.path.isfile(file):
            os.rename(file,target)

    pp.run_cmd("rm %s*" % args.files_prefix)

    pp.successlog("\nProfiling finished sucessfully!")

def main_update_tbdb(args):
    if pp.nofolder("tbdb"):
        pp.run_cmd("git clone https://github.com/jodyphelan/tbdb.git")
    os.chdir("tbdb")
    pp.run_cmd(f'git checkout {args.branch}')

    pp.run_cmd("git pull")
    tmp = "--match_ref %s" % args.match_ref if args.match_ref else ""
    pp.run_cmd("tb-profiler create_db %s --load" % tmp)
    os.chdir("../")
    pp.successlog("Sucessfully updated TBDB")

def main_create_db(args):
    # extra_files = {}
    if args.barcode:
        extra_files = {"barcode":args.barcode}
    else:
        extra_files = None
    pp.create_db(args,extra_files=extra_files)


def main_lineage(args):

    pp.filecheck(args.bam)
    bam = pp.bam(args.bam,args.bam,args.conf["ref"])
    mutations = bam.get_bed_gt(args.conf["barcode"],args.conf["ref"],args.caller,args.platform)

    results = {}
    snps_file = args.prefix+".lineage.snps.txt" if args.snps else None

    if args.spoligotype:
        results["spoligotype"] = tbp.bam2spoligotype(args.bam,args.files_prefix,args.conf)

    results["barcode"] = pp.barcode(mutations,args.conf["barcode"],snps_file=snps_file)
    tbp.barcode2lineage(results)
    outfile = "%s.lineage.%s" % (args.prefix,args.outfmt)
    O = open(outfile,"w")
    if args.outfmt=="json":
        json.dump(results,O)
    elif args.outfmt=="txt":
        O.write(tbp.text.lineagejson2text(results["lineage"]))
    O.close()

def main_spoligotype(args):

    results = tbp.bam2spoligotype(args.bam,args.files_prefix,args.conf)

    json_output = args.dir+"/"+args.prefix+".spoligotype.json"
    text_output = args.dir+"/"+args.prefix+".spoligotype.txt"
    csv_output = args.dir+"/"+args.prefix+".spoligotype.csv"


    json.dump(results,open(json_output,"w"))
    if args.txt:
        tbp.write_spoligotype_report(results,args.conf,text_output,sep="\t")
    if args.csv:
        tbp.write_spoligotype_report(results,args.conf,csv_output,sep=",")

    pp.run_cmd("rm %s*" % args.files_prefix)
    pp.successlog("\nSpoligotyping finished sucessfully!")

def main_collate(args):
    tbp.collate_results(args.prefix,args.conf,sample_file=args.samples,result_dir=args.dir,reporting_af=args.reporting_af,mark_missing=args.mark_missing)

def main_version(args):
    sys.stdout.write("\nTBProfiler version %s\n" % tbp.__version__)

def main_reformat(args):

    results = json.load(open(args.json))
    args.prefix = results["id"]
    csv_output = args.prefix+".results.csv"
    text_output = args.prefix+".results.txt"
    if args.txt:
        tbp.write_text(results,args.conf,text_output)
    if args.csv:
        tbp.write_text(results,args.conf,csv_output,sep=",")


def main_fasta_profile(args):
    pp.warninglog("*Deprecated, please use 'tb-profiler profile --fasta <fasta_file>'. This function will be removed in future releases*")
    for x in ["bam","vcf","results"]:
        if pp.nofolder(args.dir+"/"+x):
            os.mkdir(args.dir+"/"+x)

    results = pp.fasta_profiler(args.conf, args.prefix, args.fasta)
    results = tbp.reformat(results,args.conf,reporting_af=0,use_suspect=args.suspect)
    results["id"] = args.prefix
    results["tbprofiler_version"] = tbp.__version__
    results["input_data_source"] = "fasta"
    results["pipeline"] = [{"Analysis":"Variant_calling","Program":"paftools.js"}]

    args.reporting_af = 0.7
    tbp.write_outputs(args,results)

    pp.run_cmd("mv -f %(dir)s/%(prefix)s*.vcf.gz* %(dir)s/vcf/" % vars(args))


def main_vcf_profile(args):
    pp.warninglog("*Deprecated, please use 'tb-profiler profile --vcf <vcf_file>'. This function will be removed in future releases.*")
    for x in ["bam","vcf","results"]:
        if pp.nofolder(args.dir+"/"+x):
            os.mkdir(args.dir+"/"+x)

    if args.lofreq_sample_name:
        import re
        modified_vcf = f"{args.tmp_prefix}.lofreq_modified.vcf"
        with open(modified_vcf,"w") as O:
            for l in pp.cmd_out("bcftools view %(vcf)s" % vars(args)):
                row = l.strip().split()
                if "INFO=<ID=DP" in l:
                    O.write("##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n")
                    O.write("##FORMAT=<ID=AD,Number=R,Type=Integer,Description=\"Allelic depths (high-quality bases)\">\n")

                    O.write(l+"\n")
                elif l[:2]=="##":
                    O.write(l+"\n")
                elif row[0]=="#CHROM":
                    row = row + ["FORMAT", args.lofreq_sample_name]
                    O.write("%s\n" % "\t".join(row))
                else:
                    re_obj = re.search("DP4=(\d+),(\d+),(\d+),(\d+)",l)
                    dp4_1 = int(re_obj.group(1))
                    dp4_2 = int(re_obj.group(2))
                    dp4_3 = int(re_obj.group(3))
                    dp4_4 = int(re_obj.group(4))
                    ad = "%s,%s" % ( (dp4_1+dp4_2), (dp4_3+dp4_4))
                    row = row + ["GT:AD", "0/1:%s" %ad]
                    O.write("%s\n" % "\t".join(row))
        pp.run_cmd("bgzip -f %s" % modified_vcf)
        args.vcf = modified_vcf+".gz"

    vcf_obj = pp.vcf(args.vcf)
    
    for sample_name in vcf_obj.samples:
        args.sample_name = sample_name
        args.prefix = sample_name
        files_prefix = args.dir+"/"+sample_name
        args.tmp_vcf = "%s.vcf.gz" % uuid4()
        if args.reporting_af>0:
            pp.run_cmd("bcftools view -s %(sample_name)s -ac 1 %(vcf)s | bcftools view -i 'sum(AD)>0' | bcftools +fixploidy -Oz -o %(tmp_vcf)s " % vars(args))
        else:
            pp.run_cmd("bcftools view -s %(sample_name)s -ac 1 %(vcf)s | bcftools +fixploidy -Oz -o %(tmp_vcf)s " % vars(args))
        results = pp.vcf_profiler(args.conf, files_prefix, sample_name, args.tmp_vcf,args.delly_vcf)
        results = tbp.reformat(results,args.conf,reporting_af=args.reporting_af,use_suspect=args.suspect)
        results["id"] = sample_name
        results["input_data_source"] = "vcf"
        results["tbprofiler_version"] = tbp.__version__
        results["pipeline"] = []

        tbp.write_outputs(args,results)
        pp.run_cmd("rm %(tmp_vcf)s*" % vars(args))

    if args.lofreq_sample_name:
        pp.run_cmd("rm %(vcf)s*" % vars(args))


def main_test(args):
    pp.run_cmd("tb-profiler profile -1 %s" % (sys.base_prefix+"/share/tbprofiler/tbprofiler.test.fq.gz"),verbose=2)


#### Argument Parsing ####

parser = argparse.ArgumentParser(description='TBProfiler pipeline',formatter_class=argparse.ArgumentDefaultsHelpFormatter,add_help=False)
subparsers = parser.add_subparsers(help="Task to perform")

# Profile #
parser_sub = subparsers.add_parser('profile', help='Run whole profiling pipeline', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
input=parser_sub.add_argument_group("Input options")
group = input.add_mutually_exclusive_group(required=True)
group.add_argument('--read1','-1',help='First read file')
input.add_argument('--read2','-2',help='Second read file')
group.add_argument('--bam','-a',help='BAM file. Make sure it has been generated using the H37Rv genome (GCA_000195955.2)')
group.add_argument('--fasta','-f',help='Fasta file')
group.add_argument('--vcf','-v',help='VCF file')
input.add_argument('--platform','-m',choices=["illumina","nanopore"],default="illumina",help='NGS Platform used to generate data')
input.add_argument('--db',default='tbdb',help='Mutation panel name')
input.add_argument('--external_db',type=str,help='Path to db files custom')

output=parser_sub.add_argument_group("Output options")
output.add_argument('--prefix','-p',default="tbprofiler",help='Sample prefix for all results generated')
output.add_argument('--csv',action="store_true",help="Add CSV output")
output.add_argument('--txt',action="store_true",help="Add text output")
output.add_argument('--pdf',action="store_true",help="Add PDF output. This requires pdflatex to be installed")
output.add_argument('--add_columns',default=None,type=str,help="Add additional columns found in the mutation database to the text and csv results")
output.add_argument('--add_mutation_metadata',action="store_true",help=argparse.SUPPRESS)
output.add_argument('--call_whole_genome',action="store_true",help="Call whole genome")
output.add_argument('--dir','-d',default=".",help='Storage directory')

algorithm=parser_sub.add_argument_group("Algorithm options")
algorithm.add_argument('--mapper',default="bwa", choices=["bwa","minimap2","bowtie2","bwa-mem2"],help="Mapping tool to use. If you are using nanopore data it will default to minimap2",type=str)
algorithm.add_argument('--caller',default="freebayes", choices=["bcftools","gatk","freebayes","pilon","lofreq"],help="Variant calling tool to use.",type=str)
algorithm.add_argument('--calling_params',type=str,help='Override default parameters for variant calling')
algorithm.add_argument('--min_depth',default=10,type=int,help='Minimum depth required to call variant. Bases with depth below this cutoff will be marked as missing')
algorithm.add_argument('--af',default=0.1,type=float,help='Minimum allele frequency to call variants')
algorithm.add_argument('--reporting_af',default=0.1,type=float,help='Minimum allele frequency to use variants for prediction')
algorithm.add_argument('--coverage_fraction_threshold',default=0,type=int,help='Cutoff used to calculate fraction of region covered by <= this value')
algorithm.add_argument('--missing_cov_threshold',type=int,help='Cutoff used to positions/codons in genes which are missing (this argument has now been merged with --min_depth argument and will be deprecated in future releases)')
algorithm.add_argument('--suspect',action="store_true",help="Use the suspect suite of tools to add ML predictions")
algorithm.add_argument('--spoligotype',action="store_true",help="Perform in-silico spoligotyping (experimental feature)")
algorithm.add_argument('--no_trim',action="store_true",help="Don't trim files using trimmomatic")
algorithm.add_argument('--no_flagstat',action="store_true",help="Don't collect flagstats")
algorithm.add_argument('--no_clip',action="store_false",help="Don't clip reads")
algorithm.add_argument('--no_delly',action="store_true",help="Don't run delly")
algorithm.add_argument('--no_lineage',action="store_true",help=argparse.SUPPRESS)
algorithm.add_argument('--add_variant_annotations',action="store_true",help=argparse.SUPPRESS)
algorithm.add_argument('--threads','-t',default=1,help='Threads to use',type=int)

other=parser_sub.add_argument_group("Other options")
other.add_argument('--verbose',default=0, choices=[0,1,2],help="Verbosity increases from 0 to 2",type=int)
other.add_argument('--delly_vcf',help=argparse.SUPPRESS)
other.add_argument('--no_clean', action='store_true',help=argparse.SUPPRESS)
other.add_argument('--version', action='version', version="TBProfiler version %s" % tbp.__version__)

parser_sub.set_defaults(func=main_profile)


# VCF profile #
parser_sub = subparsers.add_parser('vcf_profile', help='Run profiling pipeline on VCF file. Warning: this assumes that you have good coverage across the genome', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser_sub.add_argument('vcf',help='VCF file')
parser_sub.add_argument('--db',default='tbdb',help='Mutation panel name')
parser_sub.add_argument('--external_db',type=str,help='Path to db files prefix (overrides "--db" parameter)')
parser_sub.add_argument('--reporting_af',default=0.1,type=float,help='Minimum allele frequency to call variants')
parser_sub.add_argument('--lofreq_sample_name',help='Sample name to use if VCF has been generated by lofreq')
parser_sub.add_argument('--suspect',action="store_true",help="Use the suspect suite of tools to add ML predictions")
parser_sub.add_argument('--txt',action="store_true",help="Add text output")
parser_sub.add_argument('--csv',action="store_true",help="Add CSV output")
parser_sub.add_argument('--add_columns',default=None,type=str,help="Add additional columns found in the mutation database to the text and csv results")
parser_sub.add_argument('--verbose',default=0, choices=[0,1,2],help="Verbosity increases from 0 to 2",type=int)
parser_sub.add_argument('--delly_vcf',help=argparse.SUPPRESS)
parser_sub.add_argument('--dir','-d',default=".",help='Storage directory')
parser_sub.add_argument('--no_clean', action='store_true',help=argparse.SUPPRESS)
parser_sub.add_argument('--version', action='version', version="TBProfiler version %s" % tbp.__version__)
parser_sub.set_defaults(func=main_vcf_profile)


# Fasta profile #
parser_sub = subparsers.add_parser('fasta_profile', help='Run profiling pipeline on Fasta file. Warning: this assumes that this is a good quality assembly which coveres all drug resistance loci', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser_sub.add_argument('--fasta','-f',help='VCF file',required=True)
parser_sub.add_argument('--prefix','-p',help='VCF file',required=True)
parser_sub.add_argument('--db',default='tbdb',help='Mutation panel name')
parser_sub.add_argument('--external_db',type=str,help='Path to db files prefix (overrides "--db" parameter)')
parser_sub.add_argument('--suspect',action="store_true",help="Use the suspect suite of tools to add ML predictions")
parser_sub.add_argument('--txt',action="store_true",help="Add text output")
parser_sub.add_argument('--csv',action="store_true",help="Add CSV output")
parser_sub.add_argument('--add_columns',default=None,type=str,help="Add additional columns found in the mutation database to the text and csv results")
parser_sub.add_argument('--verbose',default=0, choices=[0,1,2],help="Verbosity increases from 0 to 2",type=int)
parser_sub.add_argument('--dir','-d',default=".",help='Storage directory')
parser_sub.add_argument('--no_clean', action='store_true',help=argparse.SUPPRESS)
parser_sub.add_argument('--version', action='version', version="TBProfiler version %s" % tbp.__version__)
parser_sub.set_defaults(func=main_fasta_profile)


parser_sub = subparsers.add_parser('lineage', help='Profile only lineage', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser_sub.add_argument('--bam','-a',required=True, help='BAM file. Make sure it has been generated using the H37Rv genome (GCA_000195955.2)')
parser_sub.add_argument('--prefix','-p',default="tbprofiler",help='Sample prefix')
parser_sub.add_argument('--outfmt',default='json',choices=["json","txt"],type=str,help="Output format")
parser_sub.add_argument('--snps',action="store_true",help='Sample prefix')
parser_sub.add_argument('--caller',default='freebayes',choices=["bcftools","freebayes","gatk"],type=str,help="Variant caller")
parser_sub.add_argument('--platform','-m',choices=["illumina","nanopore"],default="illumina",help='NGS Platform used to generate data')
parser_sub.add_argument('--db',default='tbdb',help='Mutation panel name')
parser_sub.add_argument('--spoligotype',action="store_true",help="Perform in-silico spoligotyping (experimental feature)")
parser_sub.add_argument('--external_db',type=str,help='Path to db files prefix (overrides "--db" parameter)')
parser_sub.add_argument('--dir','-d',default=".",help='Storage directory')
parser_sub.add_argument('--no_clean', action='store_true',help=argparse.SUPPRESS)
parser_sub.add_argument('--version', action='version', version="TBProfiler version %s" % tbp.__version__)
parser_sub.set_defaults(func=main_lineage)

parser_sub = subparsers.add_parser('spoligotype', help='Profile spoligotype (experimental feature)', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser_sub.add_argument('--bam','-a',required=True, help='BAM file. Make sure it has been generated using the H37Rv genome (GCA_000195955.2)')
parser_sub.add_argument('--prefix','-p',default="tbprofiler",help='Sample prefix')
parser_sub.add_argument('--txt',action="store_true",help="Add text output")
parser_sub.add_argument('--csv',action="store_true",help="Add CSV output")
parser_sub.add_argument('--platform','-m',choices=["illumina","nanopore"],default="illumina",help='NGS Platform used to generate data')
parser_sub.add_argument('--db',default='tbdb',help='Mutation panel name')
parser_sub.add_argument('--external_db',type=str,help='Path to db files prefix (overrides "--db" parameter)')
parser_sub.add_argument('--dir','-d',default=".",help='Storage directory')
parser_sub.add_argument('--no_clean', action='store_true',help=argparse.SUPPRESS)
parser_sub.add_argument('--version', action='version', version="TBProfiler version %s" % tbp.__version__)
parser_sub.set_defaults(func=main_spoligotype)

parser_sub = subparsers.add_parser('collate', help='Collate results form multiple samples together', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser_sub.add_argument('--prefix','-p',default="tbprofiler",help='Sample prefix')
parser_sub.add_argument('--samples',help='File with samples (one per line)')
parser_sub.add_argument('--full',action="store_true",help='Output mutations in main result file')
parser_sub.add_argument('--all_variants',action="store_true",help='Output all variants in variant matrix')
parser_sub.add_argument('--mark_missing',action="store_true",help='An asteriks will be use to mark predictions which are affected by missing data at a drug resistance position')
parser_sub.add_argument('--reporting_af',default=0.1,type=float,help='Minimum allele frequency to call variants')
parser_sub.add_argument('--db',default='tbdb',help='Full path to mutation database json file to use')
parser_sub.add_argument('--external_db',type=str,help='Path to db files prefix (overrides "--db" parameter)')
parser_sub.add_argument('--dir','-d',default="results",help='Storage directory')
parser_sub.add_argument('--no_clean', action='store_true',help=argparse.SUPPRESS)
parser_sub.add_argument('--version', action='version', version="TBProfiler version %s" % tbp.__version__)
parser_sub.set_defaults(func=main_collate)


parser_sub = subparsers.add_parser('reprofile', help='Reprofile previous results using a new library. The new library must have same targets and the old one.', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser_sub.add_argument('json',help='JSON output file')
parser_sub.add_argument('--prefix','-p',default="tbprofiler",help='Sample prefix')
parser_sub.add_argument('--db',default='tbdb',help='Mutation panel name')
parser_sub.add_argument('--external_db',type=str,help='Path to db files prefix (overrides "--db" parameter)')
parser_sub.add_argument('--reporting_af',default=0.1,type=float,help='Minimum allele frequency to use variants for prediction')
parser_sub.add_argument('--add_mutation_metadata',action="store_true",help=argparse.SUPPRESS)
parser_sub.add_argument('--extra_consequences',help=argparse.SUPPRESS)
parser_sub.add_argument('--dir','-d',default=".",help='Storage directory')
parser_sub.add_argument('--no_clean', action='store_true',help=argparse.SUPPRESS)
parser_sub.add_argument('--version', action='version', version="TBProfiler version %s" % tbp.__version__)
parser_sub.set_defaults(func=main_reprofile)


parser_sub = subparsers.add_parser('reformat', help='Reformat json results into text or csv', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser_sub.add_argument('json',default="tbprofiler",help='Sample prefix')
parser_sub.add_argument('--txt',action="store_true",help="Add text output")
parser_sub.add_argument('--csv',action="store_true",help="Add CSV output")
parser_sub.add_argument('--db',default='tbdb',help='Mutation panel name')
parser_sub.add_argument('--external_db',type=str,help='Path to db files prefix (overrides "--db" parameter)')
parser_sub.add_argument('--dir','-d',default=".",help='Storage directory')
parser_sub.add_argument('--no_clean', action='store_true',help=argparse.SUPPRESS)
parser_sub.add_argument('--version', action='version', version="TBProfiler version %s" % tbp.__version__)
parser_sub.set_defaults(func=main_reformat)

parser_sub = subparsers.add_parser('create_db', help='Generate the files required to run TBProfiler', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser_sub.add_argument('--prefix','-p',default="tbdb",type=str,help='The input CSV file containing the mutations')
parser_sub.add_argument('--csv','-c',default="tbdb.csv",type=str,help='The prefix for all output files')
parser_sub.add_argument('--watchlist','-w',default="",type=str,help='A csv file containing genes to profile but without any specific associated mutations')
parser_sub.add_argument('--spoligotypes',default="",type=str,help='A file containing a list of spoligotype spacers')
parser_sub.add_argument('--barcode',default="barcode.bed",type=str,help='A bed file containing lineage barcode SNPs')
parser_sub.add_argument('--amplicon_primers',type=str,help='A file containing a list of amplicon primers')
parser_sub.add_argument('--match_ref',type=str,help='The prefix for all output files')
parser_sub.add_argument('--other_annotations',default="",type=str,help="A CSV containing gene, mutation, drug and confidence columns")
parser_sub.add_argument('--custom',action="store_true",help='Tells the script this is a custom database, this is used to alter the generation of the version file')
parser_sub.add_argument('--db_name',help='Overrides the name of the database in the version file')
parser_sub.add_argument('--db_commit',help='Overrides the commit string of the database in the version file')
parser_sub.add_argument('--db_author',help='Overrides the author of the database in the version file')
parser_sub.add_argument('--db_date',help='Overrides the date of the database in the version file')
parser_sub.add_argument('--include_original_mutation',action="store_true", help='Include the original mutation (before reformatting) as part of the variant annotaion')
parser_sub.add_argument('--load',action="store_true", help='Automaticaly load database')
parser_sub.add_argument('--dir','-d',default=".",help='Storage directory')
parser_sub.add_argument('--no_clean', action='store_true',help=argparse.SUPPRESS)
parser_sub.add_argument('--version', action='version', version="TBProfiler version %s" % tbp.__version__)
parser_sub.set_defaults(func=main_create_db)

parser_sub = subparsers.add_parser('update_tbdb', help='Pull the latest tbdb library and load', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser_sub.add_argument('--branch','-b',default="master",help='Storage directory')
parser_sub.add_argument('--match_ref',type=str,help='The prefix for all output files')
parser_sub.add_argument('--dir','-d',default=".",help='Storage directory')
parser_sub.add_argument('--no_clean', action='store_true',help=argparse.SUPPRESS)
parser_sub.add_argument('--version', action='version', version="TBProfiler version %s" % tbp.__version__)
parser_sub.set_defaults(func=main_update_tbdb)

parser_sub = subparsers.add_parser('version', help='Output program version and exit', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser_sub.add_argument('--dir','-d',default=".",help='Storage directory')
parser_sub.add_argument('--no_clean', action='store_true',help=argparse.SUPPRESS)
parser_sub.add_argument('--version', action='version', version="TBProfiler version %s" % tbp.__version__)
parser_sub.set_defaults(func=main_version)


args = parser.parse_args()
if hasattr(args, 'func'):    
    args.software_name = __softwarename__
    args.tmp_prefix = str(uuid4())
    args.files_prefix = args.dir+"/"+args.tmp_prefix
    
    if hasattr(args, 'db'):
        if args.db=="tbdb" and not args.external_db and pp.nofile(sys.base_prefix+"/share/tbprofiler/tbdb.fasta"):
            pp.errlog("Can't find the tbdb file at %s. Please run 'tb-profiler update_tbdb' to load the default library or specify another using the '--external_db' flag" % sys.base_prefix,ext=True)
        if args.external_db:
            args.conf = pp.get_db(args.software_name,args.external_db, args.db)
        else:
            args.conf = pp.get_db(args.software_name,args.db, args.db)

    args.func(args)
else:
    parser.print_help(sys.stderr)