Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Develop #101

Open
wants to merge 14 commits into
base: master
Choose a base branch
from
32 changes: 32 additions & 0 deletions hivtrace/hivtrace.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,9 +277,12 @@ def hivtrace(id,
fraction,
strip_drams_flag=False,
filter_edges="no",
filter_cycles=False,
handle_contaminants="remove",
skip_alignment=False,
save_intermediate=True,
cycle_report_fn='',
attributes_file=None,
prior=None
):
"""
Expand Down Expand Up @@ -559,6 +562,12 @@ def hivtrace(id,
hivnetworkcsv_process.extend(
['-C', handle_contaminants, '-F', CONTAMINANT_ID_LIST])


if filter_cycles:
hivnetworkcsv_process.extend(['-l'])

if cycle_report_fn:
hivnetworkcsv_process.extend(['--cycle-report-file', cycle_report_fn])
if prior:
hivnetworkcsv_process.extend(
['--prior', prior])
Expand Down Expand Up @@ -714,25 +723,36 @@ def hivtrace(id,
def main():

parser = argparse.ArgumentParser(description='HIV TRACE')

parser.add_argument('-i', '--input', help='FASTA file', required=True)

parser.add_argument(
'-a',
'--ambiguities',
help='handle ambiguous nucleotides using the specified strategy',
required=True)

parser.add_argument(
'-r', '--reference', help='reference to align to', required=True)

parser.add_argument(
'-t',
'--threshold',
help='Only count edges where the distance is less than this threshold',
required=True)

parser.add_argument(
'-m', '--minoverlap', help='Minimum Overlap', required=True)

parser.add_argument('-g', '--fraction', help='Fraction', required=True)

parser.add_argument('-u', '--curate', help='Filter contaminants')

parser.add_argument(
'-f', '--filter', help='Edge filtering option', default="no", type=str)

parser.add_argument('--filter-cycles', help='Filters cycles', action='store_true')

parser.add_argument(
'-s',
'--strip_drams',
Expand All @@ -741,6 +761,7 @@ def main():
with these sites removed. It requires input/output file names along with the list of \
DRAM sites to remove: 'lewis' or 'wheeler'."
)

parser.add_argument(
'-c',
'--compare',
Expand All @@ -754,11 +775,16 @@ def main():

parser.add_argument(
'--skip-alignment', help='Skip alignment', action='store_true')

parser.add_argument('--attributes-file', help='Annotate with attributes')

parser.add_argument('--log', help='Write logs to specified directory')

parser.add_argument('-o', '--output', help='Specify output filename')
parser.add_argument('-p', '--prior', help='Prior network configuration')

parser.add_argument('--cycle-report-fn', help='cycle report output')

args = parser.parse_args()

if args.log:
Expand All @@ -779,6 +805,7 @@ def main():
COMPARE_TO_LANL = args.compare
FRACTION = args.fraction
STRIP_DRAMS = args.strip_drams
CYCLE_REPORT_FN = None
PRIOR = None

if(args.prior):
Expand All @@ -787,6 +814,9 @@ def main():
if args.output:
OUTPUT_FN = args.output

if args.cycle_report_fn:
CYCLE_REPORT_FN = args.cycle_report_fn

if STRIP_DRAMS != 'wheeler' and STRIP_DRAMS != 'lewis':
STRIP_DRAMS = False

Expand All @@ -801,9 +831,11 @@ def main():
FRACTION,
strip_drams_flag=STRIP_DRAMS,
filter_edges=args.filter,
filter_cycles=args.filter_cycles,
handle_contaminants=args.curate,
skip_alignment=args.skip_alignment,
save_intermediate=(not args.do_not_store_intermediate),
cycle_report_fn=CYCLE_REPORT_FN,
prior=PRIOR
)

Expand Down
2 changes: 1 addition & 1 deletion test/rsrc/TEST.FASTA_USER.TRACE.JSON
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"Degrees": {"Distribution": [4, 1], "fitted": [0.8516685221760777, 0.110011136113623], "Model": "Yule", "rho": 6.741657365453795, "rho CI": [1.457751736250405, 798.9933640071333]}, "Network Summary": {"Edges": 3, "Sequences used to make links": 5, "Clusters": 2, "Nodes": 5}, "Cluster sizes": [2, 3], "Nodes": [{"edi": null, "attributes": {"COUNTRY": "JP", "SOURCE": "TEST.FASTA_output.fasta", "YEAR_OF_SAMPLING": "2036", "ACCESSION_NUMBER": "K03455", "SUBTYPE": "Z"}, "hxb2_linked": "false", "id": "Z|JP|K03455|2036|5", "cluster": 1, "baseline": null}, {"edi": null, "attributes": {"COUNTRY": "JP", "SOURCE": "TEST.FASTA_output.fasta", "YEAR_OF_SAMPLING": "2036", "ACCESSION_NUMBER": "K03455", "SUBTYPE": "Z"}, "hxb2_linked": "false", "id": "Z|JP|K03455|2036|4", "cluster": 1, "baseline": null}, {"edi": null, "attributes": {"COUNTRY": "JP", "SOURCE": "TEST.FASTA_output.fasta", "YEAR_OF_SAMPLING": "2036", "ACCESSION_NUMBER": "K03455", "SUBTYPE": "Z"}, "hxb2_linked": "false", "id": "Z|JP|K03455|2036|3", "cluster": 2, "baseline": null}, {"edi": null, "attributes": {"COUNTRY": "JP", "SOURCE": "TEST.FASTA_output.fasta", "YEAR_OF_SAMPLING": "2036", "ACCESSION_NUMBER": "K03455", "SUBTYPE": "Z"}, "hxb2_linked": "false", "id": "Z|JP|K03455|2036", "cluster": 2, "baseline": null}, {"edi": null, "attributes": {"COUNTRY": "JP", "SOURCE": "TEST.FASTA_output.fasta", "YEAR_OF_SAMPLING": "2036", "ACCESSION_NUMBER": "K03455", "SUBTYPE": "Z"}, "hxb2_linked": "false", "id": "Z|JP|K03455|2036|2", "cluster": 2, "baseline": null}], "Directed Edges": {"Reasons for unresolved directions": {"Missing dates": 3}, "Count": 0}, "Edges": [{"removed": false, "length": 0.0101121, "directed": false, "source": 4, "support": 0.0, "sequences": ["Z|JP|K03455|2036|2", "Z|JP|K03455|2036|3"], "target": 2}, {"removed": false, "length": 0.0146112, "directed": false, "source": 3, "support": 0.0, "sequences": ["Z|JP|K03455|2036", "Z|JP|K03455|2036|3"], "target": 2}, {"removed": false, "length": 0.00230851, "directed": false, "source": 1, "support": 0.0, "sequences": ["Z|JP|K03455|2036|4", "Z|JP|K03455|2036|5"], "target": 0}], "Multiple sequences": {"Subjects with": 0, "Followup, days": null}, "HIV Stages": {"Chronic": 5, "A-3": 0, "E-2": 0, "A-2": 0, "E-1": 0, "A-1": 0, "E-3": 0}}
{"Cluster sizes": [2, 3], "Edges": [{"support": 0.0, "length": 0.0101121, "target": 2, "removed": false, "sequences": ["Z|JP|K03455|2036|2", "Z|JP|K03455|2036|3"], "source": 4, "directed": false}, {"support": 0.0, "length": 0.0146112, "target": 2, "removed": false, "sequences": ["Z|JP|K03455|2036", "Z|JP|K03455|2036|3"], "source": 3, "directed": false}, {"support": 0.0, "length": 0.00230851, "target": 0, "removed": false, "sequences": ["Z|JP|K03455|2036|4", "Z|JP|K03455|2036|5"], "source": 1, "directed": false}], "HIV Stages": {"E-1": 0, "E-3": 0, "A-3": 0, "A-2": 0, "A-1": 0, "Chronic": 5, "E-2": 0}, "Directed Edges": {"Count": 0, "Reasons for unresolved directions": {"Missing dates": 3}}, "Multiple sequences": {"Subjects with": 0, "Followup, days": null}, "Nodes": [{"edi": null, "baseline": null, "cluster": 1, "hxb2_linked": "false", "id": "Z|JP|K03455|2036|5", "attributes": {"COUNTRY": "JP", "SOURCE": "TEST.FASTA_output.fasta", "YEAR_OF_SAMPLING": "2036", "ACCESSION_NUMBER": "K03455", "SUBTYPE": "Z"}}, {"edi": null, "baseline": null, "cluster": 1, "hxb2_linked": "false", "id": "Z|JP|K03455|2036|4", "attributes": {"COUNTRY": "JP", "SOURCE": "TEST.FASTA_output.fasta", "YEAR_OF_SAMPLING": "2036", "ACCESSION_NUMBER": "K03455", "SUBTYPE": "Z"}}, {"edi": null, "baseline": null, "cluster": 2, "hxb2_linked": "false", "id": "Z|JP|K03455|2036|3", "attributes": {"COUNTRY": "JP", "SOURCE": "TEST.FASTA_output.fasta", "YEAR_OF_SAMPLING": "2036", "ACCESSION_NUMBER": "K03455", "SUBTYPE": "Z"}}, {"edi": null, "baseline": null, "cluster": 2, "hxb2_linked": "false", "id": "Z|JP|K03455|2036", "attributes": {"COUNTRY": "JP", "SOURCE": "TEST.FASTA_output.fasta", "YEAR_OF_SAMPLING": "2036", "ACCESSION_NUMBER": "K03455", "SUBTYPE": "Z"}}, {"edi": null, "baseline": null, "cluster": 2, "hxb2_linked": "false", "id": "Z|JP|K03455|2036|2", "attributes": {"COUNTRY": "JP", "SOURCE": "TEST.FASTA_output.fasta", "YEAR_OF_SAMPLING": "2036", "ACCESSION_NUMBER": "K03455", "SUBTYPE": "Z"}}], "Degrees": {"Distribution": [4, 1], "rho": 6.741657365453795, "Model": "Yule", "rho CI": [1.457751736250405, 798.9933640071333], "fitted": [0.8516685221760777, 0.110011136113623]}, "Network Summary": {"Edges": 3, "Nodes": 5, "Clusters": 2, "Sequences used to make links": 5}}