diff --git a/docs/src/reference/change_log.md b/docs/src/reference/change_log.md index 8d5f61194..da8018f49 100644 --- a/docs/src/reference/change_log.md +++ b/docs/src/reference/change_log.md @@ -5,6 +5,10 @@ We also use this change log to document new features that maintain backward comp ## New features since last version update +## v15 (5 December 2024) + +- Remove `run_pangolin` configuration option from the workflow, since the pangoLEARN tool that this enabled has been deprecated. [PR 1164](https://github.com/nextstrain/ncov/pull/1164) + ## v14 (23 October 2024) - 23 October 2024: Update workflow to use Nextclade v3. This includes the removal of unused mutation summary script and rules that expected Nextclade v2 outputs. Dropping the mutation summary rules removed the need for the full alignment rule `align` to produce the insertions and translations outputs, so they have been removed. The `build_align` rule no longer produces a separate `insertions.tsv` since insertions are now included in the `nextclade_qc.tsv`. [PR 1160](https://github.com/nextstrain/ncov/pull/1160) diff --git a/docs/src/reference/workflow-config-file.rst b/docs/src/reference/workflow-config-file.rst index 60a54d823..63575290b 100644 --- a/docs/src/reference/workflow-config-file.rst +++ b/docs/src/reference/workflow-config-file.rst @@ -728,13 +728,6 @@ crowding_penalty .. _title-1: -run_pangolin ------------- - -- type: boolean -- description: Enable annotation of Pangolin lineages for a given build's subsampled sequences. -- default: ``false`` - .. _workflow-config-mask: mask diff --git a/scripts/make_pangolin_node_data.py b/scripts/make_pangolin_node_data.py deleted file mode 100644 index dcbf5b2db..000000000 --- a/scripts/make_pangolin_node_data.py +++ /dev/null @@ -1,32 +0,0 @@ -""" -Translate pangolineages from CSV -> JSON for node_data -Note: this should arguably live instead as part of `combine_metadata`, -but this gets particularly complex given the new multiple-inputs logic. -So, for now, following the initial suggestion in the issue. -""" - -import argparse -import pandas as pd -import csv -import json -from augur.utils import write_json - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description="Create node data for assigned pangolin lineages", - formatter_class=argparse.ArgumentDefaultsHelpFormatter - ) - parser.add_argument("--pangolineages", type=str, required=True, help="pangolineages.csv") - parser.add_argument("--node_data_outfile", type=str, help="pangolineages.json") - parser.add_argument("--attribute_name", default="pango_lineage_local", help="attribute name for pangolin lineage annotations in the output JSON") - args = parser.parse_args() - - pangolineages = pd.read_csv(args.pangolineages) - - node_data = { - "nodes": { - row['taxon']: {args.attribute_name: row['lineage']} for idx, row in pangolineages.iterrows() - } - } - - write_json(node_data, args.node_data_outfile) diff --git a/workflow/envs/nextstrain.yaml b/workflow/envs/nextstrain.yaml index bf59f7072..02187c107 100644 --- a/workflow/envs/nextstrain.yaml +++ b/workflow/envs/nextstrain.yaml @@ -8,6 +8,4 @@ dependencies: - epiweeks=2.1.2 - iqtree=2.2.0.3 - nextclade=3.9.0 - - pangolin=3.1.20 - - pangolearn=2022.01.20 - python>=3.8* diff --git a/workflow/snakemake_rules/main_workflow.smk b/workflow/snakemake_rules/main_workflow.smk index 4e9ac00a4..8834897bf 100644 --- a/workflow/snakemake_rules/main_workflow.smk +++ b/workflow/snakemake_rules/main_workflow.smk @@ -702,55 +702,6 @@ rule filter: --output-log {output.filter_log} 2>&1 | tee {log}; """ -if "run_pangolin" in config and config["run_pangolin"]: - rule run_pangolin: - message: - """ - Running pangolin to assign lineage labels to samples. Includes putative lineage definitions by default. - Please remember to update your installation of pangolin regularly to ensure the most up-to-date classifications. - """ - input: - alignment = "results/{build_name}/aligned.fasta", - output: - lineages = "results/{build_name}/pangolineages.csv", - params: - outdir = "results/{build_name}", - csv_outfile = "pangolineages.csv", - node_data_outfile = "pangolineages.json" - log: - "logs/pangolin_{build_name}.txt" - conda: config["conda_environment"] - threads: 1 - resources: - mem_mb=3000 - benchmark: - "benchmarks/pangolineages_{build_name}.txt" - shell: ## once pangolin fully supports threads, add `--threads {threads}` to the below (existing pango cli param) - r""" - pangolin {input.alignment}\ - --outdir {params.outdir} \ - --outfile {params.csv_outfile} 2>&1 | tee {log}\ - """ - - rule make_pangolin_node_data: - input: - lineages = rules.run_pangolin.output.lineages - output: - node_data = "results/{build_name}/pangolineages.json" - log: - "logs/pangolin_export_{build_name}.txt" - conda: config["conda_environment"] - resources: - mem_mb=3000 - benchmark: - "benchmarks/make_pangolin_node_data_{build_name}.txt" - shell: - r""" - python3 scripts/make_pangolin_node_data.py \ - --pangolineages {input.lineages} \ - --node_data_outfile {output.node_data} 2>&1 | tee {log}\ - """ - # TODO: This will probably not work for build names like "country_usa" where we need to know the country is "USA". rule adjust_metadata_regions: message: @@ -1308,9 +1259,6 @@ def _get_node_data_by_wildcards(wildcards): rules.calculate_epiweeks.output.node_data, ] - if "run_pangolin" in config and config["run_pangolin"]: - inputs.append(rules.make_pangolin_node_data.output.node_data) - # Convert input files from wildcard strings to real file names. inputs = [input_file.format(**wildcards_dict) for input_file in inputs]