From 96d299bfa00a0b96940db0ed4d84b055f40d40eb Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Mon, 29 Jan 2024 10:55:30 -0800 Subject: [PATCH 1/6] auspice_config: Remove filter `level` The filter `level` was originally added in https://github.com/nextstrain/ncov/commit/5a71d774740206ed0367e7d0928794b6c473440e for RBD level but was the wrong field name. The correct filter `rbd_level` was later added in https://github.com/nextstrain/ncov/commit/1b4846a72cb805ba8e6adb6244e9955690e4afcc. --- workflow/snakemake_rules/export_for_nextstrain.smk | 1 - 1 file changed, 1 deletion(-) diff --git a/workflow/snakemake_rules/export_for_nextstrain.smk b/workflow/snakemake_rules/export_for_nextstrain.smk index f857a95eb..a90f0a31d 100644 --- a/workflow/snakemake_rules/export_for_nextstrain.smk +++ b/workflow/snakemake_rules/export_for_nextstrain.smk @@ -303,7 +303,6 @@ rule auspice_config: "pango_lineage", "Nextclade_pango", "region", - "level", "country", "division", location_filter, From a2ddf1695f926cacfbf256c0efdac12dd131987f Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Mon, 29 Jan 2024 12:36:20 -0800 Subject: [PATCH 2/6] assign_rbd_levels: Add missing log file --- workflow/snakemake_rules/main_workflow.smk | 2 ++ 1 file changed, 2 insertions(+) diff --git a/workflow/snakemake_rules/main_workflow.smk b/workflow/snakemake_rules/main_workflow.smk index 6f257191b..a7ab78735 100644 --- a/workflow/snakemake_rules/main_workflow.smk +++ b/workflow/snakemake_rules/main_workflow.smk @@ -1307,6 +1307,8 @@ rule assign_rbd_levels: basal_clade_label="21L (Omicron)" output: node_data="results/{build_name}/rbd_levels.json", + log: + "logs/assign_rbd_levels_{build_name}.txt" benchmark: "benchmarks/assign_levels_{build_name}.txt", conda: From f0d154d3bd35741f21bb6516a261be2efbaad077 Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Mon, 29 Jan 2024 12:36:54 -0800 Subject: [PATCH 3/6] assign_rbd_levels: rename benchmark file to match rule name --- workflow/snakemake_rules/main_workflow.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/snakemake_rules/main_workflow.smk b/workflow/snakemake_rules/main_workflow.smk index a7ab78735..0e0c459da 100644 --- a/workflow/snakemake_rules/main_workflow.smk +++ b/workflow/snakemake_rules/main_workflow.smk @@ -1310,7 +1310,7 @@ rule assign_rbd_levels: log: "logs/assign_rbd_levels_{build_name}.txt" benchmark: - "benchmarks/assign_levels_{build_name}.txt", + "benchmarks/assign_rbd_levels_{build_name}.txt", conda: config["conda_environment"], shell: From 90a84cdd1e9f11fb4ecbe5184eb284a85d0ec29d Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Mon, 29 Jan 2024 12:07:34 -0800 Subject: [PATCH 4/6] assign_rbd_levels: Update `basal_clade_label` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The clade labels were updated in https://github.com/nextstrain/ncov/pull/1065. Update the clade label so that the `assign_rbd_levels` script can find the correct basal clade. I had considered pulling this value out as a parameter in the config YAML, but the original commit message¹ implies that this should _not_ be configurable. ¹ https://github.com/nextstrain/ncov/commit/fb5f44e1b935f80940c510d196a8a70e7057705b --- workflow/snakemake_rules/main_workflow.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/snakemake_rules/main_workflow.smk b/workflow/snakemake_rules/main_workflow.smk index 0e0c459da..6ce6282cc 100644 --- a/workflow/snakemake_rules/main_workflow.smk +++ b/workflow/snakemake_rules/main_workflow.smk @@ -1304,7 +1304,7 @@ rule assign_rbd_levels: tree = "results/{build_name}/tree.nwk", params: config=config["files"]["rbd_level_definitions"], - basal_clade_label="21L (Omicron)" + basal_clade_label="21L (BA.2)" output: node_data="results/{build_name}/rbd_levels.json", log: From 7c662b5c5020743eb36db7ab0f8b2773061e0847 Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Mon, 29 Jan 2024 12:13:34 -0800 Subject: [PATCH 5/6] script/assign_rbd_levels: Update clade parsing The JSON output from `augur clades` was updated to separate `nodes` and `branches` in https://github.com/nextstrain/augur/pull/728 so now the `assign_rbd_levels` script needs to parse the `branches` in order to find the basal node. --- scripts/assign_rbd_levels.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/assign_rbd_levels.py b/scripts/assign_rbd_levels.py index bf8c07b60..ec14ec156 100644 --- a/scripts/assign_rbd_levels.py +++ b/scripts/assign_rbd_levels.py @@ -7,12 +7,12 @@ def find_matching_nodes(clades_fname, basal_clade_label, tree_fname): basal_node_name = None with open(clades_fname) as fh: - for name, node_data in json.load(fh)['nodes'].items(): - if node_data.get('clade_annotation', '') == basal_clade_label: + for name, node_data in json.load(fh)['branches'].items(): + if node_data.get('labels', {}).get('clade', '') == basal_clade_label: basal_node_name = name break if not basal_node_name: - print(f"WARNING: no node found with a clade_annotation of {basal_clade_label}. This script will proceed, but no levels will be exported.") + print(f"WARNING: no branch found with a clade of {basal_clade_label}. This script will proceed, but no levels will be exported.") return set() print(f"Node representing {basal_clade_label}: {basal_node_name}") T = Phylo.read(tree_fname, 'newick') @@ -92,4 +92,4 @@ def classify_into_levels(spike_seq, rbd_mutations): node_data['rbd_level_details'][name] = ", ".join([f"S:{x[0][1]}{x[1]} ({x[2]})" for x in zip(rbd_mutations, codons, calls)]) with open(args.output_node_data, 'w') as fh: - json.dump(node_data, fh, indent=2) \ No newline at end of file + json.dump(node_data, fh, indent=2) From f67e2aac4d6eb6e8b48f12842bde6dfd93688da2 Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Tue, 30 Jan 2024 11:02:17 -0800 Subject: [PATCH 6/6] Update changelog --- docs/src/reference/change_log.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/src/reference/change_log.md b/docs/src/reference/change_log.md index f55db0c00..0fba7c670 100644 --- a/docs/src/reference/change_log.md +++ b/docs/src/reference/change_log.md @@ -5,7 +5,10 @@ We also use this change log to document new features that maintain backward comp ## New features since last version update +- 30 January 2024: Fix RBD-level coloring by updating clade label and clade parsing. [PR 1094](https://github.com/nextstrain/ncov/pull/1094) + - 14 Dec 2023: Use `nextclade2` binary that makes the version explicit [PR 1089](https://github.com/nextstrain/ncov/pull/1089) + - 17 June 2023: Update subsampling strategy for `nextstrain_profiles` to better equilibrate per-capita sampling rates across geographic regions. Primarily this update breaks out China and India as separate subsampling targets because of their large population sizes. It also fine tunes the per-region sampling targets. After this update, URL structure (ie https://nextstrain.org/ncov/gisaid/global/6m) is unchanged. [PR 1074](https://github.com/nextstrain/ncov/pull/1074) ## v13 (16 May 2023)