Skip to content

Commit

Permalink
Merge pull request #100 from nextstrain/james/division-metadata
Browse files Browse the repository at this point in the history
[h5n1-cattle-outbreak] distinguish inferred vs known metadata for division
  • Loading branch information
jameshadfield authored Oct 30, 2024
2 parents 0b5dcae + 6583482 commit 443e785
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 5 deletions.
24 changes: 19 additions & 5 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,14 @@ def metadata_by_wildcards(wildcards):
# H5 builds have extra clade-level metadata added to the metadata TSV.
# We may move this to a node-data JSON which would simplify the snakemake logic
# a bit -- see <https://github.com/nextstrain/avian-flu/issues/25>
if wildcards.subtype in ("h5n1", "h5nx", "h5n1-cattle-outbreak"):
if wildcards.subtype in ("h5n1", "h5nx"):
return "results/{subtype}/metadata-with-clade.tsv"
# cattle-flu.smk will make its own modifications as needed
elif wildcards.subtype=="h5n1-cattle-outbreak":
if wildcards.segment=="genome":
return "results/{subtype}/{segment}/default/metadata-with-clade-and-non-inferred-values.tsv"
else:
return "results/{subtype}/metadata-with-clade.tsv"
else:
return "results/{subtype}/metadata.tsv",

Expand Down Expand Up @@ -530,14 +536,22 @@ rule auspice_config:
run:
import json
with open(input.auspice_config) as fh:
config = json.load(fh)
auspice_config = json.load(fh)
if wildcards.subtype == "h5n1-cattle-outbreak":
if wildcards.segment == "genome":
config['display_defaults']['distance_measure'] = "num_date"
auspice_config['display_defaults']['distance_measure'] = "num_date"
division_idx = next((i for i,c in enumerate(auspice_config['colorings']) if c['key']=='division'), None)
assert division_idx!=None, "Auspice config did not have a division coloring!"
auspice_config['colorings'].insert(division_idx+1, {
"key": "division_metadata",
"title": auspice_config['colorings'][division_idx]["title"] + " (metadata)",
"type": "categorical",
})
auspice_config['colorings'][division_idx]["title"] += " (inferred)"
else:
config['display_defaults']['distance_measure'] = "div"
auspice_config['display_defaults']['distance_measure'] = "div"
with open(output.auspice_config, 'w') as fh:
json.dump(config, fh, indent=2)
json.dump(auspice_config, fh, indent=2)


rule export:
Expand Down
34 changes: 34 additions & 0 deletions rules/cattle-flu.smk
Original file line number Diff line number Diff line change
Expand Up @@ -96,3 +96,37 @@ rule prune_tree:
--output-tree {output.tree} \
--output-metadata {output.node_data}
"""

def assert_expected_config(w):
try:
# TODO: once we refactor things we should use `get_config()` here
# see <https://github.com/nextstrain/avian-flu/pull/100#discussion_r1823047047>
# but currently this snakefile doesn't have access to that function.
assert len(config['traits']['genome_columns'])==1 and config['traits']['genome_columns']['FALLBACK']=="division"
except Exception as err:
raise Exception("Rule add_metadata_columns_to_show_non_inferred_values expected a certain format for config['traits'] that has since changed") from err

rule add_metadata_columns_to_show_non_inferred_values:
"""
Genome builds run `augur traits` for "division" (we assert this below) so we want to add a metadata
column `division_metadata` which is a duplicate of `division`.
NOTE: long-term we should be consulting `traits_params()` to work out the columns to duplicate, but
that function's not visible to this .smk file so would require deeper refactoring.
"""
input:
metadata = "results/{subtype}/metadata-with-clade.tsv",
output:
metadata = "results/{subtype}/{segment}/{time}/metadata-with-clade-and-non-inferred-values.tsv",
wildcard_constraints:
subtype="h5n1-cattle-outbreak",
segment="genome",
time="default",
params:
old_column = "division",
new_column = "division_metadata",
assert_traits = assert_expected_config,
shell:
"""
cat {input.metadata} | csvtk mutate -t -f {params.old_column} -n {params.new_column} > {output.metadata}
"""

0 comments on commit 443e785

Please sign in to comment.