Skip to content

Commit

Permalink
Merge pull request #158 from nextstrain/add-nextclade-github-action
Browse files Browse the repository at this point in the history
Add GitHub Action for Nextclade annotations
  • Loading branch information
huddlej authored Apr 1, 2024
2 parents 7fbd1cd + 94cdfe8 commit a162342
Show file tree
Hide file tree
Showing 7 changed files with 152 additions and 5 deletions.
32 changes: 32 additions & 0 deletions .github/workflows/run-nextclade.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
name: Run Nextclade on all sequences

on:
workflow_dispatch:
inputs:
dockerImage:
description: "Specific container image to use for build (will override the default of `nextstrain build`)"
required: false
type: string

jobs:
run-build:
permissions:
id-token: write
uses: nextstrain/.github/.github/workflows/pathogen-repo-build.yaml@master
secrets: inherit
with:
runtime: aws-batch
env: |
NEXTSTRAIN_DOCKER_IMAGE: ${{ inputs.dockerImage }}
run: |
nextstrain build \
--detach \
--cpus 36 \
--memory 72gib \
--env AWS_ACCESS_KEY_ID \
--env AWS_SECRET_ACCESS_KEY \
. \
upload_all_nextclade_files \
-p \
--configfile profiles/nextclade.yaml \
--set-threads run_nextclade=12
1 change: 1 addition & 0 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ from treetime.utils import numeric_date


wildcard_constraints:
lineage = r'h1n1pdm|h3n2|vic|yam',
segment = r'pb2|pb1|pa|ha|np|na|mp|ns',
center = r'who|cdc|crick|niid|crick|vidrl',
passage = r'cell|egg',
Expand Down
17 changes: 17 additions & 0 deletions profiles/nextclade.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
custom_rules:
- workflow/snakemake_rules/download_from_s3.smk
- profiles/nextclade/run-nextclade.smk

s3_dst: "s3://nextstrain-data-private/files/workflows/seasonal-flu"

segments:
- ha
- na

builds:
h1n1pdm:
lineage: h1n1pdm
h3n2:
lineage: h3n2
vic:
lineage: vic
72 changes: 72 additions & 0 deletions profiles/nextclade/run-nextclade.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
rule upload_all_nextclade_files:
input:
files=lambda wildcards: [
"data/upload/s3/{filetype}_{lineage}_{segment}.done".format(filetype=filetype, lineage=build["lineage"], segment=segment)
for filetype in ("alignment", "nextclade")
for build in config["builds"].values()
for segment in config["segments"]
]

rule get_nextclade_dataset_for_lineage_and_segment:
output:
nextclade_dir=directory("nextclade_dataset/{lineage}_{segment}/"),
shell:
"""
nextclade3 dataset get \
-n flu_{wildcards.lineage}_{wildcards.segment} \
--output-dir {output.nextclade_dir}
"""

rule run_nextclade:
input:
nextclade_dir="nextclade_dataset/{lineage}_{segment}/",
sequences="data/{lineage}/{segment}.fasta",
output:
alignment="data/upload/s3/{lineage}/{segment}/aligned.fasta",
annotations="data/upload/s3/{lineage}/{segment}/nextclade.tsv",
log:
"logs/run_nextclade_{lineage}_{segment}.txt"
threads: 8
shell:
"""
nextclade3 run \
-j {threads} \
-D {input.nextclade_dir} \
--output-fasta {output.alignment} \
--output-tsv {output.annotations} \
{input.sequences}
"""

rule upload_alignment:
input:
alignment="data/upload/s3/{lineage}/{segment}/aligned.fasta",
output:
flag="data/upload/s3/alignment_{lineage}_{segment}.done",
params:
s3_dst=config["s3_dst"],
log:
"logs/upload_alignment_{lineage}_{segment}.txt"
shell:
"""
./scripts/upload-to-s3 \
--quiet \
{input.alignment:q} \
{params.s3_dst:q}/{wildcards.lineage}/{wildcards.segment}/aligned.fasta.xz 2>&1 | tee {output.flag}
"""

rule upload_nextclade_annotations:
input:
annotations="data/upload/s3/{lineage}/{segment}/nextclade.tsv",
output:
flag="data/upload/s3/nextclade_{lineage}_{segment}.done",
params:
s3_dst=config["s3_dst"],
log:
"logs/upload_nextclade_annotations_{lineage}_{segment}.txt"
shell:
"""
./scripts/upload-to-s3 \
--quiet \
{input.annotations:q} \
{params.s3_dst:q}/{wildcards.lineage}/{wildcards.segment}/nextclade.tsv.xz 2>&1 | tee {output.flag}
"""
8 changes: 4 additions & 4 deletions workflow/snakemake_rules/core.smk
Original file line number Diff line number Diff line change
Expand Up @@ -488,10 +488,10 @@ rule annotate_recency_of_submissions:
output:
node_data = "builds/{build_name}/recency.json",
params:
submission_date_field=config["submission_date_field"],
date_bins=config["recency"]["date_bins"],
date_bin_labels=config["recency"]["date_bin_labels"],
upper_bin_label=config["recency"]["upper_bin_label"],
submission_date_field=config.get("submission_date_field"),
date_bins=config.get("recency", {}).get("date_bins"),
date_bin_labels=config.get("recency", {}).get("date_bin_labels"),
upper_bin_label=config.get("recency", {}).get("upper_bin_label"),
conda: "../envs/nextstrain.yaml"
benchmark:
"benchmarks/recency_{build_name}.txt"
Expand Down
25 changes: 25 additions & 0 deletions workflow/snakemake_rules/download_from_s3.smk
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
ruleorder: download_parsed_sequences > parse
ruleorder: download_parsed_metadata > annotate_metadata_with_reference_strains

rule download_sequences:
output:
sequences="data/{lineage}/raw_{segment}.fasta"
Expand All @@ -19,3 +22,25 @@ rule download_titers:
"""
aws s3 cp {params.s3_path} - | gzip -c -d > {output.titers}
"""

rule download_parsed_sequences:
output:
sequences="data/{lineage}/{segment}.fasta"
params:
s3_path="s3://nextstrain-data-private/files/workflows/seasonal-flu/{lineage}/{segment}/sequences.fasta.xz"
conda: "../../workflow/envs/nextstrain.yaml"
shell:
"""
aws s3 cp {params.s3_path} - | xz -c -d > {output.sequences}
"""

rule download_parsed_metadata:
output:
metadata="data/{lineage}/metadata.tsv",
params:
s3_path="s3://nextstrain-data-private/files/workflows/seasonal-flu/{lineage}/metadata.tsv.xz"
conda: "../../workflow/envs/nextstrain.yaml"
shell:
"""
aws s3 cp {params.s3_path} - | xz -c -d > {output.metadata}
"""
2 changes: 1 addition & 1 deletion workflow/snakemake_rules/export.smk
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ rule export:
metadata = build_dir + "/{build_name}/metadata.tsv",
node_data = _get_node_data_by_wildcards,
auspice_config = lambda w: config['builds'][w.build_name]['auspice_config'],
lat_longs = config['lat-longs']
lat_longs = config.get('lat-longs', "config/lat_longs.tsv"),
output:
auspice_json = "auspice/{build_name}_{segment}.json",
root_sequence_json = "auspice/{build_name}_{segment}_root-sequence.json",
Expand Down

0 comments on commit a162342

Please sign in to comment.