-
Notifications
You must be signed in to change notification settings - Fork 9
98 lines (91 loc) · 3.59 KB
/
ingest-ncbi.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
name: Ingest NCBI
defaults:
run:
# This is the same as GitHub Action's `bash` keyword as of 20 June 2023:
# https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsshell
#
# Completely spelling it out here so that GitHub can't change it out from under us
# and we don't have to refer to the docs to know the expected behavior.
shell: bash --noprofile --norc -eo pipefail {0}
on:
schedule:
# Note times are in UTC, which is 1 or 2 hours behind CET depending on daylight savings.
#
# Note the actual runs might be late.
# Numerous people were confused, about that, including me:
# - https://github.community/t/scheduled-action-running-consistently-late/138025/11
# - https://github.com/github/docs/issues/3059
#
# Note, '*' is a special character in YAML, so you have to quote this string.
#
# Docs:
# - https://docs.github.com/en/actions/learn-github-actions/events-that-trigger-workflows#schedule
#
# Tool that deciphers this particular format of crontab string:
# - https://crontab.guru/
#
# Runs at 5pm UTC (1pm EDT/10am PDT) since curation by NCBI happens on the East Coast.
# We were running into invalid zip archive errors at 9am PDT, so hoping an hour
# delay will lower the error frequency
- cron: '0 17 * * *'
workflow_dispatch:
inputs:
image:
description: 'Specific container image to use for ingest workflow (will override the default of "nextstrain build")'
required: false
type: string
trial-name:
description: |
Trial name for outputs.
If not set, outputs will overwrite files at s3://nextstrain-data/files/workflows/avian-flu/
If set, outputs will be uploaded to s3://nextstrain-data/files/workflows/avian-flu/trials/<trial_name>/
required: false
type: string
jobs:
ingest:
permissions:
id-token: write
uses: nextstrain/.github/.github/workflows/pathogen-repo-build.yaml@master
secrets: inherit
with:
# Starting with the default docker runtime
# We can migrate to AWS Batch when/if we need to for more resources or if
# the job runs longer than the GH Action limit of 6 hours.
runtime: docker
run: |
declare -a config;
if [[ "$TRIAL_NAME" ]]; then
# Create JSON string for the nested upload config
S3_DST_BASE="s3://nextstrain-data/files/workflows/avian-flu/trial/$TRIAL_NAME"
config+=(
s3_dst=$(
jq -cn --arg S3_DST_BASE "$S3_DST_BASE" '{
"joined-ncbi": "\($S3_DST_BASE)/h5n1",
"ncbi": "\($S3_DST_BASE)/h5n1/ncbi",
"andersen-lab": "\($S3_DST_BASE)/h5n1/andersen-lab"
}'
)
)
fi;
nextstrain build \
ingest \
upload_all_ncbi \
--configfile build-configs/ncbi/defaults/config.yaml \
--config "${config[@]}"
env: |
NEXTSTRAIN_DOCKER_IMAGE: ${{ inputs.image }}
TRIAL_NAME: ${{ inputs.trial-name }}
# Specifying artifact name to differentiate ingest build outputs from
# the phylogenetic build outputs
artifact-name: ingest-ncbi-build-output
artifact-paths: |
ingest/.snakemake/log/
ingest/andersen-lab/results/
ingest/andersen-lab/benchmarks/
ingest/andersen-lab/logs/
ingest/joined-ncbi/results/
ingest/joined-ncbi/benchmarks/
ingest/joined-ncbi/logs/
ingest/ncbi/results/
ingest/ncbi/benchmarks/
ingest/ncbi/logs/