Skip to content

Run post-submission jobs #37

Run post-submission jobs

Run post-submission jobs #37

# This workflow is designed to run shortly after the hub's weekly submission
# deadline. It generates an unscored-location-dates file as well as time
# series and oracle output target data.
name: Run post-submission jobs
on:
schedule:
# Not precise, but GithHub actions don't support time zones
- cron: "20 01 * 11-12,1-3 4" # 1:20 AM UTC every Thurs (Nov-Dec, Jan-Mar)
- cron: "20 00 * 4-10 4" # 12:20 AM UTC every Thurs (Apr-Oct)
workflow_dispatch:
inputs:
nowcast_date:
description: "Nowcast date (YYYY-MM-DD)"
required: false
permissions:
contents: write
pull-requests: write
jobs:
get-all-dates:
runs-on: ubuntu-latest
outputs:
NOWCAST_DATE: ${{ steps.set-dates.outputs.NOWCAST_DATE }}
SEQUENCE_AS_OF: ${{ steps.set-dates.outputs.SEQUENCE_AS_OF }}
matrix: ${{ steps.set-dates.outputs.MATRIX_DATES }}
steps:
- name: Checkout 🛎️
uses: actions/checkout@v4
with:
# only checkout what we need
sparse-checkout: |
.github/
- name: Set dates 🕰️
id: set-dates
# Nowcast_date = round_id of the round that just closed for submissions.
# Sequence_as_of = date used to get the sequences for clade assignment.
# Matrix_dates = dates used for matrix in create-target-data job (including
# the round_id of the 91-day-old round that can now be officially closed).
run: |
source ${GITHUB_WORKSPACE}/.github/shared/shared-functions.sh
nowcast_date=$(get_latest_wednesday "${{ inputs.nowcast_date }}")
sequence_as_of=$(date -d "$nowcast_date - 1 day" +%Y-%m-%d)
matrix_dates=$(generate_weekly_dates "$nowcast_date" 13)
echo "nowcast_date=$nowcast_date" >> $GITHUB_OUTPUT
echo "sequence_as_of=$sequence_as_of" >> $GITHUB_OUTPUT
echo "matrix_dates=$matrix_dates" >> $GITHUB_OUTPUT
echo -e "\nDebug values:"
echo "Nowcast date: $nowcast_date"
echo "Sequence as of: $sequence_as_of"
echo "Matrix dates: $matrix_dates"
# Create target data for the round that closed 90 days ago.
# This step will exit if there is no round with a nowcast
# date 90 days ago.
create-target-data:
runs-on: ubuntu-latest
needs: get-all-dates
env:
SEQUENCE_AS_OF: ${{ needs.get-all-dates.outputs.sequence_as_of }}
strategy:
matrix: ${{ fromJson(needs.get-all-dates.outputs.matrix) }}
steps:
- name: Checkout 🛎️
uses: actions/checkout@v4
with:
# only checkout what we need
sparse-checkout: |
.github/
auxiliary-data/
hub-config/
target-data/
src/
- name: Install uv 🐍
uses: astral-sh/setup-uv@v5
with:
version: ">=0.0.1"
- name: Create target data 🎯
run: |
echo "sequence_as_of: $SEQUENCE_AS_OF"
uv run get_target_data.py \
--nowcast-date=${{ matrix.nowcast-date }} \
--sequence-as-of=$SEQUENCE_AS_OF \
--target-data-dir=${{ github.workspace }}
working-directory: src
- name: Upload target data 📤
uses: actions/upload-artifact@v4
with:
name: target-data-${{ matrix.nowcast-date }}
path: |
${{ github.workspace }}/time-series/**/**/*.parquet
${{ github.workspace }}/oracle-output/**/*.parquet
target-data-pr:
runs-on: ubuntu-latest
needs: create-target-data
steps:
- name: Checkout 🛎️
uses: actions/checkout@v4
with:
# only checkout what we need
sparse-checkout: |
.github/
auxiliary-data/
hub-config/
target-data/
src/
- name: Download target data 📥
uses: actions/download-artifact@v4
with:
pattern: target-data-*
merge-multiple: true
path: ${{ github.workspace }}
- name: Move target and interim data to hub directory ➡️
run: |
echo "Merging new target data files into target-data directory"
cp -R ${{ github.workspace }}/time-series/* ${{ github.workspace }}/target-data/time-series/
cp -R ${{ github.workspace }}/oracle-output/* ${{ github.workspace }}/target-data/oracle-output/
echo "Contents of target-data after merge:"
ls -lR ${{ github.workspace }}/target-data/
- name: Create PR for new target and interim data
uses: ./.github/actions/create-pr
with:
pr-prefix: "add-target-data"
file-path: target-data/
commit-message: "Add target data and interim files."
pr-body: "Created via GitHub Actions: generate target and interim data."
create-location-date-sequence-counts:
runs-on: ubuntu-latest
needs: get-all-dates
env:
NOWCAST_DATE: ${{ needs.get-all-dates.outputs.nowcast_date }}
steps:
- name: Checkout 🛎️
uses: actions/checkout@v4
with:
# only checkout what we need
sparse-checkout: |
.github/
auxiliary-data/
hub-config/
src/
- name: Install uv 🐍
uses: astral-sh/setup-uv@v5
with:
version: ">=0.0.1"
- name: Create file of sequence counts by location and date 🦠
run: |
uv run get_location_date_counts.py --nowcast-date=$NOWCAST_DATE
working-directory: src
- name: Create PR for sequence counts 🚀
uses: ./.github/actions/create-pr
with:
pr-prefix: "${{ env.NOWCAST_DATE }}-unscored-locations"
file-path: auxiliary-data/unscored-location-dates/
commit-message: "Add unscored locations for round $NOWCAST_DATE"
pr-body: "Created via GitHub Actions: generate count of sequences collected by date/location for the past 31 days."