generated from hubverse-org/hubTemplate
-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add a script that generates the list of clades to model (#27)
* Add a script that generates the list of clades to model The list of clades will be saved as a file in auxiliary-data/modeled-clades/YYYY-MM-DD.txt where YYYY-MM-DD is the id of the upcoming modeling round. * Add scheduled GitHub workflow to prep for round opening * Add a stub R script for adding a new modeling round to the hub This iteration of the script is a placeholder to get R code hooked up to the GitHub workflowD
- Loading branch information
Showing
8 changed files
with
151 additions
and
102 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
name: Create new modeling round | ||
|
||
on: | ||
schedule: | ||
- cron: "0 03 * * 1" # every Monday at 3 AM UTC | ||
workflow_dispatch: | ||
|
||
permissions: | ||
contents: write | ||
pull-requests: write | ||
|
||
jobs: | ||
create-modeling-round: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Checkout 🛎️ | ||
uses: actions/checkout@v4 | ||
with: | ||
# don't check out repo folders with large amounts of data | ||
# (e.g., model-output, target-data) | ||
sparse-checkout: | | ||
auxiliary-data/ | ||
hub-config/ | ||
src/ | ||
- name: Install uv 🌈 | ||
uses: astral-sh/setup-uv@v2 | ||
with: | ||
version: "0.4.9" | ||
|
||
- name: Set up Python 🐍 | ||
uses: actions/setup-python@v5 | ||
with: | ||
python-version-file: "src/.python-version" | ||
|
||
- name: Create clade list 🦠 | ||
run: | | ||
uv run src/get_clades_to_model.py | ||
- name: Set up R 📊 | ||
uses: r-lib/actions/setup-r@v2 | ||
|
||
- name: Generate new round config 📝 | ||
run: | | ||
Rscript src/make_round_config.R | ||
- name: Get current date and time 🕰️ | ||
run: | | ||
PR_DATETIME=$(date +'%Y-%m-%d_%H-%M-%S') | ||
echo "PR_DATETIME=$PR_DATETIME" >> $GITHUB_ENV | ||
- name: Create PR for new modeling round 🚀 | ||
env: | ||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||
run: | | ||
git config user.name "github-actions[bot]" | ||
git config user.email "41898282+github-actions[bot]@users.noreply.github.com" | ||
git checkout -b new_round_"$PR_DATETIME" | ||
git add auxiliary-data/modeled-clades/ | ||
git commit -m "Add new round $PR_DATETIME" | ||
git push -u origin new_round_"$PR_DATETIME" | ||
gh pr create \ | ||
--base main \ | ||
--title "Add new round $PR_DATETIME" \ | ||
--body "This PR was created via GitHub Actions: generate clade list and new round config." |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,3 +2,7 @@ | |
.Rhistory | ||
.RData | ||
.Ruserdata | ||
|
||
**/uv.lock | ||
|
||
.pre-commit-config.yaml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
24A | ||
recombinant | ||
24B | ||
24C | ||
other |
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
3.12 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
""" | ||
Create a list of Sars-CoV-2 clades to model and saves it to the hub's auxiliary-data folder. | ||
This script wraps the virus-clade-utils package, which generates the clade list using the | ||
latest GenBank-based Sars-CoV-2 sequence metadata from Nextstrain. | ||
https://github.com/reichlab/virus-clade-utils | ||
Current parameters for the clade list: | ||
threshold = .01, | ||
threshold_weeks = 3 week | ||
max_clades = 9 | ||
The script is scheduled to run every Monday, for use in the modeling round that will open | ||
on the following Wednesday. | ||
To run the script manually: | ||
1. Install uv on your machine: https://docs.astral.sh/uv/getting-started/installation/ | ||
2. From the root of this repo: uv run src/get_clades_to_model.py | ||
""" | ||
|
||
# /// script | ||
# dependencies = [ | ||
# "virus_clade_utils@git+https://github.com/reichlab/virus-clade-utils/", | ||
# ] | ||
# /// | ||
|
||
import logging | ||
from datetime import datetime, timedelta | ||
from pathlib import Path | ||
|
||
from virus_clade_utils import get_clade_list # type: ignore | ||
|
||
# Log to stdout | ||
logger = logging.getLogger(__name__) | ||
handler = logging.StreamHandler() | ||
formatter = logging.Formatter( | ||
"%(asctime)s - %(levelname)s - %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p" | ||
) | ||
handler.setFormatter(formatter) | ||
logger.addHandler(handler) | ||
logger.setLevel(logging.INFO) | ||
|
||
|
||
def get_next_wednesday(starting_date: datetime) -> str: | ||
"""Return the date of the next Wednesday in YYYY-MM-DD format.""" | ||
|
||
current_day_of_week = starting_date.weekday() | ||
days_until_wednesday = (2 - current_day_of_week + 7) % 7 | ||
if days_until_wednesday == 0: | ||
days_until_wednesday = 7 | ||
|
||
next_wednesday = starting_date + timedelta(days=days_until_wednesday) | ||
return next_wednesday.strftime("%Y-%m-%d") | ||
|
||
|
||
def main(round_id: str, clade_output_path: Path): | ||
"""Get a list of clades to model and save to the hub's auxiliary-data folder.""" | ||
|
||
clade_list = get_clade_list.main() | ||
logger.info(f"Clade list: {clade_list}") | ||
|
||
clade_file = clade_output_path / f"{round_id}.txt" | ||
with open(clade_file, "w") as f: | ||
for clade in clade_list: | ||
f.write(f"{clade}\n") | ||
f.write("other\n") | ||
|
||
logger.info(f"Clade list saved: {clade_file}") | ||
|
||
|
||
if __name__ == "__main__": | ||
# round_id will be the Wednesday following the creation of the clade list | ||
round_id = get_next_wednesday(datetime.today()) | ||
clade_output_path = Path(__file__).parents[1] / "auxiliary-data" / "modeled-clades" | ||
main(round_id, clade_output_path) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
print("Once implemented, script will update hub-config/tasks.config based on latest list of clades to model.") |