Skip to content

Snapshot NHSN data and upload to S3 #19

Snapshot NHSN data and upload to S3

Snapshot NHSN data and upload to S3 #19

name: Snapshot NHSN data and upload to S3
on:
schedule:
- cron: "45 17 * * 3" # every Wednesday at 5:45PM UTC == 12:45PM EST
workflow_dispatch:
env:
# Reich lab AWS account number
AWS_ACCOUNT: 312560106906
permissions:
contents: read
# id-token write required for AWS auth
id-token: write
jobs:
snapshot-nhsn-data:
runs-on: ubuntu-latest
steps:
- name: Set up R 📊
uses: r-lib/actions/setup-r@v2
with:
r-version: 4.4.1
install-r: true
use-public-rspm: true
extra-repositories: 'https://hubverse-org.r-universe.dev'
- name: install R packages
run: |
Rscript -e "install.packages('RSocrata')"
- name: Get file name
run: echo "FILE_NAME=nhsn-$(date +'%Y-%m-%d').csv" >> $GITHUB_ENV
- name: Snapshot NHSN data
run: Rscript -e "nhsn_data <- RSocrata::read.socrata('https://data.cdc.gov/resource/mpgq-jmmr.csv');
nhsn_data <- nhsn_data[c('jurisdiction', 'weekendingdate', 'totalconfflunewadm', 'totalconfc19newadm')];
nhsn_data[['weekendingdate']] <- substr(nhsn_data[['weekendingdate']], 1, 10);
colnames(nhsn_data) <- c('Geographic aggregation', 'Week Ending Date', 'Total Influenza Admissions', 'Total COVID-19 Admissions');
write.csv(nhsn_data, file = '$FILE_NAME', row.names = FALSE)"
env:
FILE_NAME: ${{ env.FILE_NAME }}
- name: Configure AWS credentials
# request credentials to assume the hub's AWS role via OpenID Connect
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::${{ env.AWS_ACCOUNT }}:role/iddata-github-action
aws-region: us-east-1
- name: Copy files to cloud storage
run: |
aws s3 cp "./$FILE_NAME" "s3://infectious-disease-data/data-raw/influenza-nhsn/$FILE_NAME"