Skip to content

Process Samples Aspera #20

Process Samples Aspera

Process Samples Aspera #20

name: Process Samples Aspera
on:
schedule:
- cron: '0 0-16 * * 0,6'
env:
BATCH_SIZE: 100
jobs:
setup:
permissions:
contents: write
id-token: write
runs-on: self-hosted
outputs:
run_rest_jobs: ${{ steps.get_accession_list.outputs.run_jobs }}
steps:
- name: Checkout main
uses: actions/checkout@v4
with:
token: ${{ secrets.GITHUB_TOKEN }}
- name: 'Set up gcloud SDK'
uses: 'google-github-actions/setup-gcloud@v2'
with:
version: '>= 363.0.0'
- id: 'auth'
name: 'Authenticate with gcloud'
uses: 'google-github-actions/auth@v2'
with:
workload_identity_provider: 'projects/12767718289/locations/global/workloadIdentityPools/github/providers/freyja-sra'
service_account: '[email protected]'
- name: 'Fetch fastq files'
run: |
gcloud storage cp ... # Fetch fastq files from cloud storage
run_samples:
needs: [setup]
if: needs.setup.outputs.run_rest_jobs == 'true'
permissions:
contents: write
id-token: write
runs-on: self-hosted
steps:
- name: Checkout main
uses: actions/checkout@v4
with:
token: ${{ secrets.GITHUB_TOKEN }}
- name: Setup nextflow
uses: nf-core/setup-nextflow@v1
- name: 'Set up Cloud SDK'
uses: 'google-github-actions/setup-gcloud@v2'
with:
version: '>= 363.0.0'
- id: 'auth'
name: 'Authenticate with gcloud'
uses: 'google-github-actions/auth@v2'
with:
workload_identity_provider: 'projects/12767718289/locations/global/workloadIdentityPools/github/providers/freyja-sra'
service_account: '[email protected]'
- name: 'Get accession list'
id: get_accession_list
run: |
python scripts/get_accession_list.py $BATCH_SIZE
- name: Run pipeline on new samples
run: |
export NXF_ENABLE_VIRTUAL_THREADS=false
nextflow run main.nf \
--accession_list data/accession_list.csv \
--num_samples $BATCH_SIZE \
-profile docker \
-entry aspera &
BG_PID=$!
wait $BG_PID
- id: 'upload-outputs'
name: 'Upload Outputs to Cloud Storage'
uses: 'google-github-actions/upload-cloud-storage@v2'
with:
path: 'outputs/'
destination: 'outbreak-ww-data/'
parent: false
project_id: 'andersen-lab-primary'
- name: 'Update processed samples'
run: |
python scripts/update_sample_status.py $BATCH_SIZE
- name: 'Commit and push changes'
run: |
git config --local user.name "$GITHUB_ACTOR"
git config --local user.email "[email protected]"
git remote set-url origin https://github.com/andersen-lab/Freyja-SRA
git add data/all_metadata.csv
git commit -m "Update processed samples"
git push --force
- name: 'Clean workspace'
run: |
rm -rf ${{ github.workspace }}/*