Skip to content

test-iasworld-data

test-iasworld-data #31

name: test-iasworld-data
# This workflow is not configured to run on PRs, because PRs have their own CI
# process defined by the `build-and-test-dbt` workflow. Instead, the Data
# Department's Spark data extraction process dispatches it via API once per
# day after finishing the daily ingest of iasWorld data. For more detail, see:
#
# https://github.com/ccao-data/service-spark-iasworld
on:
workflow_dispatch:
inputs:
select:
description: >
Optional space-separated list of tests to run (defaults to all
iasWorld data tests)
required: false
type: string
selector:
description: >
Optional dbt selector representing tests to run (takes precedence
over the above list of tests if both are present)
required: false
type: string
upload_test_results:
description: Upload test results to S3
required: false
default: false
type: boolean
env:
PYTHONUNBUFFERED: "1"
UV_SYSTEM_PYTHON: 1
jobs:
test-iasworld-data:
runs-on: ubuntu-latest
# These permissions are needed to interact with GitHub's OIDC Token endpoint
# so that we can authenticate with AWS
permissions:
id-token: write
contents: read
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Validate and parse input variables
id: parse-inputs
run: |
# Default to no select option, which will fall back to the default
# behavior of the underlying Python script
SELECT_OPTION=""
if [[ -n "$SELECTOR" ]]; then
SELECT_OPTION="--selector $SELECTOR"
elif [[ -n "$SELECT" ]]; then
SELECT_OPTION="--select $SELECT"
fi
echo "Setting select option to '$SELECT_OPTION'"
echo "select-option=$SELECT_OPTION" >> "$GITHUB_OUTPUT"
shell: bash
env:
SELECT: ${{ inputs.select }}
SELECTOR: ${{ inputs.selector }}
- name: Setup dbt
uses: ./.github/actions/setup_dbt
with:
role-to-assume: ${{ secrets.AWS_IAM_ROLE_TO_ASSUME_ARN }}
- name: Restore dbt state cache
id: cache
uses: ./.github/actions/restore_dbt_cache
with:
path: ${{ env.PROJECT_DIR }}/${{ env.STATE_DIR }}
key: ${{ env.CACHE_KEY }}
- name: Install Python dependencies
working-directory: ${{ env.PROJECT_DIR }}
shell: bash
run: uv pip install ".[dbt_tests]"
- name: Run tests
run: |
DEFER_OPTION=""
if [[ "$CACHE_HIT" == 'true' ]]; then
DEFER_OPTION="--defer --state $STATE_DIR"
fi
# shellcheck disable=SC2086
python scripts/run_iasworld_data_tests.py \
--target "$TARGET" \
--output-dir ./qc_test_results/ \
$SELECT_OPTION \
$SKIP_ARTIFACTS_OPTION \
$DEFER_OPTION
working-directory: ${{ env.PROJECT_DIR }}
shell: bash
env:
USER: ${{ github.triggering_actor }}
GIT_SHA: ${{ github.sha }}
GIT_REF: ${{ github.ref_name }}
GIT_AUTHOR: ${{ github.event.commits[0].author.name }}
CACHE_HIT: ${{ steps.cache.outputs.cache-hit }}
SELECT_OPTION: ${{ steps.parse-inputs.outputs.select-option }}
SKIP_ARTIFACTS_OPTION: ${{ inputs.upload_test_results && '--no-skip-artifacts' || '--skip-artifacts' }}
- name: Save test results to S3
if: inputs.upload_test_results
run: |
s3_prefix="s3://ccao-data-warehouse-us-east-1/qc"
local_prefix="qc_test_results/metadata"
for dir in "test_run" "test_run_result" "test_run_failing_row"; do
dirpath="${local_prefix}/${dir}"
if [ -e "$dirpath" ]; then
echo "Copying ${dirpath} metadata to S3"
aws s3 sync "$dirpath" "${s3_prefix}/${dir}"
fi
done
crawler_name="ccao-data-warehouse-qc-crawler"
aws glue start-crawler --name "$crawler_name"
echo "Triggered Glue crawler $crawler_name"
working-directory: ${{ env.PROJECT_DIR }}
shell: bash
- name: Get current time
if: failure()
run: echo "TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%S")" >> "$GITHUB_ENV"
shell: bash
# Only triggered when run on a schedule. Otherwise, whoever dispatched the
# workflow is notified via GitHub (instead of SNS)
- name: Send failure notification
if: github.event_name == 'workflow_dispatch' && github.triggering_actor == 'sqoop-bot[bot]' && failure()
uses: ./.github/actions/publish_sns_topic
with:
sns_topic_arn: ${{ secrets.AWS_SNS_NOTIFICATION_TOPIC_ARN }}
subject: "iasWorld tests errored for workflow run: ${{ github.run_id }}"
body: |
iasWorld tests raised an error for workflow ${{ github.run_id }}, run on ${{ env.TIMESTAMP }} UTC
Link to failing workflow:
https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}