Skip to content

Commit

Permalink
Define GitHub actions workflows for building and testing dbt (#50)
Browse files Browse the repository at this point in the history
  • Loading branch information
jeancochrane authored Aug 8, 2023
1 parent 5896cb6 commit 9574a38
Show file tree
Hide file tree
Showing 11 changed files with 288 additions and 6 deletions.
29 changes: 29 additions & 0 deletions .github/actions/configure_dbt_environment/action.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Configure dbt environment
description: Set environment variables based on the active dbt project (CI or prod)
runs:
using: composite
steps:
- name: Configure dbt environment
run: |
if [[ $GITHUB_REF_NAME == 'master' ]]; then
echo "On master branch, setting dbt env to prod"
{
echo "TARGET=prod";
echo "CACHE_KEY=master";
} >> "$GITHUB_ENV"
elif [[ $GITHUB_REF_NAME == 'data-catalog' ]]; then
echo "On data catalog branch, setting dbt env to CI"
{
echo "TARGET=ci";
echo "CACHE_KEY=data-catalog";
echo "HEAD_REF=data-catalog";
} >> "$GITHUB_ENV"
else
echo "On pull request branch, setting dbt env to CI"
{
echo "TARGET=ci";
echo "CACHE_KEY=$GITHUB_HEAD_REF";
echo "HEAD_REF=$GITHUB_HEAD_REF"
} >> "$GITHUB_ENV"
fi
shell: bash
34 changes: 34 additions & 0 deletions .github/actions/install_dbt_requirements/action.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
name: Install dbt dependencies
description: Installs Python and dbt requirements for a workflow
inputs:
dbt_project_dir:
description: Path to the directory containing the dbt project.
required: false
default: ./dbt
requirements_file_path:
description: Path to Python requirements file.
required: false
default: ./dbt/requirements.txt
runs:
using: composite
steps:
- name: Setup python
uses: actions/setup-python@v4
with:
python-version: 3.x
cache: pip

- name: Install python requirements
run: python -m pip install -r ${{ inputs.requirements_file_path }}
shell: bash

- name: Cache dbt requirements
uses: actions/cache@v3
with:
path: ${{ inputs.dbt_project_dir }}/dbt_packages
key: dbt-${{ hashFiles(format('{0}/packages.yml', inputs.dbt_project_dir)) }}

- name: Install dbt requirements
run: dbt deps
working-directory: ${{ inputs.dbt_project_dir }}
shell: bash
16 changes: 16 additions & 0 deletions .github/actions/load_environment_variables/action.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
name: Load environment variables
description: Configures environment variables for a workflow
inputs:
env_var_file_path:
description: |
File path to variable file or directory.
Defaults to ./.github/variables/* if none specified
and runs against each file in that directory.
required: false
default: ./.github/variables/*
runs:
using: composite
steps:
# Use sed to strip comment lines
- run: sed "/#/d" ${{ inputs.env_var_file_path }} >> "$GITHUB_ENV"
shell: bash
35 changes: 35 additions & 0 deletions .github/scripts/cleanup_dbt_resources.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/usr/bin/env bash
# Clean up dbt resources created by a CI run or by local development.
#
# Takes one argument representing the target environment to clean up,
# one of `dev` or `ci`. E.g.:
#
# ./cleanup_dbt_resources.sh dev
#
# Assumes that jq is installed and available on the caller's path.
set -euo pipefail

if [[ "$#" -eq 0 ]]; then
echo "Missing first argument representing dbt target"
exit 1
fi

if [ "$1" == "prod" ]; then
echo "Target cannot be 'prod'"
exit 1
fi

schemas_json=$(dbt --quiet list --resource-type model --target "$1" \
--output json --output-keys schema) || (echo "Error in dbt call" && exit 1)
schemas=$(echo "$schemas_json"| sort | uniq | jq ' .schema') || (\
echo "Error in schema parsing" && exit 1
)

echo "Deleting the following schemas from Athena:"
echo
echo "$schemas"

echo "$schemas" | xargs -i bash -c 'aws glue delete-database --name {} || exit 255'

echo
echo "Done!"
3 changes: 3 additions & 0 deletions .github/variables/dbt.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
CACHE_NAME=dbt-cache
MANIFEST_DIR=dbt/target
PROJECT_DIR=dbt
83 changes: 83 additions & 0 deletions .github/workflows/build_and_test_dbt.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
name: build-and-test-dbt

on:
pull_request:
branches: [master, data-catalog]
push:
branches: [master, data-catalog]

jobs:
build-and-test-dbt:
runs-on: ubuntu-latest
# These permissions are needed to interact with GitHub's OIDC Token endpoint
# so that we can authenticate with AWS
permissions:
id-token: write
contents: read
steps:
- name: Checkout
uses: actions/checkout@v3

- name: Install dbt requirements
uses: ./.github/actions/install_dbt_requirements

- name: Load environment variables
uses: ./.github/actions/load_environment_variables

- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v2
with:
role-to-assume: ${{ secrets.AWS_IAM_ROLE_TO_ASSUME_ARN }}
aws-region: us-east-1

- name: Configure dbt environment
uses: ./.github/actions/configure_dbt_environment

- name: Cache dbt manifest
id: cache
uses: actions/cache@v3
with:
path: ${{ env.MANIFEST_DIR }}
key: ${{ env.CACHE_NAME }}-${{ env.CACHE_KEY }}
restore-keys: |
${{ env.CACHE_NAME }}-data-catalog
${{ env.CACHE_NAME }}-master
- if: ${{ steps.cache.outputs.cache-hit == 'true' }}
name: Set command args to build/test modified resources
run: echo "MODIFIED_RESOURCES_ONLY=true" >> "$GITHUB_ENV"
shell: bash

- if: ${{ steps.cache.outputs.cache-hit != 'true' }}
name: Set command args to build/test all resources
run: echo "MODIFIED_RESOURCES_ONLY=false" >> "$GITHUB_ENV"
shell: bash

- name: Test dbt macros
run: dbt run-operation test_all
working-directory: ${{ env.PROJECT_DIR }}
shell: bash

- name: Build models
run: |
if [[ $MODIFIED_RESOURCES_ONLY == 'true' ]]; then
echo "Running build on modified resources only"
dbt run --target "$TARGET" -s state:modified --defer --state target/
else
echo "Running build on all resources"
dbt run --target "$TARGET"
fi
working-directory: ${{ env.PROJECT_DIR }}
shell: bash

- name: Test models
run: |
if [[ $MODIFIED_RESOURCES_ONLY == 'true' ]]; then
echo "Running tests on modified resources only"
dbt test --target "$TARGET" -s state:modified --state target/
else
echo "Running tests on all resources"
dbt test --target "$TARGET"
fi
working-directory: ${{ env.PROJECT_DIR }}
shell: bash
42 changes: 42 additions & 0 deletions .github/workflows/cleanup_dbt_resources.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
name: cleanup-dbt-resources

on:
pull_request:
branches: [master, data-catalog]
types: [closed]

jobs:
cleanup-dbt-resources:
runs-on: ubuntu-latest
# These permissions are needed to interact with GitHub's OIDC Token endpoint
# so that we can authenticate with AWS
permissions:
id-token: write
contents: read
steps:
- name: Checkout
uses: actions/checkout@v3

- name: Install dbt requirements
uses: ./.github/actions/install_dbt_requirements

- name: Install requirements for cleaning up dbt resources
run: sudo apt-get update && sudo apt-get install jq
shell: bash

- name: Load environment variables
uses: ./.github/actions/load_environment_variables

- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v2
with:
role-to-assume: ${{ secrets.AWS_IAM_ROLE_TO_ASSUME_ARN }}
aws-region: us-east-1

- name: Configure dbt environment
uses: ./.github/actions/configure_dbt_environment

- name: Clean up dbt resources
run: ../.github/scripts/cleanup_dbt_resources.sh ci
working-directory: ${{ env.PROJECT_DIR }}
shell: bash
40 changes: 40 additions & 0 deletions .github/workflows/test_dbt_models.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
name: test-dbt-models

on: workflow_dispatch

jobs:
test-dbt-models:
runs-on: ubuntu-latest
# These permissions are needed to interact with GitHub's OIDC Token endpoint
# so that we can authenticate with AWS
permissions:
id-token: write
contents: read
steps:
- name: Checkout
uses: actions/checkout@v3

- name: Install dbt requirements
uses: ./.github/actions/install_dbt_requirements

- name: Load environment variables
uses: ./.github/actions/load_environment_variables

- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v2
with:
role-to-assume: ${{ secrets.AWS_IAM_ROLE_TO_ASSUME_ARN }}
aws-region: us-east-1

- name: Configure dbt environment
uses: ./.github/actions/configure_dbt_environment

- name: Test models
# Target is currently set to CI because we expect this action to be
# run against the long-lived data-catalog branch, but we should change
# this to prod when we merge that branch into master
run: dbt test --target ci
working-directory: ${{ env.PROJECT_DIR }}
shell: bash
env:
GITHUB_HEAD_REF: data-catalog
2 changes: 1 addition & 1 deletion dbt/macros/generate_schema_name.sql
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
{%- if target.name == "dev" -%}
{%- set schema_prefix = "dev_" ~ env_var_func("USER") ~ "_" -%}
{%- elif target.name == "ci" -%}
{%- set github_head_ref = kebab_slugify(env_var_func("GITHUB_HEAD_REF")) -%}
{%- set github_head_ref = kebab_slugify(env_var_func("HEAD_REF")) -%}
{%- set schema_prefix = "ci_" ~ github_head_ref ~ "_" -%}
{%- else -%} {%- set schema_prefix = "" -%}
{%- endif -%}
Expand Down
2 changes: 1 addition & 1 deletion dbt/macros/tests/test_generate_schema_name.sql
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

{% macro mock_env_var(var_name) %}
{% if var_name == "USER" %} {{ return("testuser") }}
{% elif var_name == "GITHUB_HEAD_REF" %} {{ return("testuser/feature-branch-1") }}
{% elif var_name == "HEAD_REF" %} {{ return("testuser/feature-branch-1") }}
{% else %} {{ return("") }}
{% endif %}
{% endmacro %}
Expand Down
8 changes: 4 additions & 4 deletions dbt/models/default/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,15 @@ models:
- pin
- year
config:
error_if: ">280655"
error_if: ">280662"
# Unique by case number and year
- unique_combination_of_columns:
name: vw_pin_appeal_unique_by_case_number_and_year
combination_of_columns:
- year
- case_no
config:
error_if: ">365779"
error_if: ">365894"
# `change` should be an enum
- dbt_utils.expression_is_true:
name: vw_pin_appeal_no_unexpected_change_values
Expand Down Expand Up @@ -85,7 +85,7 @@ models:
case when char_renovation = '1' then true else false end
)
config:
error_if: ">73925"
error_if: ">73941"
# TODO: Characteristics columns should adhere to pre-determined criteria
- name: vw_pin_address_test
description: '{{ doc("vw_pin_address_test") }}'
Expand All @@ -111,7 +111,7 @@ models:
- mail_address_zipcode_1
- mail_address_zipcode_2
config:
error_if: ">879261"
error_if: ">880581"
# TODO: Mailing address changes after validated sale(?)
# TODO: Site addresses are all in Cook County
- name: vw_pin_condo_char_test
Expand Down

0 comments on commit 9574a38

Please sign in to comment.