Skip to content

Commit

Permalink
Slugify branch names in dbt CI environment and add macro unit tests
Browse files Browse the repository at this point in the history
  • Loading branch information
jeancochrane committed Jul 31, 2023
1 parent 62dc2b3 commit 181e516
Show file tree
Hide file tree
Showing 7 changed files with 196 additions and 8 deletions.
6 changes: 6 additions & 0 deletions dbt/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,9 @@ Run tests for only one model:
```
dbt test --select <model_name>
```

Run tests for dbt macros:

```
dbt run-operation test_all
```
31 changes: 23 additions & 8 deletions dbt/macros/generate_schema_name.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,33 @@
-- and replace it with our own namespacing on dev and CI.
-- See: https://docs.getdbt.com/docs/build/custom-schemas
{% macro generate_schema_name(custom_schema_name, node) -%}
{{ return(_generate_schema_name(
custom_schema_name,
node,
target,
env_var,
exceptions.raise_compiler_error
)) }}
{%- endmacro %}

-- Helper implementation of generate_schema_name where no arguments
-- are read from the global variable context. Useful for unit testing.
{% macro _generate_schema_name(
custom_schema_name, node, target, env_var_func, raise_error_func
) %}
{#
According to the dbt docs linked above, this is required to be set by
the built-in macro that we are overriding, but we don't actually use it
#}
{%- set default_schema = target.schema -%}
{%- if target.name == "dev" -%}
{%- set schema_prefix = env_var("USER") -%}
{%- set schema_prefix = "dev-" ~ env_var_func("USER") ~ "-" -%}
{%- elif target.name == "ci" -%}
{%- set schema_prefix = env_var("GITHUB_HEAD_REF") -%}
{%- set github_head_ref = kebab_slugify(
env_var_func("GITHUB_HEAD_REF")
) -%}
{%- set schema_prefix = "ci-" ~ github_head_ref ~ "-" -%}
{%- else -%}
{%- set schema_prefix = "" -%}
{%- endif -%}
Expand All @@ -23,20 +39,19 @@
The default schema name is not allowed, since we use subdirectory
organization to map tables/views to their Athena database
#}
{{ exceptions.raise_compiler_error(
{{ return(raise_error_func(
"Missing schema definition for " ~ node.name ~ ". " ~
"Its containing subdirectory is probably missing a `+schema` " ~
"attribute under the `models` config in dbt_project.yml."
) }}
)) }}
{%- else -%}
{%- set full_schema_name -%}
{{ schema_prefix ~ "-" ~ custom_schema_name | trim }}
{{ schema_prefix ~ custom_schema_name | trim }}
{%- endset -%}
{{ full_schema_name }}
{{ return(full_schema_name) }}
{%- endif -%}
{%- endmacro %}
{% endmacro %}
16 changes: 16 additions & 0 deletions dbt/macros/slugify.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
-- Variation of dbt_utils.slugify macro using kebab-case instead of snake_case
{% macro kebab_slugify(string) %}
{#- Lower case the string -#}
{% set string = string | lower %}

{#- Replace spaces, slashes, and underscores with hyphens -#}
{% set string = modules.re.sub('[ _/]+', '-', string) %}

{#- Only take letters, numbers, and hyphens -#}
{% set string = modules.re.sub('[^a-z0-9-]+', '', string) %}

{#- Prepends "_" if string begins with a number -#}
{% set string = modules.re.sub('^[0-9]', '_' + string[0], string) %}

This comment has been minimized.

Copy link
@dfsnow

dfsnow Jul 31, 2023

Member

question (blocking): Wouldn't this prepend an underscore to the beginning of most issue branches such that the final table name would be something like dev-_123-issue-branch-location.vw_pin10_location_test?

suggestion: Maybe we should just make the inter-keyword separator an underscore and everything else a dash i.e. dev_123-issue-branch_location.vw_pin10_location_test or dev_jeancochrane-dev-branch_location.vw_pin10_location_test.

This comment has been minimized.

Copy link
@jeancochrane

jeancochrane Jul 31, 2023

Author Contributor

@dfsnow That's true! The leading underscore behavior is a holdover from the dbt utils function that I adapted here, but I think that you're right that it doesn't make sense in this context since we're always going to prepend the ci- prefix to anything using this slug. I can go ahead and remove the leading underscore.

As far as formats, what would you prefer, the use of underscores as an inter-keyword separator or keeping things as they are and using hyphens? I think the hyphens are a little bit nicer to look at, but the underscores are potentially easier to grok to someone who knows how to read the format.

This comment has been minimized.

Copy link
@dfsnow

dfsnow Jul 31, 2023

Member

I think something like ci_issue-branch-head-name_database-name.table_name is slightly easier to grok than ci-issue-branch-head-name-database-name.table_name. @wrridgeway, what do you think?

This comment has been minimized.

Copy link
@jeancochrane

jeancochrane Jul 31, 2023

Author Contributor

That makes sense to me @dfsnow! We'll see what Billy says, but in the meantime I adjusted the logic to separate keywords with underscores in 2f57842.

This comment has been minimized.

Copy link
@wrridgeway

wrridgeway Jul 31, 2023

Member

Works for me. At some point perhaps we'll rename the tables with hyphens since that's the only thing that makes this slightly unintuitive.

This comment has been minimized.

Copy link
@dfsnow

dfsnow Jul 31, 2023

Member

I think it's actually fine to keep the tables as-is, since renaming them would involve changing the actual system-of-record table names in our mirror (bad).


{{ return(string) }}
{% endmacro %}
6 changes: 6 additions & 0 deletions dbt/macros/tests/test_all.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
-- dbt macro unit testing inspired by this blog post:
-- https://docs.getdbt.com/blog/unit-testing-dbt-packages
{% macro test_all() %}
{% do test_slugify() %}
{% do test_generate_schema_name() %}
{% endmacro %}
76 changes: 76 additions & 0 deletions dbt/macros/tests/test_generate_schema_name.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
{% macro test_generate_schema_name() %}
{% do test_generate_schema_name_handles_dev_env() %}
{% do test_generate_schema_name_handles_ci_env() %}
{% do test_generate_schema_name_handles_prod_env() %}
{% do test_generate_schema_name_raises_for_default_schema_name() %}
{% endmacro %}

{% macro mock_env_var(var_name) %}
{% if var_name == "USER" %}
{{ return("testuser") }}
{% elif var_name == "GITHUB_HEAD_REF" %}
{{ return("testuser/feature-branch-1") }}
{% else %}
{{ return("") }}
{% endif %}
{% endmacro %}

{% macro mock_raise_compiler_error(_error) %}
{{ return("Compiler error raised") }}
{% endmacro %}

{% macro test_generate_schema_name_handles_dev_env() %}
{% do assert_equals(
"test_generate_schema_name_handles_dev_env",
_generate_schema_name(
"test",
{"name": "test"},
{"schema": "default", "name": "dev"},
mock_env_var,
exceptions.raise_compiler_error
),
"dev-testuser-test"
) %}
{% endmacro %}

{% macro test_generate_schema_name_handles_ci_env() %}
{% do assert_equals(
"test_generate_schema_name_handles_ci_env",
_generate_schema_name(
"test",
{"name": "test"},
{"schema": "default", "name": "ci"},
mock_env_var,
exceptions.raise_compiler_error
),
"ci-testuser-feature-branch-1-test"
) %}
{% endmacro %}

{% macro test_generate_schema_name_handles_prod_env() %}
{% do assert_equals(
"test_generate_schema_name_handles_prod_env",
_generate_schema_name(
"test",
{"name": "test"},
{"schema": "default", "name": "prod"},
mock_env_var,
exceptions.raise_compiler_error
),
"test"
) %}
{% endmacro %}

{% macro test_generate_schema_name_raises_for_default_schema_name() %}
{% do assert_equals(
"test_generate_schema_name_raises_for_default_schema_name",
_generate_schema_name(
None,
{"name": "test"},
{"schema": "default", "name": "prod"},
mock_env_var,
mock_raise_compiler_error
),
"Compiler error raised"
) %}
{% endmacro %}
60 changes: 60 additions & 0 deletions dbt/macros/tests/test_slugify.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
{% macro test_slugify() %}
{% do test_kebab_slugify() %}
{% endmacro %}

{% macro test_kebab_slugify() %}
{% do test_kebab_slugify_lowercases_strings() %}
{% do test_kebab_slugify_replaces_spaces() %}
{% do test_kebab_slugify_replaces_slashes() %}
{% do test_kebab_slugify_replaces_underscores() %}
{% do test_kebab_slugify_removes_special_characters() %}
{% do test_kebab_slugify_handles_leading_numbers() %}
{% endmacro %}

{% macro test_kebab_slugify_lowercases_strings() %}
{{ assert_equals(
"test_kebab_slugify_lowercases_strings",
kebab_slugify("TEST"),
"test"
) }}
{% endmacro %}

{% macro test_kebab_slugify_replaces_spaces() %}
{{ assert_equals(
"test_kebab_slugify_replaces_spaces",
kebab_slugify("t e s t"),
"t-e-s-t"
) }}
{% endmacro %}

{% macro test_kebab_slugify_replaces_slashes() %}
{{ assert_equals(
"test_kebab_slugify_replaces_slashes",
kebab_slugify("t/e/s/t"),
"t-e-s-t"
) }}
{% endmacro %}

{% macro test_kebab_slugify_replaces_underscores() %}
{{ assert_equals(
"test_kebab_slugify_replaces_underscores",
kebab_slugify("t_e_s_t"),
"t-e-s-t"
) }}
{% endmacro %}

{% macro test_kebab_slugify_removes_special_characters() %}
{{ assert_equals(
"test_kebab_slugify_removes_special_characters",
kebab_slugify("t!@#e$%^s&*()t"),
"test"
) }}
{% endmacro %}

{% macro test_kebab_slugify_handles_leading_numbers() %}
{{ assert_equals(
"test_kebab_slugify_handles_leading_numbers",
kebab_slugify("123test"),
"_123test"
) }}
{% endmacro %}
9 changes: 9 additions & 0 deletions dbt/macros/tests/utils.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{% macro assert_equals(test_name, value, expected) %}
{% if value == expected %}
{% do log(test_name ~ " - PASS", info=True) %}
{% else %}
{% do exceptions.raise_compiler_error(
test_name ~ " - FAIL: " ~ value~ " != " ~ expected
) %}
{% endif %}
{% endmacro %}

0 comments on commit 181e516

Please sign in to comment.