ccao-data · jeancochrane · Aug 1, 2023 · Jul 31, 2023 · Jul 31, 2023 · Jul 31, 2023
diff --git a/dbt/README.md b/dbt/README.md
@@ -48,14 +48,24 @@ Build the models to create views in our Athena warehouse:
 dbt run
 ```
 
+By default, all `dbt` commands will run against the `dev` environment, which
+namespaces the resources it creates by prefixing target database names with
+your Unix `$USER` name (e.g. `jecochr-default` for the `default` database when
+`dbt` is run on Jean's machine). To instead **run commands against prod**,
+use the `--target` flag:
+
+```
+dbt run --target prod
+```
+
 Generate the documentation:
 
 ```
 dbt docs generate
 ```
 
 This will create a new file `target/index.html` representing the static
-docs website.
+docs site.
 
 You can also serve the docs locally:
 

diff --git a/dbt/dbt_project.yml b/dbt/dbt_project.yml
@@ -28,6 +28,6 @@ models:
   athena:
     +materialized: view
     default:
-      +schema: dbt-test-default
+      +schema: default
     location:
-      +schema: dbt-test-location
+      +schema: location
diff --git a/dbt/macros/generate_schema_name.sql b/dbt/macros/generate_schema_name.sql
@@ -1,15 +1,41 @@
--- Override the default schema naming to remove the dbt-added prefix.
+-- Override the default schema naming to remove the autogenerated prefix
+-- and replace it with our own namespacing on dev and CI.
 -- See: https://docs.getdbt.com/docs/build/custom-schemas
 {% macro generate_schema_name(custom_schema_name, node) -%}
 
+    {#
+        According to the dbt docs linked above, this is required to be set by
+        the built-in macro that we are overriding, but we don't actually use it
+    #}
     {%- set default_schema = target.schema -%}
+
+    {%- if target.name == "dev" -%}
+        {%- set schema_prefix = env_var("USER") -%}
+    {%- elif target.name == "ci" -%}
+        {%- set schema_prefix = env_var("GITHUB_BASE_REF") -%}
+    {%- else -%}
+        {%- set schema_prefix = "" -%}
+    {%- endif -%}
+
     {%- if custom_schema_name is none -%}
 
-        {{ default_schema }}
+        {#
+            The default schema name is not allowed, since we use subdirectory
+            organization to map tables/views to their Athena database
+        #}
+        {{ exceptions.raise_compiler_error(
+            "Missing schema definition for " ~ node.name ~ ". " ~
+            "Its containing subdirectory is probably missing a `+schema` " ~
+            "attribute under the `models` config in dbt_project.yml."
+        ) }}
 
     {%- else -%}
 
-        {{ custom_schema_name | trim }}
+        {%- set full_schema_name -%}
+            {{ schema_prefix ~ "-" ~ custom_schema_name | trim }}
+        {%- endset -%}
+
+        {{ full_schema_name }}
 
     {%- endif -%}
 

diff --git a/dbt/profiles.yml b/dbt/profiles.yml
@@ -11,3 +11,22 @@ athena:
       # "database" here corresponds to a Glue data catalog
       database: awsdatacatalog
       threads: 5
+    ci:
+      type: athena
+      s3_staging_dir: s3://ccao-dbt-athena-ci-us-east-1/results/
+      s3_data_dir: s3://ccao-dbt-athena-ci-us-east-1/data/
+      region_name: us-east-1
+      schema: dbt-test
+      database: awsdatacatalog
+      # Prefix all generated data by schema, so that we can delete it when the
+      # PR is merged
+      s3_data_naming: schema_table
+      threads: 5
+    prod:
+      type: athena
+      s3_staging_dir: s3://ccao-athena-results-us-east-1/
+      s3_data_dir: s3://ccao-athena-data-us-east-1/
+      region_name: us-east-1
+      schema: default
+      database: awsdatacatalog
+      threads: 5