From e53df7798a739c9d5646596d571eb3a9d8b311eb Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 20 Dec 2024 17:18:39 +0000 Subject: [PATCH 01/11] Add docs --- dbt/models/model/docs.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dbt/models/model/docs.md b/dbt/models/model/docs.md index f5e4d4351..6d261acb8 100644 --- a/dbt/models/model/docs.md +++ b/dbt/models/model/docs.md @@ -34,7 +34,7 @@ Overall feature importance by model run (`run_id`). Includes metrics such as gain, cover, and frequency. This is the output of the built-in LightGBM/XGBoost feature importance methods. -**Primary Key**: `year`, `run_id`, `model_predictor_name_all` +**Primary Key**: `year`, `run_id`, `model_predictor_all_name` {% enddocs %} # final_model @@ -77,7 +77,7 @@ If hyperparameters are blank for a given run, then that parameter was not used. Range of hyperparameters searched by a given model run (`run_id`) during cross-validation. -**Primary Key**: `year`, `run_id` +**Primary Key**: `year`, `run_id`, `parameter_name` {% enddocs %} # parameter_search @@ -99,7 +99,7 @@ Includes breakouts for many levels of geography, as well as different "stages". The stages are: - `test` - Performance on the out-of-sample test set (typically the - most recent 10% of sales) + most recent 10% of sales)s - `assessment` - Performance on the most recent year of sales (after being trained on all sales, so in-sample) @@ -113,7 +113,7 @@ The stages are: Identical to `model.performance`, but additionally broken out by quantile. **Primary Key**: `year`, `run_id`, `stage`, `triad_code`, `geography_type`, -`geography_id`, `by_class`, `quantile` +`geography_id`, `by_class`, `num_quantile`, `quantile` {% enddocs %} # shap @@ -188,4 +188,4 @@ View to compile PIN-level model inputs shared between the residential (`model.vw_card_res_input`) and condo (`model.vw_pin_condo_input`) model views. **Primary Key**: `year`, `run_id`, `meta_pin` -{% enddocs %} +{% enddocs %} \ No newline at end of file From 2b38351f35d8662ff2a8ac0e4875e0aa781e36e5 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 20 Dec 2024 17:31:02 +0000 Subject: [PATCH 02/11] Add schema --- dbt/models/model/schema.yml | 135 +++++++++++++++++- dbt/models/reporting/reporting.ratio_stats.py | 3 +- 2 files changed, 135 insertions(+), 3 deletions(-) diff --git a/dbt/models/model/schema.yml b/dbt/models/model/schema.yml index 04048bfcf..dc28f13ae 100644 --- a/dbt/models/model/schema.yml +++ b/dbt/models/model/schema.yml @@ -4,61 +4,192 @@ sources: tables: - name: assessment_card description: '{{ doc("table_assessment_card") }}' + data_tests: + - unique_combination_of_columns: + name: model_assessment_card_unique_by_pin_card_and_year + combination_of_columns: + - meta_pin + - meta_card_num + - meta_year + - run_id + config: + error_if: ">5748" + meta: + description: assessment card should be unique by pin, card, year, and run_id tags: - load_auto - name: assessment_pin description: '{{ doc("table_assessment_pin") }}' + data_tests: + - unique_combination_of_columns: + name: model_assessment_pin_unique_by_pin_and_year + combination_of_columns: + - meta_pin + - meta_year + - run_id + config: + error_if: ">2016" + meta: + description: assessment pin should be unique by pin, year, and run_id tags: - load_auto - name: feature_importance + data_tests: + - unique_combination_of_columns: + name: model_feature_importance_unique + combination_of_columns: + - year + - run_id + - model_predictor_all_name + meta: + description: feature importance should be unique by year, run_id, and model_predictor_all_name description: '{{ doc("table_feature_importance") }}' tags: - load_auto - name: metadata + data_tests: + - unique_combination_of_columns: + name: model_metadata_unique_by_year_and_run_id + combination_of_columns: + - year + - run_id + meta: + description: metadata should be unique by year and run_id description: '{{ doc("table_metadata") }}' tags: - load_auto - name: parameter_final description: '{{ doc("table_parameter_final") }}' + data_tests: + - unique_combination_of_columns: + name: model_parameter_final_unique_by_year_and_run_id + combination_of_columns: + - year + - run_id + meta: + description: parameter final should be unique by year and run_id tags: - load_auto - name: parameter_range description: '{{ doc("table_parameter_range") }}' + data_tests: + - unique_combination_of_columns: + name: model_parameter_range_unique_by_year_run_id_and_parameter_name + combination_of_columns: + - year + - run_id + - parameter_name + meta: + description: parameter range should be unique by year and run_id tags: - load_auto - name: parameter_search description: '{{ doc("table_parameter_search") }}' + data_tests: + - unique_combination_of_columns: + name: model_parameter_search_unique_by_year_run_id_and_iteration + combination_of_columns: + - year + - run_id + - iteration + config: + error_if: ">2136" + meta: + description: parameter search should be unique by year, run_id, and iteration tags: - load_auto - name: performance description: '{{ doc("table_performance") }}' + data_tests: + - unique_combination_of_columns: + name: model_performance_unique + combination_of_columns: + - year + - run_id + - stage + - triad_code + - geography_type + - geography_id + - class + meta: + description: performance should be unique by year, run_id, stage, triad_code, geography_type, geography_id, and class tags: - load_auto - name: performance_quantile description: '{{ doc("table_performance_quantile") }}' + data_tests: + - unique_combination_of_columns: + name: model_performance_quantile_unique + combination_of_columns: + - year + - run_id + - triad_code + - stage + - geography_type + - geography_id + - class + - num_quantile + - quantile + meta: + description: > + performance quantile should be unique by year, run_id, stage, triad_code, + geography_type, by_class, geography_id, num_quantile, and quantile tags: - load_auto - name: shap description: '{{ doc("table_shap") }}' + data_tests: + - unique_combination_of_columns: + name: model_shap_unique_by_year_run_id_meta_pin_meta_and_card_num + combination_of_columns: + - year + - run_id + - meta_pin + - meta_card_num + config: + error_if: ">524" + meta: + description: shap should be unique by year, run_id, meta_pin, and meta_card_num tags: - load_auto - name: test_card description: '{{ doc("table_test_card") }}' + data_tests: + - unique_combination_of_columns: + name: model_test_card_unique + combination_of_columns: + - year + - run_id + - meta_pin + - meta_card_num + - meta_sale_document_num + config: + error_if: ">102422" + meta: + description: test card should be unique by year, run_id, meta_pin, meta_card_num, and meta_sale_document_num tags: - load_auto - name: timing description: '{{ doc("table_timing") }}' + data_tests: + - unique_combination_of_columns: + name: model_timing_unique_by_year_run_id + combination_of_columns: + - year + - run_id + meta: + description: timing should be unique by year and run_id tags: - load_auto @@ -104,7 +235,7 @@ models: - name: note description: | Any notes or caveats associated with the model run - + - name: model.vw_pin_shared_input description: '{{ doc("view_vw_pin_shared_input") }}' columns: @@ -996,4 +1127,4 @@ models: name: model_vw_pin_condo_input_unique_pin_year combination_of_columns: - meta_pin - - meta_year + - meta_year \ No newline at end of file diff --git a/dbt/models/reporting/reporting.ratio_stats.py b/dbt/models/reporting/reporting.ratio_stats.py index d9799e3eb..73658de5f 100644 --- a/dbt/models/reporting/reporting.ratio_stats.py +++ b/dbt/models/reporting/reporting.ratio_stats.py @@ -4,10 +4,11 @@ from typing import Union -import assesspy as ap import pandas as pd from pyspark.sql.functions import col, lit +import assesspy as ap + CCAO_LOWER_QUANTILE = 0.05 CCAO_UPPER_QUANTILE = 0.95 CCAO_MIN_SAMPLE_SIZE = 20.0 From 2ad3de7b11ddf0ae1067f8d213c9ee04190be505 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 20 Dec 2024 17:38:16 +0000 Subject: [PATCH 03/11] FIx typos --- dbt/models/model/docs.md | 2 +- dbt/models/model/schema.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbt/models/model/docs.md b/dbt/models/model/docs.md index 6d261acb8..869159bfd 100644 --- a/dbt/models/model/docs.md +++ b/dbt/models/model/docs.md @@ -99,7 +99,7 @@ Includes breakouts for many levels of geography, as well as different "stages". The stages are: - `test` - Performance on the out-of-sample test set (typically the - most recent 10% of sales)s + most recent 10% of sales) - `assessment` - Performance on the most recent year of sales (after being trained on all sales, so in-sample) diff --git a/dbt/models/model/schema.yml b/dbt/models/model/schema.yml index dc28f13ae..ccdeee0a4 100644 --- a/dbt/models/model/schema.yml +++ b/dbt/models/model/schema.yml @@ -235,7 +235,7 @@ models: - name: note description: | Any notes or caveats associated with the model run - + - name: model.vw_pin_shared_input description: '{{ doc("view_vw_pin_shared_input") }}' columns: From 05b530d639d93514f3144cbc9d348e8e882e87ae Mon Sep 17 00:00:00 2001 From: Damonamajor <56321109+Damonamajor@users.noreply.github.com> Date: Mon, 23 Dec 2024 20:08:03 -0600 Subject: [PATCH 04/11] Update reporting.ratio_stats.py --- dbt/models/reporting/reporting.ratio_stats.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/reporting/reporting.ratio_stats.py b/dbt/models/reporting/reporting.ratio_stats.py index 73658de5f..f7cc20a91 100644 --- a/dbt/models/reporting/reporting.ratio_stats.py +++ b/dbt/models/reporting/reporting.ratio_stats.py @@ -4,10 +4,10 @@ from typing import Union +import assesspy as ap import pandas as pd from pyspark.sql.functions import col, lit -import assesspy as ap CCAO_LOWER_QUANTILE = 0.05 CCAO_UPPER_QUANTILE = 0.95 From ec4ce57b962afc9409ee445c748f6dd3f501f887 Mon Sep 17 00:00:00 2001 From: Damonamajor <56321109+Damonamajor@users.noreply.github.com> Date: Mon, 23 Dec 2024 20:08:25 -0600 Subject: [PATCH 05/11] Update reporting.ratio_stats.py --- dbt/models/reporting/reporting.ratio_stats.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dbt/models/reporting/reporting.ratio_stats.py b/dbt/models/reporting/reporting.ratio_stats.py index f7cc20a91..d9799e3eb 100644 --- a/dbt/models/reporting/reporting.ratio_stats.py +++ b/dbt/models/reporting/reporting.ratio_stats.py @@ -8,7 +8,6 @@ import pandas as pd from pyspark.sql.functions import col, lit - CCAO_LOWER_QUANTILE = 0.05 CCAO_UPPER_QUANTILE = 0.95 CCAO_MIN_SAMPLE_SIZE = 20.0 From 38d4fb2d6a5b24cd19e36825770a231cb7a4ddd6 Mon Sep 17 00:00:00 2001 From: Damonamajor <56321109+Damonamajor@users.noreply.github.com> Date: Tue, 24 Dec 2024 09:58:37 -0600 Subject: [PATCH 06/11] Update schema.yml --- dbt/models/model/schema.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbt/models/model/schema.yml b/dbt/models/model/schema.yml index ccdeee0a4..9f9eb5ea4 100644 --- a/dbt/models/model/schema.yml +++ b/dbt/models/model/schema.yml @@ -23,7 +23,7 @@ sources: description: '{{ doc("table_assessment_pin") }}' data_tests: - unique_combination_of_columns: - name: model_assessment_pin_unique_by_pin_and_year + name: model_assessment_pin_unique_by_pin_year_and_run_id combination_of_columns: - meta_pin - meta_year @@ -85,7 +85,7 @@ sources: - run_id - parameter_name meta: - description: parameter range should be unique by year and run_id + description: parameter range should be unique by year run_id and parameter_name tags: - load_auto @@ -141,7 +141,7 @@ sources: meta: description: > performance quantile should be unique by year, run_id, stage, triad_code, - geography_type, by_class, geography_id, num_quantile, and quantile + geography_type, class, geography_id, num_quantile, and quantile tags: - load_auto @@ -1127,4 +1127,4 @@ models: name: model_vw_pin_condo_input_unique_pin_year combination_of_columns: - meta_pin - - meta_year \ No newline at end of file + - meta_year From ffb26979d65fcbe618d6a4d53dc8cf0001cceb4c Mon Sep 17 00:00:00 2001 From: Damonamajor <56321109+Damonamajor@users.noreply.github.com> Date: Tue, 24 Dec 2024 13:03:42 -0600 Subject: [PATCH 07/11] Update dbt/models/model/schema.yml Co-authored-by: Dan Snow <31494343+dfsnow@users.noreply.github.com> --- dbt/models/model/schema.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/model/schema.yml b/dbt/models/model/schema.yml index 9f9eb5ea4..cfe377551 100644 --- a/dbt/models/model/schema.yml +++ b/dbt/models/model/schema.yml @@ -85,7 +85,7 @@ sources: - run_id - parameter_name meta: - description: parameter range should be unique by year run_id and parameter_name + description: parameter range should be unique by year, run_id, and parameter_name tags: - load_auto From eda20d4b2a4dd0f430c9da41a0fbda434272c5f3 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Tue, 24 Dec 2024 19:21:57 +0000 Subject: [PATCH 08/11] Dan edits --- dbt/models/model/docs.md | 6 +++--- dbt/models/model/schema.yml | 30 +++--------------------------- 2 files changed, 6 insertions(+), 30 deletions(-) diff --git a/dbt/models/model/docs.md b/dbt/models/model/docs.md index 869159bfd..8bef09eed 100644 --- a/dbt/models/model/docs.md +++ b/dbt/models/model/docs.md @@ -165,7 +165,7 @@ data cached by DVC when possible. See [model-res-avm#getting-data](https://github.com/ccao-data/model-res-avm#getting-data) for more information. -**Primary Key**: `year`, `run_id`, `meta_pin`, `meta_card_num` +**Primary Key**: `year`, `meta_pin`, `meta_card_num` {% enddocs %} # vw_pin_condo_input @@ -178,7 +178,7 @@ Observations are at the PIN-14 (condo unit) level. Unlike the residential input view, this view does not perform filling. Instead condo characteristics are backfilled in `default.vw_pin_condo_char`. -**Primary Key**: `year`, `run_id`, `meta_pin` +**Primary Key**: `year`, `meta_pin` {% enddocs %} # vw_pin_shared_input @@ -187,5 +187,5 @@ are backfilled in `default.vw_pin_condo_char`. View to compile PIN-level model inputs shared between the residential (`model.vw_card_res_input`) and condo (`model.vw_pin_condo_input`) model views. -**Primary Key**: `year`, `run_id`, `meta_pin` +**Primary Key**: `year`, `meta_pin` {% enddocs %} \ No newline at end of file diff --git a/dbt/models/model/schema.yml b/dbt/models/model/schema.yml index cfe377551..e64188e4c 100644 --- a/dbt/models/model/schema.yml +++ b/dbt/models/model/schema.yml @@ -13,9 +13,10 @@ sources: - meta_year - run_id config: + # We add a fixed level of errors since duplicated data exists before + # these tests were implemented. If duplicated data is added after 12/24/2024, + # warnings will transition to errors. error_if: ">5748" - meta: - description: assessment card should be unique by pin, card, year, and run_id tags: - load_auto @@ -30,8 +31,6 @@ sources: - run_id config: error_if: ">2016" - meta: - description: assessment pin should be unique by pin, year, and run_id tags: - load_auto @@ -43,8 +42,6 @@ sources: - year - run_id - model_predictor_all_name - meta: - description: feature importance should be unique by year, run_id, and model_predictor_all_name description: '{{ doc("table_feature_importance") }}' tags: - load_auto @@ -56,8 +53,6 @@ sources: combination_of_columns: - year - run_id - meta: - description: metadata should be unique by year and run_id description: '{{ doc("table_metadata") }}' tags: - load_auto @@ -70,8 +65,6 @@ sources: combination_of_columns: - year - run_id - meta: - description: parameter final should be unique by year and run_id tags: - load_auto @@ -84,8 +77,6 @@ sources: - year - run_id - parameter_name - meta: - description: parameter range should be unique by year, run_id, and parameter_name tags: - load_auto @@ -100,8 +91,6 @@ sources: - iteration config: error_if: ">2136" - meta: - description: parameter search should be unique by year, run_id, and iteration tags: - load_auto @@ -118,8 +107,6 @@ sources: - geography_type - geography_id - class - meta: - description: performance should be unique by year, run_id, stage, triad_code, geography_type, geography_id, and class tags: - load_auto @@ -138,10 +125,6 @@ sources: - class - num_quantile - quantile - meta: - description: > - performance quantile should be unique by year, run_id, stage, triad_code, - geography_type, class, geography_id, num_quantile, and quantile tags: - load_auto @@ -157,8 +140,6 @@ sources: - meta_card_num config: error_if: ">524" - meta: - description: shap should be unique by year, run_id, meta_pin, and meta_card_num tags: - load_auto @@ -172,11 +153,8 @@ sources: - run_id - meta_pin - meta_card_num - - meta_sale_document_num config: error_if: ">102422" - meta: - description: test card should be unique by year, run_id, meta_pin, meta_card_num, and meta_sale_document_num tags: - load_auto @@ -188,8 +166,6 @@ sources: combination_of_columns: - year - run_id - meta: - description: timing should be unique by year and run_id tags: - load_auto From bcf8edf33d24196e563eb742b5b86bd4eb2d82af Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Tue, 24 Dec 2024 19:57:10 +0000 Subject: [PATCH 09/11] Add document number --- dbt/models/model/docs.md | 4 ++-- dbt/models/model/schema.yml | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/dbt/models/model/docs.md b/dbt/models/model/docs.md index 8bef09eed..4a09762bf 100644 --- a/dbt/models/model/docs.md +++ b/dbt/models/model/docs.md @@ -86,7 +86,7 @@ during cross-validation. Hyperparameters used for _every_ cross-validation iteration, along with the corresponding performance statistics. -**Primary Key**: `year`, `run_id`, `iteration` +**Primary Key**: `year`, `run_id`, `iteration`, `configuration`, `fold_id` {% enddocs %} # performance @@ -138,7 +138,7 @@ The test set is the out-of-sample data used to evaluate model performance. Predictions in this table are trained using only data _not in this set of sales_. -**Primary Key**: `year`, `run_id`, `meta_pin`, `meta_card_num` +**Primary Key**: `year`, `run_id`, `meta_pin`, `meta_card_num`, `document_number` {% enddocs %} # timing diff --git a/dbt/models/model/schema.yml b/dbt/models/model/schema.yml index e64188e4c..f9310884f 100644 --- a/dbt/models/model/schema.yml +++ b/dbt/models/model/schema.yml @@ -84,13 +84,15 @@ sources: description: '{{ doc("table_parameter_search") }}' data_tests: - unique_combination_of_columns: - name: model_parameter_search_unique_by_year_run_id_and_iteration + name: model_parameter_search_unique combination_of_columns: - year - run_id - iteration + - configuration + - fold_id config: - error_if: ">2136" + error_if: ">400" tags: - load_auto @@ -153,6 +155,7 @@ sources: - run_id - meta_pin - meta_card_num + - meta_sale_document_num config: error_if: ">102422" tags: From bb4823d87410551b274e45ff5936a65fd82cd372 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 27 Dec 2024 03:31:06 +0000 Subject: [PATCH 10/11] Change doc_no --- dbt/models/model/docs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/model/docs.md b/dbt/models/model/docs.md index 4a09762bf..ad97b75c9 100644 --- a/dbt/models/model/docs.md +++ b/dbt/models/model/docs.md @@ -138,7 +138,7 @@ The test set is the out-of-sample data used to evaluate model performance. Predictions in this table are trained using only data _not in this set of sales_. -**Primary Key**: `year`, `run_id`, `meta_pin`, `meta_card_num`, `document_number` +**Primary Key**: `year`, `run_id`, `meta_pin`, `meta_card_num`, `meta_sale_document_num` {% enddocs %} # timing From 812c80a9a6d663cc9ed2497e1d3289cc317477c5 Mon Sep 17 00:00:00 2001 From: Dan Snow Date: Fri, 27 Dec 2024 20:26:52 +0000 Subject: [PATCH 11/11] Rename and reorganize tests --- dbt/models/model/schema.yml | 92 ++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/dbt/models/model/schema.yml b/dbt/models/model/schema.yml index f9310884f..70a4249c7 100644 --- a/dbt/models/model/schema.yml +++ b/dbt/models/model/schema.yml @@ -4,87 +4,89 @@ sources: tables: - name: assessment_card description: '{{ doc("table_assessment_card") }}' + tags: + - load_auto data_tests: - unique_combination_of_columns: - name: model_assessment_card_unique_by_pin_card_and_year + name: model_assessment_card_unique_pin_card_year_run combination_of_columns: - meta_pin - meta_card_num - meta_year - run_id config: - # We add a fixed level of errors since duplicated data exists before - # these tests were implemented. If duplicated data is added after 12/24/2024, - # warnings will transition to errors. - error_if: ">5748" - tags: - - load_auto + # We add fixed error thresholds here since duplicated data + # exists from before these tests were implemented. If duplicated + # data is added after 12/27/2024, warnings should be errors. + error_if: ">5748" - name: assessment_pin description: '{{ doc("table_assessment_pin") }}' + tags: + - load_auto data_tests: - unique_combination_of_columns: - name: model_assessment_pin_unique_by_pin_year_and_run_id + name: model_assessment_pin_unique_pin_year_run combination_of_columns: - meta_pin - meta_year - run_id config: - error_if: ">2016" - tags: - - load_auto + error_if: ">2016" - name: feature_importance + description: '{{ doc("table_feature_importance") }}' + tags: + - load_auto data_tests: - unique_combination_of_columns: - name: model_feature_importance_unique + name: model_feature_importance_unique_year_run_feature combination_of_columns: - year - run_id - model_predictor_all_name - description: '{{ doc("table_feature_importance") }}' - tags: - - load_auto - name: metadata + description: '{{ doc("table_metadata") }}' + tags: + - load_auto data_tests: - unique_combination_of_columns: - name: model_metadata_unique_by_year_and_run_id + name: model_metadata_unique_year_run combination_of_columns: - year - run_id - description: '{{ doc("table_metadata") }}' - tags: - - load_auto - name: parameter_final description: '{{ doc("table_parameter_final") }}' + tags: + - load_auto data_tests: - unique_combination_of_columns: - name: model_parameter_final_unique_by_year_and_run_id + name: model_parameter_final_unique_year_run combination_of_columns: - year - run_id - tags: - - load_auto - name: parameter_range description: '{{ doc("table_parameter_range") }}' + tags: + - load_auto data_tests: - unique_combination_of_columns: - name: model_parameter_range_unique_by_year_run_id_and_parameter_name + name: model_parameter_range_unique_year_run_param combination_of_columns: - year - run_id - parameter_name - tags: - - load_auto - name: parameter_search description: '{{ doc("table_parameter_search") }}' + tags: + - load_auto data_tests: - unique_combination_of_columns: - name: model_parameter_search_unique + name: model_parameter_search_unique_by_keys combination_of_columns: - year - run_id @@ -92,15 +94,15 @@ sources: - configuration - fold_id config: - error_if: ">400" - tags: - - load_auto + error_if: ">400" - name: performance description: '{{ doc("table_performance") }}' + tags: + - load_auto data_tests: - unique_combination_of_columns: - name: model_performance_unique + name: model_performance_unique_by_keys combination_of_columns: - year - run_id @@ -109,14 +111,14 @@ sources: - geography_type - geography_id - class - tags: - - load_auto - name: performance_quantile description: '{{ doc("table_performance_quantile") }}' + tags: + - load_auto data_tests: - unique_combination_of_columns: - name: model_performance_quantile_unique + name: model_performance_quantile_unique_by_keys combination_of_columns: - year - run_id @@ -127,29 +129,29 @@ sources: - class - num_quantile - quantile - tags: - - load_auto - name: shap description: '{{ doc("table_shap") }}' + tags: + - load_auto data_tests: - unique_combination_of_columns: - name: model_shap_unique_by_year_run_id_meta_pin_meta_and_card_num + name: model_shap_unique_pin_card_year_run combination_of_columns: - year - run_id - meta_pin - meta_card_num config: - error_if: ">524" - tags: - - load_auto + error_if: ">524" - name: test_card description: '{{ doc("table_test_card") }}' + tags: + - load_auto data_tests: - unique_combination_of_columns: - name: model_test_card_unique + name: model_test_card_unique_pin_card_year_run_doc combination_of_columns: - year - run_id @@ -157,20 +159,18 @@ sources: - meta_card_num - meta_sale_document_num config: - error_if: ">102422" - tags: - - load_auto + error_if: ">102422" - name: timing description: '{{ doc("table_timing") }}' + tags: + - load_auto data_tests: - unique_combination_of_columns: - name: model_timing_unique_by_year_run_id + name: model_timing_unique_year_run combination_of_columns: - year - run_id - tags: - - load_auto models: - name: model.final_model