diff --git a/dbt/models/model/docs.md b/dbt/models/model/docs.md index f5e4d4351..ad97b75c9 100644 --- a/dbt/models/model/docs.md +++ b/dbt/models/model/docs.md @@ -34,7 +34,7 @@ Overall feature importance by model run (`run_id`). Includes metrics such as gain, cover, and frequency. This is the output of the built-in LightGBM/XGBoost feature importance methods. -**Primary Key**: `year`, `run_id`, `model_predictor_name_all` +**Primary Key**: `year`, `run_id`, `model_predictor_all_name` {% enddocs %} # final_model @@ -77,7 +77,7 @@ If hyperparameters are blank for a given run, then that parameter was not used. Range of hyperparameters searched by a given model run (`run_id`) during cross-validation. -**Primary Key**: `year`, `run_id` +**Primary Key**: `year`, `run_id`, `parameter_name` {% enddocs %} # parameter_search @@ -86,7 +86,7 @@ during cross-validation. Hyperparameters used for _every_ cross-validation iteration, along with the corresponding performance statistics. -**Primary Key**: `year`, `run_id`, `iteration` +**Primary Key**: `year`, `run_id`, `iteration`, `configuration`, `fold_id` {% enddocs %} # performance @@ -113,7 +113,7 @@ The stages are: Identical to `model.performance`, but additionally broken out by quantile. **Primary Key**: `year`, `run_id`, `stage`, `triad_code`, `geography_type`, -`geography_id`, `by_class`, `quantile` +`geography_id`, `by_class`, `num_quantile`, `quantile` {% enddocs %} # shap @@ -138,7 +138,7 @@ The test set is the out-of-sample data used to evaluate model performance. Predictions in this table are trained using only data _not in this set of sales_. -**Primary Key**: `year`, `run_id`, `meta_pin`, `meta_card_num` +**Primary Key**: `year`, `run_id`, `meta_pin`, `meta_card_num`, `meta_sale_document_num` {% enddocs %} # timing @@ -165,7 +165,7 @@ data cached by DVC when possible. See [model-res-avm#getting-data](https://github.com/ccao-data/model-res-avm#getting-data) for more information. -**Primary Key**: `year`, `run_id`, `meta_pin`, `meta_card_num` +**Primary Key**: `year`, `meta_pin`, `meta_card_num` {% enddocs %} # vw_pin_condo_input @@ -178,7 +178,7 @@ Observations are at the PIN-14 (condo unit) level. Unlike the residential input view, this view does not perform filling. Instead condo characteristics are backfilled in `default.vw_pin_condo_char`. -**Primary Key**: `year`, `run_id`, `meta_pin` +**Primary Key**: `year`, `meta_pin` {% enddocs %} # vw_pin_shared_input @@ -187,5 +187,5 @@ are backfilled in `default.vw_pin_condo_char`. View to compile PIN-level model inputs shared between the residential (`model.vw_card_res_input`) and condo (`model.vw_pin_condo_input`) model views. -**Primary Key**: `year`, `run_id`, `meta_pin` -{% enddocs %} +**Primary Key**: `year`, `meta_pin` +{% enddocs %} \ No newline at end of file diff --git a/dbt/models/model/schema.yml b/dbt/models/model/schema.yml index 04048bfcf..70a4249c7 100644 --- a/dbt/models/model/schema.yml +++ b/dbt/models/model/schema.yml @@ -6,61 +6,171 @@ sources: description: '{{ doc("table_assessment_card") }}' tags: - load_auto + data_tests: + - unique_combination_of_columns: + name: model_assessment_card_unique_pin_card_year_run + combination_of_columns: + - meta_pin + - meta_card_num + - meta_year + - run_id + config: + # We add fixed error thresholds here since duplicated data + # exists from before these tests were implemented. If duplicated + # data is added after 12/27/2024, warnings should be errors. + error_if: ">5748" - name: assessment_pin description: '{{ doc("table_assessment_pin") }}' tags: - load_auto + data_tests: + - unique_combination_of_columns: + name: model_assessment_pin_unique_pin_year_run + combination_of_columns: + - meta_pin + - meta_year + - run_id + config: + error_if: ">2016" - name: feature_importance description: '{{ doc("table_feature_importance") }}' tags: - load_auto + data_tests: + - unique_combination_of_columns: + name: model_feature_importance_unique_year_run_feature + combination_of_columns: + - year + - run_id + - model_predictor_all_name - name: metadata description: '{{ doc("table_metadata") }}' tags: - load_auto + data_tests: + - unique_combination_of_columns: + name: model_metadata_unique_year_run + combination_of_columns: + - year + - run_id - name: parameter_final description: '{{ doc("table_parameter_final") }}' tags: - load_auto + data_tests: + - unique_combination_of_columns: + name: model_parameter_final_unique_year_run + combination_of_columns: + - year + - run_id - name: parameter_range description: '{{ doc("table_parameter_range") }}' tags: - load_auto + data_tests: + - unique_combination_of_columns: + name: model_parameter_range_unique_year_run_param + combination_of_columns: + - year + - run_id + - parameter_name - name: parameter_search description: '{{ doc("table_parameter_search") }}' tags: - load_auto + data_tests: + - unique_combination_of_columns: + name: model_parameter_search_unique_by_keys + combination_of_columns: + - year + - run_id + - iteration + - configuration + - fold_id + config: + error_if: ">400" - name: performance description: '{{ doc("table_performance") }}' tags: - load_auto + data_tests: + - unique_combination_of_columns: + name: model_performance_unique_by_keys + combination_of_columns: + - year + - run_id + - stage + - triad_code + - geography_type + - geography_id + - class - name: performance_quantile description: '{{ doc("table_performance_quantile") }}' tags: - load_auto + data_tests: + - unique_combination_of_columns: + name: model_performance_quantile_unique_by_keys + combination_of_columns: + - year + - run_id + - triad_code + - stage + - geography_type + - geography_id + - class + - num_quantile + - quantile - name: shap description: '{{ doc("table_shap") }}' tags: - load_auto + data_tests: + - unique_combination_of_columns: + name: model_shap_unique_pin_card_year_run + combination_of_columns: + - year + - run_id + - meta_pin + - meta_card_num + config: + error_if: ">524" - name: test_card description: '{{ doc("table_test_card") }}' tags: - load_auto + data_tests: + - unique_combination_of_columns: + name: model_test_card_unique_pin_card_year_run_doc + combination_of_columns: + - year + - run_id + - meta_pin + - meta_card_num + - meta_sale_document_num + config: + error_if: ">102422" - name: timing description: '{{ doc("table_timing") }}' tags: - load_auto + data_tests: + - unique_combination_of_columns: + name: model_timing_unique_year_run + combination_of_columns: + - year + - run_id models: - name: model.final_model