diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f4e0385..e47f9c5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -34,6 +34,11 @@ repos: entry: Cannot commit .Rhistory, .RData, .Rds or .rds. language: fail files: '\.(Rhistory|RData|Rds|rds)$' + - id: check-vars-dict + name: Validate vars_dict + entry: Rscript scripts/check-vars-dict.R + files: data/vars_dict.rda + language: r - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.7.4 hooks: diff --git a/R/data.R b/R/data.R index f6a094d..04496c4 100644 --- a/R/data.R +++ b/R/data.R @@ -153,7 +153,7 @@ #' to their human-readable value (ROOF_CNST = 1 #' becomes ROOF_CNST = Shingle/Asphalt). #' -#' @format A data frame with 509 rows and 11 variables: +#' @format A data frame with 518 rows and 11 variables: #' \describe{ #' \item{var_name_hie}{Column name of variable when stored in the legacy #' ADDCHARS SQL table} diff --git a/data-raw/vars_dict.csv b/data-raw/vars_dict.csv index 3673a4a..b1160f8 100644 --- a/data-raw/vars_dict.csv +++ b/data-raw/vars_dict.csv @@ -488,7 +488,7 @@ qu_mlt_cd,card,card,meta_card_num,card_num,Card Number,meta,character,,, ,,parking_space_flag_reason,parking_space_flag_reason,,Reason Parcel Is Considered Parking/Garage Space or Storage Unit,meta,character,,, ,,is_common_area,is_common_area,is_common_area,Building Common Area,meta,logical,,, ,,char_building_units,char_building_units,building_units,Total Condominium Building Livable Parcels,char,numeric,,, -,,char_building_sf ,char_building_sf ,building_sf ,Total Condominium Building Square Footage,char,numeric,,, +,,char_building_sf,char_building_sf,building_sf,Total Condominium Building Square Footage,char,numeric,,, ,,char_unit_sf,char_unit_sf,unit_sf,Condominium Unit Square Footage,char,numeric,,, ,,char_bedrooms,char_bedrooms,bedrooms,Condominium Unit Bedrooms,char,numeric,,, ,,char_half_baths,char_half_baths,half_baths,Condominium Unit Half Baths,char,numeric,,, @@ -509,11 +509,6 @@ qu_mlt_cd,card,card,meta_card_num,card_num,Card Number,meta,character,,, ,,ccao_is_active_exe_homeowner,ccao_is_active_exe_homeowner,is_active_exe_homeowner,Active Homeowner Exemption,ccao,logical,,, ,,ccao_n_years_exe_homeowner,ccao_n_years_exe_homeowner,n_years_exe_homeowner,Number of Years Active Homeowner Exemption,ccao,numeric,,, ,,sale_count_past_n_years,meta_sale_count_past_n_years,sale_count_past_n_years,Number of sales within previous N years of sale/lien date,meta,numeric,,, -,,char_building_sf,char_building_sf,building_sf,Building Square Footage,char,numeric,,, -,,char_unit_sf,char_unit_sf,unit_sf,Unit Square Footage,char,numeric,,, -,,char_bedrooms,char_bedrooms,bedrooms,Bedrooms,char,numeric,,, -,,char_half_baths,char_half_baths,half_baths,Half Baths,char,numeric,,, -,,char_full_baths,char_full_baths,full_baths,Full Baths,char,numeric,,, ,,strata_1,meta_strata_1,strata_1,Condominium Building Strata 1,meta,character,,, ,,strata_2,meta_strata_2,strata_2,Condominium Building Strata 2,meta,character,,, ,,shp_parcel_centroid_dist_ft_sd,shp_parcel_centroid_dist_ft_sd,parcel_centroid_dist_ft_sd,Standard Deviation Distance From Parcel Centroid to Vertices (Feet),shp,numeric,,, diff --git a/data/vars_dict.rda b/data/vars_dict.rda index 55660eb..a4b4552 100644 Binary files a/data/vars_dict.rda and b/data/vars_dict.rda differ diff --git a/man/vars_dict.Rd b/man/vars_dict.Rd index 97f47e4..b7227cc 100644 --- a/man/vars_dict.Rd +++ b/man/vars_dict.Rd @@ -5,7 +5,7 @@ \alias{vars_dict} \title{Data dictionary for CCAO data sets and variables} \format{ -A data frame with 509 rows and 11 variables: +A data frame with 518 rows and 11 variables: \describe{ \item{var_name_hie}{Column name of variable when stored in the legacy ADDCHARS SQL table} diff --git a/scripts/check-vars-dict.R b/scripts/check-vars-dict.R new file mode 100644 index 0000000..faa3988 --- /dev/null +++ b/scripts/check-vars-dict.R @@ -0,0 +1,17 @@ +#!/usr/bin/env Rscript +# Script to check that the `vars_dict` data object is well-formed +load("data/vars_dict.rda") + +# Check for duplicate model parameters +non_na_model_vars <- subset( + vars_dict, + !is.na(var_name_model) +)[c("var_name_model", "var_code", "var_value")] +dupes <- non_na_model_vars[which(duplicated(non_na_model_vars)), ] + +if (nrow(dupes) > 0) { + stop( + "Duplicate var_name_model entries in vars_dict: ", + paste(dupes$var_name_model, collapse = ", ") + ) +}