From 1bf493e59a7ef58d049c5daa0bdeb000ff27f663 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Tue, 3 Dec 2024 18:30:33 +0000 Subject: [PATCH] Rename `type` and `dictionary` params in R package vars funs to match Python --- R/vars_funs.R | 126 +++++++++++++++++++++----------- man/vars_recode.Rd | 34 +++++---- man/vars_rename.Rd | 37 ++++++---- tests/testthat/test-vars_funs.R | 124 +++++++++++++++++++++++-------- 4 files changed, 217 insertions(+), 104 deletions(-) diff --git a/R/vars_funs.R b/R/vars_funs.R index 76103fa..5988224 100644 --- a/R/vars_funs.R +++ b/R/vars_funs.R @@ -5,22 +5,25 @@ #' example, rename all columns pulled from SQL to their standard names used #' in modeling. Or, rename all standard modeling names to "pretty" names for #' publication. This function will only rename things specified in -#' the user-supplied \code{dict} argument, all other names in the data will -#' remain unchanged. +#' the user-supplied \code{dictionary} argument, all other names in the data +#' will remain unchanged. #' #' Options for \code{names_from} and \code{names_to} are specific to the -#' specified \code{dict}. Run this function with \code{names_from} equal to -#' \code{NULL} to see a list of available options for the specified dictionary. +#' specified \code{dictionary}. Run this function with \code{names_from} equal +#' 'to \code{NULL} to see a list of available options for the specified +#' 'dictionary. #' #' @param data A data frame or tibble with columns to be renamed. #' @param names_from The source/name type of data. See description #' @param names_to The target names. See description -#' @param type Output type. Either \code{"inplace"}, which renames the input -#' data frame, or \code{"vector"}, which returns a named character vector with -#' the construction new_col_name = old_col_name. -#' @param dict The dictionary used to translate names. Uses +#' @param output_type Output type. Either \code{"inplace"}, which renames the +#' input data frame, or \code{"vector"}, which returns a named character +#' vector with the construction new_col_name = old_col_name. +#' @param type Deprecated. Use \code{output_type} instead. +#' @param dictionary The dictionary used to translate names. Uses #' \code{\link{vars_dict}} by default. Use \code{\link{vars_dict_legacy}} for #' legacy data column names. +#' @param dict Deprecated. Use \code{dictionary} instead. #' #' @return The input data frame with columns renamed. #' @@ -34,13 +37,13 @@ #' data = sample_data, #' names_from = "sql", #' names_to = "standard", -#' dict = ccao::vars_dict_legacy +#' dictionary = ccao::vars_dict_legacy #' ) #' vars_rename( #' data = sample_data, #' names_from = "sql", #' names_to = "pretty", -#' dict = ccao::vars_dict_legacy +#' dictionary = ccao::vars_dict_legacy #' ) #' #' # No renames will occur since no column names here are from SQL @@ -48,7 +51,7 @@ #' data = class_dict[1:5, 1:5], #' names_from = "sql", #' names_to = "pretty", -#' dict = ccao::vars_dict_legacy +#' dictionary = ccao::vars_dict_legacy #' ) #' #' # With data from Athena @@ -59,13 +62,13 @@ #' data = sample_data_athena, #' names_from = "athena", #' names_to = "model", -#' dict = ccao::vars_dict +#' dictionary = ccao::vars_dictionary #' ) #' vars_rename( #' data = sample_data_athena, #' names_from = "athena", #' names_to = "pretty", -#' dict = ccao::vars_dict +#' dictionary = ccao::vars_dictionary #' ) #' @md #' @family vars_funs @@ -73,19 +76,33 @@ vars_rename <- function(data, names_from = NULL, names_to = NULL, - type = "inplace", - dict = ccao::vars_dict) { + output_type = "inplace", + dictionary = ccao::vars_dict, + # Deprecated args + type = NULL, + dict = NULL) { + # Check if deprecated arguments are used and override values if so + if (!is.null(type)) { + warning("'type' is deprecated. Use 'output_type' instead.", call. = FALSE) + output_type <- type + } + + if (!is.null(dict)) { + warning("'dict' is deprecated. Use 'dictionary' instead.", call. = FALSE) + dictionary <- dict + } + # Check input data dictionary stopifnot( - is.data.frame(dict), - sum(startsWith(names(dict), "var_name_")) >= 2, - nrow(dict) > 0 + is.data.frame(dictionary), + sum(startsWith(names(dictionary), "var_name_")) >= 2, + nrow(dictionary) > 0 ) # Get vector of possible inputs to names_from and names_to from dictionary poss_names_args <- gsub( "var_name_", "", - names(dict)[startsWith(names(dict), "var_name_")] + names(dictionary)[startsWith(names(dictionary), "var_name_")] ) # If args aren't in possible, throw error and list possible args @@ -107,7 +124,7 @@ vars_rename <- function(data, is.data.frame(data) | is.character(data), tolower(names_from) %in% poss_names_args, tolower(names_to) %in% poss_names_args, - tolower(type) %in% c("inplace", "vector") + tolower(output_type) %in% c("inplace", "vector") ) # If the input is a dataframe, extract the names from that dataframe @@ -117,15 +134,15 @@ vars_rename <- function(data, to <- paste0("var_name_", names_to) # Rename using dict, replacing any NAs with the original column names - names_wm <- dict[[to]][match(names_lst, dict[[from]])] + names_wm <- dictionary[[to]][match(names_lst, dictionary[[from]])] names_wm[is.na(names_wm)] <- names_lst[is.na(names_wm)] # Return names inplace if the input data is a data frame, else return a # character vector of new names - if (is.data.frame(data) && type == "inplace") { + if (is.data.frame(data) && output_type == "inplace") { names(data) <- names_wm return(data) - } else if (is.character(data) || type == "vector") { + } else if (is.character(data) || output_type == "vector") { return(names_wm) } } @@ -140,7 +157,7 @@ vars_rename <- function(data, #' must be specified via a user-defined dictionary. The default dictionary is #' \code{\link{vars_dict}}. #' -#' Options for \code{type} are: +#' Options for \code{code_type} are: #' #' - \code{"long"}, which transforms EXT_WALL = 1 to EXT_WALL = Frame #' - \code{"short"}, which transforms EXT_WALL = 1 to EXT_WALL = FRME @@ -151,13 +168,15 @@ vars_rename <- function(data, #' @param cols A \code{} column selection or vector of column #' names. Looks for all columns with numerically encoded character #' values by default. -#' @param type Output/recode type. See description for options. +#' @param code_type Output/recode type. See description for options. +#' @param type Deprecated. Use \code{code_type} instead. #' @param as_factor If \code{TRUE}, re-encoded values will be returned as #' factors with their levels pre-specified by the dictionary. Otherwise, will #' return re-encoded values as characters only. -#' @param dict The dictionary used to translate encodings. Uses +#' @param dictionary The dictionary used to translate encodings. Uses #' \code{\link{vars_dict}} by default. Use \code{\link{vars_dict_legacy}} for #' legacy data column encodings. +#' @param dict Deprecated. Use \code{dictionary} instead. #' #' @note Values which are in the data but are NOT in \code{\link{vars_dict}} #' will be converted to NA. For example, there is no numeric value 3 for AIR, @@ -174,12 +193,12 @@ vars_rename <- function(data, #' sample_data #' vars_recode( #' data = sample_data, -#' dict = ccao::vars_dict_legacy +#' dictionary = ccao::vars_dict_legacy #' ) #' vars_recode( #' data = sample_data, -#' type = "short", -#' dict = ccao::vars_dict_legacy +#' code_type = "short", +#' dictionary = ccao::vars_dict_legacy #' ) #' #' # Recode only the specified columns @@ -189,12 +208,12 @@ vars_rename <- function(data, #' vars_recode( #' data = gar_sample, #' cols = dplyr::starts_with("GAR"), -#' dict = ccao::vars_dict_legacy +#' dictionary = ccao::vars_dict_legacy #' ) #' vars_recode( #' data = gar_sample, #' cols = "GAR1_SIZE", -#' dict = ccao::vars_dict_legacy +#' dictionary = ccao::vars_dict_legacy #' ) #' #' # Using data from Athena @@ -202,13 +221,13 @@ vars_rename <- function(data, #' sample_data_athena #' vars_recode( #' data = sample_data_athena, -#' type = "code", -#' dict = ccao::vars_dict_legacy +#' code_type = "code", +#' dictionary = ccao::vars_dict_legacy #' ) #' vars_recode( #' data = sample_data_athena, -#' type = "long", -#' dict = ccao::vars_dict_legacy +#' code_type = "long", +#' dictionary = ccao::vars_dict_legacy #' ) #' @md #' @importFrom magrittr %>% @@ -217,18 +236,37 @@ vars_rename <- function(data, #' @export vars_recode <- function(data, cols = dplyr::everything(), - type = "long", + code_type = "long", as_factor = TRUE, - dict = ccao::vars_dict) { + dictionary = ccao::vars_dict, + # Deprecated args + type = NULL, + dict = NULL) { + # Check if deprecated arguments are used and override values if so + if (!is.null(type)) { + warning("'type' is deprecated. Use 'code_type' instead.", call. = FALSE) + code_type <- type + } + + if (!is.null(dict)) { + warning("'dict' is deprecated. Use 'dictionary' instead.", call. = FALSE) + dictionary <- dict + } + # Check input data dictionary stopifnot( - is.data.frame(dict), - sum(startsWith(names(dict), "var_name_")) >= 1, - nrow(dict) > 0 + is.data.frame(dictionary), + sum(startsWith(names(dictionary), "var_name_")) >= 1, + nrow(dictionary) > 0 ) # Check that the dictionary contains the correct columns - if (!any(c("var_code", "var_value", "var_value_short") %in% names(dict))) { + if ( + !any( + c("var_code", "var_value", "var_value_short") + %in% names(dictionary) + ) + ) { stop( "Input dictionary must contain the following columns: ", "var_code, var_value, var_value_short" @@ -238,12 +276,12 @@ vars_recode <- function(data, # Error/input checking stopifnot( is.data.frame(data), - type %in% c("code", "short", "long"), + code_type %in% c("code", "short", "long"), is.logical(as_factor) ) # Translate inputs to column names - var <- switch(type, + var <- switch(code_type, "code" = "var_code", "long" = "var_value", "short" = "var_value_short" @@ -251,7 +289,7 @@ vars_recode <- function(data, # Convert chars dict into long format that can be easily referenced use # any possible input column names - dict_long <- dict %>% + dict_long <- dictionary %>% dplyr::filter( .data$var_type == "char" & .data$var_data_type == "categorical" ) %>% diff --git a/man/vars_recode.Rd b/man/vars_recode.Rd index c1139a5..c790f9b 100644 --- a/man/vars_recode.Rd +++ b/man/vars_recode.Rd @@ -7,9 +7,11 @@ vars_recode( data, cols = dplyr::everything(), - type = "long", + code_type = "long", as_factor = TRUE, - dict = ccao::vars_dict + dictionary = ccao::vars_dict, + type = NULL, + dict = NULL ) } \arguments{ @@ -19,15 +21,19 @@ vars_recode( names. Looks for all columns with numerically encoded character values by default.} -\item{type}{Output/recode type. See description for options.} +\item{code_type}{Output/recode type. See description for options.} \item{as_factor}{If \code{TRUE}, re-encoded values will be returned as factors with their levels pre-specified by the dictionary. Otherwise, will return re-encoded values as characters only.} -\item{dict}{The dictionary used to translate encodings. Uses +\item{dictionary}{The dictionary used to translate encodings. Uses \code{\link{vars_dict}} by default. Use \code{\link{vars_dict_legacy}} for legacy data column encodings.} + +\item{type}{Deprecated. Use \code{code_type} instead.} + +\item{dict}{Deprecated. Use \code{dictionary} instead.} } \value{ The input data frame with re-encoded values for the specified @@ -41,7 +47,7 @@ EXT_WALL = "Masonry". Note that the values and their translations are must be specified via a user-defined dictionary. The default dictionary is \code{\link{vars_dict}}. -Options for \code{type} are: +Options for \code{code_type} are: \itemize{ \item \code{"long"}, which transforms EXT_WALL = 1 to EXT_WALL = Frame \item \code{"short"}, which transforms EXT_WALL = 1 to EXT_WALL = FRME @@ -62,12 +68,12 @@ sample_data <- chars_sample_universe[1:5, 18:27] sample_data vars_recode( data = sample_data, - dict = ccao::vars_dict_legacy + dictionary = ccao::vars_dict_legacy ) vars_recode( data = sample_data, - type = "short", - dict = ccao::vars_dict_legacy + code_type = "short", + dictionary = ccao::vars_dict_legacy ) # Recode only the specified columns @@ -77,12 +83,12 @@ gar_sample vars_recode( data = gar_sample, cols = dplyr::starts_with("GAR"), - dict = ccao::vars_dict_legacy + dictionary = ccao::vars_dict_legacy ) vars_recode( data = gar_sample, cols = "GAR1_SIZE", - dict = ccao::vars_dict_legacy + dictionary = ccao::vars_dict_legacy ) # Using data from Athena @@ -90,13 +96,13 @@ sample_data_athena <- chars_sample_athena[1:5, c(1:5, 10:20)] sample_data_athena vars_recode( data = sample_data_athena, - type = "code", - dict = ccao::vars_dict_legacy + code_type = "code", + dictionary = ccao::vars_dict_legacy ) vars_recode( data = sample_data_athena, - type = "long", - dict = ccao::vars_dict_legacy + code_type = "long", + dictionary = ccao::vars_dict_legacy ) } \seealso{ diff --git a/man/vars_rename.Rd b/man/vars_rename.Rd index c8dfa8a..4ef1144 100644 --- a/man/vars_rename.Rd +++ b/man/vars_rename.Rd @@ -9,8 +9,10 @@ vars_rename( data, names_from = NULL, names_to = NULL, - type = "inplace", - dict = ccao::vars_dict + output_type = "inplace", + dictionary = ccao::vars_dict, + type = NULL, + dict = NULL ) } \arguments{ @@ -20,13 +22,17 @@ vars_rename( \item{names_to}{The target names. See description} -\item{type}{Output type. Either \code{"inplace"}, which renames the input -data frame, or \code{"vector"}, which returns a named character vector with -the construction new_col_name = old_col_name.} +\item{output_type}{Output type. Either \code{"inplace"}, which renames the +input data frame, or \code{"vector"}, which returns a named character +vector with the construction new_col_name = old_col_name.} -\item{dict}{The dictionary used to translate names. Uses +\item{dictionary}{The dictionary used to translate names. Uses \code{\link{vars_dict}} by default. Use \code{\link{vars_dict_legacy}} for legacy data column names.} + +\item{type}{Deprecated. Use \code{output_type} instead.} + +\item{dict}{Deprecated. Use \code{dictionary} instead.} } \value{ The input data frame with columns renamed. @@ -36,12 +42,13 @@ Bulk rename columns from one type of CCAO data to another. For example, rename all columns pulled from SQL to their standard names used in modeling. Or, rename all standard modeling names to "pretty" names for publication. This function will only rename things specified in -the user-supplied \code{dict} argument, all other names in the data will -remain unchanged. +the user-supplied \code{dictionary} argument, all other names in the data +will remain unchanged. Options for \code{names_from} and \code{names_to} are specific to the -specified \code{dict}. Run this function with \code{names_from} equal to -\code{NULL} to see a list of available options for the specified dictionary. +specified \code{dictionary}. Run this function with \code{names_from} equal +'to \code{NULL} to see a list of available options for the specified +'dictionary. } \examples{ @@ -53,13 +60,13 @@ vars_rename( data = sample_data, names_from = "sql", names_to = "standard", - dict = ccao::vars_dict_legacy + dictionary = ccao::vars_dict_legacy ) vars_rename( data = sample_data, names_from = "sql", names_to = "pretty", - dict = ccao::vars_dict_legacy + dictionary = ccao::vars_dict_legacy ) # No renames will occur since no column names here are from SQL @@ -67,7 +74,7 @@ vars_rename( data = class_dict[1:5, 1:5], names_from = "sql", names_to = "pretty", - dict = ccao::vars_dict_legacy + dictionary = ccao::vars_dict_legacy ) # With data from Athena @@ -78,13 +85,13 @@ vars_rename( data = sample_data_athena, names_from = "athena", names_to = "model", - dict = ccao::vars_dict + dictionary = ccao::vars_dictionary ) vars_rename( data = sample_data_athena, names_from = "athena", names_to = "pretty", - dict = ccao::vars_dict + dictionary = ccao::vars_dictionary ) } \seealso{ diff --git a/tests/testthat/test-vars_funs.R b/tests/testthat/test-vars_funs.R index 1a29513..ea0cf82 100644 --- a/tests/testthat/test-vars_funs.R +++ b/tests/testthat/test-vars_funs.R @@ -9,7 +9,7 @@ test_that("output is as expected", { data = chars_sample_universe[, 21:32], names_from = "sql", names_to = "standard", - dict = ccao::vars_dict_legacy + dictionary = ccao::vars_dict_legacy )), c( "char_apts", "char_ext_wall", "char_roof_cnst", "char_rooms", "char_beds", @@ -22,7 +22,7 @@ test_that("output is as expected", { cdu_dict, names_from = "sql", names_to = "pretty", - dict = ccao::vars_dict_legacy + dictionary = ccao::vars_dict_legacy )), c("cdu_code", "cdu_type", "cdu_desc", "cdu_desc_short") ) @@ -31,8 +31,8 @@ test_that("output is as expected", { data = chars_sample_universe[, 14:19], names_from = "sql", names_to = "standard", - type = "vector", - dict = ccao::vars_dict_legacy + output_type = "vector", + dictionary = ccao::vars_dict_legacy ), c( "meta_certified_est_land", "meta_modeling_group", "char_age", @@ -44,7 +44,7 @@ test_that("output is as expected", { data = chars_sample_athena[, 14:19], names_from = "athena", names_to = "pretty", - dict = ccao::vars_dict + dictionary = ccao::vars_dict )), c( "Apartments", "Cathedral Ceiling", "Attic Finish", @@ -56,7 +56,7 @@ test_that("output is as expected", { data = c("apts", "condition_desirability_and_utility", "per_ass"), names_from = "socrata", names_to = "standard", - dict = ccao::vars_dict_legacy + dictionary = ccao::vars_dict_legacy ), c("char_apts", "meta_cdu", "meta_per_ass") ) @@ -65,7 +65,7 @@ test_that("output is as expected", { data = c("APTS", "EXT_WALL", "BEDS"), names_from = "sql", names_to = "standard", - dict = ccao::vars_dict_legacy + dictionary = ccao::vars_dict_legacy ), c("char_apts", "char_ext_wall", "char_beds") ) @@ -74,12 +74,50 @@ test_that("output is as expected", { data = c("char_apts", "char_ext_wall", "char_beds"), names_from = "athena", names_to = "iasworld", - dict = ccao::vars_dict + dictionary = ccao::vars_dict ), c("user14", "extwall", "rmbed") ) }) +test_that("deprecation warnings get emitted", { + expect_warning( + vars_rename( + data = chars_sample_athena[, 14:19], + names_from = "athena", + names_to = "pretty", + type = "vector" + ), + "'type' is deprecated" + ) + expect_warning( + vars_rename( + data = chars_sample_athena[, 14:19], + names_from = "athena", + names_to = "pretty", + dict = ccao::vars_dict + ), + "'dict' is deprecated" + ) + # Test that the deprecated params produce the same output as the new params + expect_equal( + vars_rename( + data = chars_sample_athena[, 14:19], + names_from = "athena", + names_to = "pretty", + type = "vector", + dict = ccao::vars_dict + ), + vars_rename( + data = chars_sample_athena[, 14:19], + names_from = "athena", + names_to = "pretty", + output_type = "vector", + dictionary = ccao::vars_dict + ) + ) +}) + # Test that invalid inputs throw errors test_that("invalid data types stop process", { expect_condition(vars_rename(1)) @@ -88,7 +126,7 @@ test_that("invalid data types stop process", { data = chars_sample_universe, names_to = "HEADT", names_from = "sql", - dict = ccao::vars_dict_legacy + dictionary = ccao::vars_dict_legacy ) ) expect_error( @@ -96,7 +134,7 @@ test_that("invalid data types stop process", { data = chars_sample_universe, names_from = "OPEN", names_to = "sql", - dict = ccao::vars_dict_legacy + dictionary = ccao::vars_dict_legacy ) ) expect_error( @@ -104,8 +142,8 @@ test_that("invalid data types stop process", { data = chars_sample_universe, names_from = "sql", names_to = "pretty", - type = "list", - dict = ccao::vars_dict_legacy + output_type = "list", + dictionary = ccao::vars_dict_legacy ) ) expect_error( @@ -113,7 +151,7 @@ test_that("invalid data types stop process", { data = chars_sample_universe, names_from = "sql", names_to = NULL, - dict = ccao::vars_dict_legacy + dictionary = ccao::vars_dict_legacy ) ) expect_error( @@ -121,7 +159,7 @@ test_that("invalid data types stop process", { data = chars_sample_universe, names_from = NULL, names_to = "sql", - dict = ccao::vars_dict_legacy + dictionary = ccao::vars_dict_legacy ) ) expect_error( @@ -129,7 +167,7 @@ test_that("invalid data types stop process", { data = chars_sample_universe, names_from = NULL, names_to = "sql", - dict = c("sql" = "char") + dictionary = c("sql" = "char") ) ) expect_error( @@ -137,7 +175,7 @@ test_that("invalid data types stop process", { data = chars_sample_universe, names_from = NULL, names_to = "sql", - dict = ccao::vars_dict_legacy[, 5:10] + dictionary = ccao::vars_dict_legacy[, 5:10] ) ) expect_error( @@ -145,7 +183,7 @@ test_that("invalid data types stop process", { data = chars_sample_universe, names_from = "sql", names_to = "pretty", - dict = ccao::vars_dict + dictionary = ccao::vars_dict ) ) }) @@ -209,30 +247,30 @@ test_that("output is as expected", { expect_known_hash( vars_recode( data = chars_sample_universe, - dict = ccao::vars_dict_legacy + dictionary = ccao::vars_dict_legacy ), hash = "8c41990e86" ) expect_known_hash( - vars_recode(data = chars_sample_athena, type = "long"), + vars_recode(data = chars_sample_athena, code_type = "long"), hash = "d3f8b1e3cd" ) expect_equivalent( vars_recode( data = recode_test_data, - dict = ccao::vars_dict_legacy + dictionary = ccao::vars_dict_legacy ), recode_correct ) expect_equivalent( - vars_recode(data = recode_test_data_athena, type = "short"), + vars_recode(data = recode_test_data_athena, code_type = "short"), recode_correct_athena ) expect_equivalent( vars_recode( data = recode_test_data, as_factor = FALSE, - dict = ccao::vars_dict_legacy + dictionary = ccao::vars_dict_legacy ), recode_correct %>% dplyr::mutate(dplyr::across(where(is.factor), as.character)) @@ -240,47 +278,71 @@ test_that("output is as expected", { expect_known_hash( vars_recode( data = chars_sample_universe, - type = "short", - dict = ccao::vars_dict_legacy + code_type = "short", + dictionary = ccao::vars_dict_legacy ), hash = "ecd0d79b5d" ) expect_known_hash( vars_recode( data = chars_sample_universe, - type = "short", + code_type = "short", as_factor = FALSE, - dict = ccao::vars_dict_legacy + dictionary = ccao::vars_dict_legacy ), hash = "aed980d873" ) }) +test_that("deprecation warnings get emitted", { + expect_warning( + vars_recode(data = recode_test_data_athena, type = "short"), + "'type' is deprecated" + ) + expect_warning( + vars_recode(data = recode_test_data_athena, dict = ccao::vars_dict), + "'dict' is deprecated" + ) + # Test that the deprecated params produce the same output as the new params + expect_equal( + vars_recode( + data = recode_test_data_athena, + type = "short", + dict = ccao::vars_dict + ), + vars_recode( + data = recode_test_data_athena, + code_type = "short", + dictionary = ccao::vars_dict + ) + ) +}) + # Test that invalid inputs throw errors test_that("invalid data types stop process", { expect_error( vars_recode( data = "cat", - dict = ccao::vars_dict_legacy + dictionary = ccao::vars_dict_legacy ) ) expect_error( vars_recode( data = chars_sample_universe, - type = "HEADT", - dict = ccao::vars_dict_legacy + code_type = "HEADT", + dictionary = ccao::vars_dict_legacy ) ) expect_error( vars_recode( data = chars_sample_universe, - dict = ccao::vars_dict_legacy[, 5:10] + dictionary = ccao::vars_dict_legacy[, 5:10] ) ) expect_error( vars_recode( data = chars_sample_universe, - dict = ccao::vars_dict_legacy[, 6:14] + dictionary = ccao::vars_dict_legacy[, 6:14] ) ) })