Skip to content

Commit

Permalink
Rename type and dictionary params in R package vars funs to match…
Browse files Browse the repository at this point in the history
… Python
  • Loading branch information
jeancochrane committed Dec 3, 2024
1 parent df375b5 commit 1bf493e
Show file tree
Hide file tree
Showing 4 changed files with 217 additions and 104 deletions.
126 changes: 82 additions & 44 deletions R/vars_funs.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,25 @@
#' example, rename all columns pulled from SQL to their standard names used
#' in modeling. Or, rename all standard modeling names to "pretty" names for
#' publication. This function will only rename things specified in
#' the user-supplied \code{dict} argument, all other names in the data will
#' remain unchanged.
#' the user-supplied \code{dictionary} argument, all other names in the data
#' will remain unchanged.
#'
#' Options for \code{names_from} and \code{names_to} are specific to the
#' specified \code{dict}. Run this function with \code{names_from} equal to
#' \code{NULL} to see a list of available options for the specified dictionary.
#' specified \code{dictionary}. Run this function with \code{names_from} equal
#' 'to \code{NULL} to see a list of available options for the specified
#' 'dictionary.
#'
#' @param data A data frame or tibble with columns to be renamed.
#' @param names_from The source/name type of data. See description
#' @param names_to The target names. See description
#' @param type Output type. Either \code{"inplace"}, which renames the input
#' data frame, or \code{"vector"}, which returns a named character vector with
#' the construction new_col_name = old_col_name.
#' @param dict The dictionary used to translate names. Uses
#' @param output_type Output type. Either \code{"inplace"}, which renames the
#' input data frame, or \code{"vector"}, which returns a named character
#' vector with the construction new_col_name = old_col_name.
#' @param type Deprecated. Use \code{output_type} instead.
#' @param dictionary The dictionary used to translate names. Uses
#' \code{\link{vars_dict}} by default. Use \code{\link{vars_dict_legacy}} for
#' legacy data column names.
#' @param dict Deprecated. Use \code{dictionary} instead.
#'
#' @return The input data frame with columns renamed.
#'
Expand All @@ -34,21 +37,21 @@
#' data = sample_data,
#' names_from = "sql",
#' names_to = "standard",
#' dict = ccao::vars_dict_legacy
#' dictionary = ccao::vars_dict_legacy
#' )
#' vars_rename(
#' data = sample_data,
#' names_from = "sql",
#' names_to = "pretty",
#' dict = ccao::vars_dict_legacy
#' dictionary = ccao::vars_dict_legacy
#' )
#'
#' # No renames will occur since no column names here are from SQL
#' vars_rename(
#' data = class_dict[1:5, 1:5],
#' names_from = "sql",
#' names_to = "pretty",
#' dict = ccao::vars_dict_legacy
#' dictionary = ccao::vars_dict_legacy
#' )
#'
#' # With data from Athena
Expand All @@ -59,33 +62,47 @@
#' data = sample_data_athena,
#' names_from = "athena",
#' names_to = "model",
#' dict = ccao::vars_dict
#' dictionary = ccao::vars_dictionary
#' )
#' vars_rename(
#' data = sample_data_athena,
#' names_from = "athena",
#' names_to = "pretty",
#' dict = ccao::vars_dict
#' dictionary = ccao::vars_dictionary
#' )
#' @md
#' @family vars_funs
#' @export
vars_rename <- function(data,
names_from = NULL,
names_to = NULL,
type = "inplace",
dict = ccao::vars_dict) {
output_type = "inplace",
dictionary = ccao::vars_dict,
# Deprecated args
type = NULL,
dict = NULL) {
# Check if deprecated arguments are used and override values if so
if (!is.null(type)) {
warning("'type' is deprecated. Use 'output_type' instead.", call. = FALSE)
output_type <- type
}

if (!is.null(dict)) {
warning("'dict' is deprecated. Use 'dictionary' instead.", call. = FALSE)
dictionary <- dict
}

# Check input data dictionary
stopifnot(
is.data.frame(dict),
sum(startsWith(names(dict), "var_name_")) >= 2,
nrow(dict) > 0
is.data.frame(dictionary),
sum(startsWith(names(dictionary), "var_name_")) >= 2,
nrow(dictionary) > 0
)

# Get vector of possible inputs to names_from and names_to from dictionary
poss_names_args <- gsub(
"var_name_", "",
names(dict)[startsWith(names(dict), "var_name_")]
names(dictionary)[startsWith(names(dictionary), "var_name_")]
)

# If args aren't in possible, throw error and list possible args
Expand All @@ -107,7 +124,7 @@ vars_rename <- function(data,
is.data.frame(data) | is.character(data),
tolower(names_from) %in% poss_names_args,
tolower(names_to) %in% poss_names_args,
tolower(type) %in% c("inplace", "vector")
tolower(output_type) %in% c("inplace", "vector")
)

# If the input is a dataframe, extract the names from that dataframe
Expand All @@ -117,15 +134,15 @@ vars_rename <- function(data,
to <- paste0("var_name_", names_to)

# Rename using dict, replacing any NAs with the original column names
names_wm <- dict[[to]][match(names_lst, dict[[from]])]
names_wm <- dictionary[[to]][match(names_lst, dictionary[[from]])]
names_wm[is.na(names_wm)] <- names_lst[is.na(names_wm)]

# Return names inplace if the input data is a data frame, else return a
# character vector of new names
if (is.data.frame(data) && type == "inplace") {
if (is.data.frame(data) && output_type == "inplace") {
names(data) <- names_wm
return(data)
} else if (is.character(data) || type == "vector") {
} else if (is.character(data) || output_type == "vector") {
return(names_wm)
}
}
Expand All @@ -140,7 +157,7 @@ vars_rename <- function(data,
#' must be specified via a user-defined dictionary. The default dictionary is
#' \code{\link{vars_dict}}.
#'
#' Options for \code{type} are:
#' Options for \code{code_type} are:
#'
#' - \code{"long"}, which transforms EXT_WALL = 1 to EXT_WALL = Frame
#' - \code{"short"}, which transforms EXT_WALL = 1 to EXT_WALL = FRME
Expand All @@ -151,13 +168,15 @@ vars_rename <- function(data,
#' @param cols A \code{<tidy-select>} column selection or vector of column
#' names. Looks for all columns with numerically encoded character
#' values by default.
#' @param type Output/recode type. See description for options.
#' @param code_type Output/recode type. See description for options.
#' @param type Deprecated. Use \code{code_type} instead.
#' @param as_factor If \code{TRUE}, re-encoded values will be returned as
#' factors with their levels pre-specified by the dictionary. Otherwise, will
#' return re-encoded values as characters only.
#' @param dict The dictionary used to translate encodings. Uses
#' @param dictionary The dictionary used to translate encodings. Uses
#' \code{\link{vars_dict}} by default. Use \code{\link{vars_dict_legacy}} for
#' legacy data column encodings.
#' @param dict Deprecated. Use \code{dictionary} instead.
#'
#' @note Values which are in the data but are NOT in \code{\link{vars_dict}}
#' will be converted to NA. For example, there is no numeric value 3 for AIR,
Expand All @@ -174,12 +193,12 @@ vars_rename <- function(data,
#' sample_data
#' vars_recode(
#' data = sample_data,
#' dict = ccao::vars_dict_legacy
#' dictionary = ccao::vars_dict_legacy
#' )
#' vars_recode(
#' data = sample_data,
#' type = "short",
#' dict = ccao::vars_dict_legacy
#' code_type = "short",
#' dictionary = ccao::vars_dict_legacy
#' )
#'
#' # Recode only the specified columns
Expand All @@ -189,26 +208,26 @@ vars_rename <- function(data,
#' vars_recode(
#' data = gar_sample,
#' cols = dplyr::starts_with("GAR"),
#' dict = ccao::vars_dict_legacy
#' dictionary = ccao::vars_dict_legacy
#' )
#' vars_recode(
#' data = gar_sample,
#' cols = "GAR1_SIZE",
#' dict = ccao::vars_dict_legacy
#' dictionary = ccao::vars_dict_legacy
#' )
#'
#' # Using data from Athena
#' sample_data_athena <- chars_sample_athena[1:5, c(1:5, 10:20)]
#' sample_data_athena
#' vars_recode(
#' data = sample_data_athena,
#' type = "code",
#' dict = ccao::vars_dict_legacy
#' code_type = "code",
#' dictionary = ccao::vars_dict_legacy
#' )
#' vars_recode(
#' data = sample_data_athena,
#' type = "long",
#' dict = ccao::vars_dict_legacy
#' code_type = "long",
#' dictionary = ccao::vars_dict_legacy
#' )
#' @md
#' @importFrom magrittr %>%
Expand All @@ -217,18 +236,37 @@ vars_rename <- function(data,
#' @export
vars_recode <- function(data,
cols = dplyr::everything(),
type = "long",
code_type = "long",
as_factor = TRUE,
dict = ccao::vars_dict) {
dictionary = ccao::vars_dict,
# Deprecated args
type = NULL,
dict = NULL) {
# Check if deprecated arguments are used and override values if so
if (!is.null(type)) {
warning("'type' is deprecated. Use 'code_type' instead.", call. = FALSE)
code_type <- type
}

if (!is.null(dict)) {
warning("'dict' is deprecated. Use 'dictionary' instead.", call. = FALSE)
dictionary <- dict
}

# Check input data dictionary
stopifnot(
is.data.frame(dict),
sum(startsWith(names(dict), "var_name_")) >= 1,
nrow(dict) > 0
is.data.frame(dictionary),
sum(startsWith(names(dictionary), "var_name_")) >= 1,
nrow(dictionary) > 0
)

# Check that the dictionary contains the correct columns
if (!any(c("var_code", "var_value", "var_value_short") %in% names(dict))) {
if (
!any(
c("var_code", "var_value", "var_value_short")
%in% names(dictionary)
)
) {
stop(
"Input dictionary must contain the following columns: ",
"var_code, var_value, var_value_short"
Expand All @@ -238,20 +276,20 @@ vars_recode <- function(data,
# Error/input checking
stopifnot(
is.data.frame(data),
type %in% c("code", "short", "long"),
code_type %in% c("code", "short", "long"),
is.logical(as_factor)
)

# Translate inputs to column names
var <- switch(type,
var <- switch(code_type,
"code" = "var_code",
"long" = "var_value",
"short" = "var_value_short"
)

# Convert chars dict into long format that can be easily referenced use
# any possible input column names
dict_long <- dict %>%
dict_long <- dictionary %>%
dplyr::filter(
.data$var_type == "char" & .data$var_data_type == "categorical"
) %>%
Expand Down
34 changes: 20 additions & 14 deletions man/vars_recode.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 1bf493e

Please sign in to comment.