From 80437a37c58142e2dbfe0a17a31b9dd4427e9747 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 21 Nov 2024 22:19:54 +0100 Subject: [PATCH] Allow curl-styled pattern for `data_rename()` --- DESCRIPTION | 2 +- R/data_rename.R | 83 ++++++++++++++++++++++++++++++++++++++-------- man/data_rename.Rd | 49 ++++++++++++++++++++------- man/text_format.Rd | 19 ++++++----- 4 files changed, 119 insertions(+), 34 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2325c062d..be41e0f6f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: datawizard Title: Easy Data Wrangling and Statistical Transformations -Version: 0.13.0.13 +Version: 0.13.0.14 Authors@R: c( person("Indrajeet", "Patil", , "patilindrajeet.science@gmail.com", role = "aut", comment = c(ORCID = "0000-0003-1995-6531")), diff --git a/R/data_rename.R b/R/data_rename.R index 18f45657b..1d541882a 100644 --- a/R/data_rename.R +++ b/R/data_rename.R @@ -10,18 +10,34 @@ #' pipe-workflow. #' #' @param data A data frame, or an object that can be coerced to a data frame. -#' @param pattern Character vector. For `data_rename()`, indicates columns that -#' should be selected for renaming. Can be `NULL` (in which case all columns -#' are selected). For `data_addprefix()` or `data_addsuffix()`, a character -#' string, which will be added as prefix or suffix to the column names. For -#' `data_rename()`, `pattern` can also be a named vector. In this case, names -#' are used as values for the `replacement` argument (i.e. `pattern` can be a -#' character vector using ` = ""` and argument `replacement` -#' will be ignored then). -#' @param replacement Character vector. Indicates the new name of the columns -#' selected in `pattern`. Can be `NULL` (in which case column are numbered -#' in sequential order). If not `NULL`, `pattern` and `replacement` must be -#' of the same length. If `pattern` is a named vector, `replacement` is ignored. +#' @param pattern Character vector. +#' - For `data_addprefix()` or `data_addsuffix()`, a character string, which +#' will be added as prefix or suffix to the column names. +#' - For `data_rename()`, indicates columns that should be selected for +#' renaming. Can be `NULL` (in which case all columns are selected). +#' `pattern` can also be a named vector. In this case, names are used as +#' values for the `replacement` argument (i.e. `pattern` can be a character +#' vector using ` = ""` and argument `replacement` will +#' be ignored then). +#' @param replacement Character vector. Can be one of the following: +#' - A character vector that indicates the new name of the columns selected in +#' `pattern`. `pattern` and `replacement` must be of the same length. +#' - `NULL`, in which case column are numbered in sequential order. +#' - A string (i.e. character vector of length 1) with a "curl" styled pattern. +#' Currently supported tokens are `{col}` and `{n}`. `{col}` will be replaced +#' by the column name, i.e. the corresponding value in `pattern`. `{n}` will +#' be replaced by the number of the variable that is replaced. For instance, +#' ```r +#' data_rename( +#' mtcars, +#' pattern = c("am", "vs"), +#' replacement = "new_name_from_{col}" +#' ) +#' ``` +#' would returns new column names `new_name_from_am` and `new_name_from__vs`. +#' See 'Examples'. +#' +#' If `pattern` is a named vector, `replacement` is ignored. #' @param rows Vector of row names. #' @param safe Do not throw error if for instance the variable to be #' renamed/removed doesn't exist. @@ -45,6 +61,10 @@ #' #' # Change all #' head(data_rename(iris, replacement = paste0("Var", 1:5))) +#' +#' # Use curl-styled patterns +#' head(data_rename(mtcars[1:3], c("mpg", "cyl", "disp"), "formerly_{col}")) +#' head(data_rename(mtcars[1:3], c("mpg", "cyl", "disp"), "{col}_is_column_{n}")) #' @seealso #' - Functions to rename stuff: [data_rename()], [data_rename_rows()], [data_addprefix()], [data_addsuffix()] #' - Functions to reorder or remove columns: [data_reorder()], [data_relocate()], [data_remove()] @@ -122,6 +142,11 @@ data_rename <- function(data, } } + # check if we have "curl" styled replacement-string + curl_style <- length(replacement) == 1 && + grepl("{", replacement, fixed = TRUE) && + length(pattern) > 1 + if (length(replacement) > length(pattern) && verbose) { insight::format_alert( paste0( @@ -129,7 +154,7 @@ data_rename <- function(data, length(replacement) - length(pattern), " names of `replacement` are not used." ) ) - } else if (length(replacement) < length(pattern) && verbose) { + } else if (length(replacement) < length(pattern) && verbose && !curl_style) { insight::format_alert( paste0( "There are more names in `pattern` than in `replacement`. The last ", @@ -138,6 +163,11 @@ data_rename <- function(data, ) } + # if we have curl-styled replacement-string, create replacement pattern now + if (curl_style) { + replacement <- .curl_replacement(pattern, replacement) + } + for (i in seq_along(pattern)) { if (!is.na(replacement[i])) { data <- .data_rename(data, pattern[i], replacement[i], safe, verbose) @@ -167,6 +197,33 @@ data_rename <- function(data, } +.curl_replacement <- function(pattern, replacement) { + # this function replaces "curl" tokens into their related + # real names/values. Currently, following tokens are accepted: + # - {col}: replacement is the name of the column (inidcated in "pattern") + # - {n}: replacement is the number of the variable out of n, that should be renamed + out <- rep_len("", length(pattern)) + for (i in seq_along(out)) { + # prepare pattern + column_name <- pattern[i] + out[i] <- replacement + # replace first accepted token + out[i] <- gsub( + "(.*)(\\{col\\})(.*)", + replacement = paste0("\\1", column_name, "\\3"), + x = out[i] + ) + # replace second accepted token + out[i] <- gsub( + "(.*)(\\{n\\})(.*)", + replacement = paste0("\\1", i, "\\3"), + x = out[i] + ) + } + out +} + + # Row.names ---------------------------------------------------------------- #' @rdname data_rename diff --git a/man/data_rename.Rd b/man/data_rename.Rd index 2ff779c21..0ba4fa381 100644 --- a/man/data_rename.Rd +++ b/man/data_rename.Rd @@ -43,14 +43,17 @@ data_rename_rows(data, rows = NULL) \arguments{ \item{data}{A data frame, or an object that can be coerced to a data frame.} -\item{pattern}{Character vector. For \code{data_rename()}, indicates columns that -should be selected for renaming. Can be \code{NULL} (in which case all columns -are selected). For \code{data_addprefix()} or \code{data_addsuffix()}, a character -string, which will be added as prefix or suffix to the column names. For -\code{data_rename()}, \code{pattern} can also be a named vector. In this case, names -are used as values for the \code{replacement} argument (i.e. \code{pattern} can be a -character vector using \verb{ = ""} and argument \code{replacement} -will be ignored then).} +\item{pattern}{Character vector. +\itemize{ +\item For \code{data_addprefix()} or \code{data_addsuffix()}, a character string, which +will be added as prefix or suffix to the column names. +\item For \code{data_rename()}, indicates columns that should be selected for +renaming. Can be \code{NULL} (in which case all columns are selected). +\code{pattern} can also be a named vector. In this case, names are used as +values for the \code{replacement} argument (i.e. \code{pattern} can be a character +vector using \verb{ = ""} and argument \code{replacement} will +be ignored then). +}} \item{select}{Variables that will be included when performing the required tasks. Can be either @@ -107,10 +110,28 @@ functions (see 'Details'), this argument may be used as workaround.} \item{...}{Other arguments passed to or from other functions.} -\item{replacement}{Character vector. Indicates the new name of the columns -selected in \code{pattern}. Can be \code{NULL} (in which case column are numbered -in sequential order). If not \code{NULL}, \code{pattern} and \code{replacement} must be -of the same length. If \code{pattern} is a named vector, \code{replacement} is ignored.} +\item{replacement}{Character vector. Can be one of the following: +\itemize{ +\item A character vector that indicates the new name of the columns selected in +\code{pattern}. \code{pattern} and \code{replacement} must be of the same length. +\item \code{NULL}, in which case column are numbered in sequential order. +\item A string (i.e. character vector of length 1) with a "curl" styled pattern. +Currently supported tokens are \code{{col}} and \code{{n}}. \code{{col}} will be replaced +by the column name, i.e. the corresponding value in \code{pattern}. \code{{n}} will +be replaced by the number of the variable that is replaced. For instance, + +\if{html}{\out{
}}\preformatted{data_rename( + mtcars, + pattern = c("am", "vs"), + replacement = "new_name_from_\{col\}" +) +}\if{html}{\out{
}} + +would returns new column names \code{new_name_from_am} and \code{new_name_from__vs}. +See 'Examples'. +} + +If \code{pattern} is a named vector, \code{replacement} is ignored.} \item{safe}{Do not throw error if for instance the variable to be renamed/removed doesn't exist.} @@ -148,6 +169,10 @@ head(data_rename(iris, NULL)) # Change all head(data_rename(iris, replacement = paste0("Var", 1:5))) + +# Use curl-styled patterns +head(data_rename(mtcars[1:3], c("mpg", "cyl", "disp"), "formerly_{col}")) +head(data_rename(mtcars[1:3], c("mpg", "cyl", "disp"), "{col}_is_column_{n}")) } \seealso{ \itemize{ diff --git a/man/text_format.Rd b/man/text_format.Rd index 14d64b096..16c76e67c 100644 --- a/man/text_format.Rd +++ b/man/text_format.Rd @@ -50,14 +50,17 @@ text elements will not be enclosed.} \item{n}{The number of characters to find.} -\item{pattern}{Character vector. For \code{data_rename()}, indicates columns that -should be selected for renaming. Can be \code{NULL} (in which case all columns -are selected). For \code{data_addprefix()} or \code{data_addsuffix()}, a character -string, which will be added as prefix or suffix to the column names. For -\code{data_rename()}, \code{pattern} can also be a named vector. In this case, names -are used as values for the \code{replacement} argument (i.e. \code{pattern} can be a -character vector using \verb{ = ""} and argument \code{replacement} -will be ignored then).} +\item{pattern}{Character vector. +\itemize{ +\item For \code{data_addprefix()} or \code{data_addsuffix()}, a character string, which +will be added as prefix or suffix to the column names. +\item For \code{data_rename()}, indicates columns that should be selected for +renaming. Can be \code{NULL} (in which case all columns are selected). +\code{pattern} can also be a named vector. In this case, names are used as +values for the \code{replacement} argument (i.e. \code{pattern} can be a character +vector using \verb{ = ""} and argument \code{replacement} will +be ignored then). +}} } \value{ A character string.