diff --git a/DESCRIPTION b/DESCRIPTION index 4758f601c..20764a09e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: datawizard Title: Easy Data Wrangling and Statistical Transformations -Version: 0.13.0.2 +Version: 0.13.0.5 Authors@R: c( person("Indrajeet", "Patil", , "patilindrajeet.science@gmail.com", role = "aut", comment = c(ORCID = "0000-0003-1995-6531")), diff --git a/NEWS.md b/NEWS.md index 388c5a822..08e3527b3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,10 @@ # datawizard (development) +BREAKING CHANGES + +* Argument `drop_na` in `data_match()` is deprecated now. Please use `remove_na` + instead. + CHANGES * The `select` argument, which is available in different functions to select diff --git a/R/data_group.R b/R/data_group.R index 00a7adf84..538c875c2 100644 --- a/R/data_group.R +++ b/R/data_group.R @@ -51,7 +51,7 @@ data_group <- function(data, to = my_grid[i, , drop = FALSE], match = "and", return_indices = TRUE, - drop_na = FALSE + remove_na = FALSE )) }) my_grid[[".rows"]] <- .rows diff --git a/R/data_match.R b/R/data_match.R index c03b3f222..6b522a0b8 100644 --- a/R/data_match.R +++ b/R/data_match.R @@ -15,7 +15,7 @@ #' @param return_indices Logical, if `FALSE`, return the vector of rows that #' can be used to filter the original data frame. If `FALSE` (default), #' returns directly the filtered data frame instead of the row indices. -#' @param drop_na Logical, if `TRUE`, missing values (`NA`s) are removed before +#' @param remove_na Logical, if `TRUE`, missing values (`NA`s) are removed before #' filtering the data. This is the default behaviour, however, sometimes when #' row indices are requested (i.e. `return_indices=TRUE`), it might be useful #' to preserve `NA` values, so returned row indices match the row indices of @@ -26,6 +26,7 @@ #' character vector (e.g. `c("x > 4", "y == 2")`) or a variable that contains #' the string representation of a logical expression. These might be useful #' when used in packages to avoid defining undefined global variables. +#' @param drop_na Deprecated, please use `remove_na` instead. #' #' @return A filtered data frame, or the row indices that match the specified #' configuration. @@ -100,12 +101,24 @@ #' data_filter(mtcars, fl) #' @inherit data_rename seealso #' @export -data_match <- function(x, to, match = "and", return_indices = FALSE, drop_na = TRUE, ...) { +data_match <- function(x, + to, + match = "and", + return_indices = FALSE, + remove_na = TRUE, + drop_na, + ...) { if (!is.data.frame(to)) { to <- as.data.frame(to) } original_x <- x + ## TODO: remove deprecated argument later + if (!missing(drop_na)) { + insight::format_warning("Argument `drop_na` is deprecated. Please use `remove_na` instead.") + remove_na <- drop_na + } + # evaluate match <- match.arg(tolower(match), c("and", "&", "&&", "or", "|", "||", "!", "not")) match <- switch(match, @@ -133,7 +146,7 @@ data_match <- function(x, to, match = "and", return_indices = FALSE, drop_na = T idx <- vector("numeric", length = 0L) } else { # remove missings before matching - if (isTRUE(drop_na)) { + if (isTRUE(remove_na)) { x <- x[stats::complete.cases(x), , drop = FALSE] } idx <- seq_len(nrow(x)) diff --git a/man/data_match.Rd b/man/data_match.Rd index a57c34768..a209170ab 100644 --- a/man/data_match.Rd +++ b/man/data_match.Rd @@ -5,7 +5,15 @@ \alias{data_filter} \title{Return filtered or sliced data frame, or row indices} \usage{ -data_match(x, to, match = "and", return_indices = FALSE, drop_na = TRUE, ...) +data_match( + x, + to, + match = "and", + return_indices = FALSE, + remove_na = TRUE, + drop_na, + ... +) data_filter(x, ...) } @@ -24,12 +32,14 @@ or \code{"not"} (or \code{"!"}).} can be used to filter the original data frame. If \code{FALSE} (default), returns directly the filtered data frame instead of the row indices.} -\item{drop_na}{Logical, if \code{TRUE}, missing values (\code{NA}s) are removed before +\item{remove_na}{Logical, if \code{TRUE}, missing values (\code{NA}s) are removed before filtering the data. This is the default behaviour, however, sometimes when row indices are requested (i.e. \code{return_indices=TRUE}), it might be useful to preserve \code{NA} values, so returned row indices match the row indices of the original data frame.} +\item{drop_na}{Deprecated, please use \code{remove_na} instead.} + \item{...}{A sequence of logical expressions indicating which rows to keep, or a numeric vector indicating the row indices of rows to keep. Can also be a string representation of a logical expression (e.g. \code{"x > 4"}), a diff --git a/tests/testthat/test-data_match.R b/tests/testthat/test-data_match.R index 75991b4b2..1a40f39fd 100644 --- a/tests/testthat/test-data_match.R +++ b/tests/testthat/test-data_match.R @@ -52,7 +52,7 @@ test_that("data_match works with missing data", { data.frame(c172code = 1, e16sex = 2), match = "not", return_indices = TRUE, - drop_na = FALSE + remove_na = FALSE )) expect_identical(x1, 41L) x1 <- length(data_match( @@ -60,7 +60,7 @@ test_that("data_match works with missing data", { data.frame(c172code = 1, e16sex = 2), match = "not", return_indices = TRUE, - drop_na = TRUE + remove_na = TRUE )) expect_identical(x1, 36L) })