Skip to content

Commit

Permalink
New argument zeros in ranktransform() (#573)
Browse files Browse the repository at this point in the history
* init

* minor

* fix lints
  • Loading branch information
etiennebacher authored Dec 15, 2024
1 parent 0faf924 commit 60c2fa5
Show file tree
Hide file tree
Showing 5 changed files with 89 additions and 15 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: datawizard
Title: Easy Data Wrangling and Statistical Transformations
Version: 0.13.0.18
Version: 0.13.0.19
Authors@R: c(
person("Indrajeet", "Patil", , "[email protected]", role = "aut",
comment = c(ORCID = "0000-0003-1995-6531")),
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ CHANGES
* `data_summary()` also accepts the results of `bayestestR::ci()` as summary
function (#483).

* `ranktransform()` has a new argument `zeros` to determine how zeros should be
handled when `sign = TRUE` (#573).

BUG FIXES

* `describe_distribution()` no longer errors if the sample was too sparse to compute
Expand Down
54 changes: 45 additions & 9 deletions R/ranktransform.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
#' @param method Treatment of ties. Can be one of `"average"` (default),
#' `"first"`, `"last"`, `"random"`, `"max"` or `"min"`. See [rank()] for
#' details.
#' @param zeros How to handle zeros. If `"na"` (default), they are marked as
#' `NA`. If `"signrank"`, they are kept during the ranking and marked as zeros.
#' This is only used when `sign = TRUE`.
#' @param ... Arguments passed to or from other methods.
#' @inheritParams extract_column_names
#' @inheritParams standardize.data.frame
Expand All @@ -19,8 +22,11 @@
#' @examples
#' ranktransform(c(0, 1, 5, -5, -2))
#'
#' # Won't work
#' # ranktransform(c(0, 1, 5, -5, -2), sign = TRUE)
#' # By default, zeros are converted to NA
#' suppressWarnings(
#' ranktransform(c(0, 1, 5, -5, -2), sign = TRUE)
#' )
#' ranktransform(c(0, 1, 5, -5, -2), sign = TRUE, zeros = "signrank")
#'
#' head(ranktransform(trees))
#' @return A rank-transformed object.
Expand All @@ -38,13 +44,20 @@ ranktransform <- function(x, ...) {
ranktransform.numeric <- function(x,
sign = FALSE,
method = "average",
zeros = "na",
verbose = TRUE,
...) {
# no change if all values are `NA`s
if (all(is.na(x))) {
return(x)
}

zeros <- match.arg(zeros, c("na", "signrank"))
method <- match.arg(
method,
c("average", "first", "last", "random", "max", "min")
)

# Warning if only one value and return early
if (insight::has_single_value(x)) {
if (is.null(names(x))) {
Expand All @@ -54,7 +67,13 @@ ranktransform.numeric <- function(x,
}

if (verbose) {
insight::format_warning(paste0("Variable `", name, "` contains only one unique value and will not be normalized."))
insight::format_warning(
paste0(
"Variable `",
name,
"` contains only one unique value and will not be normalized."
)
)
}

return(x)
Expand All @@ -70,16 +89,31 @@ ranktransform.numeric <- function(x,
}

if (verbose) {
insight::format_warning(paste0("Variable `", name, "` contains only two different values. Consider converting it to a factor."))
# nolint
insight::format_warning(
paste0(
"Variable `",
name,
"` contains only two different values. Consider converting it to a factor."
)
)
}
}


if (sign) {
ZEROES <- x == 0
if (any(ZEROES) && verbose) insight::format_warning("Zeros detected. These cannot be sign-rank transformed.")
out <- rep(NA, length(x))
out[!ZEROES] <- sign(x[!ZEROES]) * rank(abs(x[!ZEROES]), ties.method = method, na.last = "keep")
if (zeros == "na") {
out <- rep(NA, length(x))
ZEROES <- x == 0
if (any(ZEROES) && verbose) {
insight::format_warning("Zeros detected. These cannot be sign-rank transformed.") # nolint
}
out[!ZEROES] <- sign(x[!ZEROES]) * rank(abs(x[!ZEROES]),
ties.method = method,
na.last = "keep"
)
} else if (zeros == "signrank") {
out <- sign(x) * rank(abs(x), ties.method = method, na.last = "keep")
}
} else {
out <- rank(x, ties.method = method, na.last = "keep")
}
Expand All @@ -102,6 +136,7 @@ ranktransform.grouped_df <- function(x,
method = "average",
ignore_case = FALSE,
regex = FALSE,
zeros = "na",
verbose = TRUE,
...) {
info <- attributes(x)
Expand Down Expand Up @@ -143,6 +178,7 @@ ranktransform.data.frame <- function(x,
method = "average",
ignore_case = FALSE,
regex = FALSE,
zeros = "na",
verbose = TRUE,
...) {
# evaluate arguments
Expand Down
21 changes: 18 additions & 3 deletions man/ranktransform.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 22 additions & 2 deletions tests/testthat/test-ranktransform.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,22 @@ test_that("signed rank works as expected", {
))))
})

test_that("argument 'zeros' works", {
x <- c(-1, 0, 2, -3, 4)
expect_warning(
ranktransform(x, sign = TRUE),
"cannot be sign-rank"
)
expect_identical(
ranktransform(x, sign = TRUE, zeros = "signrank"),
c(-2, 0, 3, -4, 5)
)
expect_error(
ranktransform(x, sign = TRUE, zeros = "foo"),
"should be one of"
)
})

test_that("ranktransform works with data frames", {
set.seed(123)
expect_snapshot(ranktransform(BOD))
Expand All @@ -58,9 +74,9 @@ test_that("ranktransform works with data frames (grouped data)", {
skip_if_not_installed("poorman")

set.seed(123)
value1 <- sample(1:20, 9, replace = TRUE)
value1 <- sample.int(20, 9, replace = TRUE)
set.seed(456)
value2 <- sample(1:20, 9, replace = TRUE)
value2 <- sample.int(20, 9, replace = TRUE)

test_df <- data.frame(
id = rep(c("A", "B", "C"), each = 3),
Expand All @@ -69,6 +85,7 @@ test_that("ranktransform works with data frames (grouped data)", {
stringsAsFactors = FALSE
)

# nolint start: nested_pipe_linter
expect_identical(
test_df %>%
poorman::group_by(id) %>%
Expand All @@ -81,6 +98,7 @@ test_that("ranktransform works with data frames (grouped data)", {
stringsAsFactors = FALSE
)
)
# nolint end
})


Expand All @@ -99,6 +117,7 @@ test_that("ranktransform works with data frames containing NAs (grouped data)",
stringsAsFactors = FALSE
)

# nolint start: nested_pipe_linter
expect_identical(
test_df %>%
poorman::group_by(id) %>%
Expand All @@ -111,6 +130,7 @@ test_that("ranktransform works with data frames containing NAs (grouped data)",
stringsAsFactors = FALSE
)
)
# nolint end
})

# select helpers ------------------------------
Expand Down

0 comments on commit 60c2fa5

Please sign in to comment.