From 729f545fbed3df1744bc6c00a1a005c87e5dca44 Mon Sep 17 00:00:00 2001 From: markfairbanks Date: Tue, 10 Dec 2024 07:31:36 -0700 Subject: [PATCH 01/10] Use `mutate(.keep = "none")` --- R/step-subset-transmute.R | 38 +------------------------------------- 1 file changed, 1 insertion(+), 37 deletions(-) diff --git a/R/step-subset-transmute.R b/R/step-subset-transmute.R index b2c6e20c..d19fc434 100644 --- a/R/step-subset-transmute.R +++ b/R/step-subset-transmute.R @@ -13,41 +13,5 @@ #' dt <- lazy_dt(dplyr::starwars) #' dt %>% transmute(name, sh = paste0(species, "/", homeworld)) transmute.dtplyr_step <- function(.data, ...) { - dots <- capture_new_vars(.data, ...) - dots_list <- process_new_vars(.data, dots) - dots <- dots_list$dots - - groups <- group_vars(.data) - if (!is_empty(groups)) { - # TODO could check if there is actually anything mutated, e.g. to avoid - # DT[, .(x = x)] - is_group_var <- names(dots) %in% groups - group_dots <- dots[is_group_var] - - .data <- mutate(ungroup(.data), !!!group_dots) - .data <- group_by(.data, !!!syms(groups)) - - dots <- dots[!is_group_var] - } - - if (is_empty(dots)) { - # grouping variables have been removed from `dots` so `select()` would - # produce a message "Adding grouping vars". - # As `dplyr::transmute()` doesn't generate a message when adding group vars - # we can also leave it away here - return(select(.data, !!!group_vars(.data))) - } - - if (!dots_list$use_braces) { - j <- call2(".", !!!dots) - } else { - j <- mutate_with_braces(dots)$expr - } - vars <- union(group_vars(.data), names(dots)) - out <- step_subset_j(.data, vars = vars, j = j) - if (dots_list$need_removal_step) { - out <- select(out, -tidyselect::all_of(dots_list$vars_removed)) - } - - out + mutate(.data, ..., .keep = "none") } From 34f4284f574eb739a3fcb73b74ff5466064cba6c Mon Sep 17 00:00:00 2001 From: markfairbanks Date: Tue, 10 Dec 2024 07:32:56 -0700 Subject: [PATCH 02/10] Use a snapshot --- tests/testthat/_snaps/step-call.md | 7 +++++++ tests/testthat/test-step-call.R | 5 ++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/tests/testthat/_snaps/step-call.md b/tests/testthat/_snaps/step-call.md index fc85d2d0..b1ff0514 100644 --- a/tests/testthat/_snaps/step-call.md +++ b/tests/testthat/_snaps/step-call.md @@ -17,6 +17,13 @@ Output setnames(copy(DT), c("a", "b", "c"), toupper) +# can compute distinct computed variables + + Code + dt %>% distinct(z = x + y) %>% show_query() + Output + unique(copy(dt)[, `:=`(z = x + y)][, `:=`(c("x", "y"), NULL)]) + # errors are raised Code diff --git a/tests/testthat/test-step-call.R b/tests/testthat/test-step-call.R index 2ff4e8d6..0b17dfad 100644 --- a/tests/testthat/test-step-call.R +++ b/tests/testthat/test-step-call.R @@ -136,9 +136,8 @@ test_that("keeps all variables if requested", { test_that("can compute distinct computed variables", { dt <- lazy_dt(data.table(x = c(1, 1), y = c(1, 2)), "dt") - expect_equal( - dt %>% distinct(z = x + y) %>% show_query(), - expr(unique(dt[, .(z = x + y)])) + expect_snapshot( + dt %>% distinct(z = x + y) %>% show_query() ) expect_equal( From 65470deb10300d192a19fb409981a8b2e7a17e61 Mon Sep 17 00:00:00 2001 From: markfairbanks Date: Tue, 10 Dec 2024 07:33:19 -0700 Subject: [PATCH 03/10] Remove most tests as they're now covered by `mutate()` tests --- tests/testthat/test-step-subset-transmute.R | 247 +------------------- 1 file changed, 8 insertions(+), 239 deletions(-) diff --git a/tests/testthat/test-step-subset-transmute.R b/tests/testthat/test-step-subset-transmute.R index e892fb51..da6c88b9 100644 --- a/tests/testthat/test-step-subset-transmute.R +++ b/tests/testthat/test-step-subset-transmute.R @@ -1,249 +1,18 @@ -test_that("simple calls generate expected translations", { +test_that("works", { dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT") expect_equal( - dt %>% transmute(x) %>% show_query(), - expr(DT[, .(x = x)]) + dt %>% transmute(x) %>% collect(), + dt %>% mutate(x, .keep = "none") %>% collect() ) }) -test_that("transmute generates compound expression if needed", { - dt <- lazy_dt(data.table(x = 1, y = 2), "DT") +test_that("empty dots preserves groups", { + dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT") %>% + group_by(y) - expect_equal( - dt %>% transmute(x2 = x * 2, x4 = x2 * 2) %>% show_query(), - expr(DT[, { - x2 <- x * 2 - x4 <- x2 * 2 - .(x2, x4) - }]) - ) -}) - -test_that("allows multiple assignment to the same variable", { - dt <- lazy_dt(data.table(x = 1, y = 2), "DT") - - # when nested - expect_equal( - dt %>% transmute(x = x * 2, x = x * 2) %>% show_query(), - expr(DT[, { - x <- x * 2 - x <- x * 2 - .(x) - }]) - ) - - # when not nested - expect_equal( - dt %>% transmute(z = 2, y = 3) %>% show_query(), - expr(DT[, .(z = 2, y = 3)]) - ) -}) - - -test_that("groups are respected", { - dt <- lazy_dt(data.table(x = 1), "DT") %>% group_by(x) %>% transmute(y = 2) - - expect_equal(dt$vars, c("x", "y")) - expect_equal( - dt %>% show_query(), - expr(DT[, .(y = 2), keyby = .(x)]) - ) -}) - -test_that("grouping vars can be transmuted", { - dt <- lazy_dt(data.table(x = 1), "DT") %>% group_by(x) %>% transmute(x = x + 1, y = 2) - - expect_equal(dt$vars, c("x", "y")) - expect_equal(dt$groups, "x") - expect_equal( - dt %>% show_query(), - expr(copy(DT)[, `:=`(x = x + 1)][, .(y = 2), keyby = .(x)]) - ) - - skip("transmuting grouping vars with nesting is not supported") - dt <- lazy_dt(data.table(x = 1), "DT") %>% - group_by(x) %>% - transmute(x = x + 1, y = x + 1, x = y + 1) - - expect_equal(dt$vars, c("x", "y")) - expect_equal( - dt %>% collect(), - tibble(x = 4, y = 3) %>% group_by(x) - ) -}) - -test_that("empty transmute works", { - dt <- lazy_dt(data.frame(x = 1), "DT") - expect_equal(transmute(dt) %>% show_query(), expr(DT[, 0L])) - expect_equal(transmute(dt)$vars, character()) - expect_equal(transmute(dt, !!!list()) %>% show_query(), expr(DT[, 0L])) - - dt_grouped <- lazy_dt(data.frame(x = 1), "DT") %>% group_by(x) - expect_equal(transmute(dt_grouped)$vars, "x") -}) - -test_that("only transmuting groups works", { - dt <- lazy_dt(data.frame(x = 1)) %>% group_by(x) - expect_equal(transmute(dt, x) %>% collect(), dt %>% collect()) - expect_equal(transmute(dt, x)$vars, "x") -}) - -test_that("across() can access previously created variables", { - dt <- lazy_dt(data.frame(x = 1), "DT") - step <- transmute(dt, y = 2, across(y, sqrt)) - expect_equal( - collect(step), - tibble(y = sqrt(2)) - ) - expect_equal( - show_query(step), - expr(DT[, { - y <- 2 - y <- sqrt(y) - .(y) - }]) - ) -}) - -test_that("new columns take precedence over global variables", { - dt <- lazy_dt(data.frame(x = 1), "DT") - y <- 'global var' - step <- transmute(dt, y = 2, z = y + 1) - expect_equal( - collect(step), - tibble(y = 2, z = 3) - ) - expect_equal( - show_query(step), - expr(DT[, { - y <- 2 - z <- y + 1 - .(y, z) - }]) - ) -}) - -# var = NULL ------------------------------------------------------------- + res <- dt %>% transmute() %>% collect() -test_that("var = NULL when var is in original data", { - dt <- lazy_dt(data.frame(x = 1), "DT") - step <- dt %>% transmute(x = 2, z = x*2, x = NULL) - expect_equal( - collect(step), - tibble(z = 4) - ) - expect_equal( - step$vars, - "z" - ) - expect_equal( - show_query(step), - expr(DT[, { - x <- 2 - z <- x * 2 - .(x, z) - }][, `:=`("x", NULL)]) - ) -}) - -test_that("var = NULL when var is in final output", { - dt <- lazy_dt(data.frame(x = 1), "DT") - step <- transmute(dt, y = NULL, y = 3) - expect_equal( - collect(step), - tibble(y = 3) - ) - expect_equal( - show_query(step), - expr(DT[, { - y <- NULL - y <- 3 - .(y) - }]) - ) -}) - -test_that("temp var with nested arguments", { - dt <- lazy_dt(data.frame(x = 1), "DT") - step <- transmute(dt, y = 2, z = y*2, y = NULL) - expect_equal( - collect(step), - tibble(z = 4) - ) - expect_equal( - step$vars, - "z" - ) - expect_equal( - show_query(step), - expr(DT[, { - y <- 2 - z <- y * 2 - .(y, z) - }][, `:=`("y", NULL)]) - ) -}) - -test_that("temp var with no new vars added", { - dt <- lazy_dt(data.frame(x = 1), "DT") - step <- transmute(dt, y = 2, y = NULL) - expect_equal( - collect(step), - tibble() - ) - expect_equal( - step$vars, - character() - ) - expect_equal( - show_query(step), - expr(DT[, { - y <- 2 - .(y) - }][, `:=`("y", NULL)]) - ) -}) - -test_that("var = NULL works when data is grouped", { - dt <- lazy_dt(data.frame(x = 1, g = 1), "DT") %>% group_by(g) - - # when var is in original data - step <- dt %>% transmute(x = 2, z = x*2, x = NULL) - expect_equal( - collect(step), - tibble(g = 1, z = 4) %>% group_by(g) - ) - expect_equal( - step$vars, - c("g", "z") - ) - expect_equal( - show_query(step), - expr(DT[, { - x <- 2 - z <- x * 2 - .(x, z) - }, keyby = .(g)][, `:=`("x", NULL)]) - ) - - # when var is not in original data - step <- transmute(dt, y = 2, z = y*2, y = NULL) - expect_equal( - collect(step), - tibble(g = 1, z = 4) %>% group_by(g) - ) - expect_equal( - step$vars, - c("g", "z") - ) - expect_equal( - show_query(step), - expr(DT[, { - y <- 2 - z <- y * 2 - .(y, z) - }, keyby = .(g)][, `:=`("y", NULL)]) - ) + expect_equal(names(res), "y") }) From 61b026196031b1b9c2edae7daa59609505b21568 Mon Sep 17 00:00:00 2001 From: markfairbanks Date: Tue, 10 Dec 2024 07:33:37 -0700 Subject: [PATCH 04/10] Remove `transmute()` test from wrong file --- tests/testthat/test-step-subset-summarise.R | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/testthat/test-step-subset-summarise.R b/tests/testthat/test-step-subset-summarise.R index fae4eba3..6e7921da 100644 --- a/tests/testthat/test-step-subset-summarise.R +++ b/tests/testthat/test-step-subset-summarise.R @@ -5,11 +5,6 @@ test_that("simple calls generate expected translations", { dt %>% summarise(x = mean(x)) %>% show_query(), expr(DT[, .(x = mean(x))]) ) - - expect_equal( - dt %>% transmute(x) %>% show_query(), - expr(DT[, .(x = x)]) - ) }) test_that("can use with across", { From 15c1af853d4a0d1df3c57fd8cd2ab31d75a6e099 Mon Sep 17 00:00:00 2001 From: markfairbanks Date: Tue, 10 Dec 2024 07:33:48 -0700 Subject: [PATCH 05/10] Remove `transmute()` test --- tests/testthat/test-step-mutate.R | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/testthat/test-step-mutate.R b/tests/testthat/test-step-mutate.R index 172c8e52..14756c41 100644 --- a/tests/testthat/test-step-mutate.R +++ b/tests/testthat/test-step-mutate.R @@ -59,11 +59,6 @@ test_that("generates single calls as expect", { dt %>% group_by(x) %>% mutate(x2 = x * 2) %>% show_query(), expr(copy(DT)[, `:=`(x2 = x * 2), by = .(x)]) ) - - expect_equal( - dt %>% transmute(x2 = x * 2) %>% show_query(), - expr(DT[, .(x2 = x * 2)]) - ) }) test_that("mutate generates compound expression if needed", { From 1baf2f421fe605813af651489961b966ae0e5027 Mon Sep 17 00:00:00 2001 From: markfairbanks Date: Tue, 10 Dec 2024 07:34:00 -0700 Subject: [PATCH 06/10] News bullet --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index e6eeda24..4932e252 100644 --- a/NEWS.md +++ b/NEWS.md @@ -25,6 +25,8 @@ * `print.dtplyr_step()` gains `n`, `max_extra_cols`, and `max_footer_lines` args (#464) +* `transmute()` preserves row count and avoids unnecessary copies (#470) + # dtplyr 1.3.1 * Fix for failing R CMD check. From 8bb7056823092b8655b0cc65f76d4f1dd3aaf53b Mon Sep 17 00:00:00 2001 From: markfairbanks Date: Thu, 26 Dec 2024 11:41:39 -0700 Subject: [PATCH 07/10] Preserve column order --- R/step-subset-transmute.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/R/step-subset-transmute.R b/R/step-subset-transmute.R index d19fc434..0faaeafa 100644 --- a/R/step-subset-transmute.R +++ b/R/step-subset-transmute.R @@ -13,5 +13,9 @@ #' dt <- lazy_dt(dplyr::starwars) #' dt %>% transmute(name, sh = paste0(species, "/", homeworld)) transmute.dtplyr_step <- function(.data, ...) { - mutate(.data, ..., .keep = "none") + out <- mutate(.data, ..., .keep = "none") + old_vars <- intersect(.data$vars, out$vars) + new_vars <- setdiff(out$vars, .data$vars) + vars <- c(old_vars, new_vars) + select(out, all_of(vars)) } From ec1dd71292bfc0723bfac5bc39c0d265ab30a472 Mon Sep 17 00:00:00 2001 From: markfairbanks Date: Thu, 26 Dec 2024 11:41:47 -0700 Subject: [PATCH 08/10] Add test about column order --- tests/testthat/test-step-subset-transmute.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/testthat/test-step-subset-transmute.R b/tests/testthat/test-step-subset-transmute.R index da6c88b9..b2742ee9 100644 --- a/tests/testthat/test-step-subset-transmute.R +++ b/tests/testthat/test-step-subset-transmute.R @@ -16,3 +16,11 @@ test_that("empty dots preserves groups", { expect_equal(names(res), "y") }) +test_that("preserves column order", { + dt <- lazy_dt(data.table(x = 1, y = 1), "DT") + + res <- dt %>% transmute(y, x) %>% collect() + + expect_equal(names(res), c("x", "y")) +}) + From bb9a46bea25e46f3616ad2b620b499560ae4dc46 Mon Sep 17 00:00:00 2001 From: markfairbanks Date: Thu, 26 Dec 2024 12:04:18 -0700 Subject: [PATCH 09/10] Get correct column order --- R/step-subset-transmute.R | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/R/step-subset-transmute.R b/R/step-subset-transmute.R index 0faaeafa..0e5f6754 100644 --- a/R/step-subset-transmute.R +++ b/R/step-subset-transmute.R @@ -14,8 +14,9 @@ #' dt %>% transmute(name, sh = paste0(species, "/", homeworld)) transmute.dtplyr_step <- function(.data, ...) { out <- mutate(.data, ..., .keep = "none") - old_vars <- intersect(.data$vars, out$vars) - new_vars <- setdiff(out$vars, .data$vars) - vars <- c(old_vars, new_vars) - select(out, all_of(vars)) + cols_expr <- names(capture_new_vars(.data, ...)) + cols_group <- group_vars(.data) + cols_group <- setdiff(cols_group, cols_expr) + cols_retain <- c(cols_group, cols_expr) + select(out, all_of(cols_retain)) } From dfd6772a01922e6a6ecf2d7ff88d90bf7ec9b7bc Mon Sep 17 00:00:00 2001 From: markfairbanks Date: Thu, 26 Dec 2024 12:04:24 -0700 Subject: [PATCH 10/10] Fix test --- tests/testthat/test-step-subset-transmute.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-step-subset-transmute.R b/tests/testthat/test-step-subset-transmute.R index b2742ee9..e4e51d31 100644 --- a/tests/testthat/test-step-subset-transmute.R +++ b/tests/testthat/test-step-subset-transmute.R @@ -21,6 +21,6 @@ test_that("preserves column order", { res <- dt %>% transmute(y, x) %>% collect() - expect_equal(names(res), c("x", "y")) + expect_equal(names(res), c("y", "x")) })