diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index ee65ccb5..064677b4 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -10,7 +10,9 @@ on: pull_request: branches: [main, master] -name: R-CMD-check +name: R-CMD-check.yaml + +permissions: read-all jobs: R-CMD-check: @@ -25,24 +27,22 @@ jobs: - {os: macos-latest, r: 'release'} - {os: windows-latest, r: 'release'} - # Use 3.6 to trigger usage of RTools35 - - {os: windows-latest, r: '3.6'} - # use 4.1 to check with rtools40's older compiler - - {os: windows-latest, r: '4.1'} - - - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - - {os: ubuntu-latest, r: 'release'} - - {os: ubuntu-latest, r: 'oldrel-1'} - - {os: ubuntu-latest, r: 'oldrel-2'} - - {os: ubuntu-latest, r: 'oldrel-3'} - - {os: ubuntu-latest, r: 'oldrel-4'} + # use 4.0 or 4.1 to check with rtools40's older compiler + - {os: windows-latest, r: 'oldrel-4'} + + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'release'} + - {os: ubuntu-latest, r: 'oldrel-1'} + - {os: ubuntu-latest, r: 'oldrel-2'} + - {os: ubuntu-latest, r: 'oldrel-3'} + - {os: ubuntu-latest, r: 'oldrel-4'} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} R_KEEP_PKG_SOURCE: yes steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-pandoc@v2 @@ -60,3 +60,4 @@ jobs: - uses: r-lib/actions/check-r-package@v2 with: upload-snapshots: true + build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index ed7650c7..4bbce750 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -9,7 +9,9 @@ on: types: [published] workflow_dispatch: -name: pkgdown +name: pkgdown.yaml + +permissions: read-all jobs: pkgdown: @@ -22,7 +24,7 @@ jobs: permissions: contents: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-pandoc@v2 @@ -41,7 +43,7 @@ jobs: - name: Deploy to GitHub pages 🚀 if: github.event_name != 'pull_request' - uses: JamesIves/github-pages-deploy-action@v4.4.1 + uses: JamesIves/github-pages-deploy-action@v4.5.0 with: clean: false branch: gh-pages diff --git a/.github/workflows/pr-commands.yaml b/.github/workflows/pr-commands.yaml index 71f335b3..2edd93f2 100644 --- a/.github/workflows/pr-commands.yaml +++ b/.github/workflows/pr-commands.yaml @@ -4,7 +4,9 @@ on: issue_comment: types: [created] -name: Commands +name: pr-commands.yaml + +permissions: read-all jobs: document: @@ -13,8 +15,10 @@ jobs: runs-on: ubuntu-latest env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + permissions: + contents: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/pr-fetch@v2 with: @@ -50,8 +54,10 @@ jobs: runs-on: ubuntu-latest env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + permissions: + contents: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/pr-fetch@v2 with: diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index 27d45283..98822609 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -6,7 +6,9 @@ on: pull_request: branches: [main, master] -name: test-coverage +name: test-coverage.yaml + +permissions: read-all jobs: test-coverage: @@ -15,7 +17,7 @@ jobs: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-r@v2 with: @@ -23,28 +25,37 @@ jobs: - uses: r-lib/actions/setup-r-dependencies@v2 with: - extra-packages: any::covr + extra-packages: any::covr, any::xml2 needs: coverage - name: Test coverage run: | - covr::codecov( + cov <- covr::package_coverage( quiet = FALSE, clean = FALSE, install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") ) + covr::to_cobertura(cov) shell: Rscript {0} + - uses: codecov/codecov-action@v4 + with: + fail_ci_if_error: ${{ github.event_name != 'pull_request' && true || false }} + file: ./cobertura.xml + plugin: noop + disable_search: true + token: ${{ secrets.CODECOV_TOKEN }} + - name: Show testthat output if: always() run: | ## -------------------------------------------------------------------- - find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true + find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true shell: bash - name: Upload test results if: failure() - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: coverage-test-failures path: ${{ runner.temp }}/package diff --git a/DESCRIPTION b/DESCRIPTION index cda5a8d3..c7dbf709 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -15,8 +15,8 @@ Description: Provides a data.table backend for 'dplyr'. The goal of License: MIT + file LICENSE URL: https://dtplyr.tidyverse.org, https://github.com/tidyverse/dtplyr BugReports: https://github.com/tidyverse/dtplyr/issues -Depends: - R (>= 3.6) +Depends: + R (>= 4.0) Imports: cli (>= 3.4.0), data.table (>= 1.13.0), @@ -35,10 +35,10 @@ Suggests: testthat (>= 3.1.2), tidyr (>= 1.1.0), waldo (>= 0.3.1) -VignetteBuilder: +VignetteBuilder: knitr Config/Needs/website: tidyverse/tidytemplate Config/testthat/edition: 3 Encoding: UTF-8 Roxygen: {library(tidyr); list(markdown = TRUE)} -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 diff --git a/NAMESPACE b/NAMESPACE index 8f81a743..07d234ad 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,9 +17,11 @@ S3method(distinct,dtplyr_step) S3method(do,dtplyr_step) S3method(dt_call,dtplyr_step) S3method(dt_call,dtplyr_step_assign) +S3method(dt_call,dtplyr_step_call) S3method(dt_call,dtplyr_step_first) S3method(dt_call,dtplyr_step_join) S3method(dt_call,dtplyr_step_modify) +S3method(dt_call,dtplyr_step_mutate) S3method(dt_call,dtplyr_step_set) S3method(dt_call,dtplyr_step_subset) S3method(dt_has_computation,dtplyr_step) diff --git a/R/step-call.R b/R/step-call.R index 7e9a01ff..0b130024 100644 --- a/R/step-call.R +++ b/R/step-call.R @@ -16,6 +16,7 @@ step_call <- function(parent, fun, args = list(), vars = parent$vars, in_place = ) } +#' @export dt_call.dtplyr_step_call <- function(x, needs_copy = x$needs_copy) { call2(x$fun, dt_call(x$parent, needs_copy), !!!x$args) } diff --git a/R/step-join.R b/R/step-join.R index 8f447abd..97877edb 100644 --- a/R/step-join.R +++ b/R/step-join.R @@ -82,7 +82,7 @@ dt_call.dtplyr_step_join <- function(x, needs_copy = x$needs_copy) { anti = call2("[", lhs, call2("!", rhs), on = on), semi = call2("[", lhs, call2("unique", call2("[", lhs, rhs, which = TRUE, nomatch = NULL, on = on))) ) - + if (x$style == "full") { default_suffix <- c(".x", ".y") if (!identical(x$suffix, default_suffix)) { @@ -133,7 +133,6 @@ right_join.dtplyr_step <- function(x, y, ..., by = NULL, copy = FALSE, suffix = step_join(x, y, by, style = "right", copy = copy, suffix = suffix) } - #' @importFrom dplyr inner_join #' @export inner_join.dtplyr_step <- function(x, y, ..., by = NULL, copy = FALSE, suffix = c(".x", ".y")) { diff --git a/R/step-mutate.R b/R/step-mutate.R index 5906b132..c34100f0 100644 --- a/R/step-mutate.R +++ b/R/step-mutate.R @@ -26,6 +26,7 @@ step_mutate <- function(parent, new_vars = list(), use_braces = FALSE, by = new_ out } +#' @export dt_call.dtplyr_step_mutate <- function(x, needs_copy = x$needs_copy) { # i is always empty because we never mutate a subset if (is_empty(x$new_vars)) { diff --git a/README.Rmd b/README.Rmd index bdbc2650..ee2bfdbb 100644 --- a/README.Rmd +++ b/README.Rmd @@ -17,8 +17,8 @@ knitr::opts_chunk$set( [![CRAN status](https://www.r-pkg.org/badges/version/dtplyr)](https://cran.r-project.org/package=dtplyr) -[![R-CMD-check](https://github.com/tidyverse/dtplyr/workflows/R-CMD-check/badge.svg)](https://github.com/tidyverse/dtplyr/actions) -[![Codecov test coverage](https://codecov.io/gh/tidyverse/dtplyr/branch/main/graph/badge.svg)](https://app.codecov.io/gh/tidyverse/dtplyr?branch=main) +[![R-CMD-check](https://github.com/tidyverse/dtplyr/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/tidyverse/dtplyr/actions/workflows/R-CMD-check.yaml) +[![Codecov test coverage](https://codecov.io/gh/tidyverse/dtplyr/graph/badge.svg)](https://app.codecov.io/gh/tidyverse/dtplyr) ## Overview @@ -52,7 +52,7 @@ library(dtplyr) library(dplyr, warn.conflicts = FALSE) ``` -Then use `lazy_dt()` to create a "lazy" data table that tracks the operations performed on it. +Then use `lazy_dt()` to create a "lazy" data table that tracks the operations performed on it. ```{r} mtcars2 <- lazy_dt(mtcars) @@ -61,21 +61,21 @@ mtcars2 <- lazy_dt(mtcars) You can preview the transformation (including the generated data.table code) by printing the result: ```{r} -mtcars2 %>% - filter(wt < 5) %>% +mtcars2 %>% + filter(wt < 5) %>% mutate(l100k = 235.21 / mpg) %>% # liters / 100 km - group_by(cyl) %>% + group_by(cyl) %>% summarise(l100k = mean(l100k)) ``` But generally you should reserve this only for debugging, and use `as.data.table()`, `as.data.frame()`, or `as_tibble()` to indicate that you're done with the transformation and want to access the results: ```{r} -mtcars2 %>% - filter(wt < 5) %>% +mtcars2 %>% + filter(wt < 5) %>% mutate(l100k = 235.21 / mpg) %>% # liters / 100 km - group_by(cyl) %>% - summarise(l100k = mean(l100k)) %>% + group_by(cyl) %>% + summarise(l100k = mean(l100k)) %>% as_tibble() ``` @@ -83,13 +83,13 @@ mtcars2 %>% There are two primary reasons that dtplyr will always be somewhat slower than data.table: -* Each dplyr verb must do some work to convert dplyr syntax to data.table - syntax. This takes time proportional to the complexity of the input code, +* Each dplyr verb must do some work to convert dplyr syntax to data.table + syntax. This takes time proportional to the complexity of the input code, not the input _data_, so should be a negligible overhead for large datasets. - [Initial benchmarks][benchmark] suggest that the overhead should be under + [Initial benchmarks][benchmark] suggest that the overhead should be under 1ms per dplyr call. -* To match dplyr semantics, `mutate()` does not modify in place by default. +* To match dplyr semantics, `mutate()` does not modify in place by default. This means that most expressions involving `mutate()` must make a copy that would not be necessary if you were using data.table directly. (You can opt out of this behaviour in `lazy_dt()` with `immutable = FALSE`). diff --git a/README.md b/README.md index 7c7fb568..d5603a8d 100644 --- a/README.md +++ b/README.md @@ -7,9 +7,9 @@ [![CRAN status](https://www.r-pkg.org/badges/version/dtplyr)](https://cran.r-project.org/package=dtplyr) -[![R-CMD-check](https://github.com/tidyverse/dtplyr/workflows/R-CMD-check/badge.svg)](https://github.com/tidyverse/dtplyr/actions) +[![R-CMD-check](https://github.com/tidyverse/dtplyr/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/tidyverse/dtplyr/actions/workflows/R-CMD-check.yaml) [![Codecov test -coverage](https://codecov.io/gh/tidyverse/dtplyr/branch/main/graph/badge.svg)](https://app.codecov.io/gh/tidyverse/dtplyr?branch=main) +coverage](https://codecov.io/gh/tidyverse/dtplyr/graph/badge.svg)](https://app.codecov.io/gh/tidyverse/dtplyr) ## Overview @@ -47,6 +47,7 @@ other goodies that it provides: ``` r library(data.table) +#> Warning: package 'data.table' was built under R version 4.4.1 library(dtplyr) library(dplyr, warn.conflicts = FALSE) ``` @@ -62,10 +63,10 @@ You can preview the transformation (including the generated data.table code) by printing the result: ``` r -mtcars2 %>% - filter(wt < 5) %>% +mtcars2 %>% + filter(wt < 5) %>% mutate(l100k = 235.21 / mpg) %>% # liters / 100 km - group_by(cyl) %>% + group_by(cyl) %>% summarise(l100k = mean(l100k)) #> Source: local data table [3 x 2] #> Call: `_DT1`[wt < 5][, `:=`(l100k = 235.21/mpg)][, .(l100k = mean(l100k)), @@ -85,11 +86,11 @@ But generally you should reserve this only for debugging, and use you’re done with the transformation and want to access the results: ``` r -mtcars2 %>% - filter(wt < 5) %>% +mtcars2 %>% + filter(wt < 5) %>% mutate(l100k = 235.21 / mpg) %>% # liters / 100 km - group_by(cyl) %>% - summarise(l100k = mean(l100k)) %>% + group_by(cyl) %>% + summarise(l100k = mean(l100k)) %>% as_tibble() #> # A tibble: 3 × 2 #> cyl l100k diff --git a/tests/testthat/_snaps/step-call.md b/tests/testthat/_snaps/step-call.md index 48ffefc6..fc85d2d0 100644 --- a/tests/testthat/_snaps/step-call.md +++ b/tests/testthat/_snaps/step-call.md @@ -23,6 +23,6 @@ collect(drop_na(dt, "z")) Condition Error in `drop_na()`: - ! Can't subset columns that don't exist. + ! Can't select columns that don't exist. x Column `z` doesn't exist. diff --git a/tests/testthat/test-step-join.R b/tests/testthat/test-step-join.R index 03f67d37..65ff4d19 100644 --- a/tests/testthat/test-step-join.R +++ b/tests/testthat/test-step-join.R @@ -346,10 +346,10 @@ test_that("performs cartesian joins as needed", { test_that("performs cross join", { df1 <- data.frame(x = 1:2, y = "a", stringsAsFactors = FALSE) df2 <- data.frame(x = 3:4) + expected <- dplyr::cross_join(df1, df2) %>% as_tibble() dt1 <- lazy_dt(df1, "dt1") dt2 <- lazy_dt(df2, "dt2") - expected <- left_join(df1, df2, by = character()) %>% as_tibble() expect_snapshot(left_join(dt1, dt2, by = character())) expect_equal(left_join(dt1, dt2, by = character()) %>% collect(), expected)