Skip to content

Commit

Permalink
Merge branch 'master' into linter-ci
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaelChirico authored Apr 20, 2024
2 parents 0c8ec4e + d420afe commit 9091b36
Show file tree
Hide file tree
Showing 31 changed files with 782 additions and 292 deletions.
3 changes: 2 additions & 1 deletion .dev/cc.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ sourceImports = function(path=getwd(), quiet=FALSE) {
if (!quiet) warning("No NAMESPACE file found, required to guarantee imports resolve correctly")
return(invisible())
}
suppressWarnings(rm("getRversion", envir=.GlobalEnv)) # clean up from previous cc() because parseNamespaceFile() run getRversion() in NAMESPACE in .GlobalEnv
nsParsedImports = parseNamespaceFile(basename(path), "..")$imports # weird signature to this function
if (!quiet && length(nsParsedImports)) cat(sprintf("Ensuring objects from %d import entries in NAMESPACE resolve correctly\n", length(nsParsedImports)))
for (ii in seq_along(nsParsedImports)) {
Expand All @@ -51,7 +52,7 @@ sourceImports = function(path=getwd(), quiet=FALSE) {
return(invisible())
}

cc = function(test=FALSE, clean=FALSE, debug=FALSE, omp=!debug, cc_dir, path=Sys.getenv("PROJ_PATH"), CC="gcc", quiet=FALSE) {
cc = function(test=FALSE, clean=FALSE, debug=FALSE, omp=!debug, cc_dir, path=Sys.getenv("PROJ_PATH", unset="."), CC="gcc", quiet=FALSE) {
if (!missing(cc_dir)) {
warning("'cc_dir' arg is deprecated, use 'path' argument or 'PROJ_PATH' env var instead")
path = cc_dir
Expand Down
103 changes: 103 additions & 0 deletions .github/workflows/R-CMD-check-occasional.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
on:
schedule:
- cron: '18 13 8 * *' # 8th of month at 13:18 UTC

# A more complete suite of checks to run monthly; each PR/merge need not pass all these, but they should pass before CRAN release
name: R-CMD-check-occasional

jobs:
R-CMD-check-occasional:
runs-on: ${{ matrix.os }}

name: ${{ matrix.os }} (${{ matrix.r }})

strategy:
matrix:
os: [macOS-latest, windows-latest, ubuntu-latest]
r: ['devel', 'release', '3.2', '3.3', '3.4', '3.5', '3.6', '4.0', '4.1', '4.2', '4.3']
locale: ['en_US.utf8', 'zh_CN.utf8', 'lv_LV.utf8'] # Chinese for translations, Latvian for collate order (#3502)
exclude: # only run non-English locale CI on Ubuntu
- os: macOS-latest
locale: 'zh_CN.utf8'
- os: macOS-latest
locale: 'lv_LV.utf8'
- os: windows-latest
locale: 'zh_CN.utf8'
- os: windows-latest
locale: 'lv_LV.utf8'

env:
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
TEST_DATA_TABLE_WITH_OTHER_PACKAGES: true
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}

steps:
- name: Set locale
if: matrix.locale == 'en_US.utf8'
run: |
sudo locale-gen en_US
echo "LC_ALL=en_US.utf8" >> $GITHUB_ENV
- name: Set locale
if: matrix.locale == 'zh_CN.utf8'
run: |
sudo locale-gen zh_CN
echo "LC_ALL=zh_CN.utf8" >> $GITHUB_ENV
echo "LANGUAGE=zh_CN" >> $GITHUB_ENV
- name: Set locale
if: matrix.locale == 'lv_LV.utf8'
run: |
sudo locale-gen lv_LV
echo "LC_ALL=lv_LV.utf8" >> $GITHUB_ENV
echo "LANGUAGE=lv_LV" >> $GITHUB_ENV
- uses: actions/checkout@v2

- uses: r-lib/actions/setup-r@v2
with:
r-version: ${{ matrix.r }}


- name: Query dependencies
run: |
install.packages('remotes')
saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
shell: Rscript {0}

- name: Restore R package cache
uses: actions/cache@v2
with:
path: ${{ env.R_LIBS_USER }}
key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-

- name: Install system dependencies
if: runner.os == 'Linux'
run: |
while read -r cmd
do
eval sudo $cmd
done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))')
- name: Install dependencies
run: |
remotes::install_deps(dependencies = TRUE)
remotes::install_cran("rcmdcheck")
shell: Rscript {0}

- name: Check
env:
_R_CHECK_CRAN_INCOMING_REMOTE_: false
run: |
options(crayon.enabled = TRUE)
rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check")
shell: Rscript {0}

- name: Upload check results
if: failure()
uses: actions/upload-artifact@main
with:
name: ${{ runner.os }}-r${{ matrix.r }}-results
path: check
23 changes: 23 additions & 0 deletions .github/workflows/performance-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
name: Autocomment atime-based performance regression analysis on PRs

on:
pull_request:
branches:
- '*'
types:
- opened
- reopened
- synchronize
paths:
- 'R/**'
- 'src/**'

jobs:
comment:
runs-on: ubuntu-latest
container: ghcr.io/iterative/cml:0-dvc2-base1
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
repo_token: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: Anirban166/[email protected]
12 changes: 11 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@
8. Computations in `j` can return a matrix or array _if it is one-dimensional_, e.g. a row or column vector, when `j` is a list of columns during grouping, [#783](https://github.com/Rdatatable/data.table/issues/783). Previously a matrix could be provided `DT[, expr, by]` form, but not `DT[, list(expr), by]` form; this resolves that inconsistency. It is still an error to return a "true" array, e.g. a `2x3` matrix.
9. `fread` now supports automatic detection of `dec` (as either `.` or `,`, the latter being [common in many places in Europe, Africa, and South America](https://en.wikipedia.org/wiki/Decimal_separator)); this behavior is now the default, i.e. `dec='auto'`, [#2431](https://github.com/Rdatatable/data.table/issues/2431). This was our #2 most-requested issue. See [#3189](https://github.com/Rdatatable/data.table/issues/3189) and please do peruse this list and show support to the issues that would help you the most as we continue to use this metric to help prioritize development.
10. `measure` now supports user-specified `cols` argument, which can be useful to specify a subset of columns to `melt`, without having to use a regex, [#5063](https://github.com/Rdatatable/data.table/issues/5063). Thanks to @UweBlock and @Henrik-P for reporting, and @tdhock for the PR.
11. `split.data.table` recognizes `sep=` when splitting with `by=`, just like the default and data.frame methods [#5417](https://github.com/Rdatatable/data.table/issues/5417).
## BUG FIXES
1. `unique()` returns a copy the case when `nrows(x) <= 1` instead of a mutable alias, [#5932](https://github.com/Rdatatable/data.table/pull/5932). This is consistent with existing `unique()` behavior when the input has no duplicates but more than one row. Thanks to @brookslogan for the report and @dshemetov for the fix.
Expand Down Expand Up @@ -68,10 +74,14 @@
9. `print.data.table` now handles combination multibyte characters correctly when truncating wide string entries, [#5096](https://github.com/Rdatatable/data.table/issues/5096). Thanks to @MichaelChirico for the report and @joshhwuu for the fix.
10. `test.data.table()` runs correctly in more sessions, in particular those where the `digits` or `warn` settings are not their defaults (`7` and `0`, respectively), [#5285](https://github.com/Rdatatable/data.table/issues/5285). Thanks @OfekShilon for the report and suggested fix and @MichaelChirico for the PR.
10. `test.data.table()` runs robustly:
+ In sessions where the `digits` or `warn` options are not their defaults (`7` and `0`, respectively), [#5285](https://github.com/Rdatatable/data.table/issues/5285). Thanks @OfekShilon for the report and suggested fix and @MichaelChirico for the PR.
+ In locales where `letters != sort(letters)`, e.g. Latvian, [#3502](https://github.com/Rdatatable/data.table/issues/3502). Thanks @minemR for the report and @MichaelChirico for the fix.
11. Using `print.data.table` when truncation is needed with `row.names = FALSE` prints the indicator `---` in every value column instead of adding a blank column where the `rownames` would have been just to include `---`, [#4083](https://github.com/Rdatatable/data.table/issues/4083). Thanks @MichaelChirico for the report and @joshhwuu for the fix.
12. `print.data.table` now honors `na.print`, as seen in `print.default`, allowing for string replacement of `NA` values when printing. Thanks @HughParsonage for the report and @joshhwuu for the fix.
# data.table [v1.15.0](https://github.com/Rdatatable/data.table/milestone/29) (30 Jan 2024)
## BREAKING CHANGE
Expand Down
6 changes: 4 additions & 2 deletions R/data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -2452,9 +2452,11 @@ split.data.table = function(x, f, drop = FALSE, by, sorted = FALSE, keep.by = TR
dtq[["i"]] = quote(levs)
join = TRUE
}
dots = list(...)
if (!"sep" %chin% names(dots)) dots$sep = "."
dtq[["j"]] = substitute(
list(.ll.tech.split=list(.expr), .ll.tech.split.names=paste(lapply(.BY, as.character), collapse=".")),
list(.expr = if (join) quote(if(.N == 0L) .SD[0L] else .SD) else as.name(".SD"))
list(.ll.tech.split=list(.expr), .ll.tech.split.names=paste(lapply(.BY, as.character), collapse=.sep)),
list(.expr = if (join) quote(if(.N == 0L) .SD[0L] else .SD) else as.name(".SD"), .sep = dots$sep)
)
dtq[["by"]] = substitute( # retain order, for `join` and `sorted` it will use order of `i` data.table instead of `keyby`.
.expr,
Expand Down
18 changes: 10 additions & 8 deletions R/fmelt.R
Original file line number Diff line number Diff line change
Expand Up @@ -107,17 +107,18 @@ measurev = function(fun.list, sep="_", pattern, cols, multiple.keyword="value.na
stopf("pattern must be character string")
}
match.vec = regexpr(pattern, cols, perl=TRUE)
measure.vec = which(0 < match.vec)
if (length(measure.vec) == 0L) {
measure.vec.i = which(0 < match.vec)
if (length(measure.vec.i) == 0L) {
stopf("pattern did not match any cols, so nothing would be melted; fix by changing pattern")
}
start = attr(match.vec, "capture.start")[measure.vec, , drop=FALSE]
start = attr(match.vec, "capture.start")[measure.vec.i, , drop=FALSE]
if (is.null(start)) {
stopf("pattern must contain at least one capture group (parenthesized sub-pattern)")
}
err.args.groups("number of capture groups in pattern", ncol(start))
end = attr(match.vec, "capture.length")[measure.vec,]+start-1L
names.mat = matrix(cols[measure.vec], nrow(start), ncol(start))
end = attr(match.vec, "capture.length")[measure.vec.i,]+start-1L
measure.vec <- cols[measure.vec.i]
names.mat = matrix(measure.vec, nrow(start), ncol(start))
substr(names.mat, start, end)
} else { #pattern not specified, so split using sep.
if (!is.character(sep)) {
Expand All @@ -130,10 +131,11 @@ measurev = function(fun.list, sep="_", pattern, cols, multiple.keyword="value.na
stopf("each column name results in only one item after splitting using sep, which means that all columns would be melted; to fix please either specify melt on all columns directly without using measure, or use a different sep/pattern specification")
}
err.args.groups("max number of items after splitting column names", n.groups)
measure.vec = which(vector.lengths==n.groups)
do.call(rbind, list.of.vectors[measure.vec])
measure.vec.i = which(vector.lengths==n.groups)
measure.vec = cols[measure.vec.i]
do.call(rbind, list.of.vectors[measure.vec.i])
}
err.names.unique("measured columns", cols[measure.vec])
err.names.unique("measured columns", measure.vec)
uniq.mat = unique(group.mat)
if (nrow(uniq.mat) < nrow(group.mat)) {
stopf("number of unique column IDs =%d is less than number of melted columns =%d; fix by changing pattern/sep", nrow(uniq.mat), nrow(group.mat))
Expand Down
5 changes: 3 additions & 2 deletions R/fread.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
fread = function(
input="", file=NULL, text=NULL, cmd=NULL, sep="auto", sep2="auto", dec=".", quote="\"", nrows=Inf, header="auto",
input="", file=NULL, text=NULL, cmd=NULL, sep="auto", sep2="auto", dec="auto", quote="\"", nrows=Inf, header="auto",
na.strings=getOption("datatable.na.strings","NA"), stringsAsFactors=FALSE, verbose=getOption("datatable.verbose",FALSE),
skip="__auto__", select=NULL, drop=NULL, colClasses=NULL, integer64=getOption("datatable.integer64","integer64"),
col.names, check.names=FALSE, encoding="unknown", strip.white=TRUE, fill=FALSE, blank.lines.skip=FALSE, key=NULL, index=NULL,
Expand All @@ -16,7 +16,8 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC")
else if (sep=="auto") sep="" # sep=="" at C level means auto sep
else stopifnot( nchar(sep)==1L ) # otherwise an actual character to use as sep
}
stopifnot( is.character(dec), length(dec)==1L, nchar(dec)==1L )
stopifnot( is.character(dec), length(dec)==1L)
if (dec == "auto") dec = "" else stopifnot(nchar(dec) == 1L)
# handle encoding, #563
if (length(encoding) != 1L || !encoding %chin% c("unknown", "UTF-8", "Latin-1")) {
stopf("Argument 'encoding' must be 'unknown', 'UTF-8' or 'Latin-1'.")
Expand Down
30 changes: 11 additions & 19 deletions R/print.data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"),
print.keys=getOption("datatable.print.keys"),
trunc.cols=getOption("datatable.print.trunc.cols"),
quote=FALSE,
na.print=NULL,
timezone=FALSE, ...) {
# topn - print the top topn and bottom topn rows with '---' inbetween (5)
# nrows - under this the whole (small) table is printed, unless topn is provided (100)
Expand Down Expand Up @@ -109,6 +110,13 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"),
# When nrow(toprint) = 1, attributes get lost in the subset,
# function below adds those back when necessary
toprint = toprint_subset(toprint, cols_to_print)
trunc.cols <- length(not_printed) > 0L
}
print_default = function(x) {
if (col.names != "none") cut_colnames = identity
cut_colnames(print(x, right=TRUE, quote=quote, na.print=na.print))
# prints names of variables not shown in the print
if (trunc.cols) trunc_cols_message(not_printed, abbs, class, col.names)
}
if (printdots) {
if (isFALSE(row.names)) {
Expand All @@ -117,30 +125,14 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"),
toprint = rbind(head(toprint, topn + isTRUE(class)), "---"="", tail(toprint, topn))
}
rownames(toprint) = format(rownames(toprint), justify="right")
if (col.names == "none") {
cut_colnames(print(toprint, right=TRUE, quote=quote))
} else {
print(toprint, right=TRUE, quote=quote)
}
if (trunc.cols && length(not_printed) > 0L)
# prints names of variables not shown in the print
trunc_cols_message(not_printed, abbs, class, col.names)

print_default(toprint)
return(invisible(x))
}
if (nrow(toprint)>20L && col.names == "auto")
# repeat colnames at the bottom if over 20 rows so you don't have to scroll up to see them
# option to shut this off per request of Oleg Bondar on SO, #1482
toprint=rbind(toprint, matrix(if (quote) old else colnames(toprint), nrow=1L)) # fixes bug #97
if (col.names == "none") {
cut_colnames(print(toprint, right=TRUE, quote=quote))
} else {
print(toprint, right=TRUE, quote=quote)
}
if (trunc.cols && length(not_printed) > 0L)
# prints names of variables not shown in the print
trunc_cols_message(not_printed, abbs, class, col.names)

toprint = rbind(toprint, matrix(if (quote) old else colnames(toprint), nrow=1L)) # fixes bug #97
print_default(toprint)
invisible(x)
}

Expand Down
Loading

0 comments on commit 9091b36

Please sign in to comment.