From 502d9f481e704958e48c42ee7741a589d32de63b Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Wed, 3 Jan 2024 00:46:21 +0100 Subject: [PATCH 01/15] allow rbind for integer64 and character/complex --- NEWS.md | 2 ++ inst/tests/tests.Rraw | 12 ++++++++++++ src/rbindlist.c | 8 +++++--- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index eed4b7899b..4fe43967dd 100644 --- a/NEWS.md +++ b/NEWS.md @@ -560,6 +560,8 @@ 55. `fread(URL)` with `https:` and `ftps:` could timeout if proxy settings were not guessed right by `curl::curl_download`, [#1686](https://github.com/Rdatatable/data.table/issues/1686). `fread(URL)` now uses `download.file()` as default for downloading files from urls. Thanks to @cderv for the report and Benjamin Schwendinger for the fix. +56. `rbindlist` and `rbind` bind now `bit64::integer64` columns with `character`/`complex` columns, [#5504](https://github.com/Rdatatable/data.table/issues/5504). Thanks to @MichaelChirico for the request and @ben-schwen for the PR. + ## NOTES 1. New feature 29 in v1.12.4 (Oct 2019) introduced zero-copy coercion. Our thinking is that requiring you to get the type right in the case of `0` (type double) vs `0L` (type integer) is too inconvenient for you the user. So such coercions happen in `data.table` automatically without warning. Thanks to zero-copy coercion there is no speed penalty, even when calling `set()` many times in a loop, so there's no speed penalty to warn you about either. However, we believe that assigning a character value such as `"2"` into an integer column is more likely to be a user mistake that you would like to be warned about. The type difference (character vs integer) may be the only clue that you have selected the wrong column, or typed the wrong variable to be assigned to that column. For this reason we view character to numeric-like coercion differently and will warn about it. If it is correct, then the warning is intended to nudge you to wrap the RHS with `as.()` so that it is clear to readers of your code that a coercion from character to that type is intended. For example : diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 48bb087c73..09bafec1f2 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18164,3 +18164,15 @@ DT = data.table(id=1:2, x=1:2) r = copy(DT)[1L, x:= 5L] test(2241.13, DT, data.table(id=1:2, x=1:2)) test(2241.14, r, data.table(id=1:2, x=c(5L,2L))) + +# rbind ignore attributes and do not copy for integer64 and higher types #5504 +if (test_bit64) { + a = data.table(as.integer64(1)) + b = data.table("a") + c = data.table(as.complex(2)) + test(2242.41, rbind(a,b), data.table(c("1","a"))) + test(2242.42, rbind(b,a), data.table(c("a","1"))) + test(2242.43, rbind(a,c), data.table(as.complex(1:2))) + test(2242.44, rbind(c,a), data.table(as.complex(2:1))) + test(2242.45, rbind(a, data.table(list(1))), error="integer64 but maxType=='list'") +} diff --git a/src/rbindlist.c b/src/rbindlist.c index ba19d2c389..0183155f6a 100644 --- a/src/rbindlist.c +++ b/src/rbindlist.c @@ -321,11 +321,13 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) if (!foundName) { static char buff[12]; snprintf(buff,12,"V%d",j+1), SET_STRING_ELT(ansNames, idcol+j, mkChar(buff)); foundName=buff; } if (factor) maxType=INTSXP; // if any items are factors then a factor is created (could be an option) - if (int64 && maxType!=REALSXP) - error(_("Internal error: column %d of result is determined to be integer64 but maxType=='%s' != REALSXP"), j+1, type2char(maxType)); // # nocov + if (int64 && maxType!=REALSXP && !(maxType==16||maxType==15)) + error(_("Column %d of result is determined to be integer64 but maxType=='%s' != REALSXP"), j+1, type2char(maxType)); + if (date && INHERITS(firstCol, char_IDate)) maxType=INTSXP; // first encountered Date determines class and type #5309 SEXP target; SET_VECTOR_ELT(ans, idcol+j, target=allocVector(maxType, nrow)); // does not initialize logical & numerics, but does initialize character and list - if (!factor) copyMostAttrib(firstCol, target); // all but names,dim and dimnames; mainly for class. And if so, we want a copy here, not keepattr's SET_ATTRIB. + // #5504 do not copy class for mixing int64 and higher maxtypes CPLSXP and STRSXP + if (!factor && !(int64 && (maxType==16||maxType==15))) copyMostAttrib(firstCol, target); // all but names,dim and dimnames; mainly for class. And if so, we want a copy here, not keepattr's SET_ATTRIB. if (factor && anyNotStringOrFactor) { // in future warn, or use list column instead ... warning(_("Column %d contains a factor but not all items for the column are character or factor"), idcol+j+1); From 88da91583d4c51dcd0cf7666132b9b1e66ca9c3a Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Wed, 3 Jan 2024 00:49:14 +0100 Subject: [PATCH 02/15] update comments --- inst/tests/tests.Rraw | 2 +- src/rbindlist.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 09bafec1f2..d3262963cc 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18165,7 +18165,7 @@ r = copy(DT)[1L, x:= 5L] test(2241.13, DT, data.table(id=1:2, x=1:2)) test(2241.14, r, data.table(id=1:2, x=c(5L,2L))) -# rbind ignore attributes and do not copy for integer64 and higher types #5504 +# rbind do not copy class/attributes for integer64 and STRSXP/CPLSXP #5504 if (test_bit64) { a = data.table(as.integer64(1)) b = data.table("a") diff --git a/src/rbindlist.c b/src/rbindlist.c index 0183155f6a..fce30d88ef 100644 --- a/src/rbindlist.c +++ b/src/rbindlist.c @@ -326,7 +326,7 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) if (date && INHERITS(firstCol, char_IDate)) maxType=INTSXP; // first encountered Date determines class and type #5309 SEXP target; SET_VECTOR_ELT(ans, idcol+j, target=allocVector(maxType, nrow)); // does not initialize logical & numerics, but does initialize character and list - // #5504 do not copy class for mixing int64 and higher maxtypes CPLSXP and STRSXP + // #5504 do not copy class for mixing int64 and higher maxTypes CPLSXP and STRSXP if (!factor && !(int64 && (maxType==16||maxType==15))) copyMostAttrib(firstCol, target); // all but names,dim and dimnames; mainly for class. And if so, we want a copy here, not keepattr's SET_ATTRIB. if (factor && anyNotStringOrFactor) { From 61319680121ec04d8e6a3a7d97d1276c5b2b2482 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Wed, 3 Jan 2024 00:53:16 +0100 Subject: [PATCH 03/15] delete carried over line --- src/rbindlist.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/rbindlist.c b/src/rbindlist.c index fce30d88ef..5e76ac3f24 100644 --- a/src/rbindlist.c +++ b/src/rbindlist.c @@ -323,7 +323,6 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) if (factor) maxType=INTSXP; // if any items are factors then a factor is created (could be an option) if (int64 && maxType!=REALSXP && !(maxType==16||maxType==15)) error(_("Column %d of result is determined to be integer64 but maxType=='%s' != REALSXP"), j+1, type2char(maxType)); - if (date && INHERITS(firstCol, char_IDate)) maxType=INTSXP; // first encountered Date determines class and type #5309 SEXP target; SET_VECTOR_ELT(ans, idcol+j, target=allocVector(maxType, nrow)); // does not initialize logical & numerics, but does initialize character and list // #5504 do not copy class for mixing int64 and higher maxTypes CPLSXP and STRSXP From 510c95d9ea909690e2fc4d1834282c5d78717a99 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Wed, 3 Jan 2024 17:47:54 +0100 Subject: [PATCH 04/15] add vector support --- inst/tests/tests.Rraw | 6 ++++-- src/rbindlist.c | 14 +++++++++----- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index d3262963cc..9859736bce 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18165,14 +18165,16 @@ r = copy(DT)[1L, x:= 5L] test(2241.13, DT, data.table(id=1:2, x=1:2)) test(2241.14, r, data.table(id=1:2, x=c(5L,2L))) -# rbind do not copy class/attributes for integer64 and STRSXP/CPLSXP #5504 +# rbind do not copy class/attributes for integer64 and CPLXSXP/STRSXP/VECSXP #5504 if (test_bit64) { a = data.table(as.integer64(1)) b = data.table("a") c = data.table(as.complex(2)) + d = data.table(list(2)) test(2242.41, rbind(a,b), data.table(c("1","a"))) test(2242.42, rbind(b,a), data.table(c("a","1"))) test(2242.43, rbind(a,c), data.table(as.complex(1:2))) test(2242.44, rbind(c,a), data.table(as.complex(2:1))) - test(2242.45, rbind(a, data.table(list(1))), error="integer64 but maxType=='list'") + test(2242.45, rbind(a,d), data.table(list(as.integer64(1), 2))) + test(2242.46, rbind(d,a), data.table(list(2, as.integer64(1)))) } diff --git a/src/rbindlist.c b/src/rbindlist.c index 5e76ac3f24..82c95fe275 100644 --- a/src/rbindlist.c +++ b/src/rbindlist.c @@ -321,12 +321,12 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) if (!foundName) { static char buff[12]; snprintf(buff,12,"V%d",j+1), SET_STRING_ELT(ansNames, idcol+j, mkChar(buff)); foundName=buff; } if (factor) maxType=INTSXP; // if any items are factors then a factor is created (could be an option) - if (int64 && maxType!=REALSXP && !(maxType==16||maxType==15)) - error(_("Column %d of result is determined to be integer64 but maxType=='%s' != REALSXP"), j+1, type2char(maxType)); + if (int64 && !(maxType==REALSXP || maxType==STRSXP || maxType==VECSXP || maxType==CPLXSXP)) + error(_("Internal error: column %d of result is determined to be integer64 but maxType=='%s' != REALSXP"), j+1, type2char(maxType)); // # nocov SEXP target; SET_VECTOR_ELT(ans, idcol+j, target=allocVector(maxType, nrow)); // does not initialize logical & numerics, but does initialize character and list - // #5504 do not copy class for mixing int64 and higher maxTypes CPLSXP and STRSXP - if (!factor && !(int64 && (maxType==16||maxType==15))) copyMostAttrib(firstCol, target); // all but names,dim and dimnames; mainly for class. And if so, we want a copy here, not keepattr's SET_ATTRIB. + // #5504 do not copy class for mixing int64 and higher maxTypes CPLXSXP/STRSXP/VECSXP + if (!factor && !(int64 && (maxType==STRSXP || maxType==VECSXP || maxType==CPLXSXP))) copyMostAttrib(firstCol, target); // all but names,dim and dimnames; mainly for class. And if so, we want a copy here, not keepattr's SET_ATTRIB. if (factor && anyNotStringOrFactor) { // in future warn, or use list column instead ... warning(_("Column %d contains a factor but not all items for the column are character or factor"), idcol+j+1); @@ -518,10 +518,14 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) } else { if ((TYPEOF(target)==VECSXP || TYPEOF(target)==EXPRSXP) && TYPEOF(thisCol)!=TYPEOF(target)) { // do an as.list() on the atomic column; #3528 - thisCol = PROTECT(coerceVector(thisCol, TYPEOF(target))); nprotect++; + // use coerceAs for VECSXP and coerceVector for + thisCol = PROTECT(INHERITS(thisCol, char_integer64) ? coerceAs(thisCol, target, ScalarLogical(TRUE)) : coerceVector(thisCol, TYPEOF(target))); nprotect++; } // else coerces if needed within memrecycle; with a no-alloc direct coerce from 1.12.4 (PR #3909) const char *ret = memrecycle(target, R_NilValue, ansloc, thisnrow, thisCol, 0, -1, idcol+j+1, foundName); + if (TYPEOF(target)==VECSXP && INHERITS(thisCol, char_integer64)) { + for (int i=ansloc; i<=ansloc+thisnrow; ++i) copyMostAttrib(thisCol, VECTOR_ELT(target, i)); + } if (ret) warning(_("Column %d of item %d: %s"), w+1, i+1, ret); // e.g. when precision is lost like assigning 3.4 to integer64; test 2007.2 // TODO: but maxType should handle that and this should never warn From a743cd766eb1ebc151cb996a288ee7afead18ac9 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Wed, 3 Jan 2024 22:17:46 +0100 Subject: [PATCH 05/15] add coverage --- inst/tests/tests.Rraw | 20 ++++++++++---------- src/rbindlist.c | 5 +---- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 9859736bce..831e47e891 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18167,14 +18167,14 @@ test(2241.14, r, data.table(id=1:2, x=c(5L,2L))) # rbind do not copy class/attributes for integer64 and CPLXSXP/STRSXP/VECSXP #5504 if (test_bit64) { - a = data.table(as.integer64(1)) - b = data.table("a") - c = data.table(as.complex(2)) - d = data.table(list(2)) - test(2242.41, rbind(a,b), data.table(c("1","a"))) - test(2242.42, rbind(b,a), data.table(c("a","1"))) - test(2242.43, rbind(a,c), data.table(as.complex(1:2))) - test(2242.44, rbind(c,a), data.table(as.complex(2:1))) - test(2242.45, rbind(a,d), data.table(list(as.integer64(1), 2))) - test(2242.46, rbind(d,a), data.table(list(2, as.integer64(1)))) + a = data.table(as.integer64(c(1,2))) + b = data.table(c("a","b")) + c = data.table(as.complex(c(3,4))) + d = data.table(list(3.5, 4L)) + test(2242.41, rbind(a,b), data.table(c("1","2","a","b"))) + test(2242.42, rbind(b,a), data.table(c("a","b","1","2"))) + test(2242.43, rbind(a,c), data.table(as.complex(1:4))) + test(2242.44, rbind(c,a), data.table(as.complex(c(3,4,1,2)))) + test(2242.45, rbind(a,d), data.table(list(as.integer64(1), as.integer64(2), 3.5, 4L))) + test(2242.46, rbind(d,a), data.table(list(3.5, 4L, as.integer64(1), as.integer64(2)))) } diff --git a/src/rbindlist.c b/src/rbindlist.c index 82c95fe275..58cf2997a1 100644 --- a/src/rbindlist.c +++ b/src/rbindlist.c @@ -518,14 +518,11 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) } else { if ((TYPEOF(target)==VECSXP || TYPEOF(target)==EXPRSXP) && TYPEOF(thisCol)!=TYPEOF(target)) { // do an as.list() on the atomic column; #3528 - // use coerceAs for VECSXP and coerceVector for + // coerceAs for int64 to copy attributes and coerceVector otherwise thisCol = PROTECT(INHERITS(thisCol, char_integer64) ? coerceAs(thisCol, target, ScalarLogical(TRUE)) : coerceVector(thisCol, TYPEOF(target))); nprotect++; } // else coerces if needed within memrecycle; with a no-alloc direct coerce from 1.12.4 (PR #3909) const char *ret = memrecycle(target, R_NilValue, ansloc, thisnrow, thisCol, 0, -1, idcol+j+1, foundName); - if (TYPEOF(target)==VECSXP && INHERITS(thisCol, char_integer64)) { - for (int i=ansloc; i<=ansloc+thisnrow; ++i) copyMostAttrib(thisCol, VECTOR_ELT(target, i)); - } if (ret) warning(_("Column %d of item %d: %s"), w+1, i+1, ret); // e.g. when precision is lost like assigning 3.4 to integer64; test 2007.2 // TODO: but maxType should handle that and this should never warn From b48166f6fcb375f292090f40038931b4baf0978e Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Wed, 3 Jan 2024 22:37:18 +0100 Subject: [PATCH 06/15] add CODEOWNER --- CODEOWNERS | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CODEOWNERS b/CODEOWNERS index 7d7a5ecaac..c59249d7b2 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -49,3 +49,7 @@ # utils /src/utils.c @HughParsonage + +# rbind +/src/rbindlist.c @ben-schwen +/man/rbindlist.Rd @ben-schwen From f4e45637334278042766af80143dc07c3d12f8a6 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Wed, 3 Jan 2024 22:39:48 +0100 Subject: [PATCH 07/15] update NEWS --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 4fe43967dd..8b8ab42575 100644 --- a/NEWS.md +++ b/NEWS.md @@ -560,7 +560,7 @@ 55. `fread(URL)` with `https:` and `ftps:` could timeout if proxy settings were not guessed right by `curl::curl_download`, [#1686](https://github.com/Rdatatable/data.table/issues/1686). `fread(URL)` now uses `download.file()` as default for downloading files from urls. Thanks to @cderv for the report and Benjamin Schwendinger for the fix. -56. `rbindlist` and `rbind` bind now `bit64::integer64` columns with `character`/`complex` columns, [#5504](https://github.com/Rdatatable/data.table/issues/5504). Thanks to @MichaelChirico for the request and @ben-schwen for the PR. +56. `rbindlist` and `rbind` binding `bit64::integer64` columns with `character`/`complex`/`list` columns now works, [#5504](https://github.com/Rdatatable/data.table/issues/5504). Thanks to @MichaelChirico for the request and @ben-schwen for the PR. ## NOTES From 6362c6cad643dd2d007250e5ca5959778b2e0d67 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 8 Sep 2024 19:06:28 +0200 Subject: [PATCH 08/15] restore merge cut --- inst/tests/tests.Rraw | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 8a783e4559..d6bcf4b201 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18268,6 +18268,18 @@ r = copy(DT)[1L, x:= 5L] test(2241.13, DT, data.table(id=1:2, x=1:2)) test(2241.14, r, data.table(id=1:2, x=c(5L,2L))) +# .SDcols using binary '-' is evaluated correctly (#5826 exposed this) +DT = data.table(a=1, b=2, c=3) +test(2242.1, DT[, .SD, .SDcols=2-1], DT[, .(a)]) +test(2242.2, DT[, .SD, .SDcols=length(DT)-1], DT[, .SD, .SDcols=2]) + +# turn off GForce where arguments are calls but still allow variables, #5547 +options(datatable.optimize=2L) +dt = data.table(x=c("a","b","c","d"), y=c(1L,1L,2L,2L)) +i = c(0,1) +j = 1L +t = "lead" +f = shift test(2243.01, dt[, shift(x, i, type=t), by=y, verbose=TRUE], dt[, f(x, c(0,1), type="lead"), by=y], output="GForce optimized j to") test(2243.02, dt[, shift(x, abs(c(0,1)), type=t), by=y, verbose=TRUE], dt[, f(x, c(0,1), type="lead"), by=y], output="GForce optimized j to") test(2243.03, dt[, shift(x, abs(i), type=t), by=y, verbose=TRUE], dt[, f(x, c(0,1), type="lead"), by=y], output="GForce FALSE") From a72657d9919410fef34da0719cf02da13c7e7249 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 8 Sep 2024 19:07:58 +0200 Subject: [PATCH 09/15] internal error --- src/rbindlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rbindlist.c b/src/rbindlist.c index fe517bb091..3aabac8829 100644 --- a/src/rbindlist.c +++ b/src/rbindlist.c @@ -339,7 +339,7 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg, SEXP ignor if (!foundName) { static char buff[12]; snprintf(buff,12,"V%d",j+1), SET_STRING_ELT(ansNames, idcol+j, mkChar(buff)); foundName=buff; } if (factor) maxType=INTSXP; // if any items are factors then a factor is created (could be an option) if (int64 && !(maxType==REALSXP || maxType==STRSXP || maxType==VECSXP || maxType==CPLXSXP)) - error(_("Internal error: column %d of result is determined to be integer64 but maxType=='%s' != REALSXP"), j+1, type2char(maxType)); // # nocov + internal_error(__func__, "column %d of result is determined to be integer64 but maxType=='%s' != REALSXP"), j+1, type2char(maxType)); // # nocov if (date && INHERITS(firstCol, char_IDate)) maxType=INTSXP; // first encountered Date determines class and type #5309 SEXP target; SET_VECTOR_ELT(ans, idcol+j, target=allocVector(maxType, nrow)); // does not initialize logical & numerics, but does initialize character and list From 631b0bc5603f78ecc3b8c70f3ef4c56c46020505 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 8 Sep 2024 19:09:15 +0200 Subject: [PATCH 10/15] trailing ws --- inst/tests/tests.Rraw | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index d6bcf4b201..0a4abcaa47 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18268,17 +18268,17 @@ r = copy(DT)[1L, x:= 5L] test(2241.13, DT, data.table(id=1:2, x=1:2)) test(2241.14, r, data.table(id=1:2, x=c(5L,2L))) -# .SDcols using binary '-' is evaluated correctly (#5826 exposed this) -DT = data.table(a=1, b=2, c=3) -test(2242.1, DT[, .SD, .SDcols=2-1], DT[, .(a)]) -test(2242.2, DT[, .SD, .SDcols=length(DT)-1], DT[, .SD, .SDcols=2]) +# .SDcols using binary '-' is evaluated correctly (#5826 exposed this) +DT = data.table(a=1, b=2, c=3) +test(2242.1, DT[, .SD, .SDcols=2-1], DT[, .(a)]) +test(2242.2, DT[, .SD, .SDcols=length(DT)-1], DT[, .SD, .SDcols=2]) # turn off GForce where arguments are calls but still allow variables, #5547 -options(datatable.optimize=2L) -dt = data.table(x=c("a","b","c","d"), y=c(1L,1L,2L,2L)) -i = c(0,1) -j = 1L -t = "lead" +options(datatable.optimize=2L) +dt = data.table(x=c("a","b","c","d"), y=c(1L,1L,2L,2L)) +i = c(0,1) +j = 1L +t = "lead" f = shift test(2243.01, dt[, shift(x, i, type=t), by=y, verbose=TRUE], dt[, f(x, c(0,1), type="lead"), by=y], output="GForce optimized j to") test(2243.02, dt[, shift(x, abs(c(0,1)), type=t), by=y, verbose=TRUE], dt[, f(x, c(0,1), type="lead"), by=y], output="GForce optimized j to") From 39302d9a7fded0c166cd63c2fe2ab6fd79c97c3d Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 8 Sep 2024 19:09:45 +0200 Subject: [PATCH 11/15] more trailing --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 0a4abcaa47..30d8a20a8f 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18273,7 +18273,7 @@ DT = data.table(a=1, b=2, c=3) test(2242.1, DT[, .SD, .SDcols=2-1], DT[, .(a)]) test(2242.2, DT[, .SD, .SDcols=length(DT)-1], DT[, .SD, .SDcols=2]) -# turn off GForce where arguments are calls but still allow variables, #5547 +# turn off GForce where arguments are calls but still allow variables, #5547 options(datatable.optimize=2L) dt = data.table(x=c("a","b","c","d"), y=c(1L,1L,2L,2L)) i = c(0,1) From 8c4afab5efe395c82f1d06b87aeffdaf9d681dfc Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 8 Sep 2024 19:13:39 +0200 Subject: [PATCH 12/15] refine comments --- src/rbindlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rbindlist.c b/src/rbindlist.c index 3aabac8829..7db61530a1 100644 --- a/src/rbindlist.c +++ b/src/rbindlist.c @@ -540,7 +540,7 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg, SEXP ignor bool listprotect = (TYPEOF(target)==VECSXP || TYPEOF(target)==EXPRSXP) && TYPEOF(thisCol)!=TYPEOF(target); // do an as.list() on the atomic column; #3528 if (listprotect) { - // coerceAs for int64 to copy attributes and coerceVector otherwise + // coerceAs for int64 to copy attributes (coerceVector does not copy atts) thisCol = PROTECT(INHERITS(thisCol, char_integer64) ? coerceAs(thisCol, target, ScalarLogical(TRUE)) : coerceVector(thisCol, TYPEOF(target))); // else coerces if needed within memrecycle; with a no-alloc direct coerce from 1.12.4 (PR #3909) const char *ret = memrecycle(target, R_NilValue, ansloc, thisnrow, thisCol, 0, -1, idcol+j+1, foundName); From 565f65093948d738341711d8579c8697d5ba6f65 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 8 Sep 2024 19:21:29 +0200 Subject: [PATCH 13/15] internal error --- src/rbindlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rbindlist.c b/src/rbindlist.c index 7db61530a1..e53e98c5b0 100644 --- a/src/rbindlist.c +++ b/src/rbindlist.c @@ -339,7 +339,7 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg, SEXP ignor if (!foundName) { static char buff[12]; snprintf(buff,12,"V%d",j+1), SET_STRING_ELT(ansNames, idcol+j, mkChar(buff)); foundName=buff; } if (factor) maxType=INTSXP; // if any items are factors then a factor is created (could be an option) if (int64 && !(maxType==REALSXP || maxType==STRSXP || maxType==VECSXP || maxType==CPLXSXP)) - internal_error(__func__, "column %d of result is determined to be integer64 but maxType=='%s' != REALSXP"), j+1, type2char(maxType)); // # nocov + internal_error(__func__, "column %d of result is determined to be integer64 but maxType=='%s' != REALSXP", j+1, type2char(maxType)); // # nocov if (date && INHERITS(firstCol, char_IDate)) maxType=INTSXP; // first encountered Date determines class and type #5309 SEXP target; SET_VECTOR_ELT(ans, idcol+j, target=allocVector(maxType, nrow)); // does not initialize logical & numerics, but does initialize character and list From ea5fce2ab23dfc91162e333d9ffc7dba75b4bec4 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sun, 8 Sep 2024 11:59:34 -0700 Subject: [PATCH 14/15] fix re-attach bit64 --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 30d8a20a8f..ef384afa8e 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18736,7 +18736,7 @@ test(2264.8, print(DT, show.indices=TRUE), output=ans) if (test_bit64) local({ DT = data.table(a = 'abc', b = as.integer64(1)) suppressPackageStartupMessages(unloadNamespace("bit64")) - on.exit(suppressPackageStartupMessages(library(bit64))) + on.exit(suppressPackageStartupMessages(library(bit64, pos="package:base"))) test(2265, DT, output="abc\\s*1$") }) From 74d35e9ad8230a6a55a15eb0f0e841242499080f Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sun, 8 Sep 2024 12:05:51 -0700 Subject: [PATCH 15/15] test imaginary components in complex case --- inst/tests/tests.Rraw | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index ef384afa8e..18e2dfea7b 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -19213,12 +19213,12 @@ test(2287.2, !identical(DT$A[[1L]], l[[1L]])) if (test_bit64) { a = data.table(as.integer64(c(1,2))) b = data.table(c("a","b")) - c = data.table(as.complex(c(3,4))) + c = data.table(complex(real = 3:4, imaginary = 5:6)) d = data.table(list(3.5, 4L)) test(2288.1, rbind(a,b), data.table(c("1","2","a","b"))) test(2288.2, rbind(b,a), data.table(c("a","b","1","2"))) - test(2288.3, rbind(a,c), data.table(as.complex(1:4))) - test(2288.4, rbind(c,a), data.table(as.complex(c(3,4,1,2)))) + test(2288.3, rbind(a,c), data.table(complex(real = 1:4, imaginary = c(0, 0, 5:6)))) + test(2288.4, rbind(c,a), data.table(complex(real = c(3:4, 1:2), imaginary = c(5:6, 0, 0)))) test(2288.5, rbind(a,d), data.table(list(as.integer64(1), as.integer64(2), 3.5, 4L))) test(2288.6, rbind(d,a), data.table(list(3.5, 4L, as.integer64(1), as.integer64(2)))) }