Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

allow rbind for integer64 and character/complex/vector #5874

Merged
merged 17 commits into from
Sep 8, 2024
Merged
4 changes: 4 additions & 0 deletions CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@
/src/shift.c @ben-schwen @michaelchirico
/man/shift.Rd @ben-schwen @michaelchirico

# rbind
/src/rbindlist.c @ben-schwen
/man/rbindlist.Rd @ben-schwen

# translations
/inst/po/ @michaelchirico
/po/ @michaelchirico
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ rowwiseDT(

8. `dt[, col]` now returns a copy of `col` also when it is a list column, as in any other case, [#4877](https://github.com/Rdatatable/data.table/issues/4877). Thanks to @tlapak for reporting and the PR.

9. `rbindlist` and `rbind` binding `bit64::integer64` columns with `character`/`complex`/`list` columns now works, [#5504](https://github.com/Rdatatable/data.table/issues/5504). Thanks to @MichaelChirico for the request and @ben-schwen for the PR.

## NOTES

1. Tests run again when some Suggests packages are missing, [#6411](https://github.com/Rdatatable/data.table/issues/6411). Thanks @aadler for the note and @MichaelChirico for the fix.
Expand Down
16 changes: 15 additions & 1 deletion inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -18736,7 +18736,7 @@ test(2264.8, print(DT, show.indices=TRUE), output=ans)
if (test_bit64) local({
DT = data.table(a = 'abc', b = as.integer64(1))
suppressPackageStartupMessages(unloadNamespace("bit64"))
on.exit(suppressPackageStartupMessages(library(bit64)))
on.exit(suppressPackageStartupMessages(library(bit64, pos="package:base")))
test(2265, DT, output="abc\\s*1$")
})

Expand Down Expand Up @@ -19208,3 +19208,17 @@ test(2287.1, address(dt[, A]) != address(dt[, A]))
l = dt[, A]
dt[1L, A := .(list(1L))]
test(2287.2, !identical(DT$A[[1L]], l[[1L]]))

# rbind do not copy class/attributes for integer64 and CPLXSXP/STRSXP/VECSXP #5504
if (test_bit64) {
a = data.table(as.integer64(c(1,2)))
b = data.table(c("a","b"))
c = data.table(complex(real = 3:4, imaginary = 5:6))
d = data.table(list(3.5, 4L))
test(2288.1, rbind(a,b), data.table(c("1","2","a","b")))
test(2288.2, rbind(b,a), data.table(c("a","b","1","2")))
test(2288.3, rbind(a,c), data.table(complex(real = 1:4, imaginary = c(0, 0, 5:6))))
test(2288.4, rbind(c,a), data.table(complex(real = c(3:4, 1:2), imaginary = c(5:6, 0, 0))))
test(2288.5, rbind(a,d), data.table(list(as.integer64(1), as.integer64(2), 3.5, 4L)))
test(2288.6, rbind(d,a), data.table(list(3.5, 4L, as.integer64(1), as.integer64(2))))
}
8 changes: 5 additions & 3 deletions src/rbindlist.c
Original file line number Diff line number Diff line change
Expand Up @@ -338,12 +338,13 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg, SEXP ignor

if (!foundName) { static char buff[12]; snprintf(buff,12,"V%d",j+1), SET_STRING_ELT(ansNames, idcol+j, mkChar(buff)); foundName=buff; }
if (factor) maxType=INTSXP; // if any items are factors then a factor is created (could be an option)
if (int64 && maxType!=REALSXP)
if (int64 && !(maxType==REALSXP || maxType==STRSXP || maxType==VECSXP || maxType==CPLXSXP))
internal_error(__func__, "column %d of result is determined to be integer64 but maxType=='%s' != REALSXP", j+1, type2char(maxType)); // # nocov
if (date && INHERITS(firstCol, char_IDate)) maxType=INTSXP; // first encountered Date determines class and type #5309
SEXP target;
SET_VECTOR_ELT(ans, idcol+j, target=allocVector(maxType, nrow)); // does not initialize logical & numerics, but does initialize character and list
if (!factor) copyMostAttrib(firstCol, target); // all but names,dim and dimnames; mainly for class. And if so, we want a copy here, not keepattr's SET_ATTRIB.
// #5504 do not copy class for mixing int64 and higher maxTypes CPLXSXP/STRSXP/VECSXP
if (!factor && !(int64 && (maxType==STRSXP || maxType==VECSXP || maxType==CPLXSXP))) copyMostAttrib(firstCol, target); // all but names,dim and dimnames; mainly for class. And if so, we want a copy here, not keepattr's SET_ATTRIB.

if (factor && anyNotStringOrFactor) {
// in future warn, or use list column instead ... warning(_("Column %d contains a factor but not all items for the column are character or factor"), idcol+j+1);
Expand Down Expand Up @@ -539,7 +540,8 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg, SEXP ignor
bool listprotect = (TYPEOF(target)==VECSXP || TYPEOF(target)==EXPRSXP) && TYPEOF(thisCol)!=TYPEOF(target);
// do an as.list() on the atomic column; #3528
if (listprotect) {
thisCol = PROTECT(coerceVector(thisCol, TYPEOF(target)));
// coerceAs for int64 to copy attributes (coerceVector does not copy atts)
thisCol = PROTECT(INHERITS(thisCol, char_integer64) ? coerceAs(thisCol, target, ScalarLogical(TRUE)) : coerceVector(thisCol, TYPEOF(target)));
// else coerces if needed within memrecycle; with a no-alloc direct coerce from 1.12.4 (PR #3909)
const char *ret = memrecycle(target, R_NilValue, ansloc, thisnrow, thisCol, 0, -1, idcol+j+1, foundName);
UNPROTECT(1); // earlier unprotect rbindlist calls with lots of lists #4536
Expand Down
Loading