From b48649a855b97258563f9503bfdc20605001868b Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 6 Jan 2025 22:41:46 +0800 Subject: [PATCH] setDT() works on S4 slots (again), and := works in under-allocated S4 slots (#6703) * setDT() works on S4 slots * Tweak test so that it would fail on master * typo * NEWS for separately-fixed bug --- NEWS.md | 2 ++ R/data.table.R | 13 ++++++++++--- inst/tests/S4.Rraw | 9 +++++++++ src/data.table.h | 1 + src/init.c | 1 + src/wrappers.c | 18 +++++++++++++----- 6 files changed, 36 insertions(+), 8 deletions(-) diff --git a/NEWS.md b/NEWS.md index a8cc17b651..0d01c2d854 100644 --- a/NEWS.md +++ b/NEWS.md @@ -123,6 +123,8 @@ rowwiseDT( 17. `DT[order(...)]` better matches `base::order()` behavior by (1) recognizing the `method=` argument (and erroring since this is not supported) and (2) accepting a vector of `TRUE`/`FALSE` in `decreasing=` as an alternative to using `-a` to convey "sort `a` decreasing", [#4456](https://github.com/Rdatatable/data.table/issues/4456). Thanks @jangorecki for the FR and @MichaelChirico for the PR. +17. Assignment with `:=` to an S4 slot of an under-allocated data.table now works, [#6704](https://github.com/Rdatatable/data.table/issues/6704). Thanks @MichaelChirico for the report and fix. + ## NOTES 1. There is a new vignette on joins! See `vignette("datatable-joins")`. Thanks to Angel Feliz for authoring it! Feedback welcome. This vignette has been highly requested since 2017: [#2181](https://github.com/Rdatatable/data.table/issues/2181). diff --git a/R/data.table.R b/R/data.table.R index e04ad2d380..2709624214 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1221,7 +1221,7 @@ replace_dot_alias = function(e) { setalloccol(x, n, verbose=verbose) # always assigns to calling scope; i.e. this scope if (is.name(name)) { assign(as.character(name),x,parent.frame(),inherits=TRUE) - } else if (name %iscall% c('$', '[[') && is.name(name[[2L]])) { + } else if (.is_simple_extraction(name)) { # TODO(#6702): use a helper here as the code is very similar to setDT(). k = eval(name[[2L]], parent.frame(), parent.frame()) if (is.list(k)) { origj = j = if (name[[1L]] == "$") as.character(name[[3L]]) else eval(name[[3L]], parent.frame(), parent.frame()) @@ -1233,6 +1233,8 @@ replace_dot_alias = function(e) { .Call(Csetlistelt,k,as.integer(j), x) } else if (is.environment(k) && exists(as.character(name[[3L]]), k)) { assign(as.character(name[[3L]]), x, k, inherits=FALSE) + } else if (isS4(k)) { + .Call(CsetS4elt, k, as.character(name[[3L]]), x) } } # TO DO: else if env$<- or list$<- } @@ -2967,7 +2969,7 @@ setDT = function(x, keep.rownames=FALSE, key=NULL, check.names=FALSE) { if (is.name(name)) { name = as.character(name) assign(name, x, parent.frame(), inherits=TRUE) - } else if (name %iscall% c('$', '[[') && is.name(name[[2L]])) { + } else if (.is_simple_extraction(name)) { # common case is call from 'lapply()' k = eval(name[[2L]], parent.frame(), parent.frame()) if (is.list(k)) { @@ -2979,9 +2981,11 @@ setDT = function(x, keep.rownames=FALSE, key=NULL, check.names=FALSE) { stopf("Item '%s' not found in names of input list", origj) } } - .Call(Csetlistelt,k,as.integer(j), x) + .Call(Csetlistelt, k, as.integer(j), x) } else if (is.environment(k) && exists(as.character(name[[3L]]), k)) { assign(as.character(name[[3L]]), x, k, inherits=FALSE) + } else if (isS4(k)) { + .Call(CsetS4elt, k, as.character(name[[3L]]), x) } } .Call(CexpandAltRep, x) # issue#2866 and PR#2882 @@ -3048,6 +3052,9 @@ rleidv = function(x, cols=seq_along(x), prefix=NULL) { !is.call(e[[2L]]) && !is.call(e[[3L]]) # e.g. V1:V2, but not min(V1):max(V2) or 1:max(V2) } +# for assignments like x[[1]][, a := 2] or setDT(x@DT) +.is_simple_extraction = function(e) e %iscall% c('$', '@', '[[') && is.name(e[[2L]]) + # GForce functions # to add a new function to GForce (from the R side -- the easy part!): # (1) add it to gfuns diff --git a/inst/tests/S4.Rraw b/inst/tests/S4.Rraw index 7e0b8111d6..19a595c08b 100644 --- a/inst/tests/S4.Rraw +++ b/inst/tests/S4.Rraw @@ -109,3 +109,12 @@ DT = data.table(a = rep(1:2, c(1, 100))) # Set the S4 bit on a simple object DT[, b := asS4(seq_len(.N))] test(6, DT[, b, by=a, verbose=TRUE][, isS4(b)], output="dogroups: growing") + +# setDT() works for a data.frame slot, #6701 +setClass("DataFrame", slots=c(x="data.frame")) +DF = new("DataFrame", x=data.frame(a=1)) +setDT(DF@x) +test(7.1, is.data.table(DF@x)) +# Similar code for under-allocated data.tables in S4 slots, #6704 +setClass("DataTable", slots=c(x="data.table")) +test(7.2, options=c(datatable.alloccol=0L), {DT = new("DataTable", x=data.table(a=1)); DT@x[, b := 2L]; DT@x$b}, 2L) # NB: requires assigning DT to test assignment back to that object diff --git a/src/data.table.h b/src/data.table.h index ae76a227f1..523f6682fa 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -299,6 +299,7 @@ SEXP freadR(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SE SEXP fwriteR(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); SEXP rbindlist(SEXP, SEXP, SEXP, SEXP, SEXP); SEXP setlistelt(SEXP, SEXP, SEXP); +SEXP setS4elt(SEXP, SEXP, SEXP); SEXP address(SEXP); SEXP expandAltRep(SEXP); SEXP fmelt(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); diff --git a/src/init.c b/src/init.c index 204dc1088d..0188cfaf65 100644 --- a/src/init.c +++ b/src/init.c @@ -72,6 +72,7 @@ R_CallMethodDef callMethods[] = { {"Crbindlist", (DL_FUNC) &rbindlist, -1}, {"Cvecseq", (DL_FUNC) &vecseq, -1}, {"Csetlistelt", (DL_FUNC) &setlistelt, -1}, +{"CsetS4elt", (DL_FUNC) &setS4elt, -1}, {"Caddress", (DL_FUNC) &address, -1}, {"CexpandAltRep", (DL_FUNC) &expandAltRep, -1}, {"Cfmelt", (DL_FUNC) &fmelt, -1}, diff --git a/src/wrappers.c b/src/wrappers.c index 6587caa97a..2cf46d9cfe 100644 --- a/src/wrappers.c +++ b/src/wrappers.c @@ -59,18 +59,26 @@ SEXP copy(SEXP x) return(duplicate(x)); } +// Internal use only. So that := can update elements of a list of data.table, #2204. Just needed to overallocate/grow the VECSXP. SEXP setlistelt(SEXP l, SEXP i, SEXP value) { - R_len_t i2; - // Internal use only. So that := can update elements of a list of data.table, #2204. Just needed to overallocate/grow the VECSXP. - if (!isNewList(l)) error(_("First argument to setlistelt must be a list()")); - if (!isInteger(i) || LENGTH(i)!=1) error(_("Second argument to setlistelt must a length 1 integer vector")); - i2 = INTEGER(i)[0]; + if (!isNewList(l)) internal_error(__func__, "First argument to setlistelt must be a list()"); + if (!isInteger(i) || LENGTH(i)!=1) internal_error(__func__, "Second argument to setlistelt must a length 1 integer vector"); + R_len_t i2 = INTEGER(i)[0]; if (LENGTH(l) < i2 || i2<1) error(_("i (%d) is outside the range of items [1,%d]"),i2,LENGTH(l)); SET_VECTOR_ELT(l, i2-1, value); return(R_NilValue); } +// Internal use only. So that := can update elements of a slot of data.table, #6701. +SEXP setS4elt(SEXP obj, SEXP name, SEXP value) +{ + if (!isS4(obj)) internal_error(__func__, "First argument to setS4elt must be an S4 object"); + if (!isString(name) || LENGTH(name)!=1) internal_error(__func__, "Second argument to setS4elt must be a character string"); + R_do_slot_assign(obj, name, value); + return(R_NilValue); +} + SEXP address(SEXP x) { // A better way than : http://stackoverflow.com/a/10913296/403310