From 87804e5931058065a5c705761ccdb6b12592b934 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 2 Jan 2025 08:49:49 +0000 Subject: [PATCH 1/4] setDT() works on S4 slots --- R/data.table.R | 13 ++++++++++--- inst/tests/S4.Rraw | 8 ++++++++ src/data.table.h | 1 + src/init.c | 1 + src/wrappers.c | 18 +++++++++++++----- 5 files changed, 33 insertions(+), 8 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 2cc34ba849..ff15a55dde 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1221,7 +1221,7 @@ replace_dot_alias = function(e) { setalloccol(x, n, verbose=verbose) # always assigns to calling scope; i.e. this scope if (is.name(name)) { assign(as.character(name),x,parent.frame(),inherits=TRUE) - } else if (name %iscall% c('$', '[[') && is.name(name[[2L]])) { + } else if (.is_simple_extraction(name)) { # TODO(#6702): use a helper here as the code is very similar to setDT(). k = eval(name[[2L]], parent.frame(), parent.frame()) if (is.list(k)) { origj = j = if (name[[1L]] == "$") as.character(name[[3L]]) else eval(name[[3L]], parent.frame(), parent.frame()) @@ -1233,6 +1233,8 @@ replace_dot_alias = function(e) { .Call(Csetlistelt,k,as.integer(j), x) } else if (is.environment(k) && exists(as.character(name[[3L]]), k)) { assign(as.character(name[[3L]]), x, k, inherits=FALSE) + } else if (isS4(k)) { + .Call(CsetS4elt, k, as.character(name[[3L]]), x) } } # TO DO: else if env$<- or list$<- } @@ -2967,7 +2969,7 @@ setDT = function(x, keep.rownames=FALSE, key=NULL, check.names=FALSE) { if (is.name(name)) { name = as.character(name) assign(name, x, parent.frame(), inherits=TRUE) - } else if (name %iscall% c('$', '[[') && is.name(name[[2L]])) { + } else if (.is_simple_extraction(name)) { # common case is call from 'lapply()' k = eval(name[[2L]], parent.frame(), parent.frame()) if (is.list(k)) { @@ -2979,9 +2981,11 @@ setDT = function(x, keep.rownames=FALSE, key=NULL, check.names=FALSE) { stopf("Item '%s' not found in names of input list", origj) } } - .Call(Csetlistelt,k,as.integer(j), x) + .Call(Csetlistelt, k, as.integer(j), x) } else if (is.environment(k) && exists(as.character(name[[3L]]), k)) { assign(as.character(name[[3L]]), x, k, inherits=FALSE) + } else if (isS4(k)) { + .Call(CsetS4elt, k, as.character(name[[3L]]), x) } } .Call(CexpandAltRep, x) # issue#2866 and PR#2882 @@ -3048,6 +3052,9 @@ rleidv = function(x, cols=seq_along(x), prefix=NULL) { is.name(e[[1L]]) && is.name(e[[2L]]) # e.g. V1:V2, but not min(V1):max(V2) or 1:max(V2) } +# for assignments like x[[1]][, a := 2] or setDT(x@DT) +.is_simple_extraction = function(e) e %iscall% c('$', '@', '[[') && is.name(e[[2L]]) + # GForce functions # to add a new function to GForce (from the R side -- the easy part!): # (1) add it to gfuns diff --git a/inst/tests/S4.Rraw b/inst/tests/S4.Rraw index 7e0b8111d6..2650a7c4a3 100644 --- a/inst/tests/S4.Rraw +++ b/inst/tests/S4.Rraw @@ -109,3 +109,11 @@ DT = data.table(a = rep(1:2, c(1, 100))) # Set the S4 bit on a simple object DT[, b := asS4(seq_len(.N))] test(6, DT[, b, by=a, verbose=TRUE][, isS4(b)], output="dogroups: growing") + +# setDT() works for a data.frame slot, #6701 +setClass("DataFrame", slots=c(x="data.frame")) +DF = new("DataFrame", x=data.frame(a=1)) +setDT(DF@x) +test(7.1, is.data.table(DF@x)) +setClass("DataTable", slots=c(x="data.table")) +test(7.2, options=c(datatable.alloccol=0L), new("DataTable", x=data.table(a=1))@x[, b := 2L]$b, 2L) diff --git a/src/data.table.h b/src/data.table.h index ae76a227f1..523f6682fa 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -299,6 +299,7 @@ SEXP freadR(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SE SEXP fwriteR(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); SEXP rbindlist(SEXP, SEXP, SEXP, SEXP, SEXP); SEXP setlistelt(SEXP, SEXP, SEXP); +SEXP setS4elt(SEXP, SEXP, SEXP); SEXP address(SEXP); SEXP expandAltRep(SEXP); SEXP fmelt(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); diff --git a/src/init.c b/src/init.c index 204dc1088d..0188cfaf65 100644 --- a/src/init.c +++ b/src/init.c @@ -72,6 +72,7 @@ R_CallMethodDef callMethods[] = { {"Crbindlist", (DL_FUNC) &rbindlist, -1}, {"Cvecseq", (DL_FUNC) &vecseq, -1}, {"Csetlistelt", (DL_FUNC) &setlistelt, -1}, +{"CsetS4elt", (DL_FUNC) &setS4elt, -1}, {"Caddress", (DL_FUNC) &address, -1}, {"CexpandAltRep", (DL_FUNC) &expandAltRep, -1}, {"Cfmelt", (DL_FUNC) &fmelt, -1}, diff --git a/src/wrappers.c b/src/wrappers.c index 6587caa97a..2cf46d9cfe 100644 --- a/src/wrappers.c +++ b/src/wrappers.c @@ -59,18 +59,26 @@ SEXP copy(SEXP x) return(duplicate(x)); } +// Internal use only. So that := can update elements of a list of data.table, #2204. Just needed to overallocate/grow the VECSXP. SEXP setlistelt(SEXP l, SEXP i, SEXP value) { - R_len_t i2; - // Internal use only. So that := can update elements of a list of data.table, #2204. Just needed to overallocate/grow the VECSXP. - if (!isNewList(l)) error(_("First argument to setlistelt must be a list()")); - if (!isInteger(i) || LENGTH(i)!=1) error(_("Second argument to setlistelt must a length 1 integer vector")); - i2 = INTEGER(i)[0]; + if (!isNewList(l)) internal_error(__func__, "First argument to setlistelt must be a list()"); + if (!isInteger(i) || LENGTH(i)!=1) internal_error(__func__, "Second argument to setlistelt must a length 1 integer vector"); + R_len_t i2 = INTEGER(i)[0]; if (LENGTH(l) < i2 || i2<1) error(_("i (%d) is outside the range of items [1,%d]"),i2,LENGTH(l)); SET_VECTOR_ELT(l, i2-1, value); return(R_NilValue); } +// Internal use only. So that := can update elements of a slot of data.table, #6701. +SEXP setS4elt(SEXP obj, SEXP name, SEXP value) +{ + if (!isS4(obj)) internal_error(__func__, "First argument to setS4elt must be an S4 object"); + if (!isString(name) || LENGTH(name)!=1) internal_error(__func__, "Second argument to setS4elt must be a character string"); + R_do_slot_assign(obj, name, value); + return(R_NilValue); +} + SEXP address(SEXP x) { // A better way than : http://stackoverflow.com/a/10913296/403310 From 8d2e9581c0432e12a3c894e102abbb49c7063ac0 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 2 Jan 2025 09:05:02 +0000 Subject: [PATCH 2/4] Tweak test so that it would fail on master --- inst/tests/S4.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/S4.Rraw b/inst/tests/S4.Rraw index 2650a7c4a3..c5abbd4d3b 100644 --- a/inst/tests/S4.Rraw +++ b/inst/tests/S4.Rraw @@ -116,4 +116,4 @@ DF = new("DataFrame", x=data.frame(a=1)) setDT(DF@x) test(7.1, is.data.table(DF@x)) setClass("DataTable", slots=c(x="data.table")) -test(7.2, options=c(datatable.alloccol=0L), new("DataTable", x=data.table(a=1))@x[, b := 2L]$b, 2L) +test(7.2, options=c(datatable.alloccol=0L), {DT = new("DataTable", x=data.table(a=1)); DT@x[, b := 2L]; DT$b}, 2L) # NB: requires assigning DT to test assignment back to that object From 23c195b6d32d2cc7018b469f370e9e76aad11427 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 2 Jan 2025 09:06:09 +0000 Subject: [PATCH 3/4] typo --- inst/tests/S4.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/S4.Rraw b/inst/tests/S4.Rraw index c5abbd4d3b..3bd821bdb8 100644 --- a/inst/tests/S4.Rraw +++ b/inst/tests/S4.Rraw @@ -116,4 +116,4 @@ DF = new("DataFrame", x=data.frame(a=1)) setDT(DF@x) test(7.1, is.data.table(DF@x)) setClass("DataTable", slots=c(x="data.table")) -test(7.2, options=c(datatable.alloccol=0L), {DT = new("DataTable", x=data.table(a=1)); DT@x[, b := 2L]; DT$b}, 2L) # NB: requires assigning DT to test assignment back to that object +test(7.2, options=c(datatable.alloccol=0L), {DT = new("DataTable", x=data.table(a=1)); DT@x[, b := 2L]; DT@x$b}, 2L) # NB: requires assigning DT to test assignment back to that object From eeeb61cab6c394c988c3d0e53b06259c717febf2 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 2 Jan 2025 09:14:27 +0000 Subject: [PATCH 4/4] NEWS for separately-fixed bug --- NEWS.md | 2 ++ inst/tests/S4.Rraw | 1 + 2 files changed, 3 insertions(+) diff --git a/NEWS.md b/NEWS.md index 6d0e0c97f5..4b613f1de8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -123,6 +123,8 @@ rowwiseDT( 16. Joins of `integer64` and `double` columns succeed when the `double` column has lossless `integer64` representation, [#4167](https://github.com/Rdatatable/data.table/issues/4167) and [#6625](https://github.com/Rdatatable/data.table/issues/6625). Previously, this only worked when the double column had lossless _32-bit_ integer representation. Thanks @MichaelChirico for the reports and fix. +17. Assignment with `:=` to an S4 slot of an under-allocated data.table now works, [#6704](https://github.com/Rdatatable/data.table/issues/6704). Thanks @MichaelChirico for the report and fix. + ## NOTES 1. There is a new vignette on joins! See `vignette("datatable-joins")`. Thanks to Angel Feliz for authoring it! Feedback welcome. This vignette has been highly requested since 2017: [#2181](https://github.com/Rdatatable/data.table/issues/2181). diff --git a/inst/tests/S4.Rraw b/inst/tests/S4.Rraw index 3bd821bdb8..19a595c08b 100644 --- a/inst/tests/S4.Rraw +++ b/inst/tests/S4.Rraw @@ -115,5 +115,6 @@ setClass("DataFrame", slots=c(x="data.frame")) DF = new("DataFrame", x=data.frame(a=1)) setDT(DF@x) test(7.1, is.data.table(DF@x)) +# Similar code for under-allocated data.tables in S4 slots, #6704 setClass("DataTable", slots=c(x="data.table")) test(7.2, options=c(datatable.alloccol=0L), {DT = new("DataTable", x=data.table(a=1)); DT@x[, b := 2L]; DT@x$b}, 2L) # NB: requires assigning DT to test assignment back to that object