From 73d4e7b8d3f8616d9ecf48d43953d1978867ce02 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Fri, 16 Sep 2022 15:43:04 +0200 Subject: [PATCH 1/4] add fix for fill=TRUE and usenames=FALSE --- src/rbindlist.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/rbindlist.c b/src/rbindlist.c index 366902883..a787ddc4f 100644 --- a/src/rbindlist.c +++ b/src/rbindlist.c @@ -282,7 +282,7 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) for (int i=0; i Date: Sat, 17 Sep 2022 14:12:22 +0200 Subject: [PATCH 2/4] add tests --- inst/tests/tests.Rraw | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index e05f52281..943d5def1 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -14694,7 +14694,7 @@ test(2003.3, rbindlist(list(data.table(a=1:2), data.table(b=3:4)), fill=TRUE, us test(2003.4, rbindlist(list(data.table(a=1:2,c=5:6), data.table(b=3:4)), fill=TRUE, use.names=FALSE), data.table(a=c(1:4), c=INT(5,6,NA,NA))) test(2003.5, rbindlist(list(data.table(a=1:2), data.table(b=3:4, c=5:6)), fill=TRUE, use.names=FALSE), - data.table(a=c(1:4), V1=INT(NA,NA,5,6))) + data.table(a=c(1:4), c=INT(NA,NA,5,6))) # chmatch coverage for two different non-ascii encodings matching; issues mentioned in comments in chmatch.c #69 #2538 #111 x1 = "fa\xE7ile" @@ -18814,3 +18814,7 @@ test(2238.6, "a" %notin% integer(), TRUE) test(2238.7, "a" %notin% NULL, TRUE) test(2238.8, NA %notin% 1:5, TRUE) test(2238.9, NA %notin% c(1:5, NA), FALSE) + +# rbindlist segfault with fill=TRUE and usenames=FALSE #5444 +test(2239.1, rbindlist(list(list(1), list(2,3)), fill=TRUE, use.names=FALSE), data.table(c(1,2), c(NA, 3))) +test(2239.2, rbindlist(list(list(1), list(2,factor(3))), fill=TRUE, use.names=FALSE), data.table(c(1,2), factor(c(NA, 3)))) From efe9538d5c89ec63fbd566ce74ef939c517ac5fa Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Wed, 28 Sep 2022 21:28:19 +0200 Subject: [PATCH 3/4] update tests and news --- NEWS.md | 8 ++++---- inst/tests/tests.Rraw | 4 ---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/NEWS.md b/NEWS.md index 4f4a2f417..0ebacba82 100644 --- a/NEWS.md +++ b/NEWS.md @@ -207,7 +207,7 @@ # v1.14.4 0.4826 0.5586 0.6586 0.6329 0.7348 1.318 100 ``` -31. `rbind()` and `rbindlist()` now support `fill=TRUE` with `use.names=FALSE` instead of issuing the warning `use.names= cannot be FALSE when fill is TRUE. Setting use.names=TRUE.` +31. `rbind()` and `rbindlist()` now support `fill=TRUE` with `use.names=FALSE` instead of issuing the warning `use.names= cannot be FALSE when fill is TRUE. Setting use.names=TRUE.`, [#5444](https://github.com/Rdatatable/data.table/issues/5444). Thanks to @sindribaldur for testing dev and filing a bug report which was fixed before release. ```R DT1 @@ -251,7 +251,7 @@ # 3: 3 NA # 4: 4 NA ``` - + 32. `fread()` already made a good guess as to whether column names are present by comparing the type of the fields in row 1 to the type of the fields in the sample. This guess is now improved when a column contains a string in row 1 (i.e. a potential column name) but all blank in the sample rows, [#2526](https://github.com/Rdatatable/data.table/issues/2526). Thanks @st-pasha for reporting, and @ben-schwen for the PR. 33. `fread()` can now read `.zip` and `.tar` directly, [#3834](https://github.com/Rdatatable/data.table/issues/3834). Moreover, if a compressed file name is missing its extension, `fread()` now attempts to infer the correct filetype from its magic bytes. Thanks to Michael Chirico for the idea, and Benjamin Schwendinger for the PR. @@ -267,7 +267,7 @@ # 1: 1 3 a # 2: 2 4 b ``` - + 35. `weighted.mean()` is now optimised by group, [#3977](https://github.com/Rdatatable/data.table/issues/3977). Thanks to @renkun-ken for requesting, and Benjamin Schwendinger for the PR. 36. `as.xts.data.table()` now supports non-numeric xts coredata matrixes, [5268](https://github.com/Rdatatable/data.table/issues/5268). Existing numeric only functionality is supported by a new `numeric.only` parameter, which defaults to `TRUE` for backward compatability and the most common use case. To convert non-numeric columns, set this parameter to `FALSE`. Conversions of `data.table` columns to a `matrix` now uses `data.table::as.matrix`, with all its performance benefits. Thanks to @ethanbsmith for the report and fix. @@ -284,7 +284,7 @@ # # 1: 3 5 # 2: 4 6 - + DT[, sum(.SD), by=.I] # I V1 # diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 943d5def1..2b59ba02b 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18814,7 +18814,3 @@ test(2238.6, "a" %notin% integer(), TRUE) test(2238.7, "a" %notin% NULL, TRUE) test(2238.8, NA %notin% 1:5, TRUE) test(2238.9, NA %notin% c(1:5, NA), FALSE) - -# rbindlist segfault with fill=TRUE and usenames=FALSE #5444 -test(2239.1, rbindlist(list(list(1), list(2,3)), fill=TRUE, use.names=FALSE), data.table(c(1,2), c(NA, 3))) -test(2239.2, rbindlist(list(list(1), list(2,factor(3))), fill=TRUE, use.names=FALSE), data.table(c(1,2), factor(c(NA, 3)))) From c7c5ce2f20a7ad8deeebc40b2e337942b973d3f8 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Wed, 28 Sep 2022 21:31:21 +0200 Subject: [PATCH 4/4] move tests --- inst/tests/tests.Rraw | 3 +++ 1 file changed, 3 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 2b59ba02b..c5b7ba603 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -14695,6 +14695,9 @@ test(2003.4, rbindlist(list(data.table(a=1:2,c=5:6), data.table(b=3:4)), fill=TR data.table(a=c(1:4), c=INT(5,6,NA,NA))) test(2003.5, rbindlist(list(data.table(a=1:2), data.table(b=3:4, c=5:6)), fill=TRUE, use.names=FALSE), data.table(a=c(1:4), c=INT(NA,NA,5,6))) +# rbindlist segfault with fill=TRUE and usenames=FALSE #5444 +test(2003.6, rbindlist(list(list(1), list(2,3)), fill=TRUE, use.names=FALSE), data.table(c(1,2), c(NA, 3))) +test(2003.7, rbindlist(list(list(1), list(2,factor(3))), fill=TRUE, use.names=FALSE), data.table(c(1,2), factor(c(NA, 3)))) # chmatch coverage for two different non-ascii encodings matching; issues mentioned in comments in chmatch.c #69 #2538 #111 x1 = "fa\xE7ile"