From 73d4e7b8d3f8616d9ecf48d43953d1978867ce02 Mon Sep 17 00:00:00 2001
From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com>
Date: Fri, 16 Sep 2022 15:43:04 +0200
Subject: [PATCH 1/4] add fix for fill=TRUE and usenames=FALSE

---
 src/rbindlist.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/rbindlist.c b/src/rbindlist.c
index 366902883..a787ddc4f 100644
--- a/src/rbindlist.c
+++ b/src/rbindlist.c
@@ -282,7 +282,7 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg)
     for (int i=0; i<LENGTH(l); ++i) {
       SEXP li = VECTOR_ELT(l, i);
       if (!length(li)) continue;
-      int w = usenames ? colMap[i*ncol + j] : j;  // colMap tells us which item to fetch for each of the final result columns, so we can stack column-by-column
+      int w = usenames ? colMap[i*ncol + j] : (j<length(li) ? j : -1);  // colMap tells us which item to fetch for each of the final result columns, so we can stack column-by-column // check if j exceeds length for fill=TRUE and usenames=FALSE #5444
       if (w==-1) continue;  // column j of final result has no input from this item (fill must be true)
       if (!foundName) {
         SEXP cn=PROTECT(getAttrib(li, R_NamesSymbol));
@@ -333,9 +333,10 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg)
       // before the savetl_init() because we have no hook to clean up tl if coerceVector fails.
       if (coercedForFactor==NULL) { coercedForFactor=PROTECT(allocVector(VECSXP, LENGTH(l))); nprotect++; }
       for (int i=0; i<LENGTH(l); ++i) {
-        int w = usenames ? colMap[i*ncol + j] : j;
+        SEXP li = VECTOR_ELT(l, i);
+        int w = usenames ? colMap[i*ncol + j] : (j<length(li) ? j : -1); // check if j exceeds length for fill=TRUE and usenames=FALSE #5444
         if (w==-1) continue;
-        SEXP thisCol = VECTOR_ELT(VECTOR_ELT(l, i), w);
+        SEXP thisCol = VECTOR_ELT(li, w);
         if (!isFactor(thisCol) && !isString(thisCol)) {
           SET_VECTOR_ELT(coercedForFactor, i, coerceVector(thisCol, STRSXP));
         }
@@ -366,9 +367,10 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg)
           SET_TRUELENGTH(s,-k-1);
         }
         for (int i=0; i<LENGTH(l); ++i) {
-          int w = usenames ? colMap[i*ncol + j] : j;
+          SEXP li = VECTOR_ELT(l, i);
+          int w = usenames ? colMap[i*ncol + j] : (j<length(li) ? j : -1); // check if j exceeds length for fill=TRUE and usenames=FALSE #5444
           if (w==-1) continue;
-          SEXP thisCol = VECTOR_ELT(VECTOR_ELT(l, i), w);
+          SEXP thisCol = VECTOR_ELT(li, w);
           if (isOrdered(thisCol)) {
             SEXP levels = getAttrib(thisCol, R_LevelsSymbol);
             const SEXP *levelsD = STRING_PTR(levels);
@@ -403,7 +405,7 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg)
         const int thisnrow = eachMax[i];
         SEXP li = VECTOR_ELT(l, i);
         if (!length(li)) continue;  // NULL items in the list() of DT/DF; not if thisnrow==0 because we need to retain (unused) factor levels (#3508)
-        int w = usenames ? colMap[i*ncol + j] : j;
+        int w = usenames ? colMap[i*ncol + j] : (j<length(li) ? j : -1); // check if j exceeds length for fill=TRUE and usenames=FALSE #5444
         if (w==-1) {
           writeNA(target, ansloc, thisnrow, false);
         } else {
@@ -508,7 +510,7 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg)
         const int thisnrow = eachMax[i];
         if (thisnrow==0) continue;
         SEXP li = VECTOR_ELT(l, i);
-        int w = usenames ? colMap[i*ncol + j] : j;
+        int w = usenames ? colMap[i*ncol + j] : (j<length(li) ? j : -1); // check if j exceeds length for fill=TRUE and usenames=FALSE #5444
         SEXP thisCol;
         if (w==-1 || !length(thisCol=VECTOR_ELT(li, w))) {  // !length for zeroCol warning above; #1871
           writeNA(target, ansloc, thisnrow, false);  // writeNA is integer64 aware and writes INT64_MIN

From 834d583e445ef0bbd1cdf7760b329f5aafe84095 Mon Sep 17 00:00:00 2001
From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com>
Date: Sat, 17 Sep 2022 14:12:22 +0200
Subject: [PATCH 2/4] add tests

---
 inst/tests/tests.Rraw | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index e05f52281..943d5def1 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -14694,7 +14694,7 @@ test(2003.3, rbindlist(list(data.table(a=1:2), data.table(b=3:4)), fill=TRUE, us
 test(2003.4, rbindlist(list(data.table(a=1:2,c=5:6), data.table(b=3:4)), fill=TRUE, use.names=FALSE),
              data.table(a=c(1:4), c=INT(5,6,NA,NA)))
 test(2003.5, rbindlist(list(data.table(a=1:2), data.table(b=3:4, c=5:6)), fill=TRUE, use.names=FALSE),
-             data.table(a=c(1:4), V1=INT(NA,NA,5,6)))
+             data.table(a=c(1:4), c=INT(NA,NA,5,6)))
 
 # chmatch coverage for two different non-ascii encodings matching; issues mentioned in comments in chmatch.c #69 #2538 #111
 x1 = "fa\xE7ile"
@@ -18814,3 +18814,7 @@ test(2238.6, "a" %notin% integer(), TRUE)
 test(2238.7, "a" %notin% NULL, TRUE)
 test(2238.8, NA %notin% 1:5, TRUE)
 test(2238.9, NA %notin% c(1:5, NA), FALSE)
+
+# rbindlist segfault with fill=TRUE and usenames=FALSE #5444
+test(2239.1, rbindlist(list(list(1), list(2,3)), fill=TRUE, use.names=FALSE), data.table(c(1,2), c(NA, 3)))
+test(2239.2, rbindlist(list(list(1), list(2,factor(3))), fill=TRUE, use.names=FALSE), data.table(c(1,2), factor(c(NA, 3))))

From efe9538d5c89ec63fbd566ce74ef939c517ac5fa Mon Sep 17 00:00:00 2001
From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com>
Date: Wed, 28 Sep 2022 21:28:19 +0200
Subject: [PATCH 3/4] update tests and news

---
 NEWS.md               | 8 ++++----
 inst/tests/tests.Rraw | 4 ----
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 4f4a2f417..0ebacba82 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -207,7 +207,7 @@
     #  v1.14.4  0.4826  0.5586  0.6586  0.6329  0.7348  1.318   100
     ```
 
-31. `rbind()` and `rbindlist()` now support `fill=TRUE` with `use.names=FALSE` instead of issuing the warning `use.names= cannot be FALSE when fill is TRUE. Setting use.names=TRUE.`
+31. `rbind()` and `rbindlist()` now support `fill=TRUE` with `use.names=FALSE` instead of issuing the warning `use.names= cannot be FALSE when fill is TRUE. Setting use.names=TRUE.`, [#5444](https://github.com/Rdatatable/data.table/issues/5444). Thanks to @sindribaldur for testing dev and filing a bug report which was fixed before release.
 
     ```R
     DT1
@@ -251,7 +251,7 @@
     # 3:     3    NA
     # 4:     4    NA
     ```
-    
+
 32. `fread()` already made a good guess as to whether column names are present by comparing the type of the fields in row 1 to the type of the fields in the sample. This guess is now improved when a column contains a string in row 1 (i.e. a potential column name) but all blank in the sample rows, [#2526](https://github.com/Rdatatable/data.table/issues/2526). Thanks @st-pasha for reporting, and @ben-schwen for the PR.
 
 33. `fread()` can now read `.zip` and `.tar` directly, [#3834](https://github.com/Rdatatable/data.table/issues/3834). Moreover, if a compressed file name is missing its extension, `fread()` now attempts to infer the correct filetype from its magic bytes. Thanks to Michael Chirico for the idea, and Benjamin Schwendinger for the PR.
@@ -267,7 +267,7 @@
     # 1:     1     3      a
     # 2:     2     4      b
     ```
-    
+
 35. `weighted.mean()` is now optimised by group, [#3977](https://github.com/Rdatatable/data.table/issues/3977). Thanks to @renkun-ken for requesting, and Benjamin Schwendinger for the PR.
 
 36. `as.xts.data.table()` now supports non-numeric xts coredata matrixes, [5268](https://github.com/Rdatatable/data.table/issues/5268). Existing numeric only functionality is supported by a new `numeric.only` parameter, which defaults to `TRUE` for backward compatability and the most common use case. To convert non-numeric columns, set this parameter to `FALSE`. Conversions of `data.table` columns to a `matrix` now uses `data.table::as.matrix`, with all its performance benefits. Thanks to @ethanbsmith for the report and fix.
@@ -284,7 +284,7 @@
     #    <int> <int>
     # 1:     3     5
     # 2:     4     6
-    
+
     DT[, sum(.SD), by=.I]
     #        I    V1
     #    <int> <int>
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 943d5def1..2b59ba02b 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -18814,7 +18814,3 @@ test(2238.6, "a" %notin% integer(), TRUE)
 test(2238.7, "a" %notin% NULL, TRUE)
 test(2238.8, NA %notin% 1:5, TRUE)
 test(2238.9, NA %notin% c(1:5, NA), FALSE)
-
-# rbindlist segfault with fill=TRUE and usenames=FALSE #5444
-test(2239.1, rbindlist(list(list(1), list(2,3)), fill=TRUE, use.names=FALSE), data.table(c(1,2), c(NA, 3)))
-test(2239.2, rbindlist(list(list(1), list(2,factor(3))), fill=TRUE, use.names=FALSE), data.table(c(1,2), factor(c(NA, 3))))

From c7c5ce2f20a7ad8deeebc40b2e337942b973d3f8 Mon Sep 17 00:00:00 2001
From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com>
Date: Wed, 28 Sep 2022 21:31:21 +0200
Subject: [PATCH 4/4] move tests

---
 inst/tests/tests.Rraw | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 2b59ba02b..c5b7ba603 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -14695,6 +14695,9 @@ test(2003.4, rbindlist(list(data.table(a=1:2,c=5:6), data.table(b=3:4)), fill=TR
              data.table(a=c(1:4), c=INT(5,6,NA,NA)))
 test(2003.5, rbindlist(list(data.table(a=1:2), data.table(b=3:4, c=5:6)), fill=TRUE, use.names=FALSE),
              data.table(a=c(1:4), c=INT(NA,NA,5,6)))
+# rbindlist segfault with fill=TRUE and usenames=FALSE #5444
+test(2003.6, rbindlist(list(list(1), list(2,3)), fill=TRUE, use.names=FALSE), data.table(c(1,2), c(NA, 3)))
+test(2003.7, rbindlist(list(list(1), list(2,factor(3))), fill=TRUE, use.names=FALSE), data.table(c(1,2), factor(c(NA, 3))))
 
 # chmatch coverage for two different non-ascii encodings matching; issues mentioned in comments in chmatch.c #69 #2538 #111
 x1 = "fa\xE7ile"