Rdatatable · mattdowle · Nov 23, 2021 · Nov 19, 2021 · Nov 19, 2021 · Nov 19, 2021
@@ -81,9 +81,8 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
       yy = y[missingyidx]
       othercolsx = setdiff(nm_x, by)
       if (length(othercolsx)) {
-        tmp = rep.int(NA_integer_, length(missingyidx))
-        # TO DO: use set() here instead..
-        yy = cbind(yy, x[tmp, othercolsx, with = FALSE])
+        nx = make.unique(c(names(yy), othercolsx))
+        set(yy, NULL, tail(nx, length(othercolsx)), rep(list(NA), length(othercolsx)))
       }
       # empty data.tables (nrow =0, ncol>0) doesn't skip names anymore in new rbindlist
       # takes care of #24 without having to save names. This is how it should be, IMHO.

@@ -1863,6 +1863,8 @@ test(628.2, rbind(data.table(a=1:3,b=factor(letters[1:3]),c=factor("foo")), list
 # Test merge with common names and all.y=TRUE, #2011
 DT1 = data.table(a=c(1,3,4,5), total=c(2,1,3,1), key="a")
 DT2 = data.table(a=c(2,3,5), total=c(5,1,2), key="a")
+DT3 = data.table(a=c(2), total=c(5), key="a")
+DT4 = data.table(a=c(3), total=c(1), key="a")
 # 629+630 worked before anyway.  631+632 test the bug fix.
 adf=as.data.frame
 adt=as.data.table
@@ -1875,6 +1877,16 @@ test(630.1, merge(DT1,DT2,all.x=TRUE), setkey(adt(merge(adf(DT1),adf(DT2),by="a"
 
 test(631, merge(DT1,DT2,all.y=TRUE), data.table(a=c(2,3,5),total.x=c(NA,1,1),total.y=c(5,1,2),key="a"))
 test(631.1, merge(DT1,DT2,all.y=TRUE), setkey(adt(merge(adf(DT1),adf(DT2),by="a",all.y=TRUE)),a))
+# ensure merge(x,y,all.y) does not alter input y
+# merge containing idx 1:nrow(y)
+test(631.2, merge(DT1[c(1,3)],DT2,all.y=TRUE), data.table(a=c(2,3,5),total.x=NA_real_,total.y=c(5,1,2),key="a"))
+test(631.3, DT2, data.table(a=c(2,3,5), total=c(5,1,2), key="a"))
+# nrow(y)=1 and no match with x
+test(631.4, merge(DT1,DT3,all.y=TRUE), data.table(a=c(2),total.x=NA_real_,total.y=c(5),key="a"))
+test(631.5, DT3, data.table(a=c(2), total=c(5), key="a"))
+# nrow(y)=1 and match with x
+test(631.6, merge(DT1,DT4,all.y=TRUE), data.table(a=c(3),total.x=c(1),total.y=c(1),key="a"))
+test(631.7, DT4, data.table(a=c(3), total=c(1), key="a"))
 
 test(632, merge(DT1,DT2,all=TRUE), data.table(a=c(1,2,3,4,5),total.x=c(2,NA,1,3,1),total.y=c(NA,5,1,NA,2),key="a"))
 test(632.1, merge(DT1,DT2,all=TRUE), setkey(adt(merge(adf(DT1),adf(DT2),by="a",all=TRUE)),a))