From 4fafb87da20ddcfe47e07e28827812b964e3f181 Mon Sep 17 00:00:00 2001
From: Michael Chirico <MichaelChirico4@gmail.com>
Date: Sun, 17 Feb 2019 13:33:30 +0800
Subject: [PATCH] Add tests of forder NA/NaN behavior (#2572); also purge tests
 of TODOs (ported to issue tracker)

---
 inst/tests/tests.Rraw | 41 +++++++++++++++--------------------------
 1 file changed, 15 insertions(+), 26 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index be172bae1..4e3aa73c0 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -2391,7 +2391,7 @@ test(863, after < before+0.5)
 # rbindlist should look for the first non-empty data.table - New changes (from Arun). Explanation below:
 # Even if data.table is empty, as long as there are column names, they should be considered.
 # Ex: What if all data.tables are empty? What'll be the column name then?
-# If there are no names, then the first non-empty set of names will be allocated. I think this is the way to do it.. TODO: Should write to Matt about it.
+# If there are no names, then the first non-empty set of names will be allocated.
 test(864.1, rbindlist(list(data.table(foo=logical(0),bar=logical(0)), DT<-data.table(baz=letters[1:3],qux=4:6))), setnames(DT, c("foo", "bar")))
 test(864.2, rbindlist(list(list(logical(0),logical(0)), DT<-data.table(baz=letters[1:3],qux=4:6))), DT)
 test(864.3, rbindlist(list(data.table(logical(0),logical(0)), DT<-data.table(baz=letters[1:3],qux=4:6))), setnames(DT, c("V1", "V2")))
@@ -2843,7 +2843,6 @@ DT = data.table(a=c(NA,NA,FALSE,FALSE), b=c(1,1,2,2))
 test(1009, DT[,list(mean(a), sum(a)),by=b], data.table(b=c(1,2),V1=c(NA,0),V2=c(NA_integer_,0L))) # sum(logical()) should be integer, not real
 
 # an fread error shouldn't hold a lock on the file on Windows
-# TODO: now that these are warnings and not errors, we need another way to trigger a STOP() inside fread.c. options(warn=2) isn't enough.
 cat('A,B\n1,2\n3\n5,6\n', file=(f<-tempfile()))
 test(1010.1, fread(f,logical01=TRUE), ans<-data.table(A=TRUE, B=2L), warning=(txt<-"Stopped early on line 3.*Expected 2 fields but found 1.*fill.*TRUE.*<<3>>"))
 test(1010.2, fread(f,logical01=TRUE), ans, warning=txt)
@@ -4101,7 +4100,7 @@ test(1221, DT[.(1),b], c("a","c","e"))
 # - But save the seed so that we can generate the same data back if any error occurs
 seed = as.integer(Sys.time()) # sample(9999L, 1L) temporary fix, because all the set.seed(.) used above makes this sample() step deterministic (always seed=9107)
 seedInfo = paste("forder decreasing argument test: seed = ", seed,"  ", sep="")
-# no NaN (because it's hard to match with base::order)                             ## TODO: add tests with NaN
+# no NaN (because it's hard to match with base::order); tested below in 1988.4-8
 set.seed(seed)
 foo <- function(n) apply(matrix(sample(letters, n*8L, TRUE), ncol=8L), 1, paste, sep="")
 i1 = as.integer(sample(c(-100:100), 1e3, TRUE))
@@ -4648,8 +4647,6 @@ test(1288.15, rbindlist(ll, fill=TRUE), error="fill=TRUE, but names of input lis
 ll <- list(list(1:3, 4:6), list(a=5:7, b=8:10))
 test(1288.16, rbindlist(ll, fill=TRUE), error="fill=TRUE, but names of input list at position 1")
 
-# TO DO: TODO: think of and add more tests for rbindlist
-
 # fix for #5647
 dt = data.table(x=1L, y=1:10)
 test(1289.1, dt[, z := c(rep(NA,5), y), by=x], error="Supplied 15 items to be assigned to group 1 of size 10 in column 'z'")
@@ -5602,7 +5599,7 @@ test(1382.4, DT[c("c", "b"), list(id, check = any(var > 3)), nomatch=0L], data.t
 # Fix for #742 - allow.cartesian should be ignored if `i` has no duplicates.
 DT <- data.table(id=rep(letters[1:2], 2), var = rnorm(4), key="id")
 test(1383.1, DT[letters[1:3], list(var)], DT[1:5, list(var)])
-# Fix for #800 - allow.cartesian should be ignored if jsub[1L] has `:=`. TODO: maybe still warn if `i` has duplicates?
+# Fix for #800 - allow.cartesian should be ignored if jsub[1L] has `:=`.
 DT=data.table(id=c(1,1), date=c(1992,1991), value=c(4.1,4.5), key="id")
 test(1383.2, copy(DT)[DT, a:=1], DT[, a := 1])
 
@@ -6138,7 +6135,6 @@ test(1452, fread("notexist.csv"), error="File 'notexist.csv' does not exist.*get
 test(1453, fread(testDir("fread_line_error.csv"))[c(1,.N), c("V1","V24")],
            data.table(V1=INT(3,32), V24=c(".Q8_2_0W_8_1_7_L-4-U-5_1YSV-S-3-5.X",".U5_5_8H_7_6_0_U-5-J-7_2GNY-J-3-5.X")),
            warning=c("resolved improper quoting", "Stopped.*line 12. Expected 24 fields but found 47.*First discarded non-empty line: <<31,3-0-7 4:1:7.5 HVV,"))
-# TODO: add comment=="#".   Ensure only after last field is observed.
 
 # no-sep-found => sep="\n", use case for this in #738
 test(1454.1, fread('"Foo"`"Bar"\n5`2\n',sep="`"), data.table(Foo=5L,Bar=2L))
@@ -6476,7 +6472,7 @@ test(1475.17, uniqueN(logical(), na.rm=TRUE),            0L)
 DT <- data.table(x = rep(1:3, each = 3), y = as.Date(seq(Sys.Date(), (Sys.Date() + 8), by = "day")))
 test(1476.1, DT[, .(y=mean(y)), x], setDT(aggregate(y ~ x, DT, mean)))
 
-# test for 'transpose' of a list, TODO: integer64 support.
+# test for 'transpose' of a list
 ll = lapply(1:12, function(x) {
   if (x <= 3) sample(10, sample(5:10, 1L))
   else if (x > 3 & x <= 6) as.numeric(sample(101:115, sample(7:12, 1L)))
@@ -7774,15 +7770,6 @@ test(1585.6, f1(testDir("536_fread_fill_3_extreme.txt"), b=TRUE), f2(testDir("53
 test(1585.7, f1(testDir("536_fread_fill_4.txt")), f2(testDir("536_fread_fill_4.txt"))[-29,])
 test(1585.8, f1(testDir("536_fread_fill_4.txt"), b=TRUE), f2(testDir("536_fread_fill_4.txt"), b=TRUE))
 
-# TODO: add a test when fill=FALSE, but blank.lines.skip=TRUE, when the same effect should happen
-# TODO: fix and add test for cases like this:
-# a,b,c
-# 1,2,3
-# 4,5,6
-# 7,8,9,6 # extra column, but we've only detected 3 cols
-# 1,2,3
-# ...
-
 # fix for #721
 text="x,y\n1,a\n2,b\n"
 test(1586.1, fread(text, colClasses=c("integer", "factor")), data.table(x=1:2, y=factor(letters[1:2])))
@@ -9148,7 +9135,6 @@ if (.Machine$sizeof.pointer>4) {
   nqjoin_test(dt1, dt2, 2L, 1652, mult="last")
 }
 
-# TODO: add tests for nomatch=NA..
 # tested, but takes quite some time.. so commenting for now
 # nqjoin_test(x, y, 3L,1643.0)
 # nqjoin_test(dt1,dt2,3L,1652.0)
@@ -9314,7 +9300,6 @@ test(1658.27, fwrite(ok_dt, col.names="foobar"), error="isLOGICAL(col.names)")
 # null data table (no columns)
 test(1658.28, fwrite(data.table(NULL)), NULL, warning="Nothing to write")
 
-# 0.0 written as 0, but TODO #2398, probably related to the 2 lines after l==0 missing coverage in writeFloat64
 test(1658.29, fwrite(data.table(id=c("A","B","C"), v=c(1.1,0.0,9.9))), output="id,v\nA,1.1\nB,0\nC,9.9")
 
 # logical NA as "NA" when logical01=TRUE, instead of the default na="" which writes all types including <NA> in character column as ,, consistently.
@@ -9824,8 +9809,6 @@ test(1703.15, fread("."), error="File '.' is a directory. Not yet implemented.")
 test(1704, all.equal(data.table( a=1:3, b=4:6 ), data.table( A=1:3, B=4:6 ), check.attributes=FALSE))
 
 # all.equal.data.table should consider modes equal like base R (detected via Bioc's flowWorkspace tests)
-# If strict testing is required, then use identical().
-# TODO: add strict.numeric (default FALSE) to all.equal.data.table() ?
 test(1707.1, all.equal( data.frame(a=0L), data.frame(a=0) ) )
 test(1707.2, all.equal( data.table(a=0L), data.table(a=0) ) )
 test(1708.1, !isTRUE(all.equal( data.frame(a=0L), data.frame(a=FALSE) )))
@@ -10137,7 +10120,6 @@ test(1736.3, fwrite(DT, sep2=c("",",","")), error="sep.*,.*sep2.*,.*must all be
 test(1736.4, fwrite(DT, sep2=c("","||","")), error="nchar.*sep2.*2")
 test(1736.5, capture.output(fwrite(DT, sep='|', sep2=c("c(",",",")"), logical01=FALSE)), c("A|B|C", "1|c(1,2,3,4,5,6,7,8,9,10)|c(s,t,u,v,w)",
  "2|c(15,16,17,18)|c(1.2,2.3,3.4,3.14159265358979,-9)", "3|c(7)|c(foo,bar)", "4|c(9,10)|c(TRUE,TRUE,FALSE)"))
-# Aside: logicalAsInt tested in 1736.6 to continue to work without warning, currently. TODO: warning, deprecate and remove
 test(1736.6, capture.output(fwrite(DT, sep='|', sep2=c("{",",","}"), logicalAsInt=TRUE)),
  c("A|B|C", "1|{1,2,3,4,5,6,7,8,9,10}|{s,t,u,v,w}",
  "2|{15,16,17,18}|{1.2,2.3,3.4,3.14159265358979,-9}", "3|{7}|{foo,bar}", "4|{9,10}|{1,1,0}"))
@@ -11581,7 +11563,7 @@ DT = data.table(x=rep(c("a","b","c"),each=3), y=c(1L,3L,6L), v=10:18)
 write.table(DT, file = (f<-tempfile()), sep = "\t")
 test(1867.14, fread(f), data.table(V1=1:9, x=DT$x, y=DT$y, v=DT$v), warning="Added 1 extra default column name")
 unlink(f)
-# test(1867.15, fread(testDir("iterations.txt"))) # #1416 TODO (trailing tabs on most but not at the beginning and a "-" intended to mean missing but taken as text column name)
+# test(1867.15, fread(testDir("iterations.txt")))
 
 # non equi joins bug fix #2313
 dt <- data.table(
@@ -11921,9 +11903,9 @@ test(1894.9, DT[, sum(z)*..z], error="Variable 'z' is not found in calling scope
 z = 3L
 test(1894.11, DT[, sum(z)*..z], 72L)
 setnames(DT, "z", "..z")
-test(1894.12, DT[, sum(y)*..z], INT(105,120,135))  # TODO warning/error in future as per NEWS item in v1.11.0
+test(1894.12, DT[, sum(y)*..z], INT(105,120,135))
 rm(z)
-test(1894.13, DT[, sum(y)*..z], INT(105,120,135))  # TODO warning/error in future as per NEWS item in v1.11.0
+test(1894.13, DT[, sum(y)*..z], INT(105,120,135))
 setnames(DT, "..z", "z")
 test(1894.14, DT[, sum(y)*..z], error="Variable 'z' is not found in calling scope")
 ..z = 4L
@@ -13435,14 +13417,21 @@ test(1987.2, dcast.data.table(dt, x + y ~ z, fun=sum, value.var=vars[[1]]),
              myFun1(dt, vars[[1]]))
 test(1987.3, dcast.data.table(dt, x + y ~ z, fun=list(f1=sum, first=function(x) x[1L]), value.var=vars), myFun2(dt, vars))
 
-# testing frankv behavior with NA/NaN; earlier tests compare consistency with base::rank,
+# testing frankv/forder behavior with NA/NaN; earlier tests compare consistency with base::rank,
 # but we intentionally break from base w.r.t. ranking NAs (we consider NAs to be tied, ditto NaN)
 x = data.table(r = c(6, 4, 2, NA, 1, NaN, 5, NaN, 9, 10, NA))
+## frankv
 test(1988.1, frankv(x, cols='r', order=1L, ties.method='average'), c(5, 3, 2, 10.5, 1, 8.5, 4, 8.5, 6, 7, 10.5))
 test(1988.2, frankv(x, cols='r', order=1L, ties.method='max'),     c(5L, 3L, 2L, 11L, 1L, 9L, 4L, 9L, 6L, 7L, 11L))
 test(1988.3, frankv(x, cols='r', order=1L, ties.method='min'),     c(5L, 3L, 2L, 10L, 1L, 8L, 4L, 8L, 6L, 7L, 10L))
 test(1988.4, frankv(x, cols='r', order=1L, ties.method='dense'),   c(5L, 3L, 2L, 9L, 1L, 8L, 4L, 8L, 6L, 7L, 9L))
 
+## forderv
+test(1988.5, forderv(x, by='r', order=1L, na.last=FALSE), c(4L, 11L, 6L, 8L, 5L, 3L, 2L, 7L, 1L, 9L, 10L))
+test(1988.6, forderv(x, by='r', order=-1L, na.last=FALSE), c(4L, 11L, 6L, 8L, 10L, 9L, 1L, 7L, 2L, 3L, 5L))
+test(1988.7, forderv(x, by='r', order=1L, na.last=TRUE), c(5L, 3L, 2L, 7L, 1L, 9L, 10L, 6L, 8L, 4L, 11L))
+test(1988.8, forderv(x, by='r', order=-1L, na.last=TRUE), c(10L, 9L, 1L, 7L, 2L, 3L, 5L, 6L, 8L, 4L, 11L))
+
 # Test should not segfault, #3401 fix:
 set.seed(1L)
 foo <- function(n) apply(matrix(sample(letters, 4*n, TRUE), ncol=4L), 1, paste, collapse="")