Skip to content

Commit

Permalink
Add tests of forder NA/NaN behavior (#2572); also purge tests of TODO…
Browse files Browse the repository at this point in the history
…s (ported to issue tracker)
  • Loading branch information
MichaelChirico committed Feb 17, 2019
1 parent c4b195f commit 4fafb87
Showing 1 changed file with 15 additions and 26 deletions.
41 changes: 15 additions & 26 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -2391,7 +2391,7 @@ test(863, after < before+0.5)
# rbindlist should look for the first non-empty data.table - New changes (from Arun). Explanation below:
# Even if data.table is empty, as long as there are column names, they should be considered.
# Ex: What if all data.tables are empty? What'll be the column name then?
# If there are no names, then the first non-empty set of names will be allocated. I think this is the way to do it.. TODO: Should write to Matt about it.
# If there are no names, then the first non-empty set of names will be allocated.
test(864.1, rbindlist(list(data.table(foo=logical(0),bar=logical(0)), DT<-data.table(baz=letters[1:3],qux=4:6))), setnames(DT, c("foo", "bar")))
test(864.2, rbindlist(list(list(logical(0),logical(0)), DT<-data.table(baz=letters[1:3],qux=4:6))), DT)
test(864.3, rbindlist(list(data.table(logical(0),logical(0)), DT<-data.table(baz=letters[1:3],qux=4:6))), setnames(DT, c("V1", "V2")))
Expand Down Expand Up @@ -2843,7 +2843,6 @@ DT = data.table(a=c(NA,NA,FALSE,FALSE), b=c(1,1,2,2))
test(1009, DT[,list(mean(a), sum(a)),by=b], data.table(b=c(1,2),V1=c(NA,0),V2=c(NA_integer_,0L))) # sum(logical()) should be integer, not real

# an fread error shouldn't hold a lock on the file on Windows
# TODO: now that these are warnings and not errors, we need another way to trigger a STOP() inside fread.c. options(warn=2) isn't enough.
cat('A,B\n1,2\n3\n5,6\n', file=(f<-tempfile()))
test(1010.1, fread(f,logical01=TRUE), ans<-data.table(A=TRUE, B=2L), warning=(txt<-"Stopped early on line 3.*Expected 2 fields but found 1.*fill.*TRUE.*<<3>>"))
test(1010.2, fread(f,logical01=TRUE), ans, warning=txt)
Expand Down Expand Up @@ -4101,7 +4100,7 @@ test(1221, DT[.(1),b], c("a","c","e"))
# - But save the seed so that we can generate the same data back if any error occurs
seed = as.integer(Sys.time()) # sample(9999L, 1L) temporary fix, because all the set.seed(.) used above makes this sample() step deterministic (always seed=9107)
seedInfo = paste("forder decreasing argument test: seed = ", seed," ", sep="")
# no NaN (because it's hard to match with base::order) ## TODO: add tests with NaN
# no NaN (because it's hard to match with base::order); tested below in 1988.4-8
set.seed(seed)
foo <- function(n) apply(matrix(sample(letters, n*8L, TRUE), ncol=8L), 1, paste, sep="")
i1 = as.integer(sample(c(-100:100), 1e3, TRUE))
Expand Down Expand Up @@ -4648,8 +4647,6 @@ test(1288.15, rbindlist(ll, fill=TRUE), error="fill=TRUE, but names of input lis
ll <- list(list(1:3, 4:6), list(a=5:7, b=8:10))
test(1288.16, rbindlist(ll, fill=TRUE), error="fill=TRUE, but names of input list at position 1")

# TO DO: TODO: think of and add more tests for rbindlist

# fix for #5647
dt = data.table(x=1L, y=1:10)
test(1289.1, dt[, z := c(rep(NA,5), y), by=x], error="Supplied 15 items to be assigned to group 1 of size 10 in column 'z'")
Expand Down Expand Up @@ -5602,7 +5599,7 @@ test(1382.4, DT[c("c", "b"), list(id, check = any(var > 3)), nomatch=0L], data.t
# Fix for #742 - allow.cartesian should be ignored if `i` has no duplicates.
DT <- data.table(id=rep(letters[1:2], 2), var = rnorm(4), key="id")
test(1383.1, DT[letters[1:3], list(var)], DT[1:5, list(var)])
# Fix for #800 - allow.cartesian should be ignored if jsub[1L] has `:=`. TODO: maybe still warn if `i` has duplicates?
# Fix for #800 - allow.cartesian should be ignored if jsub[1L] has `:=`.
DT=data.table(id=c(1,1), date=c(1992,1991), value=c(4.1,4.5), key="id")
test(1383.2, copy(DT)[DT, a:=1], DT[, a := 1])

Expand Down Expand Up @@ -6138,7 +6135,6 @@ test(1452, fread("notexist.csv"), error="File 'notexist.csv' does not exist.*get
test(1453, fread(testDir("fread_line_error.csv"))[c(1,.N), c("V1","V24")],
data.table(V1=INT(3,32), V24=c(".Q8_2_0W_8_1_7_L-4-U-5_1YSV-S-3-5.X",".U5_5_8H_7_6_0_U-5-J-7_2GNY-J-3-5.X")),
warning=c("resolved improper quoting", "Stopped.*line 12. Expected 24 fields but found 47.*First discarded non-empty line: <<31,3-0-7 4:1:7.5 HVV,"))
# TODO: add comment=="#". Ensure only after last field is observed.

# no-sep-found => sep="\n", use case for this in #738
test(1454.1, fread('"Foo"`"Bar"\n5`2\n',sep="`"), data.table(Foo=5L,Bar=2L))
Expand Down Expand Up @@ -6476,7 +6472,7 @@ test(1475.17, uniqueN(logical(), na.rm=TRUE), 0L)
DT <- data.table(x = rep(1:3, each = 3), y = as.Date(seq(Sys.Date(), (Sys.Date() + 8), by = "day")))
test(1476.1, DT[, .(y=mean(y)), x], setDT(aggregate(y ~ x, DT, mean)))

# test for 'transpose' of a list, TODO: integer64 support.
# test for 'transpose' of a list
ll = lapply(1:12, function(x) {
if (x <= 3) sample(10, sample(5:10, 1L))
else if (x > 3 & x <= 6) as.numeric(sample(101:115, sample(7:12, 1L)))
Expand Down Expand Up @@ -7774,15 +7770,6 @@ test(1585.6, f1(testDir("536_fread_fill_3_extreme.txt"), b=TRUE), f2(testDir("53
test(1585.7, f1(testDir("536_fread_fill_4.txt")), f2(testDir("536_fread_fill_4.txt"))[-29,])
test(1585.8, f1(testDir("536_fread_fill_4.txt"), b=TRUE), f2(testDir("536_fread_fill_4.txt"), b=TRUE))

# TODO: add a test when fill=FALSE, but blank.lines.skip=TRUE, when the same effect should happen
# TODO: fix and add test for cases like this:
# a,b,c
# 1,2,3
# 4,5,6
# 7,8,9,6 # extra column, but we've only detected 3 cols
# 1,2,3
# ...

# fix for #721
text="x,y\n1,a\n2,b\n"
test(1586.1, fread(text, colClasses=c("integer", "factor")), data.table(x=1:2, y=factor(letters[1:2])))
Expand Down Expand Up @@ -9148,7 +9135,6 @@ if (.Machine$sizeof.pointer>4) {
nqjoin_test(dt1, dt2, 2L, 1652, mult="last")
}

# TODO: add tests for nomatch=NA..
# tested, but takes quite some time.. so commenting for now
# nqjoin_test(x, y, 3L,1643.0)
# nqjoin_test(dt1,dt2,3L,1652.0)
Expand Down Expand Up @@ -9314,7 +9300,6 @@ test(1658.27, fwrite(ok_dt, col.names="foobar"), error="isLOGICAL(col.names)")
# null data table (no columns)
test(1658.28, fwrite(data.table(NULL)), NULL, warning="Nothing to write")

# 0.0 written as 0, but TODO #2398, probably related to the 2 lines after l==0 missing coverage in writeFloat64
test(1658.29, fwrite(data.table(id=c("A","B","C"), v=c(1.1,0.0,9.9))), output="id,v\nA,1.1\nB,0\nC,9.9")

# logical NA as "NA" when logical01=TRUE, instead of the default na="" which writes all types including <NA> in character column as ,, consistently.
Expand Down Expand Up @@ -9824,8 +9809,6 @@ test(1703.15, fread("."), error="File '.' is a directory. Not yet implemented.")
test(1704, all.equal(data.table( a=1:3, b=4:6 ), data.table( A=1:3, B=4:6 ), check.attributes=FALSE))

# all.equal.data.table should consider modes equal like base R (detected via Bioc's flowWorkspace tests)
# If strict testing is required, then use identical().
# TODO: add strict.numeric (default FALSE) to all.equal.data.table() ?
test(1707.1, all.equal( data.frame(a=0L), data.frame(a=0) ) )
test(1707.2, all.equal( data.table(a=0L), data.table(a=0) ) )
test(1708.1, !isTRUE(all.equal( data.frame(a=0L), data.frame(a=FALSE) )))
Expand Down Expand Up @@ -10137,7 +10120,6 @@ test(1736.3, fwrite(DT, sep2=c("",",","")), error="sep.*,.*sep2.*,.*must all be
test(1736.4, fwrite(DT, sep2=c("","||","")), error="nchar.*sep2.*2")
test(1736.5, capture.output(fwrite(DT, sep='|', sep2=c("c(",",",")"), logical01=FALSE)), c("A|B|C", "1|c(1,2,3,4,5,6,7,8,9,10)|c(s,t,u,v,w)",
"2|c(15,16,17,18)|c(1.2,2.3,3.4,3.14159265358979,-9)", "3|c(7)|c(foo,bar)", "4|c(9,10)|c(TRUE,TRUE,FALSE)"))
# Aside: logicalAsInt tested in 1736.6 to continue to work without warning, currently. TODO: warning, deprecate and remove
test(1736.6, capture.output(fwrite(DT, sep='|', sep2=c("{",",","}"), logicalAsInt=TRUE)),
c("A|B|C", "1|{1,2,3,4,5,6,7,8,9,10}|{s,t,u,v,w}",
"2|{15,16,17,18}|{1.2,2.3,3.4,3.14159265358979,-9}", "3|{7}|{foo,bar}", "4|{9,10}|{1,1,0}"))
Expand Down Expand Up @@ -11581,7 +11563,7 @@ DT = data.table(x=rep(c("a","b","c"),each=3), y=c(1L,3L,6L), v=10:18)
write.table(DT, file = (f<-tempfile()), sep = "\t")
test(1867.14, fread(f), data.table(V1=1:9, x=DT$x, y=DT$y, v=DT$v), warning="Added 1 extra default column name")
unlink(f)
# test(1867.15, fread(testDir("iterations.txt"))) # #1416 TODO (trailing tabs on most but not at the beginning and a "-" intended to mean missing but taken as text column name)
# test(1867.15, fread(testDir("iterations.txt")))

# non equi joins bug fix #2313
dt <- data.table(
Expand Down Expand Up @@ -11921,9 +11903,9 @@ test(1894.9, DT[, sum(z)*..z], error="Variable 'z' is not found in calling scope
z = 3L
test(1894.11, DT[, sum(z)*..z], 72L)
setnames(DT, "z", "..z")
test(1894.12, DT[, sum(y)*..z], INT(105,120,135)) # TODO warning/error in future as per NEWS item in v1.11.0
test(1894.12, DT[, sum(y)*..z], INT(105,120,135))
rm(z)
test(1894.13, DT[, sum(y)*..z], INT(105,120,135)) # TODO warning/error in future as per NEWS item in v1.11.0
test(1894.13, DT[, sum(y)*..z], INT(105,120,135))
setnames(DT, "..z", "z")
test(1894.14, DT[, sum(y)*..z], error="Variable 'z' is not found in calling scope")
..z = 4L
Expand Down Expand Up @@ -13435,14 +13417,21 @@ test(1987.2, dcast.data.table(dt, x + y ~ z, fun=sum, value.var=vars[[1]]),
myFun1(dt, vars[[1]]))
test(1987.3, dcast.data.table(dt, x + y ~ z, fun=list(f1=sum, first=function(x) x[1L]), value.var=vars), myFun2(dt, vars))

# testing frankv behavior with NA/NaN; earlier tests compare consistency with base::rank,
# testing frankv/forder behavior with NA/NaN; earlier tests compare consistency with base::rank,
# but we intentionally break from base w.r.t. ranking NAs (we consider NAs to be tied, ditto NaN)
x = data.table(r = c(6, 4, 2, NA, 1, NaN, 5, NaN, 9, 10, NA))
## frankv
test(1988.1, frankv(x, cols='r', order=1L, ties.method='average'), c(5, 3, 2, 10.5, 1, 8.5, 4, 8.5, 6, 7, 10.5))
test(1988.2, frankv(x, cols='r', order=1L, ties.method='max'), c(5L, 3L, 2L, 11L, 1L, 9L, 4L, 9L, 6L, 7L, 11L))
test(1988.3, frankv(x, cols='r', order=1L, ties.method='min'), c(5L, 3L, 2L, 10L, 1L, 8L, 4L, 8L, 6L, 7L, 10L))
test(1988.4, frankv(x, cols='r', order=1L, ties.method='dense'), c(5L, 3L, 2L, 9L, 1L, 8L, 4L, 8L, 6L, 7L, 9L))

## forderv
test(1988.5, forderv(x, by='r', order=1L, na.last=FALSE), c(4L, 11L, 6L, 8L, 5L, 3L, 2L, 7L, 1L, 9L, 10L))
test(1988.6, forderv(x, by='r', order=-1L, na.last=FALSE), c(4L, 11L, 6L, 8L, 10L, 9L, 1L, 7L, 2L, 3L, 5L))
test(1988.7, forderv(x, by='r', order=1L, na.last=TRUE), c(5L, 3L, 2L, 7L, 1L, 9L, 10L, 6L, 8L, 4L, 11L))
test(1988.8, forderv(x, by='r', order=-1L, na.last=TRUE), c(10L, 9L, 1L, 7L, 2L, 3L, 5L, 6L, 8L, 4L, 11L))

# Test should not segfault, #3401 fix:
set.seed(1L)
foo <- function(n) apply(matrix(sample(letters, 4*n, TRUE), ncol=4L), 1, paste, collapse="")
Expand Down

0 comments on commit 4fafb87

Please sign in to comment.