From 0f4492bfe7868f5ab0ec4b67a35bccf8a9672070 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sat, 3 Feb 2018 01:35:47 +0800 Subject: [PATCH 1/5] Progress towards #2571 --- inst/tests/tests.Rraw | 21 ++++++++++++++------- man/assign.Rd | 26 +++++++++++++++++++++++++- 2 files changed, 39 insertions(+), 8 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 2d76a826c..aaed1d4ce 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -3075,8 +3075,7 @@ test(1042, DT[-5, mean(x), by = group], data.table(group=c(1,2), V1=c(1.5, 3.5)) # Test when abs(negative index) > nrow(dt) - should warn test(1042.1, DT[-10], DT, warning="Item 1 of i is -10 but there are only 5 rows. Ignoring this and 0 more like it out of 1.") test(1042.2, DT[c(-5, -10), mean(x), by = group], data.table(group=c(1,2),V1=c(1.5,3.5)), warning="Item 2 of i is -10 but there are only 5 rows. Ignoring this and 0 more like it out of 2.") -# Test #1043 TO DO - mixed negatives -test(1043, DT[c(1, -5)], error="Item 2 of i is -5 and item 1 is 1. Cannot mix positives and negatives.") +test(1043, DT[c(1, -5)], error="Cannot mix positives and negatives.") # crash (floating point exception), when assigning null data.table() to multiple cols, #4731 DT = data.table(x=1:5,y=6:10) @@ -11520,6 +11519,19 @@ test(1870.3, fread("A,B,\n,,\n,500,3.4"), data.table(A=NA, B=c(NA,500L), V3=c(NA # fread(lines, nrows=i) # } +# assortment of tests from #2572 +## negative indexing should retain key +DT = data.table(a = c(5, 5, 7, 2, 2), + b = 1:5, key = 'a') +test(1871.1, key(DT[-c(2, 3)]), 'a') +test(1871.2, key(DT[-(1:5)]), 'a') +test(1871.3, key(DT[-2, sum(b), by = a]), 'a') +## behavior of out-of-bound subsets +## (mixed +/- already covered in 1043) +test(1871.4, DT[3:6], + data.table(a = c(5, 5, 7, NA), + b = c(1L, 2L, 3L, NA))) +test(1871.5, DT[0:5], DT) ########################## @@ -11531,14 +11543,9 @@ test(1870.3, fread("A,B,\n,,\n,500,3.4"), data.table(A=NA, B=c(NA,500L), V3=c(NA # TO DO: tests of freading classes like Date and the verbose messages there. # TO DO: Test mid read bump of logical T/F to character, collapse back to T and F. -# TO DO: add examples of multiple LHS (name and position) and multiple RHS to example(":=") # TO DO: tests on double in add hoc by -# TO DO: test on -i that retain key e.g. DT[-4] and DT[-4,sum(v),by=b] should both retain key -# test on out of bound i subsets e.g. 6:10 when DT has 7 rows, and mixed negative and positive i integer is error. -# test that ordered subsets when i is unkeyed now retain x's key (using is.sorted(f__)) # TO DO: add FAQ that eval() is evaled in calling frame so don't need a, then update SO question of 14 March. See the test using variable name same as column name. Actually, is that true? Need "..J". -# TO DO: why did SO answer using eval twice in j need .SD in lapply(f,eval,.SD) on 19 Apr # TO DO: change all 1 to 1L internally (done in data.table.R, other .R to do) # TO DO: check the "j is named list could be inefficient" message from verbose than Chris N showed recently to 15 May diff --git a/man/assign.Rd b/man/assign.Rd index 5006708d3..61a7fed1a 100644 --- a/man/assign.Rd +++ b/man/assign.Rd @@ -27,7 +27,7 @@ set(x, i = NULL, j, value) \item{i}{ Optional. Indicates the rows on which the values must be updated with. If not provided, implies \emph{all rows}. The \code{:=} form is more powerful as it allows \emph{subsets} and \code{joins} based add/update columns by reference. See \code{Details}. In \code{set}, only integer type is allowed in \code{i} indicating which rows \code{value} should be assigned to. \code{NULL} represents all rows more efficiently than creating a vector such as \code{1:nrow(x)}. } -\item{j}{ Column name(s) (character) or number(s) (integer) to be assigned \code{value} when column(s) already exist, and only column name(s) if they are to be added newly. } +\item{j}{ Column name(s) (character) or number(s) (integer) to be assigned \code{value} when column(s) already exist, and only column name(s) if they are to be created. } \item{value}{ A list of replacement values to assign by reference to \code{x[i, j]}. } } \details{ @@ -100,6 +100,30 @@ setkey(DT, a) DT["A", b := 0L] # binary search for group "A" and set column b using keys DT["B", f := mean(d)] # subassign to new column, NA initialized +# Adding multiple columns +## by name +DT[ , c('sin_d', 'log_e', 'cos_d') := + .(sin(d), log(e), cos(d))] +## by patterned name +DT[ , paste(c('sin', 'cos'), 'b', sep = '_') := + .(sin(b), cos(b))] +## using lapply & .SD +DT[ , paste0('tan_', c('b', 'd', 'e')) := + lapply(.SD, tan), .SDcols = c('b', 'd', 'e')] +## using forced evaluation to disambguate a vector of names +## and overwrite existing columns with their squares +sq_cols = c('b', 'd', 'e') +DT[ , (sq_cols) := lapply(.SD, `^`, 2L), .SDcols = sq_cols] +## by integer (NB: for robustness, it is not recommended +## to use explicit integers to update/define columns) +DT[ , c(2L, 3L, 4L) := .(sqrt(b), sqrt(d), sqrt(e))] +## by implicit integer +DT[ , grep('a$', names(DT)) := tolower(a)] +## by implicit integer, using forced evaluation +sq_col_idx = grep('d$', names(DT)) +DT[ , (sq_col_idx) := lapply(.SD, dnorm), + .SDcols = sq_col_idx] + \dontrun{ # Speed example ... From 2b7e78f58ae468db7cf39674ee471b2ef5ac2463 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 9 Feb 2018 10:23:41 +0800 Subject: [PATCH 2/5] new test via Pasha --- inst/tests/tests.Rraw | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 6b9dbf21f..0508f27ac 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11599,7 +11599,16 @@ test(1873.4, DT[3:6], data.table(a = c(5, 5, 7, NA), b = c(1L, 2L, 3L, NA))) test(1873.5, DT[0:5], DT) - +## if fread bumps logical to character, +## the original string representation should be kept +DT = data.table(A=rep("True", 2200), B="FALSE", C='0') +DT[111, LETTERS[1:3] := .("here", "be", "dragons")] +fwrite(DT, f<-tempfile()) +test(1873.6, fread(f, verbose=TRUE), DT, + output=paste("Column 1.*bumped from 'bool8' to 'string'", + "Column 2.*bumped from 'bool8' to 'string'", + "Column 3.*bumped from 'bool8' to 'string'", + sep = '.*')) ########################## # TODO: Tests involving GForce functions needs to be run with optimisation level 1 and 2, so that both functions are tested all the time. @@ -11608,7 +11617,6 @@ test(1873.5, DT[0:5], DT) # TO DO: test and highlight in docs that negatives are fine and fast in forderv (ref R wish #15644) # TO DO: tests of freading classes like Date and the verbose messages there. -# TO DO: Test mid read bump of logical T/F to character, collapse back to T and F. # TO DO: tests on double in add hoc by From 88a93d79c07c63acf097d60fbb56649fe5ffebfe Mon Sep 17 00:00:00 2001 From: Matt Dowle Date: Wed, 21 Feb 2018 17:08:09 -0800 Subject: [PATCH 3/5] No write to testDir() (not writeable on CRAN). tempfile() instead. --- inst/tests/tests.Rraw | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index ba12a4121..ff45125c7 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11709,13 +11709,13 @@ test(1879.5, DT[0:5], DT) ## the original string representation should be kept DT = data.table(A=rep("True", 2200), B="FALSE", C='0') DT[111, LETTERS[1:3] := .("here", "be", "dragons")] -outf = testDir('test1873.6.csv') -fwrite(DT, outf) -test(1879.6, fread(outf, verbose=TRUE), DT, +fwrite(DT, f<-tempfile()) +test(1879.6, fread(f, verbose=TRUE), DT, output=paste("Column 1.*bumped from 'bool8' to 'string'", "Column 2.*bumped from 'bool8' to 'string'", "Column 3.*bumped from 'bool8' to 'string'", sep = '.*')) +unlink(f) ########################## From d6daed0a93ebbbd0891fd91edc5558a4461ed1b5 Mon Sep 17 00:00:00 2001 From: Matt Dowle Date: Thu, 22 Feb 2018 12:02:17 -0800 Subject: [PATCH 4/5] Removed TODO at the end of tests.Rraw as they are now tracked in issue #2572. Thanks to Michael C. --- inst/tests/tests.Rraw | 34 ++++++++-------------------------- 1 file changed, 8 insertions(+), 26 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index ff45125c7..221b2d12f 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11717,34 +11717,17 @@ test(1879.6, fread(f, verbose=TRUE), DT, sep = '.*')) unlink(f) -########################## - -# TODO: Tests involving GForce functions needs to be run with optimisation level 1 and 2, so that both functions are tested all the time. - -# TO DO: Add test for fixed bug #5519 - dcast returned error when a package imported data.table, but dint happen when "depends" on data.table. This is fixed (commit 1263 v1.9.3), but not sure how to add test. - -# TO DO: test and highlight in docs that negatives are fine and fast in forderv (ref R wish #15644) -# TO DO: tests of freading classes like Date and the verbose messages there. - -# TO DO: tests on double in add hoc by -# TO DO: add FAQ that eval() is evaled in calling frame so don't need a, then update SO question of 14 March. See the test using variable name same as column name. Actually, is that true? Need "..J". +################################### +# Add new tests above this line # +################################### -# TO DO: check the "j is named list could be inefficient" message from verbose than Chris N showed recently to 15 May -# TO DO: !make sure explicitly that unnamed lists are being executed by dogroups! -# TO DO: Add to warning about a previous copy that class<-, levels<- can also copy whole vector. *Any* fun<- form basically. -# TO DO: use looped := vs set test in example(":=") or example(setnames) to test overhead in [.data.table is tested to stay low in future. - -# TO DO: add tests on smaller examples with NAs for 'frankv', even though can't compare to base::rank. -## See test-* for more tests - -########################## options(warn=0) setDTthreads(0) options(oldalloccol) # set at top of this file - -plat = paste("endian==",.Platform$endian,", sizeof(long double)==",.Machine$sizeof.longdouble, - ", sizeof(pointer)==",.Machine$sizeof.pointer, sep="") +plat = paste0("endian==", .Platform$endian, + ", sizeof(long double)==", .Machine$sizeof.longdouble, + ", sizeof(pointer)==", .Machine$sizeof.pointer) if (nfail > 0) { if (nfail>1) {s1="s";s2="s: "} else {s1="";s2=" "} cat("\r") @@ -11752,7 +11735,6 @@ if (nfail > 0) { # important to stop() here, so that 'R CMD check' fails } cat("\n",plat,"\n\nAll ",ntest," tests in inst/tests/tests.Rraw completed ok in ",timetaken(started.at)," on ",date(),"\n",sep="") -# date() is included so we can tell when CRAN checks were run (in particular if they have been rerun since -# an update to Rdevel itself; data.table doesn't have any other dependency) since there appears to be no other -# way to see the timestamp that CRAN checks were run. Some CRAN machines lag by several days. +# date() is included so we can tell exactly when these tests ran on CRAN. Sometimes a CRAN log can show error but that can be just +# stale due to not updating yet since a fix in R-devel, for example. From 2d25df08d747e6558194a0e99f6f4abf37095156 Mon Sep 17 00:00:00 2001 From: Matt Dowle Date: Thu, 22 Feb 2018 12:30:30 -0800 Subject: [PATCH 5/5] Wording change as the dragons were slain there. --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 221b2d12f..31bc4ec14 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11708,7 +11708,7 @@ test(1879.5, DT[0:5], DT) ## if fread bumps logical to character, ## the original string representation should be kept DT = data.table(A=rep("True", 2200), B="FALSE", C='0') -DT[111, LETTERS[1:3] := .("here", "be", "dragons")] +DT[111, LETTERS[1:3] := .("fread", "is", "faithful")] fwrite(DT, f<-tempfile()) test(1879.6, fread(f, verbose=TRUE), DT, output=paste("Column 1.*bumped from 'bool8' to 'string'",