Skip to content

Commit

Permalink
fifelse - Issue #3657 (#3678)
Browse files Browse the repository at this point in the history
  • Loading branch information
2005m authored and mattdowle committed Aug 2, 2019
1 parent 2986736 commit a8e0230
Show file tree
Hide file tree
Showing 10 changed files with 357 additions and 13 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ Authors@R: c(
person("@javrucebo","", role="ctb"),
person("@marc-outins","", role="ctb"),
person("Roy","Storey", role="ctb"),
person("Manish","Saraswat", role="ctb"))
person("Manish","Saraswat", role="ctb"),
person("Morgan","Jacob", role="ctb"))
Depends: R (>= 3.1.0)
Imports: methods
Suggests: bit64, curl, R.utils, knitr, xts, nanotime, zoo, yaml
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ export(setorder, setorderv)
export(setNumericRounding, getNumericRounding)
export(chmatch, "%chin%", chorder, chgroup)
export(rbindlist)
export(fifelse)
export(fread)
export(fwrite)
export(foverlaps)
Expand Down
22 changes: 22 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,28 @@
20. `setkey`, `[key]by=` and `on=` in verbose mode (`options(datatable.verbose=TRUE)`) now detect any columns inheriting from `Date` which are stored as 8 byte double, test if any fractions are present, and if not suggest using a 4 byte integer instead (such as `data.table::IDate`) to save space and time, [#1738](https://github.com/Rdatatable/data.table/issues/1738). In future this could be upgraded to `message` or `warning` depending on feedback.
21. New function `fifelse(test, yes, no)` has been implemented in C by Morgan Jacob, [#3657](https://github.com/Rdatatable/data.table/issues/3657). It is comparable to `base::ifelse`, `dplyr::if_else`, `hutils::if_else`, and (forthcoming) [`vctrs::if_else()`](https://vctrs.r-lib.org/articles/stability.html#ifelse). It returns a vector of the same length as `test` but unlike `base::ifelse` the output type is consistent with those of `yes` and `no`. Please see `?data.table::fifelse` for more details.
```R
# default 4 threads on a laptop with 16GB RAM and 8 logical CPU
x = sample(c(TRUE,FALSE), 3e8, replace=TRUE) # 1GB
microbenchmark::microbenchmark(
base::ifelse(x, 7L, 11L),
dplyr::if_else(x, 7L, 11L),
hutils::if_else(x, 7L, 11L),
data.table::fifelse(x, 7L, 11L),
times = 5L, unit="s"
)
# Unit: seconds
# expr min med max neval
# base::ifelse(x, 7L, 11L) 8.5 8.6 8.8 5
# dplyr::if_else(x, 7L, 11L) 9.4 9.5 9.7 5
# hutils::if_else(x, 7L, 11L) 2.6 2.6 2.7 5
# data.table::fifelse(x, 7L, 11L) 1.5 1.5 1.6 5 # setDTthreads(1)
# data.table::fifelse(x, 7L, 11L) 0.8 0.8 0.9 5 # setDTthreads(2)
# data.table::fifelse(x, 7L, 11L) 0.4 0.4 0.5 5 # setDTthreads(4)
```
#### BUG FIXES
1. `first`, `last`, `head` and `tail` by group no longer error in some cases, [#2030](https://github.com/Rdatatable/data.table/issues/2030) [#3462](https://github.com/Rdatatable/data.table/issues/3462). Thanks to @franknarf1 for reporting.
Expand Down
4 changes: 0 additions & 4 deletions R/coalesce.R

This file was deleted.

20 changes: 14 additions & 6 deletions R/test.data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -368,12 +368,20 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,output=NULL,message=NULL) {
# For test 617 on r-prerel-solaris-sparc on 7 Mar 2013
# nocov start
if (!fail) {
cat("Test",numStr,"ran without errors but failed check that x equals y:\n")
cat("> x =",deparse(xsub),"\n")
if (is.data.table(x)) compactprint(x) else {cat("First 6 of ", length(x)," (type '", typeof(x), "'): ", sep=""); print(head(x))}
cat("> y =",deparse(ysub),"\n")
if (is.data.table(y)) compactprint(y) else {cat("First 6 of ", length(y)," (type '", typeof(y), "'): ", sep=""); print(head(y))}
if (!isTRUE(all.equal.result)) cat(all.equal.result,sep="\n")
cat("Test", numStr, "ran without errors but failed check that x equals y:\n")
failPrint = function(x, xsub) {
cat(">", substitute(x), "=", xsub, "\n")
if (is.data.table(x)) compactprint(x) else {
nn = length(x)
cat(sprintf("First %d of %d (type '%s'): \n", min(nn, 6L), length(x), typeof(x)))
# head.matrix doesn't restrict columns
if (length(d <- dim(x))) do.call(`[`, c(list(x, drop = FALSE), lapply(pmin(d, 6L), seq_len)))
else print(head(x))
}
}
failPrint(x, deparse(xsub))
failPrint(y, deparse(ysub))
if (!isTRUE(all.equal.result)) cat(all.equal.result, sep="\n")
fail = TRUE
}
# nocov end
Expand Down
9 changes: 9 additions & 0 deletions R/wrappers.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@

# Very small (e.g. one line) R functions that just call C.
# One file wrappers.R to avoid creating lots of small .R files.

coalesce = function(...) .Call(Ccoalesce, list(...), FALSE)
setcoalesce = function(...) .Call(Ccoalesce, list(...), TRUE)

fifelse = function(test, yes, no) .Call(CfifelseR,test, yes, no)

85 changes: 83 additions & 2 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -15424,9 +15424,90 @@ test(2071.07, any_na(data.table(as.raw(0L))), FALSE)
test(2071.08, any_na(data.table(c(1+1i, NA))))
test(2071.09, any_na(data.table(expression(1))), error="Unsupported column type 'expression'")
test(2071.10, dcast(data.table(a=1, b=1, l=list(list(1))), a ~ b, value.var='l'),
data.table(a=1, `1`=list(list(1)), key='a'))
data.table(a=1, `1`=list(list(1)), key='a'))
test(2071.11, dcast(data.table(a = 1, b = 2, c = 3), a ~ b, value.var = 'c', fill = '2'),
data.table(a=1, `2`=3, key='a'))
data.table(a=1, `2`=3, key='a'))

# fifelse, #3657
test_vec = -5L:5L < 0L
test_vec_na = c(test_vec, NA)
out_vec = rep(1:0, 5:6)
out_vec_na = c(out_vec, NA_integer_)
test(2072.001, fifelse(test_vec, 1L, 0L), out_vec)
test(2072.002, fifelse(test_vec, 1, 0), as.numeric(out_vec))
test(2072.003, fifelse(test_vec, TRUE, FALSE), as.logical(out_vec))
test(2072.004, fifelse(test_vec, "1", "0"), as.character(out_vec))
test(2072.005, fifelse(test_vec_na, TRUE, NA), c(rep(TRUE,5L), rep(NA,7L)))
test(2072.006, fifelse(test_vec, rep(1L,11L), rep(0L,11L)), out_vec)
test(2072.007, fifelse(test_vec, rep(1L,11L), 0L), out_vec)
test(2072.008, fifelse(test_vec, 1L, rep(0L,11L)), out_vec)
test(2072.009, fifelse(test_vec, rep(1L,11L), rep(0L,10L)), error="Length of 'no' is 10 but must be 1 or length of 'test' (11).")
test(2072.010, fifelse(test_vec, rep(1,10L), rep(0,11L)), error="Length of 'yes' is 10 but must be 1 or length of 'test' (11).")
test(2072.011, fifelse(test_vec, rep(TRUE,10L), rep(FALSE,10L)), error="Length of 'yes' is 10 but must be 1 or length of 'test' (11).")
test(2072.012, fifelse(0:1, rep(TRUE,2L), rep(FALSE,2L)), error="Argument 'test' must be logical.")
test(2072.013, fifelse(test_vec, TRUE, "FALSE"), error="'yes' is of type logical but 'no' is of type character. Please")
test(2072.014, fifelse(test_vec, list(1),list(2,4)), error="Length of 'no' is 2 but must be 1 or length of 'test' (11).")
test(2072.015, fifelse(test_vec, list(1,3),list(2,4)), error="Length of 'yes' is 2 but must be 1 or length of 'test' (11).")
test(2072.016, fifelse(test_vec, list(1), 0), as.list(as.numeric(out_vec)))
test(2072.017, fifelse(test_vec, 1, list(0)), as.list(as.numeric(out_vec)))
## Jan 1 - 5, 2011
date_vec = as.Date(14975:14979, origin = '1970-01-01')
test(2072.018, fifelse(date_vec == "2011-01-01", date_vec - 1L, date_vec),
c(date_vec[1L] - 1L, date_vec[2:5]))
test(2072.019, fifelse(c(TRUE,FALSE,TRUE,TRUE,FALSE), factor(letters[1:5]), factor("a", levels=letters[1:5])),
factor(c("a","a","c","d","a"), levels=letters[1:5]))
test(2072.020, fifelse(test_vec_na, 1L, 0L), out_vec_na)
test(2072.021, fifelse(test_vec_na, rep(1L,12L), 0L), out_vec_na)
test(2072.022, fifelse(test_vec_na, rep(1L,12L), rep(0L,12L)), out_vec_na)
test(2072.023, fifelse(test_vec_na, 1L, rep(0L,12L)), out_vec_na)
test(2072.024, fifelse(test_vec_na, 1, 0), as.numeric(out_vec_na))
test(2072.025, fifelse(test_vec_na, rep(1,12L), 0), as.numeric(out_vec_na))
test(2072.026, fifelse(test_vec_na, rep(1,12L), rep(0,12L)), as.numeric(out_vec_na))
test(2072.027, fifelse(test_vec_na, 1, rep(0,12L)), as.numeric(out_vec_na))
test(2072.028, fifelse(test_vec_na, TRUE, rep(FALSE,12L)), as.logical(out_vec_na))
test(2072.029, fifelse(test_vec_na, rep(TRUE,12L), FALSE), as.logical(out_vec_na))
test(2072.030, fifelse(test_vec_na, rep(TRUE,12L), rep(FALSE,12L)), as.logical(out_vec_na))
test(2072.031, fifelse(test_vec_na, "1", rep("0",12L)), as.character(out_vec_na))
test(2072.032, fifelse(test_vec_na, rep("1",12L), "0"), as.character(out_vec_na))
test(2072.033, fifelse(test_vec_na, rep("1",12L), rep("0",12L)), as.character(out_vec_na))
test(2072.034, fifelse(test_vec_na, "1", "0"), as.character(out_vec_na))
test(2072.035, fifelse(test_vec, as.Date("2011-01-01"), FALSE), error="'yes' is of type double but 'no' is of type logical. Please")
test(2072.036, fifelse(test_vec_na, 1+0i, 0+0i), as.complex(out_vec_na))
test(2072.037, fifelse(test_vec_na, rep(1+0i,12L), 0+0i), as.complex(out_vec_na))
test(2072.038, fifelse(test_vec_na, rep(1+0i,12L), rep(0+0i,12L)), as.complex(out_vec_na))
test(2072.039, fifelse(test_vec_na, 1+0i, rep(0+0i,12L)), as.complex(out_vec_na))
test(2072.040, fifelse(test_vec, as.raw(0), as.raw(1)), error="Type raw is not supported.")
test(2072.041, fifelse(TRUE,1,as.Date("2019-07-07")), error="'yes' has different class than 'no'. Please")
test(2072.042, fifelse(TRUE,1L,factor(letters[1])), error="'yes' has different class than 'no'. Please")
test(2072.043, fifelse(TRUE, list(1:5), list(5:1)), list(1:5))
test(2072.044, fifelse(as.logical(NA), list(1:5), list(5:1)), list(NULL))
test(2072.045, fifelse(FALSE, list(1:5), list(5:1)), list(5:1))
test(2072.046, fifelse(TRUE, data.table(1:5), data.table(5:1)), data.table(1:5))
test(2072.047, fifelse(FALSE, data.table(1:5), data.table(5:1)), data.table(5:1))
test(2072.048, fifelse(TRUE, data.frame(1:5), data.frame(5:1)), data.frame(1:5))
test(2072.049, fifelse(FALSE, data.frame(1:5), data.frame(5:1)), data.frame(5:1))
test(2072.050, fifelse(c(TRUE,FALSE), list(1:5,6:10), list(10:6,5:1)), list(1:5,5:1))
test(2072.051, fifelse(c(NA,TRUE), list(1:5,6:10), list(10:6,5:1)), list(NULL,6:10))
test(2072.052, fifelse(c(FALSE,TRUE), list(1:5,6:10), list(10:6,5:1)), list(10:6,6:10))
test(2072.053, fifelse(c(NA,TRUE), list(1:5), list(10:6,5:1)), list(NULL,1:5))
test(2072.054, fifelse(c(NA,TRUE), list(1:5,6:10), list(5:1)), list(NULL,6:10))
test(2072.055, fifelse(c(FALSE,TRUE), list(TRUE), list(10:6,5:1)), list(10:6,TRUE))
test(2072.056, fifelse(c(FALSE,TRUE), list(as.Date("2019-07-07")), list(10:6,5:1)), list(10:6,as.Date("2019-07-07")))
test(2072.057, fifelse(c(FALSE,TRUE), list(factor(letters[1:5])), list(10:6,5:1)), list(10:6,factor(letters[1:5])))
test(2072.058, fifelse(c(NA,FALSE), list(1:5), list(10:6,5:1)), list(NULL,5:1))
test(2072.059, fifelse(c(NA,FALSE), list(1:5,6:10), list(5:1)), list(NULL,5:1))
test(2072.060, fifelse(c(NA,FALSE), list(1:5), list(5:1)), list(NULL,5:1))
test(2072.061, fifelse(c(TRUE,FALSE), list(1L), 0L), list(1L,0L))
test(2072.062, fifelse(c(TRUE,FALSE), 1L, list(0L)), list(1L,0L))
test(2072.063, fifelse(c(TRUE,FALSE), factor(c("a","b")), factor(c("a","c"))), error="'yes' and 'no' are both type factor but their levels are different")
test(2072.064, fifelse(c(TRUE, TRUE, TRUE, FALSE, FALSE), factor(NA, levels=letters[1:5]), factor(letters[1:5])),
factor(c(NA,NA,NA,"d","e"),levels=letters[1:5]))
test(2072.065, fifelse(c(TRUE, TRUE, TRUE, FALSE, NA, FALSE), factor(NA, levels=letters[1:6]), factor(letters[1:6])),
factor(c(NA,NA,NA,"d",NA,"f"),levels=letters[1:6]))
test(2072.066, fifelse(c(TRUE, TRUE, TRUE, FALSE, NA, FALSE), factor(letters[1:6]), factor(NA, levels=letters[1:6])),
factor(c("a","b","c",NA,NA,NA), levels=letters[1:6]))
test(2072.067, fifelse(c(TRUE, NA, TRUE, FALSE, FALSE, FALSE), factor(NA), factor(NA)),
factor(c(NA,NA,NA,NA,NA,NA)))

###################################
# Add new tests above this line #
Expand Down
88 changes: 88 additions & 0 deletions man/fifelse.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
\name{fifelse}
\alias{fifelse}
\title{ Fast ifelse }
\description{
\code{data.table::fifelse} is comparable to \code{base::ifelse}, \code{dplyr::if_else} and \code{hutils::if_else}.
Like these functions, it returns a value with the same length as \code{test} and filled with value from \code{yes} or \code{no}.
This function is mostly written in C for speed and unlike \code{base::ifelse} the output type is consistent with those of \code{yes} and \code{no}.
}
\usage{
fifelse(test, yes, no)
}
\arguments{
\item{test}{ A logical vector }
\item{yes, no}{ Values to return depending on \code{TRUE}/\code{FALSE} element of \code{test}. They must be the same type and be either length \code{1} or the same length of \code{test}. Attributes are copied from \code{yes} to the output.}
}
\value{
A vector of the same length as \code{test} and attributes as \code{yes}. Data values are taken from the values of \code{yes} and \code{no}.
}
\examples{
# In the following 2 examples, one can see the contrast between ifelse and
# fifelse. Unlike ifelse, fifelse preserves the
# type and class of the inputs. Attributes are are taken from the "yes" argument.

### Example 1 - Date class preserved
dates = as.Date(c("2011-01-01","2011-01-02","2011-01-03","2011-01-04","2011-01-05"))
ifelse(dates == "2011-01-01", dates - 1, dates)
# [1] 14974 14976 14977 14978 14979
fifelse(dates == "2011-01-01", dates - 1, dates)
# [1] "2010-12-31" "2011-01-02" "2011-01-03" "2011-01-04" "2011-01-05"

### Example 2 - Factor class preserved
v = factor(letters[1:3])
base::ifelse(c(TRUE,FALSE,TRUE), v, factor("a",levels=levels(v)))
# [1] 1 1 3
fifelse(c(TRUE,FALSE,TRUE), v, factor("a",levels=levels(v)))
# [1] a a c
# Levels: a b c

\dontrun{
# Example 3:
# Unlike dplyr::if_else and hutils::if_else, fifelse and ifelse
# allow singleton replacements to be en-listed ( i.e. wrapped correctly in list()).

ifelse(c(TRUE, FALSE), 1L, list(0L))
# [[1]]
# [1] 1
#
# [[2]]
# [1] 0

data.table::fifelse(c(TRUE, FALSE), 1L, list(0L))
# [[1]]
# [1] 1
#
# [[2]]
# [1] 0

dplyr::if_else(c(TRUE, FALSE), 1L, list(0L))
# Error: `false` must be an integer vector, not a list
# Call `rlang::last_error()` to see a backtrace

hutils::if_else(c(TRUE, FALSE), 1L, list(0L))
# Error in hutils::if_else(c(TRUE, FALSE), 1L, list(0L)) :
# typeof(false) == list, yet typeof(true) == integer. Both true and false must have the same type.

# The above is useful given that data.table allows list columns as shown below:
DT1 = data.table(a = 0:5, b = list(1, list(2:3), list(4:6), list(6:4), list(3:2), 1))
DT2 = data.table(a = 0:5, b = list(1, list(2:3), list(4:6), list(6:4), list(3:2), 1))
all.equal(DT1[ , b := fifelse(a > 2, b, 0)],DT2[ , b := ifelse(a > 2, b, 0)])
# [1] TRUE

# Example 4 :
# data.table:fifelse is fast...
# The below code has been run on a desktop with 32GB RAM and 8 logical CPU
x <- -3e8L:3e8 < 0 # 2.4GB
system.time(y1 <- fifelse(x, 1L, 0L))
system.time(y2 <- hutils::if_else(x, 1L, 0L))
system.time(y3 <- ifelse(x, 1L, 0L))
system.time(y4 <- dplyr::if_else(x, 1L, 0L))
identical(y1,y2) && identical(y1,y3) && identical(y1,y4)
# user system elapsed (seconds)
# 0.55 0.78 1.33 # data.table v1.12.3
# 2.52 1.44 3.95 # hutils v1.5.0
# 9.46 5.80 15.26 # base v3.6.0
# 11.11 9.25 20.38 # dplyr v0.8.3
}
}
\keyword{ data }
Loading

0 comments on commit a8e0230

Please sign in to comment.