Skip to content

Commit

Permalink
better match base::order(method=) and decreasing= (#6655)
Browse files Browse the repository at this point in the history
* better match base::order(method=) and decreasing=

* another 0 for consistency

* coverage

* bad copy-paste
  • Loading branch information
MichaelChirico authored Jan 6, 2025
1 parent b150ab5 commit ba5773d
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 7 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ rowwiseDT(

15. Joins of `integer64` and `double` columns succeed when the `double` column has lossless `integer64` representation, [#4167](https://github.com/Rdatatable/data.table/issues/4167) and [#6625](https://github.com/Rdatatable/data.table/issues/6625). Previously, this only worked when the double column had lossless _32-bit_ integer representation. Thanks @MichaelChirico for the reports and fix.

17. `DT[order(...)]` better matches `base::order()` behavior by (1) recognizing the `method=` argument (and erroring since this is not supported) and (2) accepting a vector of `TRUE`/`FALSE` in `decreasing=` as an alternative to using `-a` to convey "sort `a` decreasing", [#4456](https://github.com/Rdatatable/data.table/issues/4456). Thanks @jangorecki for the FR and @MichaelChirico for the PR.

## NOTES

1. There is a new vignette on joins! See `vignette("datatable-joins")`. Thanks to Angel Feliz for authoring it! Feedback welcome. This vignette has been highly requested since 2017: [#2181](https://github.com/Rdatatable/data.table/issues/2181).
Expand Down
20 changes: 15 additions & 5 deletions R/setkey.R
Original file line number Diff line number Diff line change
Expand Up @@ -155,17 +155,19 @@ forderv = function(x, by=seq_along(x), retGrp=FALSE, retStats=retGrp, sort=TRUE,
.Call(CforderReuseSorting, x, by, retGrp, retStats, sort, order, na.last, reuseSorting) # returns integer() if already sorted, regardless of sort=TRUE|FALSE
}

forder = function(..., na.last=TRUE, decreasing=FALSE)
forder = function(..., na.last=TRUE, decreasing=FALSE, method="radix")
{
if (method != "radix") stopf("data.table has no support for sorting by method='%s'. Use base::order(), not order(), if you really need this.", method)
stopifnot(is.logical(decreasing), length(decreasing) > 0L, !is.na(decreasing))
sub = substitute(list(...))
tt = vapply_1b(sub, function(x) is.null(x) || (is.symbol(x) && !nzchar(x)))
if (any(tt)) sub[tt] = NULL # remove any NULL or empty arguments; e.g. test 1962.052: forder(DT, NULL) and forder(DT, )
if (length(sub)<2L) return(NULL) # forder() with no arguments returns NULL consistent with base::order
asc = rep.int(1L, length(sub)-1L) # ascending (1) or descending (-1) per column
# the idea here is to intercept - (and unusual --+ deriving from built expressions) before vectors in forder(DT, -colA, colB) so that :
# 1) - on character vector works; ordinarily in R that fails with type error
# 2) each column/expression can have its own +/- more easily that having to use a separate decreasing=TRUE/FALSE
# 3) we can pass the decreasing (-) flag to C and avoid what normally happens in R; i.e. allocate a new vector and apply - to every element first
# 2) each column/expression can have its own +/- more easily than having to use a separate decreasing=TRUE/FALSE
# 3) we can pass the decreasing (-) flag to C and avoid what normally happens in R; i.e. allocate a new vector and negate every element first
# We intercept the unevaluated expressions and massage them before evaluating in with(DT) scope or not depending on the first item.
for (i in seq.int(2L, length(sub))) {
v = sub[[i]]
Expand All @@ -188,8 +190,16 @@ forder = function(..., na.last=TRUE, decreasing=FALSE)
} else {
data = eval(sub, parent.frame(), parent.frame())
}
stopifnot(isTRUEorFALSE(decreasing))
o = forderv(data, seq_along(data), retGrp=FALSE, retStats=FALSE, sort=TRUE, order=if (decreasing) -asc else asc, na.last=na.last)
if (length(decreasing) > 1L) {
if (any(asc < 0L)) stopf("Mixing '-' with vector decreasing= is not supported.")
if (length(decreasing) != length(asc)) stopf("decreasing= has length %d applied to sorting %d columns.", length(decreasing), length(asc))
orderArg = fifelse(decreasing, -asc, asc)
} else if (decreasing) {
orderArg = -asc
} else {
orderArg = asc
}
o = forderv(data, seq_along(data), retGrp=FALSE, retStats=FALSE, sort=TRUE, order=orderArg, na.last=na.last)
if (!length(o) && length(data)>=1L) o = seq_along(data[[1L]]) else o
o
}
Expand Down
12 changes: 10 additions & 2 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -13607,8 +13607,9 @@ test(1962.0482, forder(L), 3:1)
test(1962.0483, forder(), NULL)
setDT(DT)
test(1962.049, forder(DT[ , 0L]), error = 'Attempting to order a 0-column')
test(1962.050, forder(DT, decreasing = NA), error = base_messages$stopifnot('isTRUEorFALSE(decreasing)'))
test(1962.051, forder(DT, decreasing = 1.4), error = base_messages$stopifnot('isTRUEorFALSE(decreasing)'))
test(1962.0500, forder(DT, decreasing = NA), error = base_messages$stopifnot('!is.na(decreasing)'))
test(1962.0510, forder(DT, decreasing = 1.4), error = base_messages$stopifnot('is.logical(decreasing)'))
test(1962.0511, forder(DT, decreasing=logical()), error=base_messages$stopifnot('length(decreasing) > 0L'))
test(1962.052, forder(DT, NULL), 3:1)
test(1962.053, forder(DT), 3:1)
test(1962.054, forder(DT, ), 3:1)
Expand Down Expand Up @@ -20702,3 +20703,10 @@ if (test_bit64) {
test(2300.3, DT1[DT2, on='id'], error="Incompatible join types")
test(2300.4, DT2[DT1, on='id'], error="Incompatible join types")
}

# interpret more arguments to order() correctly when translating to forder(), #4456
DT = data.table(a=rep(1:3, 4), b=rep(1:2, 6))
test(2301.1, DT[order(a, method="auto")], error="no support for sorting by method='auto'")
test(2301.2, DT[order(a, b, decreasing=c(TRUE, FALSE))], DT[order(-a, b)])
test(2301.3, DT[order(a, -b, decreasing=c(TRUE, TRUE))], error="Mixing '-' with vector decreasing")
test(2301.4, DT[order(a, b, decreasing=c(TRUE, TRUE, FALSE))], error="decreasing= has length 3")

0 comments on commit ba5773d

Please sign in to comment.