Skip to content

Commit

Permalink
simplify verbose timing calls (including some undiscovered typos)
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael Chirico committed Jan 23, 2018
1 parent 6afb0ee commit 4c8162d
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 42 deletions.
8 changes: 4 additions & 4 deletions R/between.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,18 @@ inrange <- function(x,lower,upper,incbounds=TRUE) {
subject = setDT(list(l=lower, u=upper))
ops = if (incbounds) c(4L, 2L) else c(5L, 3L) # >=,<= and >,<
verbose = getOption("datatable.verbose")
if (verbose) {last.started.at=proc.time()[3L];cat("forderv(query) took ... ");flush.console()}
if (verbose) {last.started.at=proc.time();cat("forderv(query) took ... ");flush.console()}
xo = forderv(query)
if (verbose) {cat(round(proc.time()[3L]-last.started.at, 3L),"secs\n");flush.console}
if (verbose) {cat(timetaken(last.started.at)); flush.console()}
ans = bmerge(shallow(subject), query, 1L:2L, c(1L,1L), FALSE, xo,
0, c(FALSE, TRUE), 0L, "all", ops, integer(0L),
1L, verbose) # fix for #1819, turn on verbose messages
options(datatable.verbose=FALSE)
setDT(ans[c("starts", "lens")], key=c("starts", "lens"))
options(datatable.verbose=verbose)
if (verbose) {last.started.at=proc.time()[3L];cat("Generating final logical vector ... ");flush.console()}
if (verbose) {last.started.at=proc.time();cat("Generating final logical vector ... ");flush.console()}
.Call(Cinrange, idx <- vector("logical", length(x)), xo, ans[["starts"]], ans[["lens"]])
if (verbose) {cat("done in",round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console}
if (verbose) {cat("done in", timetaken(last.started.at)); flush.console}
idx
}

Expand Down
4 changes: 2 additions & 2 deletions R/bmerge.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,10 @@ bmerge <- function(i, x, leftcols, rightcols, io, xo, roll, rollends, nomatch, m
set(i, j=lc, value=newval)
}
}
if (verbose) {last.started.at=proc.time()[3L];cat("Starting bmerge ...");flush.console()}
if (verbose) {last.started.at=proc.time();cat("Starting bmerge ...");flush.console()}
ans = .Call(Cbmerge, i, x, as.integer(leftcols), as.integer(rightcols), io<-haskey(i), xo, roll, rollends, nomatch, mult, ops, nqgrp, nqmaxgrp)
# NB: io<-haskey(i) necessary for test 579 where the := above change the factor to character and remove i's key
if (verbose) {cat("done in",round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console()}
if (verbose) {cat("done in", timetaken(last.started.at)); flush.console()}

# in the caller's shallow copy, see comment at the top of this function for usage
# We want to leave the coercions to i in place otherwise, since the caller depends on that to build the result
Expand Down
56 changes: 28 additions & 28 deletions R/data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -561,30 +561,30 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) {
# non-equi operators present.. investigate groups..
if (verbose) cat("Non-equi join operators detected ... \n")
if (!missingroll) stop("roll is not implemented for non-equi joins yet.")
if (verbose) {last.started.at=proc.time()[3L];cat(" forder took ... ");flush.console()}
if (verbose) {last.started.at=proc.time();cat(" forder took ... ");flush.console()}
# TODO: could check/reuse secondary indices, but we need 'starts' attribute as well!
xo = forderv(x, rightcols, retGrp=TRUE)
if (verbose) {cat(round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console}
if (verbose) {cat(timetaken(last.started.at)); flush.console()}
xg = attr(xo, 'starts')
resetcols = head(rightcols, non_equi-1L)
if (length(resetcols)) {
# TODO: can we get around having to reorder twice here?
# or at least reuse previous order?
if (verbose) {last.started.at=proc.time()[3L];cat(" Generating group lengths ... ");flush.console()}
if (verbose) {last.started.at=proc.time();cat(" Generating group lengths ... ");flush.console()}
resetlen = attr(forderv(x, resetcols, retGrp=TRUE), 'starts')
resetlen = .Call(Cuniqlengths, resetlen, nrow(x))
if (verbose) {cat("done in",round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console}
if (verbose) {cat("done in", timetaken(last.started.at)); flush.console()}
} else resetlen = integer(0L)
if (verbose) {last.started.at=proc.time()[3L];cat(" Generating non-equi group ids ... ");flush.console()}
if (verbose) {last.started.at=proc.time();cat(" Generating non-equi group ids ... ");flush.console()}
nqgrp = .Call(Cnestedid, x, rightcols[non_equi:length(rightcols)], xo, xg, resetlen, mult)
if (verbose) {cat("done in", round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console}
if (verbose) {cat("done in", timetaken(last.started.at)); flush.console()}
if (length(nqgrp)) nqmaxgrp = max(nqgrp) # fix for #1986, when 'x' is 0-row table max(.) returns -Inf.
if (nqmaxgrp > 1L) { # got some non-equi join work to do
if ("_nqgrp_" %in% names(x)) stop("Column name '_nqgrp_' is reserved for non-equi joins.")
if (verbose) {last.started.at=proc.time()[3L];cat(" Recomputing forder with non-equi ids ... ");flush.console()}
if (verbose) {last.started.at=proc.time();cat(" Recomputing forder with non-equi ids ... ");flush.console()}
set(nqx<-shallow(x), j="_nqgrp_", value=nqgrp)
xo = forderv(nqx, c(ncol(nqx), rightcols))
if (verbose) {cat("done in",round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console}
if (verbose) {cat("done in", timetaken(last.started.at)); flush.console()}
} else nqgrp = integer(0L)
if (verbose) cat(" Found", nqmaxgrp, "non-equi group(s) ...\n")
}
Expand All @@ -600,9 +600,9 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) {
if (verbose && !is.null(xo)) cat("on= matches existing index, using index\n")
}
if (is.null(xo)) {
last.started.at=proc.time()[3L]
if (verbose) {last.started.at=proc.time(); flush.console()}
xo = forderv(x, by = rightcols)
if (verbose) cat("Calculated ad hoc index in", round(proc.time()[3L]-last.started.at,3L), "secs\n")
if (verbose) {cat("Calculated ad hoc index in", timetaken(last.started.at)); flush.console()}
# TODO: use setindex() instead, so it's cached for future reuse
}
}
Expand All @@ -620,10 +620,10 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) {
# Implementation for not-join along with by=.EACHI, #604
if (notjoin && (byjoin || mult != "all")) { # mult != "all" needed for #1571 fix
notjoin = FALSE
if (verbose) {last.started.at=proc.time()[3L];cat("not-join called with 'by=.EACHI'; Replacing !i with i=setdiff(x,i) ...");flush.console()}
if (verbose) {last.started.at=proc.time();cat("not-join called with 'by=.EACHI'; Replacing !i with i=setdiff(x,i) ...");flush.console()}
orignames = copy(names(i))
i = setdiff_(x, i, rightcols, leftcols) # part of #547
if (verbose) {cat("done in",round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console}
if (verbose) {cat("done in", timetaken(last.started.at)); flush.console()}
setnames(i, orignames[leftcols])
setattr(i, 'sorted', names(i)) # since 'x' has key set, this'll always be sorted
}
Expand Down Expand Up @@ -1447,7 +1447,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) {

if (length(byval) && length(byval[[1L]])) {
if (!bysameorder) {
if (verbose) {last.started.at=proc.time()[3L];cat("Finding groups using forderv ... ");flush.console()}
if (verbose) {last.started.at=proc.time();cat("Finding groups using forderv ... ");flush.console()}
o__ = forderv(byval, sort=!missing(keyby), retGrp=TRUE)
# The sort= argument is called sortStr at C level. It's just about saving the sort of unique strings at
# C level for efficiency (cgroup vs csort) when by= not keyby=. All other types are always sorted. Getting
Expand All @@ -1457,37 +1457,37 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) {
# forderv() returns empty integer() if already ordered to save allocating 1:xnrow
bysameorder = orderedirows && !length(o__)
if (verbose) {
cat(round(proc.time()[3L]-last.started.at, 3L), "sec\n")
last.started.at=proc.time()[3L]
cat(timetaken(last.started.at))
last.started.at=proc.time()
cat("Finding group sizes from the positions (can be avoided to save RAM) ... ")
flush.console() # for windows
}
f__ = attr(o__, "starts")
len__ = uniqlengths(f__, xnrow)
if (verbose) { cat(round(proc.time()[3L]-last.started.at, 3L), "sec\n");flush.console()}
if (verbose) {cat(timetaken(last.started.at)); flush.console()}
if (!bysameorder && missing(keyby)) {
# TO DO: lower this into forder.c
if (verbose) {last.started.at=proc.time()[3L];cat("Getting back original order ... ");flush.console()}
if (verbose) {last.started.at=proc.time();cat("Getting back original order ... ");flush.console()}
firstofeachgroup = o__[f__]
if (length(origorder <- forderv(firstofeachgroup))) {
f__ = f__[origorder]
len__ = len__[origorder]
}
if (verbose) {cat(round(proc.time()[3L]-last.started.at, 3L), "sec\n")}
if (verbose) {cat(timetaken(last.started.at)); flush.console()}
}
if (!orderedirows && !length(o__)) o__ = seq_len(xnrow) # temp fix. TODO: revist orderedirows
} else {
if (verbose) {last.started.at=proc.time()[3L];cat("Finding groups using uniqlist ... ");flush.console()}
if (verbose) {last.started.at=proc.time();cat("Finding groups using uniqlist ... ");flush.console()}
f__ = uniqlist(byval)
if (verbose) {
cat(round(proc.time()[3L]-last.started.at, 3L), "sec\n")
last.started.at=proc.time()[3L]
cat(timetaken(last.started.at))
last.started.at=proc.time()
cat("Finding group sizes from the positions (can be avoided to save RAM) ... ")
flush.console() # for windows
}
len__ = uniqlengths(f__, xnrow)
# TO DO: combine uniqlist and uniquelengths into one call. Or, just set len__ to NULL when dogroups infers that.
if (verbose) { cat(round(proc.time()[3L]-last.started.at, 3L), "sec\n");flush.console() }
if (verbose) { cat(timetaken(last.started.at)); flush.console() }
}
} else {
f__=NULL
Expand Down Expand Up @@ -1777,7 +1777,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) {
# for consistency of empty case in test 184
f__=len__=0L
}
if (verbose) {last.started.at=proc.time()[3L];cat("Making each group and running j (GForce ",GForce,") ... ",sep="");flush.console()}
if (verbose) {last.started.at=proc.time();cat("Making each group and running j (GForce ",GForce,") ... ",sep="");flush.console()}
if (GForce) {
thisEnv = new.env() # not parent=parent.frame() so that gsum is found
for (ii in ansvars) assign(ii, x[[ii]], thisEnv)
Expand All @@ -1791,7 +1791,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) {
} else {
ans = .Call(Cdogroups, x, xcols, groups, grpcols, jiscols, xjiscols, grporder, o__, f__, len__, jsub, SDenv, cols, newnames, !missing(on), verbose)
}
if (verbose) {cat(round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console()}
if (verbose) {cat(timetaken(last.started.at)); flush.console()}
# TO DO: xrows would be a better name for irows: irows means the rows of x that i joins to
# Grouping by i: icols the joins columns (might not need), isdcols (the non join i and used by j), all __ are length x
# Grouping by by: i is by val, icols NULL, o__ may be subset of x, f__ points to o__ (or x if !length o__)
Expand All @@ -1811,9 +1811,9 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) {
if (!missing(keyby)) {
cnames = as.character(bysubl)[-1L]
if (all(cnames %chin% names(x))) {
if (verbose) {last.started.at=proc.time()[3L];cat("setkey() after the := with keyby= ... ");flush.console()}
if (verbose) {last.started.at=proc.time();cat("setkey() after the := with keyby= ... ");flush.console()}
setkeyv(x,cnames) # TO DO: setkey before grouping to get memcpy benefit.
if (verbose) {cat(round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console()}
if (verbose) {cat(timetaken(last.started.at)); flush.console()}
}
else warning(":= keyby not straightforward character column names or list() of column names, treating as a by:",paste(cnames,collapse=","),"\n")
}
Expand All @@ -1838,9 +1838,9 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) {
setnames(ans,seq_along(bynames),bynames) # TO DO: reinvestigate bynames flowing from dogroups here and simplify
}
if (byjoin && !missing(keyby) && !bysameorder) {
if (verbose) {last.started.at=proc.time()[3L];cat("setkey() afterwards for keyby=.EACHI ... ");flush.console()}
if (verbose) {last.started.at=proc.time();cat("setkey() afterwards for keyby=.EACHI ... ");flush.console()}
setkeyv(ans,names(ans)[seq_along(byval)])
if (verbose) {cat(round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console()}
if (verbose) {cat(timetaken(last.started.at)); flush.console()}
} else if (!missing(keyby) || (haskey(x) && bysameorder)) {
setattr(ans,"sorted",names(ans)[seq_along(grpcols)])
}
Expand Down
8 changes: 4 additions & 4 deletions R/foverlaps.R
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,10 @@ foverlaps <- function(x, y, by.x = if (!is.null(key(x))) key(x) else key(y), by.
end = yintervals[2L], any =,
within =, equal = yintervals)
call = construct(head(ynames, -2L), uycols, type)
if (verbose) {last.started.at=proc.time()[3L];cat("unique() + setkey() operations done in ...");flush.console()}
if (verbose) {last.started.at=proc.time();cat("unique() + setkey() operations done in ...");flush.console()}
uy = unique(y[, eval(call)])
setkey(uy)[, `:=`(lookup = list(list(integer(0L))), type_lookup = list(list(integer(0L))), count=0L, type_count=0L)]
if (verbose) {cat(round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console}
if (verbose) {cat(timetaken(last.started.at)); flush.console()}
matches <- function(ii, xx, del, ...) {
cols = setdiff(names(xx), del)
xx = .shallow(xx, cols, retain.key = FALSE)
Expand All @@ -130,9 +130,9 @@ foverlaps <- function(x, y, by.x = if (!is.null(key(x))) key(x) else key(y), by.
.Call(Clookup, uy, nrow(y), indices(uy, y, yintervals, nomatch=0L, roll=roll), maxgap, minoverlap, mult, type, verbose)
if (maxgap == 0L && minoverlap == 1L) {
iintervals = tail(names(x), 2L)
if (verbose) {last.started.at=proc.time()[3L];cat("binary search(es) done in ...");flush.console()}
if (verbose) {last.started.at=proc.time();cat("binary search(es) done in ...");flush.console()}
xmatches = indices(uy, x, xintervals, nomatch=0L, roll=roll)
if (verbose) {cat(round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console}
if (verbose) {cat(timetaken(last.started.at));flush.console}
olaps = .Call(Coverlaps, uy, xmatches, mult, type, nomatch, verbose)
} else if (maxgap == 0L && minoverlap > 1L) {
stop("Not yet implemented")
Expand Down
4 changes: 2 additions & 2 deletions R/timetaken.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ timetaken <- function(started.at)
hrs = hrs - 24*days
if (secs >= 60) {
if (days >= 1) res = sprintf("%d days ", as.integer(days)) else res=""
paste(res,sprintf("%02.0f:%02.0f:%02.0f", hrs, mins, secs %% 60),sep="")
paste(res,sprintf("%02.0f:%02.0f:%02.0f\n", hrs, mins, secs %% 60),sep="")
} else {
sprintf(if (secs>=10) "%.1fsec" else "%.3fsec", secs)
sprintf(if (secs>=10) "%.1fsec\n" else "%.3fsec\n", secs)
}
}
9 changes: 7 additions & 2 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -4042,7 +4042,12 @@ test(1187.4, DT[, list(sum(y,na.rm=MyVar), mean(y,na.rm=MyVar)), by=x, verbose=T
test(1187.5, DT[, mean(y,na.rm=MyVar), by=x, verbose=TRUE], output="GForce optimized j to",
DT[, mean(y,na.rm=FALSE), by=x])
# GForce should not turn on when the .ok function isn't triggered
test(1187.6, DT[ , mean(y, trim = .2), by = x, verbose = TRUE], output = 'GForce FALSE')
test(1187.6, DT[ , mean(y, trim = .2),
by = x, verbose = TRUE],
data.table(x = c("a", "b", "c", "d"),
V1 = c(NA, 3.33333333333333, NA, NA)),
output = 'j unchanged',
warning = "'trim' is not yet optimized")



Expand Down Expand Up @@ -9264,7 +9269,7 @@ test(1672.2, DT[ , .(.I[1L], V2[1L]), by = V1, verbose = TRUE],
output = "GForce optimized j")
#make sure GForce not operating for inversion
test(1672.3, DT[ , .(.I[-1L], V2[1L]), by = V1, verbose = TRUE],
output = "GForce FALSE)
output = "GForce FALSE")
#make sure works on .I by itself
test(1672.4, DT[ , .I[1L], by = V1],
output = " V1 V11: 1 12: 2 2")
Expand Down

0 comments on commit 4c8162d

Please sign in to comment.