From 93dd105707d9a7e681254c3913d22b3ff547b8cc Mon Sep 17 00:00:00 2001 From: Marvin Wright Date: Wed, 6 Dec 2023 20:47:58 +0100 Subject: [PATCH 1/6] default to 2 threads but give a startup message --- R/onAttach.R | 18 ++++++++++++++++++ R/predict.R | 2 +- R/ranger.R | 2 +- 3 files changed, 20 insertions(+), 2 deletions(-) create mode 100644 R/onAttach.R diff --git a/R/onAttach.R b/R/onAttach.R new file mode 100644 index 00000000..3f098998 --- /dev/null +++ b/R/onAttach.R @@ -0,0 +1,18 @@ + +.onAttach = function(libname, pkgname) { + if (!interactive()) { + return() + } + + threads_option <- getOption("ranger.num.threads") + threads_env <- Sys.getenv("R_RANGER_NUM_THREADS") + if (!is.null(threads_option)) { + thread_string <- paste(threads_option, "threads (set by options(ranger.num.threads = N).") + } else if (threads_env != "") { + thread_string <- paste(threads_env, "threads (set by environment variable 'R_RANGER_NUM_THREADS').") + } else { + thread_string <- "2 threads (default). Change with num.threads in ranger() and predict(), options(ranger.num.threads = N) or environment variable 'R_RANGER_NUM_THREADS'." + } + + packageStartupMessage(paste("ranger", packageVersion("ranger"), "using", thread_string)) +} diff --git a/R/predict.R b/R/predict.R index 82599ab0..0530654b 100644 --- a/R/predict.R +++ b/R/predict.R @@ -193,7 +193,7 @@ predict.ranger.forest <- function(object, data, predict.all = FALSE, ## Num threads ## Default 0 -> detect from system in C++. if (is.null(num.threads)) { - num.threads = 0 + num.threads <- as.integer(Sys.getenv("R_RANGER_NUM_THREADS", getOption("ranger.num.threads", 2L))) } else if (!is.numeric(num.threads) || num.threads < 0) { stop("Error: Invalid value for num.threads") } diff --git a/R/ranger.R b/R/ranger.R index 6d56d4d4..dc4dd823 100644 --- a/R/ranger.R +++ b/R/ranger.R @@ -514,7 +514,7 @@ ranger <- function(formula = NULL, data = NULL, num.trees = 500, mtry = NULL, ## Num threads ## Default 0 -> detect from system in C++. if (is.null(num.threads)) { - num.threads = 0 + num.threads <- as.integer(Sys.getenv("R_RANGER_NUM_THREADS", getOption("ranger.num.threads", 2L))) } else if (!is.numeric(num.threads) || num.threads < 0) { stop("Error: Invalid value for num.threads") } From a0e8fcd4464fff30267903e0cc3ca7ae71c2542e Mon Sep 17 00:00:00 2001 From: Marvin Wright Date: Wed, 6 Dec 2023 20:58:12 +0100 Subject: [PATCH 2/6] improve startup message --- R/onAttach.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/onAttach.R b/R/onAttach.R index 3f098998..e956271e 100644 --- a/R/onAttach.R +++ b/R/onAttach.R @@ -7,11 +7,11 @@ threads_option <- getOption("ranger.num.threads") threads_env <- Sys.getenv("R_RANGER_NUM_THREADS") if (!is.null(threads_option)) { - thread_string <- paste(threads_option, "threads (set by options(ranger.num.threads = N).") + thread_string <- paste(threads_option, "threads as set by options(ranger.num.threads = N). Can be overwritten with num.threads.") } else if (threads_env != "") { - thread_string <- paste(threads_env, "threads (set by environment variable 'R_RANGER_NUM_THREADS').") + thread_string <- paste(threads_env, "threads as set by environment variable R_RANGER_NUM_THREADS. Can be overwritten with num.threads.") } else { - thread_string <- "2 threads (default). Change with num.threads in ranger() and predict(), options(ranger.num.threads = N) or environment variable 'R_RANGER_NUM_THREADS'." + thread_string <- "2 threads (default). Change with num.threads in ranger() and predict(), options(ranger.num.threads = N) or environment variable R_RANGER_NUM_THREADS." } packageStartupMessage(paste("ranger", packageVersion("ranger"), "using", thread_string)) From 33ea5da1d3bbe2d20a187c67ffad7d08b270f90d Mon Sep 17 00:00:00 2001 From: Marvin Wright Date: Wed, 6 Dec 2023 21:14:11 +0100 Subject: [PATCH 3/6] also use Ncpus option --- R/onAttach.R | 12 ++++++++---- R/predict.R | 2 +- R/ranger.R | 2 +- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/R/onAttach.R b/R/onAttach.R index e956271e..972d2edf 100644 --- a/R/onAttach.R +++ b/R/onAttach.R @@ -4,12 +4,16 @@ return() } - threads_option <- getOption("ranger.num.threads") threads_env <- Sys.getenv("R_RANGER_NUM_THREADS") - if (!is.null(threads_option)) { - thread_string <- paste(threads_option, "threads as set by options(ranger.num.threads = N). Can be overwritten with num.threads.") - } else if (threads_env != "") { + threads_option1 <- getOption("ranger.num.threads") + threads_option2 <- getOption("Ncpus") + + if (threads_env != "") { thread_string <- paste(threads_env, "threads as set by environment variable R_RANGER_NUM_THREADS. Can be overwritten with num.threads.") + } else if (!is.null(threads_option1)) { + thread_string <- paste(threads_option1, "threads as set by options(ranger.num.threads = N). Can be overwritten with num.threads.") + } else if (!is.null(threads_option2)) { + thread_string <- paste(threads_option2, "threads as set by options(Ncpus = N). Can be overwritten with num.threads.") } else { thread_string <- "2 threads (default). Change with num.threads in ranger() and predict(), options(ranger.num.threads = N) or environment variable R_RANGER_NUM_THREADS." } diff --git a/R/predict.R b/R/predict.R index 0530654b..0b45c3ec 100644 --- a/R/predict.R +++ b/R/predict.R @@ -193,7 +193,7 @@ predict.ranger.forest <- function(object, data, predict.all = FALSE, ## Num threads ## Default 0 -> detect from system in C++. if (is.null(num.threads)) { - num.threads <- as.integer(Sys.getenv("R_RANGER_NUM_THREADS", getOption("ranger.num.threads", 2L))) + num.threads <- as.integer(Sys.getenv("R_RANGER_NUM_THREADS", getOption("ranger.num.threads", getOption("Ncpus", 2L)))) } else if (!is.numeric(num.threads) || num.threads < 0) { stop("Error: Invalid value for num.threads") } diff --git a/R/ranger.R b/R/ranger.R index dc4dd823..eeaf80bf 100644 --- a/R/ranger.R +++ b/R/ranger.R @@ -514,7 +514,7 @@ ranger <- function(formula = NULL, data = NULL, num.trees = 500, mtry = NULL, ## Num threads ## Default 0 -> detect from system in C++. if (is.null(num.threads)) { - num.threads <- as.integer(Sys.getenv("R_RANGER_NUM_THREADS", getOption("ranger.num.threads", 2L))) + num.threads <- as.integer(Sys.getenv("R_RANGER_NUM_THREADS", getOption("ranger.num.threads", getOption("Ncpus", 2L)))) } else if (!is.numeric(num.threads) || num.threads < 0) { stop("Error: Invalid value for num.threads") } From 8ba5fe8af303263aa61b396e524c80b759113cec Mon Sep 17 00:00:00 2001 From: Marvin Wright Date: Wed, 6 Dec 2023 21:19:33 +0100 Subject: [PATCH 4/6] add Ncpus to startup message --- R/onAttach.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/onAttach.R b/R/onAttach.R index 972d2edf..61b69dda 100644 --- a/R/onAttach.R +++ b/R/onAttach.R @@ -15,7 +15,7 @@ } else if (!is.null(threads_option2)) { thread_string <- paste(threads_option2, "threads as set by options(Ncpus = N). Can be overwritten with num.threads.") } else { - thread_string <- "2 threads (default). Change with num.threads in ranger() and predict(), options(ranger.num.threads = N) or environment variable R_RANGER_NUM_THREADS." + thread_string <- "2 threads (default). Change with num.threads in ranger() and predict(), options(Ncpus = N), options(ranger.num.threads = N) or environment variable R_RANGER_NUM_THREADS." } packageStartupMessage(paste("ranger", packageVersion("ranger"), "using", thread_string)) From 553aa9a35e34ff77848ea39c42b492fc3e21c69b Mon Sep 17 00:00:00 2001 From: Marvin Wright Date: Wed, 6 Dec 2023 21:29:17 +0100 Subject: [PATCH 5/6] add documentation for threads --- R/predict.R | 10 ++++++++-- R/ranger.R | 8 ++++---- man/predict.ranger.Rd | 5 ++++- man/predict.ranger.forest.Rd | 5 ++++- man/ranger.Rd | 8 ++++---- 5 files changed, 24 insertions(+), 12 deletions(-) diff --git a/R/predict.R b/R/predict.R index 0b45c3ec..7ed7b0e5 100644 --- a/R/predict.R +++ b/R/predict.R @@ -36,6 +36,9 @@ ##' ##' For classification and \code{predict.all = TRUE}, a factor levels are returned as numerics. ##' To retrieve the corresponding factor levels, use \code{rf$forest$levels}, if \code{rf} is the ranger object. +##' +##' By default, ranger uses 2 threads. The default can be changed with: (1) \code{num.threads} in ranger/predict call, (2) environment variable +##' R_RANGER_NUM_THREADS, (3) \code{options(ranger.num.threads = N)}, (4) \code{options(Ncpus = N)}, with precedence in that order. ##' ##' @title Ranger prediction ##' @param object Ranger \code{ranger.forest} object. @@ -45,7 +48,7 @@ ##' @param type Type of prediction. One of 'response', 'se', 'terminalNodes', 'quantiles' with default 'response'. See below for details. ##' @param se.method Method to compute standard errors. One of 'jack', 'infjack' with default 'infjack'. Only applicable if type = 'se'. See below for details. ##' @param seed Random seed. Default is \code{NULL}, which generates the seed from \code{R}. Set to \code{0} to ignore the \code{R} seed. The seed is used in case of ties in classification mode. -##' @param num.threads Number of threads. Default is number of CPUs available. +##' @param num.threads Number of threads. Default is 2 if not set by options/environment variables (see below). ##' @param verbose Verbose output on or off. ##' @param inbag.counts Number of times the observations are in-bag in the trees. ##' @param ... further arguments passed to or from other methods. @@ -433,6 +436,9 @@ predict.ranger.forest <- function(object, data, predict.all = FALSE, ##' ##' For classification and \code{predict.all = TRUE}, a factor levels are returned as numerics. ##' To retrieve the corresponding factor levels, use \code{rf$forest$levels}, if \code{rf} is the ranger object. +##' +##' By default, ranger uses 2 threads. The default can be changed with: (1) \code{num.threads} in ranger/predict call, (2) environment variable +##' R_RANGER_NUM_THREADS, (3) \code{options(ranger.num.threads = N)}, (4) \code{options(Ncpus = N)}, with precedence in that order. ##' ##' @title Ranger prediction ##' @param object Ranger \code{ranger} object. @@ -444,7 +450,7 @@ predict.ranger.forest <- function(object, data, predict.all = FALSE, ##' @param quantiles Vector of quantiles for quantile prediction. Set \code{type = 'quantiles'} to use. ##' @param what User specified function for quantile prediction used instead of \code{quantile}. Must return numeric vector, see examples. ##' @param seed Random seed. Default is \code{NULL}, which generates the seed from \code{R}. Set to \code{0} to ignore the \code{R} seed. The seed is used in case of ties in classification mode. -##' @param num.threads Number of threads. Default is number of CPUs available. +##' @param num.threads Number of threads. Default is 2 if not set by options/environment variables (see below). ##' @param verbose Verbose output on or off. ##' @param ... further arguments passed to or from other methods. ##' @return Object of class \code{ranger.prediction} with elements diff --git a/R/ranger.R b/R/ranger.R index eeaf80bf..028616c2 100644 --- a/R/ranger.R +++ b/R/ranger.R @@ -96,10 +96,10 @@ ##' To use only the SNPs without sex or other covariates from the phenotype file, use \code{0} on the right hand side of the formula. ##' Note that missing values are treated as an extra category while splitting. ##' -##' See \url{https://github.com/imbs-hl/ranger} for the development version. +##' By default, ranger uses 2 threads. The default can be changed with: (1) \code{num.threads} in ranger/predict call, (2) environment variable +##' R_RANGER_NUM_THREADS, (3) \code{options(ranger.num.threads = N)}, (4) \code{options(Ncpus = N)}, with precedence in that order. ##' -##' With recent R versions, multithreading on Windows platforms should just work. -##' If you compile yourself, the new RTools toolchain is required. +##' See \url{https://github.com/imbs-hl/ranger} for the development version. ##' ##' @title Ranger ##' @param formula Object of class \code{formula} or \code{character} describing the model to fit. Interaction terms supported only for numerical variables. @@ -133,7 +133,7 @@ ##' @param quantreg Prepare quantile prediction as in quantile regression forests (Meinshausen 2006). Regression only. Set \code{keep.inbag = TRUE} to prepare out-of-bag quantile prediction. ##' @param time.interest Time points of interest (survival only). Can be \code{NULL} (default, use all observed time points), a vector of time points or a single number to use as many time points (grid over observed time points). ##' @param oob.error Compute OOB prediction error. Set to \code{FALSE} to save computation time, e.g. for large survival forests. -##' @param num.threads Number of threads. Default is number of CPUs available. +##' @param num.threads Number of threads. Default is 2 if not set by options/environment variables (see below). ##' @param save.memory Use memory saving (but slower) splitting mode. No effect for survival and GWAS data. Warning: This option slows down the tree growing, use only if you encounter memory problems. ##' @param verbose Show computation status and estimated runtime. ##' @param node.stats Save node statistics. Set to \code{TRUE} to save prediction, number of observations and split statistics for each node. diff --git a/man/predict.ranger.Rd b/man/predict.ranger.Rd index 362befca..1fce859f 100644 --- a/man/predict.ranger.Rd +++ b/man/predict.ranger.Rd @@ -38,7 +38,7 @@ \item{seed}{Random seed. Default is \code{NULL}, which generates the seed from \code{R}. Set to \code{0} to ignore the \code{R} seed. The seed is used in case of ties in classification mode.} -\item{num.threads}{Number of threads. Default is number of CPUs available.} +\item{num.threads}{Number of threads. Default is 2 if not set by options/environment variables (see below).} \item{verbose}{Verbose output on or off.} @@ -70,6 +70,9 @@ If \code{type = 'se'} is selected, the method to estimate the variances can be c For classification and \code{predict.all = TRUE}, a factor levels are returned as numerics. To retrieve the corresponding factor levels, use \code{rf$forest$levels}, if \code{rf} is the ranger object. + +By default, ranger uses 2 threads. The default can be changed with: (1) \code{num.threads} in ranger/predict call, (2) environment variable +R_RANGER_NUM_THREADS, (3) \code{options(ranger.num.threads = N)}, (4) \code{options(Ncpus = N)}, with precedence in that order. } \examples{ ## Classification forest diff --git a/man/predict.ranger.forest.Rd b/man/predict.ranger.forest.Rd index ba018b0e..0331fc4b 100644 --- a/man/predict.ranger.forest.Rd +++ b/man/predict.ranger.forest.Rd @@ -33,7 +33,7 @@ \item{seed}{Random seed. Default is \code{NULL}, which generates the seed from \code{R}. Set to \code{0} to ignore the \code{R} seed. The seed is used in case of ties in classification mode.} -\item{num.threads}{Number of threads. Default is number of CPUs available.} +\item{num.threads}{Number of threads. Default is 2 if not set by options/environment variables (see below).} \item{verbose}{Verbose output on or off.} @@ -66,6 +66,9 @@ If \code{type = 'se'} is selected, the method to estimate the variances can be c For classification and \code{predict.all = TRUE}, a factor levels are returned as numerics. To retrieve the corresponding factor levels, use \code{rf$forest$levels}, if \code{rf} is the ranger object. + +By default, ranger uses 2 threads. The default can be changed with: (1) \code{num.threads} in ranger/predict call, (2) environment variable +R_RANGER_NUM_THREADS, (3) \code{options(ranger.num.threads = N)}, (4) \code{options(Ncpus = N)}, with precedence in that order. } \references{ \itemize{ diff --git a/man/ranger.Rd b/man/ranger.Rd index 61c6e5df..9e519c16 100644 --- a/man/ranger.Rd +++ b/man/ranger.Rd @@ -112,7 +112,7 @@ ranger( \item{oob.error}{Compute OOB prediction error. Set to \code{FALSE} to save computation time, e.g. for large survival forests.} -\item{num.threads}{Number of threads. Default is number of CPUs available.} +\item{num.threads}{Number of threads. Default is 2 if not set by options/environment variables (see below).} \item{save.memory}{Use memory saving (but slower) splitting mode. No effect for survival and GWAS data. Warning: This option slows down the tree growing, use only if you encounter memory problems.} @@ -230,10 +230,10 @@ All SNPs in the \code{GenABEL} object will be used for splitting. To use only the SNPs without sex or other covariates from the phenotype file, use \code{0} on the right hand side of the formula. Note that missing values are treated as an extra category while splitting. -See \url{https://github.com/imbs-hl/ranger} for the development version. +By default, ranger uses 2 threads. The default can be changed with: (1) \code{num.threads} in ranger/predict call, (2) environment variable +R_RANGER_NUM_THREADS, (3) \code{options(ranger.num.threads = N)}, (4) \code{options(Ncpus = N)}, with precedence in that order. -With recent R versions, multithreading on Windows platforms should just work. -If you compile yourself, the new RTools toolchain is required. +See \url{https://github.com/imbs-hl/ranger} for the development version. } \examples{ ## Classification forest with default settings From 009028e91290e9eccb5ff286081c2d03e7e55306 Mon Sep 17 00:00:00 2001 From: Marvin Wright Date: Wed, 6 Dec 2023 21:32:59 +0100 Subject: [PATCH 6/6] add note on num.threads = 0 --- R/predict.R | 4 ++-- R/ranger.R | 2 +- man/predict.ranger.Rd | 2 +- man/predict.ranger.forest.Rd | 2 +- man/ranger.Rd | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/predict.R b/R/predict.R index 7ed7b0e5..d11c453e 100644 --- a/R/predict.R +++ b/R/predict.R @@ -48,7 +48,7 @@ ##' @param type Type of prediction. One of 'response', 'se', 'terminalNodes', 'quantiles' with default 'response'. See below for details. ##' @param se.method Method to compute standard errors. One of 'jack', 'infjack' with default 'infjack'. Only applicable if type = 'se'. See below for details. ##' @param seed Random seed. Default is \code{NULL}, which generates the seed from \code{R}. Set to \code{0} to ignore the \code{R} seed. The seed is used in case of ties in classification mode. -##' @param num.threads Number of threads. Default is 2 if not set by options/environment variables (see below). +##' @param num.threads Number of threads. Use 0 for all available cores. Default is 2 if not set by options/environment variables (see below). ##' @param verbose Verbose output on or off. ##' @param inbag.counts Number of times the observations are in-bag in the trees. ##' @param ... further arguments passed to or from other methods. @@ -450,7 +450,7 @@ predict.ranger.forest <- function(object, data, predict.all = FALSE, ##' @param quantiles Vector of quantiles for quantile prediction. Set \code{type = 'quantiles'} to use. ##' @param what User specified function for quantile prediction used instead of \code{quantile}. Must return numeric vector, see examples. ##' @param seed Random seed. Default is \code{NULL}, which generates the seed from \code{R}. Set to \code{0} to ignore the \code{R} seed. The seed is used in case of ties in classification mode. -##' @param num.threads Number of threads. Default is 2 if not set by options/environment variables (see below). +##' @param num.threads Number of threads. Use 0 for all available cores. Default is 2 if not set by options/environment variables (see below). ##' @param verbose Verbose output on or off. ##' @param ... further arguments passed to or from other methods. ##' @return Object of class \code{ranger.prediction} with elements diff --git a/R/ranger.R b/R/ranger.R index 028616c2..6fa95ddf 100644 --- a/R/ranger.R +++ b/R/ranger.R @@ -133,7 +133,7 @@ ##' @param quantreg Prepare quantile prediction as in quantile regression forests (Meinshausen 2006). Regression only. Set \code{keep.inbag = TRUE} to prepare out-of-bag quantile prediction. ##' @param time.interest Time points of interest (survival only). Can be \code{NULL} (default, use all observed time points), a vector of time points or a single number to use as many time points (grid over observed time points). ##' @param oob.error Compute OOB prediction error. Set to \code{FALSE} to save computation time, e.g. for large survival forests. -##' @param num.threads Number of threads. Default is 2 if not set by options/environment variables (see below). +##' @param num.threads Number of threads. Use 0 for all available cores. Default is 2 if not set by options/environment variables (see below). ##' @param save.memory Use memory saving (but slower) splitting mode. No effect for survival and GWAS data. Warning: This option slows down the tree growing, use only if you encounter memory problems. ##' @param verbose Show computation status and estimated runtime. ##' @param node.stats Save node statistics. Set to \code{TRUE} to save prediction, number of observations and split statistics for each node. diff --git a/man/predict.ranger.Rd b/man/predict.ranger.Rd index 1fce859f..2f9c63ac 100644 --- a/man/predict.ranger.Rd +++ b/man/predict.ranger.Rd @@ -38,7 +38,7 @@ \item{seed}{Random seed. Default is \code{NULL}, which generates the seed from \code{R}. Set to \code{0} to ignore the \code{R} seed. The seed is used in case of ties in classification mode.} -\item{num.threads}{Number of threads. Default is 2 if not set by options/environment variables (see below).} +\item{num.threads}{Number of threads. Use 0 for all available cores. Default is 2 if not set by options/environment variables (see below).} \item{verbose}{Verbose output on or off.} diff --git a/man/predict.ranger.forest.Rd b/man/predict.ranger.forest.Rd index 0331fc4b..805effda 100644 --- a/man/predict.ranger.forest.Rd +++ b/man/predict.ranger.forest.Rd @@ -33,7 +33,7 @@ \item{seed}{Random seed. Default is \code{NULL}, which generates the seed from \code{R}. Set to \code{0} to ignore the \code{R} seed. The seed is used in case of ties in classification mode.} -\item{num.threads}{Number of threads. Default is 2 if not set by options/environment variables (see below).} +\item{num.threads}{Number of threads. Use 0 for all available cores. Default is 2 if not set by options/environment variables (see below).} \item{verbose}{Verbose output on or off.} diff --git a/man/ranger.Rd b/man/ranger.Rd index 9e519c16..4b1f61fe 100644 --- a/man/ranger.Rd +++ b/man/ranger.Rd @@ -112,7 +112,7 @@ ranger( \item{oob.error}{Compute OOB prediction error. Set to \code{FALSE} to save computation time, e.g. for large survival forests.} -\item{num.threads}{Number of threads. Default is 2 if not set by options/environment variables (see below).} +\item{num.threads}{Number of threads. Use 0 for all available cores. Default is 2 if not set by options/environment variables (see below).} \item{save.memory}{Use memory saving (but slower) splitting mode. No effect for survival and GWAS data. Warning: This option slows down the tree growing, use only if you encounter memory problems.}