diff --git a/R/point-estimation.R b/R/point-estimation.R index cffdcab..30e753f 100644 --- a/R/point-estimation.R +++ b/R/point-estimation.R @@ -6,12 +6,25 @@ compute_point_estimate <- function(pf, nparams <- pf$nparams if (!is.null(guess)) { - # uses user default cluster - opt <- optimParallel::optimParallel( - par = guess, - fn = pf$get_value, - control = list(fnscale = -1) - ) + # if user did not set a default cluster we compute in sequential + if (is.null(parallel::getDefaultCluster())) { + cli::cli_alert_warning("Default cluster has not been set. You may consider setting one to run computation in parallel.") + opt <- stats::optim( + par = guess, + fn = pf$get_value, + method = "L-BFGS-B", + control = list(fnscale = -1) + ) + } + else { + # uses user default cluster to compute in parallel + opt <- optimParallel::optimParallel( + par = guess, + fn = pf$get_value, + control = list(fnscale = -1), + parallel = list(cl = NULL, forward = FALSE, loginfo = FALSE) + ) + } x0 <- opt$par fval <- opt$value } else { @@ -21,6 +34,7 @@ compute_point_estimate <- function(pf, if (length(upper_bound) != nparams) abort("The number of provided upper bounds does not match the number of parameters.") + # if user default cluser is not set, will run in sequential, else run in parallel opt <- rgenoud::genoud( fn = pf$get_value, nvars = nparams, @@ -31,7 +45,7 @@ compute_point_estimate <- function(pf, wait.generations = 2 * nparams + 1, BFGSburnin = 2 * nparams + 1, print.level = 0, - cluster = parallel::getDefaultCluster(), + cluster = if (!is.null(parallel::getDefaultCluster())) parallel::getDefaultCluster() else FALSE, balance = nparams > 2 ) opt <- compute_point_estimate( diff --git a/R/sysdata.rda b/R/sysdata.rda index 2270f22..c442eee 100644 Binary files a/R/sysdata.rda and b/R/sysdata.rda differ diff --git a/data-raw/df_parallelization.rds b/data-raw/df_parallelization.rds index 0c46cc2..9d25136 100644 Binary files a/data-raw/df_parallelization.rds and b/data-raw/df_parallelization.rds differ diff --git a/data-raw/parallelization-vignette.R b/data-raw/parallelization-vignette.R index 79c04ce..7e09d2d 100644 --- a/data-raw/parallelization-vignette.R +++ b/data-raw/parallelization-vignette.R @@ -8,8 +8,8 @@ library(progressr) library(tictoc) library(flipr) -ngrid_in <- 50L -nperms <- 5000 +ngrid_in <- 100L +nperms <- 2000 n1 <- 10 set.seed(1234) x <- rnorm(n1, mean = 1, sd = 1) @@ -25,16 +25,22 @@ stat_assignments <- list(delta = 1) # Inference on the mean without parallelization -------------------------------- +plan(sequential) +setDefaultCluster(NULL) +progressr::handlers(global = FALSE) + pf <- PlausibilityFunction$new( null_spec = null_spec, stat_functions = stat_functions, stat_assignments = stat_assignments, x, y ) +pf$set_nperms(nperms) -pf$set_point_estimate(mean(y) - mean(x)) +tic() +pf$set_point_estimate() +time_without_parallel <- toc()$callback_msg -pf$set_nperms(nperms) pf$set_parameter_bounds( point_estimate = pf$point_estimate, conf_level = pf$max_conf_level @@ -47,11 +53,11 @@ pf$set_grid( tic() pf$evaluate_grid(grid = pf$grid) -time_without_parallelization <- toc()$callback_msg +time_without_future <- toc()$callback_msg # Inference on the mean with parallelization ----------------------------------- -ncores <- 4 +ncores <- 3 plan(multisession, workers = ncores) cl <- makeCluster(ncores) setDefaultCluster(cl) @@ -63,10 +69,12 @@ pf <- PlausibilityFunction$new( stat_assignments = stat_assignments, x, y ) +pf$set_nperms(nperms) -pf$set_point_estimate(mean(y) - mean(x)) +tic() +pf$set_point_estimate() +time_with_parallel <- toc()$callback_msg -pf$set_nperms(nperms) pf$set_parameter_bounds( point_estimate = pf$point_estimate, conf_level = pf$max_conf_level @@ -79,12 +87,16 @@ pf$set_grid( tic() pf$evaluate_grid(grid = pf$grid) -time_with_parallelization <- toc()$callback_msg +time_with_future <- toc()$callback_msg + +stopCluster(cl) df_parallelization <- list( delta = pf$grid$delta, - time_par = time_with_parallelization, - time_without_par = time_without_parallelization + time_without_parallel = time_without_parallel, + time_without_future = time_without_future, + time_with_parallel = time_with_parallel, + time_with_future = time_with_future ) saveRDS(df_parallelization, "data-raw/df_parallelization.rds") diff --git a/vignettes/parallelization.Rmd b/vignettes/parallelization.Rmd index 4b52fc3..189d482 100644 --- a/vignettes/parallelization.Rmd +++ b/vignettes/parallelization.Rmd @@ -14,8 +14,10 @@ knitr::opts_chunk$set( ) library(flipr) load("../R/sysdata.rda") -time_without_parallelization <- df_parallelization$time_without_par -time_with_parallelization <- df_parallelization$time_par +time_without_parallel <- df_parallelization$time_without_parallel +time_without_future <- df_parallelization$time_without_future +time_with_parallel <- df_parallelization$time_with_parallel +time_with_future <- df_parallelization$time_with_future ``` The [**flipr**](https://permaverse.github.io/flipr/) package uses functions @@ -25,21 +27,21 @@ user side. We illustrate here how to achieve asynchronous evaluation. We use the [**future**](https://future.futureverse.org/index.html) package to set the plan, the **parallel** package to define a default cluster, and the [**progressr**](https://progressr.futureverse.org/index.html) package to report -progress updates. +progress updates. -By setting the desired number of cores, we define the number of background R -sessions that will be used to evaluate expressions in parallel. This number is -used to set the multisession plan with the function `future::plan()` and to -define a default cluster with `parallel::setDefaultCluster()`. Then, to enable -the visualization of evaluation progress, we can put the code in the -`progressr::with_progress()` function, or more simply set it for all the -following code with the `progressr::handlers()` function. After these settings, -[**flipr**](https://permaverse.github.io/flipr/) functions can be used, as shown -in this example. +More precisely, setting a default cluster with **parallel** is useful to allow +parallel computation of the point estimate if an estimation is needed. On the +other side, setting the plan with +[**future**](https://future.futureverse.org/index.html) allows parallel +computation when evaluating the plausibility function. A comparison of +computation time with sequential and parallel computation for those two cases is +done in the following. + +## Computation without parallel processing To show the benefit of parallel processing, we compare here the processing times -necessary to evaluate a grid with a plausibility function. First, here is the -computation without parallelization. +necessary to compute a point estimation and to evaluate the grid for a +plausibility function. First, here is the computation without parallelization. ```{r, eval=FALSE} set.seed(1234) @@ -58,24 +60,34 @@ pf <- PlausibilityFunction$new( stat_assignments = stat_assignments, x, y ) +pf$set_nperms(2000) + +tic() +pf$set_point_estimate() +time_without_parallel <- toc()$callback_msg +``` + +```{r} +time_without_parallel +``` -pf$set_point_estimate(mean(y) - mean(x), overwrite = TRUE) +```{r, eval=FALSE} pf$set_parameter_bounds( point_estimate = pf$point_estimate, conf_level = pf$max_conf_level ) pf$set_grid( parameters = pf$parameters, - npoints = 50L + npoints = 100L ) tictoc::tic() pf$evaluate_grid(grid = pf$grid) -time_without_parallelization <- tictoc::toc() +time_without_future <- tictoc::toc()$callback_msg ``` ```{r} -time_without_parallelization +time_without_future ``` ## Computation with parallel processing @@ -91,8 +103,8 @@ following code with the `progressr::handlers()` function. After these settings, in this example. ```{r, eval=FALSE} -ncores <- 4 -future::plan(multisession, workers = ncores) +ncores <- 3 +future::plan(future::multisession, workers = ncores) cl <- parallel::makeCluster(ncores) parallel::setDefaultCluster(cl) progressr::handlers(global = TRUE) @@ -113,20 +125,30 @@ pf <- PlausibilityFunction$new( stat_assignments = stat_assignments, x, y ) +pf$set_nperms(2000) -pf$set_point_estimate(mean(y) - mean(x), overwrite = TRUE) +tic() +pf$set_point_estimate() +time_with_parallel <- toc()$callback_msg +``` + +```{r} +time_with_parallel +``` + +```{r, eval=FALSE} pf$set_parameter_bounds( point_estimate = pf$point_estimate, conf_level = pf$max_conf_level ) pf$set_grid( parameters = pf$parameters, - npoints = 50L + npoints = 100L ) tictoc::tic() pf$evaluate_grid(grid = pf$grid) -time_with_parallelization <- tictoc::toc() +time_with_future <- tictoc::toc()$callback_msg parallel::stopCluster(cl) ``` @@ -135,18 +157,19 @@ It is good practice to shut down the workers with the `parallel::stopCluster()` function at the end of the code. ```{r} -time_with_parallelization +time_with_future ``` This experiment proves that we can save a lot of computation time when using -parallel processing, as we gained approximately 33 seconds in this example to -evaluate the plausibility function. +parallel processing. Indeed, by setting 3 cores for each of the parallel +processing tools, we reduced approximately by 3 times the computation time for +both computing a point estimation and evaluating the plausibility function. Finally, to return to a sequential plan with no progress updates, the following -code can be used. +code can be used. It also allows to shut down the workers used in [**future**](https://future.futureverse.org/index.html). ```{r, eval=FALSE} -future::plan(sequential) +future::plan(future::sequential) parallel::setDefaultCluster(NULL) progressr::handlers(global = FALSE) ```