Skip to content

Commit

Permalink
Merge pull request #17 from permaverse/future
Browse files Browse the repository at this point in the history
Using furrr instead of pbapply and letting the user set the parallelization.
  • Loading branch information
astamm authored Jan 6, 2025
2 parents 445bd1d + 3df3189 commit ba23a02
Show file tree
Hide file tree
Showing 26 changed files with 369 additions and 200 deletions.
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,16 @@ Description: A flexible permutation framework for making
data or density-valued data.
License: GPL (>= 3)
Encoding: UTF-8
RoxygenNote: 7.2.3
RoxygenNote: 7.3.2
Roxygen: list(markdown = TRUE)
Imports:
cli,
dials,
furrr,
ggplot2,
magrittr,
optimParallel,
pbapply,
progressr,
purrr,
R6,
Rcpp,
Expand Down
5 changes: 3 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,9 @@ import(ggplot2)
import(rlang)
importFrom(R6,R6Class)
importFrom(Rcpp,sourceCpp)
importFrom(furrr,future_map)
importFrom(furrr,future_map_dbl)
importFrom(magrittr,"%>%")
importFrom(pbapply,pblapply)
importFrom(pbapply,pbsapply)
importFrom(progressr,progressor)
importFrom(tibble,tibble)
useDynLib(flipr, .registration = TRUE)
3 changes: 2 additions & 1 deletion R/flipr-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
#' density-valued data.
#'
#' @useDynLib flipr, .registration = TRUE
#' @importFrom furrr future_map future_map_dbl
#' @import ggplot2
#' @importFrom pbapply pblapply pbsapply
#' @importFrom progressr progressor
#' @importFrom R6 R6Class
#' @importFrom Rcpp sourceCpp
#' @import rlang
Expand Down
33 changes: 12 additions & 21 deletions R/plausibility-class.R
Original file line number Diff line number Diff line change
Expand Up @@ -371,9 +371,6 @@ PlausibilityFunction <- R6::R6Class(
#' @param upper_bound A scalar or numeric vector specifying the lower bounds
#' for each parameter under investigation. If it is a scalar, the value is
#' used as lower bound for all parameters. Defaults to `10`.
#' @param ncores An integer specifying the number of cores to use for
#' maximizing the plausibility function to get a point estimate of the
#' parameters. Defaults to `1L`.
#' @param estimate A boolean specifying whether the rough point estimate
#' provided by `val` should serve as initial point for maximizing the
#' plausibility function (`estimate = TRUE`) or as final point estimate
Expand Down Expand Up @@ -401,7 +398,6 @@ PlausibilityFunction <- R6::R6Class(
set_point_estimate = function(point_estimate = NULL,
lower_bound = -10,
upper_bound = 10,
ncores = 1L,
estimate = FALSE,
overwrite = FALSE) {
if (!anyNA(self$point_estimate) && !overwrite) {
Expand All @@ -424,8 +420,7 @@ PlausibilityFunction <- R6::R6Class(
pf = self,
guess = point_estimate,
lower_bound = lower_bound,
upper_bound = upper_bound,
ncores = ncores
upper_bound = upper_bound
)
self$point_estimate <- opt$par
names(self$point_estimate) <- names(self$parameters)
Expand Down Expand Up @@ -611,8 +606,6 @@ PlausibilityFunction <- R6::R6Class(
#'
#' @param grid A \code{\link[tibble]{tibble}} storing a grid that spans the
#' space of parameters under investigation.
#' @param ncores An integer specifying the number of cores to run
#' evaluations in parallel. Defaults to `1L`.
#'
#' @examples
#' x <- rnorm(10)
Expand All @@ -639,7 +632,7 @@ PlausibilityFunction <- R6::R6Class(
#' npoints = 2L
#' )
#' pf$evaluate_grid(grid = pf$grid)
evaluate_grid = function(grid, ncores = 1L) {
evaluate_grid = function(grid) {
if ("pvalue" %in% names(self$grid) && is_equal(grid, self$grid)) {
abort("The current grid has already been evaluated.")
}
Expand All @@ -649,18 +642,16 @@ PlausibilityFunction <- R6::R6Class(
self$grid <- grid
}

cl <- NULL
if (ncores > 1) {
cl <- parallel::makeCluster(ncores)
parallel::clusterEvalQ(cl, {
library(purrr)
})
}
self$grid$pvalue <- self$grid %>%
purrr::array_tree(margin = 1) %>%
pbapply::pbsapply(self$get_value, cl = cl)
if (ncores > 1L)
parallel::stopCluster(cl)
transformed_grid <- self$grid %>%
purrr::array_tree(margin = 1)

cli::cli_alert_info("Evaluating grid.")
p <- progressr::progressor(steps = length(transformed_grid)) #progress bar set up

self$grid$pvalue <- furrr::future_map_dbl(transformed_grid, function(.l) {
p() #progress bar update
self$get_value(.l)
}, .options = furrr::furrr_options(seed = TRUE))
},

#' @field seed A numeric value specifying the seed to be used. Defaults to `1234`.
Expand Down
79 changes: 26 additions & 53 deletions R/point-estimation.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,32 +2,16 @@ compute_point_estimate <- function(pf,
guess = NULL,
lower_bound = -10,
upper_bound = 10,
ncores = 1L,
verbose = FALSE) {
nparams <- pf$nparams

if (!is.null(guess)) {
if (ncores == 1) {
opt <- stats::optim(
par = guess,
fn = pf$get_value,
method = "L-BFGS-B",
control = list(fnscale = -1)
)
} else {
parallel_opts <- list(
cl = parallel::makeCluster(ncores),
forward = FALSE,
loginfo = FALSE
)
opt <- optimParallel::optimParallel(
par = guess,
fn = pf$get_value,
control = list(fnscale = -1),
parallel = parallel_opts
)
parallel::stopCluster(parallel_opts$cl)
}
# uses user default cluster
opt <- optimParallel::optimParallel(
par = guess,
fn = pf$get_value,
control = list(fnscale = -1)
)
x0 <- opt$par
fval <- opt$value
} else {
Expand All @@ -37,37 +21,26 @@ compute_point_estimate <- function(pf,
if (length(upper_bound) != nparams)
abort("The number of provided upper bounds does not match the number of parameters.")

if (nparams == 1 && ncores == 1) {
opt <- stats::optimise(
f = pf$get_value,
interval = c(lower_bound, upper_bound),
maximum = TRUE
)
x0 <- opt$maximum
fval <- opt$objective
} else {
opt <- rgenoud::genoud(
fn = pf$get_value,
nvars = nparams,
Domains = cbind(lower_bound, upper_bound),
max = TRUE,
pop.size = 20 * nparams,
max.generations = 10 * nparams,
wait.generations = 2 * nparams + 1,
BFGSburnin = 2 * nparams + 1,
print.level = 0,
cluster = if (ncores == 1) FALSE else ncores,
balance = nparams > 2
)
opt <- compute_point_estimate(
pf = pf,
guess = opt$par,
ncores = ncores,
verbose = FALSE
)
x0 <- opt$par
fval <- opt$value
}
opt <- rgenoud::genoud(
fn = pf$get_value,
nvars = nparams,
Domains = cbind(lower_bound, upper_bound),
max = TRUE,
pop.size = 20 * nparams,
max.generations = 10 * nparams,
wait.generations = 2 * nparams + 1,
BFGSburnin = 2 * nparams + 1,
print.level = 0,
cluster = parallel::getDefaultCluster(),
balance = nparams > 2
)
opt <- compute_point_estimate(
pf = pf,
guess = opt$par,
verbose = FALSE
)
x0 <- opt$par
fval <- opt$value
}

if (verbose) {
Expand Down
Binary file modified R/sysdata.rda
Binary file not shown.
4 changes: 1 addition & 3 deletions R/viz.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
#' function will be evaluated. Specifically if `K` is the number of parameters
#' under investigation, the grid will be of size `(ngrid + 1)^K`. Defaults to
#' `10L`.
#' @param ncores An integer specifying the number of cores to use for
#' parallelized computations. Defaults to `1L`.
#' @param subtitle A string for specifying a subtitle to the plot. Defaults to
#' `""` leading to no subtitle.
#'
Expand Down Expand Up @@ -42,7 +40,7 @@
#' )
#' pf$evaluate_grid(grid = pf$grid)
#' plot_pf(pf)
plot_pf <- function(pf, alpha = 0.05, ngrid = 10, ncores = 1, subtitle = "") {
plot_pf <- function(pf, alpha = 0.05, ngrid = 10, subtitle = "") {
if (pf$nparams > 2)
abort("Only one- or two-dimensional plausibility functions can currently be plotted.")

Expand Down
Binary file modified data-raw/alpha.rds
Binary file not shown.
2 changes: 2 additions & 0 deletions data-raw/build-sysdata.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@ df_mean <- readRDS("data-raw/df_mean.rds")
df_sd <- readRDS("data-raw/df_sd.rds")
df_fisher <- readRDS("data-raw/df_fisher.rds")
df_tippett <- readRDS("data-raw/df_tippett.rds")
df_parallelization <- readRDS("data-raw/df_parallelization.rds")
usethis::use_data(
alpha_estimates,
pfa, pfb, pfc,
df_mean, df_sd,
df_fisher, df_tippett,
df_parallelization,
overwrite = TRUE,
internal = TRUE
)
Binary file modified data-raw/df_mean.rds
Binary file not shown.
Binary file added data-raw/df_parallelization.rds
Binary file not shown.
Binary file modified data-raw/df_sd.rds
Binary file not shown.
47 changes: 28 additions & 19 deletions data-raw/exactness-vignette.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@

library(purrr)
library(parallel)
library(furrr)
library(flipr)

# Parallelization
future::plan("future::multisession", workers = availableCores())

# General setup
nreps <- 1e4
Expand All @@ -16,21 +21,23 @@ sim <- map(sample.int(.Machine$integer.max, nreps, replace = TRUE), ~ {
)
})

# Cluster setup
cl <- makeCluster(detectCores(logical = FALSE))
clusterEvalQ(cl, {
library(purrr)
library(flipr)
null_spec <- function(y, parameters) {
map(y, ~ .x - parameters)
}
stat_functions <- list(stat_t)
stat_assignments <- list(delta = 1)
nperms <- 20
alpha <- 0.05
})
null_spec <- function(y, parameters) {
map(y, ~ .x - parameters)
}
stat_functions <- list(stat_t)
stat_assignments <- list(delta = 1)
nperms <- 20
alpha <- 0.05


progressr::with_progress({
p <- progressr::progressor(steps = length(sim) / 10) # progress bar set up
ii <- 1

alpha_estimates <- furrr::future_map(sim, function(.l) {
if (ii %% 10 == 0) {p()} # progress bar update
ii <<- ii + 1

alpha_estimates <- pbapply::pblapply(sim, function(.l) {
pf <- PlausibilityFunction$new(
null_spec = null_spec,
stat_functions = stat_functions,
Expand All @@ -51,11 +58,13 @@ alpha_estimates <- pbapply::pblapply(sim, function(.l) {
upper_bound = pv_upper_bound <= alpha,
estimate = pv_estimate <= alpha
)
}, cl = cl) %>%
transpose() %>%
simplify_all() %>%
map(mean)
stopCluster(cl)
}, .options = furrr_options(seed = TRUE)) %>%
transpose() %>%
simplify_all() %>%
map(mean)
})


alpha_estimates

saveRDS(alpha_estimates, "data-raw/alpha.rds")
17 changes: 3 additions & 14 deletions data-raw/flipr-vignette.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
# Setup -------------------------------------------------------------------

library(purrr)
library(parallel)
library(flipr)

n1 <- 10
Expand All @@ -13,7 +12,6 @@ mu2 <- 4
sd1 <- 1
sd2 <- 1
nperms <- 100000
ncores <- 6

null_spec <- function(y, parameters) {
purrr::map(y, ~ .x - parameters[1])
Expand Down Expand Up @@ -48,10 +46,7 @@ pfa$set_grid(
)

pfa$set_nperms(nperms)
pfa$evaluate_grid(
grid = pfa$grid,
ncores = ncores
)
pfa$evaluate_grid(grid = pfa$grid)

saveRDS(pfa, "data-raw/pfa.rds")

Expand Down Expand Up @@ -82,10 +77,7 @@ pfb$set_grid(
)

pfb$set_nperms(nperms)
pfb$evaluate_grid(
grid = pfb$grid,
ncores = ncores
)
pfb$evaluate_grid(grid = pfb$grid)

saveRDS(pfb, "data-raw/pfb.rds")

Expand Down Expand Up @@ -116,9 +108,6 @@ pfc$set_grid(
)

pfc$set_nperms(nperms)
pfc$evaluate_grid(
grid = pfc$grid,
ncores = ncores
)
pfc$evaluate_grid(grid = pfb$grid)

saveRDS(pfc, "data-raw/pfc.rds")
Loading

0 comments on commit ba23a02

Please sign in to comment.