Skip to content

Commit

Permalink
Added code to run on sampled cohorts
Browse files Browse the repository at this point in the history
  • Loading branch information
azimov committed Nov 14, 2023
1 parent 65cf83c commit a82ecb9
Show file tree
Hide file tree
Showing 8 changed files with 109 additions and 9 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ Remotes:
ohdsi/ResultModelManager,
ohdsi/ROhdsiWebApi,
ohdsi/CirceR,
ohdsi/CohortGenerator,
ohdsi/CohortGenerator@random_sample,
ohdsi/OhdsiShinyModules
License: Apache License
VignetteBuilder: knitr
Expand Down
5 changes: 4 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@ Changes:

1. Removed need

2. Added function to make deployment to posit connect servers easier see ``
2. Added function to make deployment to posit connect servers easier

3. Added ability to use CohortGenerator sample functionality to executeDiagnostics which speeds up execution for very
large cohort definitions

Bug fix:

Expand Down
2 changes: 2 additions & 0 deletions R/Private.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ enforceMinCellValue <-
" because value below minimum"
)
}

browser()
if (length(minValues) == 1) {
data[toCensor, columnName] <- -minValues
} else {
Expand Down
38 changes: 37 additions & 1 deletion R/RunDiagnostics.R
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,23 @@ getDefaultCovariateSettings <- function() {
#' @param incremental Create only cohort diagnostics that haven't been created before?
#' @param incrementalFolder If \code{incremental = TRUE}, specify a folder where records are kept
#' of which cohort diagnostics has been executed.
#' @param runOnSample Logical. If TRUE, the function will operate on a sample of the data.
#' Default is FALSE, meaning the function will operate on the full data set.
#'
#' @param sampleN Integer. The number of records to include in the sample if runOnSample is TRUE.
#' Default is 1000. Ignored if runOnSample is FALSE.
#'
#' @param seed Integer. The seed for the random number generator used to create the sample.
#' This ensures that the same sample can be drawn again in future runs. Default is 64374.
#'
#' @param seedArgs List. Additional arguments to pass to the sampling function.
#' This can be used to control aspects of the sampling process beyond the seed and sample size.
#'
#' @param sampleIdentifierExpression Character. An expression that generates unique identifiers for each sample.
#' This expression can use the variables 'cohortId' and 'seed'.
#' Default is "cohortId * 1000 + seed", which ensures unique identifiers
#' as long as there are fewer than 1000 cohorts.

#' @examples
#' \dontrun{
#' # Load cohorts (assumes that they have already been instantiated)
Expand Down Expand Up @@ -211,7 +227,12 @@ executeDiagnostics <- function(cohortDefinitionSet,
minCharacterizationMean = 0.01,
irWashoutPeriod = 0,
incremental = FALSE,
incrementalFolder = file.path(exportFolder, "incremental")) {
incrementalFolder = file.path(exportFolder, "incremental"),
runOnSample = FALSE,
sampleN = 1000,
seed = 64374,
seedArgs = NULL,
sampleIdentifierExpression = "cohortId * 1000 + seed") {
# collect arguments that were passed to cohort diagnostics at initiation
callingArgs <- formals(executeDiagnostics)
callingArgsJson <-
Expand Down Expand Up @@ -528,6 +549,21 @@ executeDiagnostics <- function(cohortDefinitionSet,
}
}

if (runOnSample & !isTRUE(attr(cohortDefinitionSet, "isSampledCohortDefinition"))) {
cohortDefinitionSet <-
CohortGenerator::sampleCohortDefinitionSet(connection = connection,
cohortDefinitionSet = cohortDefinitionSet,
tempEmulationSchema = tempEmulationSchema,
cohortDatabaseSchema = cohortDatabaseSchema,
cohortTableNames = cohortTableNames,
n = sampleN,
seed = seed,
seedArgs = seedArgs,
identifierExpression = sampleIdentifierExpression,
incremental = incremental,
incrementalFolder = incrementalFolder)
}

## CDM source information----
timeExecution(
exportFolder,
Expand Down
24 changes: 23 additions & 1 deletion man/executeDiagnostics.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions tests/testthat/test-1-ResultsDataModel.R
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,8 @@ VALUES ('Synthea','Synthea','OHDSI Community','SyntheaTM is a Synthetic Patient
runOrphanConcepts = TRUE,
incremental = TRUE,
incrementalFolder = file.path(folder, "incremental"),
temporalCovariateSettings = temporalCovariateSettings
temporalCovariateSettings = temporalCovariateSettings,
runOnSample = TRUE
)
},
"CDM Source table has more than one record while only one is expected."
Expand All @@ -147,7 +148,8 @@ VALUES ('Synthea','Synthea','OHDSI Community','SyntheaTM is a Synthetic Patient
runOrphanConcepts = TRUE,
incremental = TRUE,
incrementalFolder = file.path(folder, "incremental"),
temporalCovariateSettings = temporalCovariateSettings
temporalCovariateSettings = temporalCovariateSettings,
runOnSample = TRUE
)
}

Expand Down
9 changes: 6 additions & 3 deletions tests/testthat/test-2-againstCdm.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ test_that("Cohort diagnostics in incremental mode", {
minCellCount = minCellCountValue,
incremental = TRUE,
incrementalFolder = file.path(folder, "incremental"),
temporalCovariateSettings = temporalCovariateSettings
temporalCovariateSettings = temporalCovariateSettings,
runOnSample = TRUE
)
)

Expand Down Expand Up @@ -74,7 +75,8 @@ test_that("Cohort diagnostics in incremental mode", {
minCellCount = minCellCountValue,
incremental = TRUE,
incrementalFolder = file.path(folder, "incremental"),
temporalCovariateSettings = temporalCovariateSettings
temporalCovariateSettings = temporalCovariateSettings,
runOnSample = TRUE
)
)
# generate sqlite file
Expand Down Expand Up @@ -120,7 +122,8 @@ test_that("Cohort diagnostics in incremental mode", {
minCellCount = minCellCountValue,
incremental = FALSE,
incrementalFolder = file.path(folder, "incremental"),
temporalCovariateSettings = temporalCovariateSettings
temporalCovariateSettings = temporalCovariateSettings,
runOnSample = TRUE
)
})

Expand Down
32 changes: 32 additions & 0 deletions tests/testthat/test-other.R
Original file line number Diff line number Diff line change
Expand Up @@ -124,3 +124,35 @@ test_that("timeExecutions function", {
checkmate::expect_data_frame(result, nrows = 5, ncols = 5)
expect_false(all(is.na(result$startTime)))
})

test_that("enforceMinCellValue replaces values below minimum with negative of minimum", {
data <- data.frame(a = c(1, 2, 3, 4, 5))
minValues <- 3
result <- enforceMinCellValue(data, "a", minValues, silent = TRUE)

expect_equal(result$a, c(-3, -3, 3, 4, 5))
})

test_that("enforceMinCellValue does not replace NA values", {
data <- data.frame(a = c(1, 2, NA, 4, 5))
minValues <- 3
result <- enforceMinCellValue(data, "a", minValues, silent = TRUE)

expect_equal(result$a, c(-3, -3, NA, 4, 5))
})

test_that("enforceMinCellValue does not replace zero values", {
data <- data.frame(a = c(0, 2, 3, 4, 5))
minValues <- 3
result <- enforceMinCellValue(data, "a", minValues, silent = TRUE)

expect_equal(result$a, c(0, -3, 3, 4, 5))
})

test_that("enforceMinCellValue works with vector of minimum values", {
data <- data.frame(a = c(1, 2, 3, 4, 5))
minValues <- c(1, 2, 3, 4, 5)
result <- enforceMinCellValue(data, "a", minValues, silent = TRUE)

expect_equal(result$a, c(1, 2, 3, 4, 5))
})

0 comments on commit a82ecb9

Please sign in to comment.