Skip to content

Commit

Permalink
Merge branch 'breaking-improvments' into config-v3
Browse files Browse the repository at this point in the history
  • Loading branch information
shauntruelove authored Oct 9, 2023
2 parents fb4dcb3 + 102d5f1 commit 1522c75
Show file tree
Hide file tree
Showing 12 changed files with 819 additions and 51 deletions.
2 changes: 2 additions & 0 deletions datasetup/build_US_setup.R
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ census_data <- census_data %>%
terr_census_data <- arrow::read_parquet(file.path(opt$p,"datasetup", "usdata","united-states-commutes","census_tracts_island_areas_2010.gz.parquet"))

census_data <- terr_census_data %>%
dplyr::rename(subpop = geoid) %>%
dplyr::filter(length(filterUSPS) == 0 | ((USPS %in% filterUSPS) & !(USPS %in% census_data)))%>%
rbind(census_data)

Expand Down Expand Up @@ -219,3 +220,4 @@ if(state_level & !file.exists(paste0(config$data_path, "/", config$subpop_setup$


## @endcond

2 changes: 1 addition & 1 deletion flepimop/R_packages/config.writer/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ export(print_interventions)
export(print_outcomes)
export(print_seeding)
export(print_seir)
export(print_spatial_setup)
export(print_subpop_setup)
export(print_value)
export(print_value1)
export(process_npi_ca)
Expand Down
2 changes: 1 addition & 1 deletion flepimop/R_packages/config.writer/R/yaml_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ collapse_intervention<- function(dat){
dplyr::group_by(dplyr::across(-period)) %>%
dplyr::summarize(period = paste0(period, collapse="\n "))

if (!all(is.na(mtr$subpop_groups)) & !all(is.null(mtr$subpop_groups))) {
if (exists("mtr$spatial_groups") && (!all(is.na(mtr$spatial_groups)) & !all(is.null(mtr$spatial_groups)))) {

mtr <- mtr %>%
dplyr::group_by(dplyr::across(-subpop)) %>%
Expand Down
2 changes: 1 addition & 1 deletion flepimop/gempyor_pkg/docs/Rinterface.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ gempyor_simulator <- gempyor$GempyorSimulator(
seir_modifiers_scenario="inference", # NPIs scenario to use
outcome_modifiers_scenario="med", # Outcome scenario to use
stoch_traj_flag=FALSE,
spatial_path_prefix = '../tests/npi/' # prefix where to find the folder indicated in spatial_setup
spatial_path_prefix = '../tests/npi/' # prefix where to find the folder indicated in subpop_setup
)
```
Here we specified that the data folder specified in the config lies in the `test/npi/` folder, not in the current directory. The only mandatory arguments is the `config_path`. The default values of the other arguments are
Expand Down
2 changes: 1 addition & 1 deletion flepimop/gempyor_pkg/docs/Rinterface.html
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ <h3>Building a simulator</h3>
seir_modifiers_scenario=&quot;inference&quot;, # NPIs scenario to use
outcome_modifiers_scenario=&quot;med&quot;, # Outcome scenario to use
stoch_traj_flag=FALSE,
spatial_path_prefix = &#39;../tests/npi/&#39; # prefix where to find the folder indicated in spatial_setup
spatial_path_prefix = &#39;../tests/npi/&#39; # prefix where to find the folder indicated in subpop_setup
)</code></pre>
<p>Here we specify that the data folder specified in the config lies in the <code>test/npi/</code> folder, not in the current directory. The only mandatory arguments is the <code>config_path</code>. The default values of the other arguments are</p>
<pre><code> run_id=&quot;test_run_id&quot;, # an ommited argument will be left at its default value
Expand Down
2 changes: 1 addition & 1 deletion flepimop/gempyor_pkg/docs/integration_benchmark.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@
"source": [
"config.set_file(config_path)\n",
"\n",
"spatial_config = config[\"spatial_setup\"]\n",
"spatial_config = config[\"subpop_setup\"]\n",
"spatial_base_path = pathlib.Path(\"../../COVID19_USA/\" + config[\"data_path\"].get())\n",
"seir_modifiers_scenario = seir_modifiers_scenario\n",
"outcome_modifiers_scenario = outcome_modifiers_scenario\n",
Expand Down
2 changes: 1 addition & 1 deletion flepimop/gempyor_pkg/docs/integration_doc.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
" seir_modifiers_scenario=\"inference\", # NPIs scenario to use\n",
" outcome_modifiers_scenario=\"med\", # Outcome scenario to use\n",
" stoch_traj_flag=False,\n",
" spatial_path_prefix=\"../tests/npi/\", # prefix where to find the folder indicated in spatial_setup$\n",
" spatial_path_prefix=\"../tests/npi/\", # prefix where to find the folder indicated in subpop_setup$\n",
")\n",
"config.clear()\n",
"config.read(user=False)\n",
Expand Down
2 changes: 1 addition & 1 deletion flepimop/gempyor_pkg/docs/interface.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
" seir_modifiers_scenario=\"inference\", # NPIs scenario to use\n",
" outcome_modifiers_scenario=\"med\", # Outcome scenario to use\n",
" stoch_traj_flag=False,\n",
" spatial_path_prefix=\"../tests/npi/\", # prefix where to find the folder indicated in spatial_setup$\n",
" spatial_path_prefix=\"../tests/npi/\", # prefix where to find the folder indicated in subpop_setup$\n",
")"
]
},
Expand Down
2 changes: 1 addition & 1 deletion flepimop/gempyor_pkg/tests/seir/interface.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
" seir_modifiers_scenario=\"inference\", # NPIs scenario to use\n",
" outcome_modifiers_scenario=\"med\", # Outcome scenario to use\n",
" stoch_traj_flag=False,\n",
" spatial_path_prefix=\"../tests/npi/\", # prefix where to find the folder indicated in spatial_setup$\n",
" spatial_path_prefix=\"../tests/npi/\", # prefix where to find the folder indicated in subpop_setup$\n",
")"
]
},
Expand Down
101 changes: 69 additions & 32 deletions flepimop/main_scripts/create_seeding.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ library(purrr)

option_list <- list(
optparse::make_option(c("-c", "--config"), action = "store", default = Sys.getenv("CONFIG_PATH"), type = "character", help = "path to the config file"),
optparse::make_option(c("-s", "--seed_variants"), action="store", default = Sys.getenv("SEED_VARIANTS"), type='logical',help="Whether to add variants/subtypes to outcomes in seeding."),
optparse::make_option(c("-k", "--keep_all_seeding"), action="store",default=TRUE,type='logical',help="Whether to filter away seeding prior to the start date of the simulation.")
)

Expand All @@ -67,6 +68,9 @@ if (is.null(config$subpop_setup$us_model)) {
is_US_run <- config$subpop_setup$us_model
seed_variants <- "variant_filename" %in% names(config$seeding)

if (!is.na(opt$seed_variants)){
seed_variants <- opt$seed_variants
}


## backwards compatibility with configs that don't have inference$gt_source
Expand Down Expand Up @@ -107,7 +111,7 @@ print(paste("Successfully loaded data from ", data_path, "for seeding."))

if (is_US_run) {
cases_deaths <- cases_deaths %>%
mutate(FIPS = stringr::str_pad(FIPS, width = 5, side = "right", pad = "0"))
mutate(subpop = stringr::str_pad(subpop, width = 5, side = "right", pad = "0"))
}

print(paste("Successfully pulled", gt_source, "data for seeding."))
Expand All @@ -126,39 +130,70 @@ if (seed_variants) {
colnames(variant_data)[colnames(variant_data) == "Update"] ="date"
colnames(cases_deaths)[colnames(cases_deaths) == "Update"] ="date"

if (!is.null(config$seeding$seeding_outcome)){
if (config$seeding$seeding_outcome=="incidH"){
if (any(grepl(paste(unique(variant_data$variant), collapse = "|"), colnames(cases_deaths)))){

if (!is.null(config$seeding$seeding_outcome)){
if (config$seeding$seeding_outcome=="incidH"){
cases_deaths <- cases_deaths %>%
dplyr::select(date, subpop, paste0("incidH_", names(config$seeding$seeding_compartments)))
colnames(cases_deaths) <- gsub("incidH_", "", colnames(cases_deaths))
cases_deaths <- cases_deaths %>%
dplyr::mutate(dplyr::across(tidyselect::any_of(unique(names(config$seeding$seeding_compartments))), ~ tidyr::replace_na(.x, 0)))
} else {
stop(paste(
"Currently only incidH is implemented for config$seeding$seeding_outcome."
))
}
} else {
cases_deaths <- cases_deaths %>%
dplyr::select(date, subpop, paste0("incidC_", names(config$seeding$seeding_compartments)))
colnames(cases_deaths) <- gsub("incidC_", "", colnames(cases_deaths))
cases_deaths <- cases_deaths %>%
dplyr::mutate(dplyr::across(tidyselect::any_of(unique(names(config$seeding$seeding_compartments))), ~ tidyr::replace_na(.x, 0)))
}

} else {

if (!is.null(config$seeding$seeding_outcome)){
if (config$seeding$seeding_outcome=="incidH"){
cases_deaths <- cases_deaths %>%
dplyr::select(date, subpop, incidH) %>%
dplyr::left_join(variant_data) %>%
dplyr::mutate(incidI = incidH * prop) %>%
dplyr::select(-prop, -incidH) %>%
tidyr::pivot_wider(names_from = variant, values_from = incidI) %>%
dplyr::mutate(dplyr::across(tidyselect::any_of(unique(variant_data$variant)), ~ tidyr::replace_na(.x, 0)))
} else {
stop(paste(
"Currently only incidH is implemented for config$seeding$seeding_outcome."
))
}
} else {
cases_deaths <- cases_deaths %>%
dplyr::select(date, FIPS, source, incidH) %>%
dplyr::select(date, subpop, incidC) %>%
dplyr::left_join(variant_data) %>%
dplyr::mutate(incidI = incidH * prop) %>%
dplyr::select(-prop, -incidH) %>%
dplyr::mutate(incidI = incidC * prop) %>%
dplyr::select(-prop, -incidC) %>%
tidyr::pivot_wider(names_from = variant, values_from = incidI) %>%
dplyr::mutate(dplyr::across(tidyselect::any_of(unique(variant_data$variant)), ~ tidyr::replace_na(.x, 0)))
} else {
stop(paste(
"Currently only incidH is implemented for config$seeding$seeding_outcome."
))
}
} else {
cases_deaths <- cases_deaths %>%
dplyr::select(date, FIPS, source, incidC) %>%
dplyr::left_join(variant_data) %>%
dplyr::mutate(incidI = incidC * prop) %>%
dplyr::select(-prop, -incidC) %>%
tidyr::pivot_wider(names_from = variant, values_from = incidI) %>%
dplyr::mutate(dplyr::across(tidyselect::any_of(unique(variant_data$variant)), ~ tidyr::replace_na(.x, 0)))
}
} else {

# rename date columns in data for joining
colnames(cases_deaths)[colnames(cases_deaths) == "Update"] ="date"
colnames(cases_deaths) <- gsub("incidH_", "", colnames(cases_deaths))

}

## Check some data attributes:
## This is a hack:
if ("subpop" %in% names(cases_deaths)) {
cases_deaths$FIPS <- cases_deaths$subpop
if ("FIPS" %in% names(cases_deaths)) {
cases_deaths$subpop <- cases_deaths$FIPS
warning("Changing FIPS name in seeding. This is a hack")
}
if ("date" %in% names(cases_deaths)) {
cases_deaths$Update <- cases_deaths$date
if ("Update" %in% names(cases_deaths)) {
cases_deaths$date <- cases_deaths$Update
warning("Changing Update name in seeding. This is a hack")
}
obs_subpop <- config$subpop_setup$subpop
Expand All @@ -177,7 +212,7 @@ check_required_names <- function(df, cols, msg) {
if ("compartments" %in% names(config)) {

if (all(names(config$seeding$seeding_compartments) %in% names(cases_deaths))) {
required_column_names <- c("FIPS", "Update", names(config$seeding$seeding_compartments))
required_column_names <- c("subpop", "date", names(config$seeding$seeding_compartments))
check_required_names(
cases_deaths,
required_column_names,
Expand All @@ -188,6 +223,7 @@ if ("compartments" %in% names(config)) {
)
incident_cases <- cases_deaths[, required_column_names] %>%
tidyr::pivot_longer(!!names(config$seeding$seeding_compartments), names_to = "seeding_group") %>%
filter(!is.na(value)) %>%
dplyr::mutate(
source_column = sapply(
config$seeding$seeding_compartments[seeding_group],
Expand All @@ -204,7 +240,7 @@ if ("compartments" %in% names(config)) {
) %>%
tidyr::separate(source_column, paste("source", names(config$compartments), sep = "_")) %>%
tidyr::separate(destination_column, paste("destination", names(config$compartments), sep = "_"))
required_column_names <- c("FIPS", "Update", "value", paste("source", names(config$compartments), sep = "_"), paste("destination", names(config$compartments), sep = "_"))
required_column_names <- c("subpop", "date", "value", paste("source", names(config$compartments), sep = "_"), paste("destination", names(config$compartments), sep = "_"))
incident_cases <- incident_cases[, required_column_names]

# if (!is.null(config$smh_round)) {
Expand Down Expand Up @@ -233,7 +269,7 @@ if ("compartments" %in% names(config)) {
stop("Please add a seeding_compartments section to the config")
}
} else {
required_column_names <- c("FIPS", "Update", "incidI")
required_column_names <- c("subpop", "date", "incidI")
check_required_names(
cases_deaths,
required_column_names,
Expand All @@ -246,7 +282,7 @@ if ("compartments" %in% names(config)) {
tidyr::pivot_longer(cols = "incidI", names_to = "source_infection_stage", values_to = "value")
incident_cases$destination_infection_stage <- "E"
incident_cases$source_infection_stage <- "S"
required_column_names <- c("FIPS", "Update", "value", "source_infection_stage", "destination_infection_stage")
required_column_names <- c("subpop", "date", "value", "source_infection_stage", "destination_infection_stage")

if ("parallel_structure" %in% names(config[["seir"]][["parameters"]])) {
parallel_compartments <- config[["seir"]][["parameters"]][["parallel_structure"]][["compartments"]]
Expand All @@ -272,16 +308,16 @@ geodata <- flepicommon::load_geodata_file(
TRUE
)

all_subpop <- geodata[[config$subpop_setup$subpop]]
all_subpop <- geodata[["subpop"]]



incident_cases <- incident_cases %>%
dplyr::filter(FIPS %in% all_subpop) %>%
dplyr::filter(subpop %in% all_subpop) %>%
dplyr::select(!!!required_column_names)
incident_cases <- incident_cases %>% filter(value>0)

incident_cases[["Update"]] <- as.Date(incident_cases$Update)
incident_cases[["date"]] <- as.Date(incident_cases$date)

if (is.null(config[["seeding"]][["seeding_inflation_ratio"]])) {
config[["seeding"]][["seeding_inflation_ratio"]] <- 10
Expand All @@ -290,16 +326,16 @@ if (is.null(config[["seeding"]][["seeding_delay"]])) {
config[["seeding"]][["seeding_delay"]] <- 5
}

grouping_columns <- required_column_names[!required_column_names %in% c("Update", "value")]
grouping_columns <- required_column_names[!required_column_names %in% c("date", "value")]
incident_cases <- incident_cases %>%
dplyr::group_by(!!!rlang::syms(grouping_columns)) %>%
dplyr::group_modify(function(.x, .y) {
.x %>%
dplyr::arrange(Update) %>%
dplyr::arrange(date) %>%
dplyr::filter(value > 0) %>%
.[seq_len(min(nrow(.x), 5)), ] %>%
dplyr::mutate(
Update = Update - lubridate::days(config[["seeding"]][["seeding_delay"]]),
date = date - lubridate::days(config[["seeding"]][["seeding_delay"]]),
value = config[["seeding"]][["seeding_inflation_ratio"]] * value + .05
) %>%
return
Expand Down Expand Up @@ -384,3 +420,4 @@ print(paste("Saved seeding to", config$seeding$lambda_file))
head(incident_cases)

## @endcond

21 changes: 10 additions & 11 deletions flepimop/main_scripts/inference_slot.R
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ if (is.null(config$inference$gt_source)){
}

gt_scale <- ifelse(state_level, "US state", "US county")
fips_codes_ <- geodata[[obs_subpop]]
subpops_ <- geodata[[obs_subpop]]

gt_start_date <- lubridate::ymd(config$start_date)
if (opt$ground_truth_start != "") {
Expand Down Expand Up @@ -218,21 +218,20 @@ if (config$inference$do_inference){

obs <- suppressMessages(
readr::read_csv(config$inference$gt_data_path,
col_types = readr::cols(FIPS = readr::col_character(),
date = readr::col_date(),
col_types = readr::cols(date = readr::col_date(),
source = readr::col_character(),
subpop = readr::col_character(),
.default = readr::col_double()), )) %>%
dplyr::filter(FIPS %in% fips_codes_, date >= gt_start_date, date <= gt_end_date) %>%
dplyr::right_join(tidyr::expand_grid(FIPS = unique(.$FIPS), date = unique(.$date))) %>%
dplyr::mutate_if(is.numeric, dplyr::coalesce, 0) %>%
dplyr::rename(!!obs_subpop := FIPS)
dplyr::filter(subpop %in% subpops_, date >= gt_start_date, date <= gt_end_date) %>%
dplyr::right_join(tidyr::expand_grid(subpop = unique(.$subpop), date = unique(.$date))) %>%
dplyr::mutate_if(is.numeric, dplyr::coalesce, 0)

geonames <- unique(obs[[obs_subpop]])
subpopnames <- unique(obs[[obs_subpop]])


## Compute statistics
data_stats <- lapply(
geonames,
subpopnames,
function(x) {
df <- obs[obs[[obs_subpop]] == x, ]
inference::getStats(
Expand All @@ -244,7 +243,7 @@ if (config$inference$do_inference){
end_date = gt_end_date
)
}) %>%
set_names(geonames)
set_names(subpopnames)


likelihood_calculation_fun <- function(sim_hosp){
Expand Down Expand Up @@ -277,7 +276,7 @@ if (config$inference$do_inference){

} else {

geonames <- obs_subpop
subpopnames <- obs_subpop

likelihood_calculation_fun <- function(sim_hosp){

Expand Down
Loading

0 comments on commit 1522c75

Please sign in to comment.