Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refresh data 2023 2024 #86

Merged
merged 21 commits into from
Sep 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions EDA/eda_parent_uprn_and_pat_threshold.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ con <- nhsbsaR::con_nhsbsa(database = "DALP")

# Create a lazy table from year month dim table in DWCP
data <- con %>%
tbl(from = in_schema("ADNSH", "INT646_BASE_20210401_20220331"))
tbl(from = in_schema("DALL_REF", "INT646_BASE_20200401_20230331")) %>%
filter(FY == "2020/21")

# Get chapter info
chapters = con %>%
Expand Down Expand Up @@ -39,7 +40,7 @@ df_std = data %>%
NURSING_HOME_FLAG = max(NURSING_HOME_FLAG),
RESIDENTIAL_HOME_FLAG = max(RESIDENTIAL_HOME_FLAG),
MAX_MONTHLY_PATIENTS = max(MAX_MONTHLY_PATIENTS),
NUMBER_OF_BEDS = max(NUMBER_OF_BEDS),
#NUMBER_OF_BEDS = max(NUMBER_OF_BEDS),
MONTHS = n_distinct(YEAR_MONTH),
PATS = n_distinct(NHS_NO),
ITEMS = sum(ITEM_COUNT),
Expand All @@ -65,7 +66,7 @@ df_merge = data %>%
NURSING_HOME_FLAG = max(NURSING_HOME_FLAG),
RESIDENTIAL_HOME_FLAG = max(RESIDENTIAL_HOME_FLAG),
MAX_MONTHLY_PATIENTS = max(MAX_MONTHLY_PATIENTS),
NUMBER_OF_BEDS = max(NUMBER_OF_BEDS),
#NUMBER_OF_BEDS = max(NUMBER_OF_BEDS),
MONTHS = n_distinct(YEAR_MONTH),
PATS = n_distinct(NHS_NO),
ITEMS = sum(ITEM_COUNT),
Expand Down
11 changes: 6 additions & 5 deletions R/mod_06_geo_ch_flag.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@ mod_06_geo_ch_flag_ui <- function(id) {
choices = c(
"2020/21",
"2021/22",
"2022/23"
"2022/23",
"2023/24"
),
selected = carehomes2::metrics_by_geo_and_ch_flag_df$FY %>%
levels() %>%
max,
max(),
full_width = TRUE
)
),
Expand Down Expand Up @@ -247,7 +248,7 @@ mod_06_geo_ch_flag_server <- function(id) {
span(
class = "nhsuk-body-s",
style = "font-size: 12px;",
gsub(" NCH", " ", gsub(" CH", "", col))
gsub("^20", "", gsub(" NCH", " ", gsub(" CH", "", col)))
) %>%
as.character()
}
Expand Down Expand Up @@ -307,11 +308,11 @@ mod_06_geo_ch_flag_server <- function(id) {
),
rowCallback = DT::JS(rowCallback)
),
height = "400px",
height = "500px",
filter = "none",
selection = "single"
) %>%
DT::formatStyle(columns = 1:7, `font-size` = "12px")
DT::formatStyle(columns = 1:9, `font-size` = "12px")
}

# Create download data (all data)
Expand Down
25 changes: 16 additions & 9 deletions data-raw/app/00_run_all.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,20 @@
Sys.time()
# Load library and generate base geo data
library(tictoc)
source("data-raw/app/data_raw_helpers.R")
source("data-raw/app/geo_data.R")

keep_vars = c(ls(), 'keep_vars', 'get_metrics')
# Define vars to retain in workflow
keep_vars = c(ls(), 'keep_vars')

# Run all scripts that generate an Rda file
tic(); source("data-raw/app/01_headline_figures_df.R"); toc() # 10 mins
tic(); source("data-raw/app/02_patients_age_gender_df.R"); toc()
tic(); source("data-raw/app/03_patients_by_imd_df.R"); toc() # 10 mins
tic(); source("data-raw/app/04_metrics_by_ch_type_85_split_df.R"); toc() # 3 hours
tic(); source("data-raw/app/05_metrics_age_gender_df.R"); toc() # 30 mins
tic(); source("data-raw/app/06_metrics_by_geo_and_ch_flag_df.R"); toc() # 90 mins
tic(); source("data-raw/app/07_ch_flag_drug_df.R"); toc() # 30 mins
tic(); source("data-raw/app/08_geo_ch_flag_drug_df.R"); toc() # 90 mins
Sys.time()

source("data-raw/app/01_headline_figures_df.R")
source("data-raw/app/02_patients_age_gender_df.R")
source("data-raw/app/03_patients_by_imd_df.R")
source("data-raw/app/04_metrics_by_ch_type_df.R")
source("data-raw/app/05_metrics_age_gender_df.R")
source("data-raw/app/06_metrics_by_geo_and_ch_flag_df.R")
source("data-raw/app/07_ch_flag_drug_df.R")
source("data-raw/app/08_geo_ch_flag_drug_df.R")
10 changes: 8 additions & 2 deletions data-raw/app/01_headline_figures_df.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ con <- nhsbsaR::con_nhsbsa(database = "DALP")

# Create a lazy table from year month dim table in DWCP
data_db <- con %>%
tbl(from = in_schema("DALL_REF", "INT646_BASE_20200401_20230331"))
tbl(from = in_schema("DALL_REF", "INT646_BASE_20200401_20240331"))

# Key findings used within analysis summary text
data_db %>%
Expand Down Expand Up @@ -104,4 +104,10 @@ mod_headline_figures_df = rbind(annual_df, monthly_df)
usethis::use_data(mod_headline_figures_df, overwrite = TRUE)

# Disconnect from database
DBI::dbDisconnect(con); rm(list = ls()); gc()
DBI::dbDisconnect(con)

# Remove vars specific to script
remove_vars <- setdiff(ls(), keep_vars)

# Remove objects and clean environment
rm(list = remove_vars, remove_vars); gc()
14 changes: 10 additions & 4 deletions data-raw/app/02_patients_age_gender_df.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Running time ~10 min

# Running time ~10 min
library(dplyr)
library(dbplyr)
devtools::load_all()
Expand All @@ -9,14 +9,13 @@ con <- nhsbsaR::con_nhsbsa(database = "DALP")

# Item-level base table
base_db <- con |>
tbl(from = in_schema("DALL_REF", "INT646_BASE_20200401_20230331"))
tbl(from = in_schema("DALL_REF", "INT646_BASE_20200401_20240331"))

# Add a dummy overall column
base_db <- base_db |>
mutate(OVERALL = "Overall")

# Loop over each geography and aggregate using purrr's map function approach

patients_by_fy_geo_age_gender_fun <- function(geography_name) {

# Identify geography cols
Expand Down Expand Up @@ -59,6 +58,7 @@ patients_by_fy_geo_age_gender_fun <- function(geography_name) {

}

# Map function
patients_by_fy_geo_age_gender_df <- purrr::map(
names(geographies),
patients_by_fy_geo_age_gender_fun
Expand All @@ -74,6 +74,7 @@ patients_by_fy_geo_age_gender_df <-
#PCT_PATIENTS = janitor::round_half_up(PCT_PATIENTS, 1)
)

# Calculate patient proportions
patients_by_fy_geo_age_gender_df <- patients_by_fy_geo_age_gender_df |>
group_by(CH_FLAG, FY, GEOGRAPHY, SUB_GEOGRAPHY_CODE, SUB_GEOGRAPHY_NAME) |>
mutate(
Expand Down Expand Up @@ -121,4 +122,9 @@ usethis::use_data(

# Disconnect from database
DBI::dbDisconnect(con)
rm(list = ls()); gc()

# Remove vars specific to script
remove_vars <- setdiff(ls(), keep_vars)

# Remove objects and clean environment
rm(list = remove_vars, remove_vars); gc()
10 changes: 8 additions & 2 deletions data-raw/app/03_patients_by_imd_df.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ con <- nhsbsaR::con_nhsbsa(database = "DALP")

# Create a lazy table from the item level base table
fact_db <- con %>%
tbl(from = in_schema("DALL_REF", "INT646_BASE_20200401_20230331"))
tbl(from = in_schema("DALL_REF", "INT646_BASE_20200401_20240331"))

# Count care home patients in each decile
mod_patients_by_imd_df <- fact_db %>%
Expand Down Expand Up @@ -37,4 +37,10 @@ mod_patients_by_imd_df <- fact_db %>%
usethis::use_data(mod_patients_by_imd_df, overwrite = TRUE)

# Disconnect
DBI::dbDisconnect(con); rm(list = ls()); gc()
DBI::dbDisconnect(con)

# Remove vars specific to script
remove_vars <- setdiff(ls(), keep_vars)

# Remove objects and clean environment
rm(list = remove_vars, remove_vars); gc()
22 changes: 15 additions & 7 deletions data-raw/app/04_metrics_by_ch_type_85_split_df.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
# Initial setup -----------------------------------------------------------

# Expected run time ~35 minutes @parallel 24

# Expected run time ~35 minutes @parallel 36
library(dplyr)
library(dbplyr)
library(tidyr)

devtools::load_all()

# Set up connection to DALP
Expand All @@ -17,7 +15,7 @@ con <- nhsbsaR::con_nhsbsa(database = "DALP")

# Item-level base table
base_db <- con %>%
tbl(from = in_schema("DALL_REF", "INT646_BASE_20200401_20230331"))
tbl(from = in_schema("DALL_REF", "INT646_BASE_20200401_20240331"))

# Initial manipulation to create CH_TYPE column, later to be grouped by
init_db <- base_db %>%
Expand Down Expand Up @@ -48,6 +46,7 @@ init_db <- base_db %>%
)
)

# Union both initi_db variants
init_db <- init_db %>%
union(
init_db %>%
Expand All @@ -57,6 +56,7 @@ init_db <- init_db %>%

## Process ----------------------------------------------------------------

# Get metrics
metrics_by_ch_type_85_split_df <- get_metrics(
init_db,
first_grouping = c(
Expand All @@ -73,6 +73,7 @@ metrics_by_ch_type_85_split_df <- get_metrics(
)
)

# Generate age band categories
metrics_by_ch_type_85_split_df <- metrics_by_ch_type_85_split_df %>%
mutate(
AGE_BAND = dplyr::case_match(
Expand All @@ -84,9 +85,16 @@ metrics_by_ch_type_85_split_df <- metrics_by_ch_type_85_split_df %>%
) %>%
dplyr::relocate(AGE_BAND, .after = CH_TYPE)

## Save -------------------------------------------------------------------
## Save ------------------------------------------------------------------------
usethis::use_data(metrics_by_ch_type_85_split_df, overwrite = TRUE)

# Cleanup -----------------------------------------------------------------
# Cleanup ----------------------------------------------------------------------

# Disconnect
DBI::dbDisconnect(con)
rm(list = ls())

# Remove vars specific to script
remove_vars <- setdiff(ls(), keep_vars)

# Remove objects and clean environment
rm(list = remove_vars, remove_vars); gc()
12 changes: 9 additions & 3 deletions data-raw/app/05_metrics_age_gender_df.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Running time ~35 min

# Libraries and functions
library(dplyr)
library(dbplyr)
devtools::load_all()
Expand All @@ -10,10 +11,10 @@ con <- nhsbsaR::con_nhsbsa(database = "DALP")

# Item-level base table
base_db <- con |>
tbl(from = in_schema("DALL_REF", "INT646_BASE_20200401_20230331")) |>
tbl(from = in_schema("DALL_REF", "INT646_BASE_20200401_20240331")) %>%
filter(GENDER %in% c("Male", "Female"))


# Get metrics
metrics_by_age_gender_and_ch_flag_df <- get_metrics(
base_db,
first_grouping = c(
Expand Down Expand Up @@ -46,4 +47,9 @@ usethis::use_data(

# Disconnect from database
DBI::dbDisconnect(con)
rm(list = ls()); gc()

# Remove vars specific to script
remove_vars <- setdiff(ls(), keep_vars)

# Remove objects and clean environment
rm(list = remove_vars, remove_vars); gc()
Loading
Loading