Skip to content

Commit

Permalink
Revert "Refresh data 2023 2024 (#86)"
Browse files Browse the repository at this point in the history
This reverts commit 441c430.
  • Loading branch information
MarkMc1089 authored Sep 17, 2024
1 parent 441c430 commit 3de30c9
Show file tree
Hide file tree
Showing 23 changed files with 210 additions and 750 deletions.
7 changes: 3 additions & 4 deletions EDA/eda_parent_uprn_and_pat_threshold.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@ con <- nhsbsaR::con_nhsbsa(database = "DALP")

# Create a lazy table from year month dim table in DWCP
data <- con %>%
tbl(from = in_schema("DALL_REF", "INT646_BASE_20200401_20230331")) %>%
filter(FY == "2020/21")
tbl(from = in_schema("ADNSH", "INT646_BASE_20210401_20220331"))

# Get chapter info
chapters = con %>%
Expand Down Expand Up @@ -40,7 +39,7 @@ df_std = data %>%
NURSING_HOME_FLAG = max(NURSING_HOME_FLAG),
RESIDENTIAL_HOME_FLAG = max(RESIDENTIAL_HOME_FLAG),
MAX_MONTHLY_PATIENTS = max(MAX_MONTHLY_PATIENTS),
#NUMBER_OF_BEDS = max(NUMBER_OF_BEDS),
NUMBER_OF_BEDS = max(NUMBER_OF_BEDS),
MONTHS = n_distinct(YEAR_MONTH),
PATS = n_distinct(NHS_NO),
ITEMS = sum(ITEM_COUNT),
Expand All @@ -66,7 +65,7 @@ df_merge = data %>%
NURSING_HOME_FLAG = max(NURSING_HOME_FLAG),
RESIDENTIAL_HOME_FLAG = max(RESIDENTIAL_HOME_FLAG),
MAX_MONTHLY_PATIENTS = max(MAX_MONTHLY_PATIENTS),
#NUMBER_OF_BEDS = max(NUMBER_OF_BEDS),
NUMBER_OF_BEDS = max(NUMBER_OF_BEDS),
MONTHS = n_distinct(YEAR_MONTH),
PATS = n_distinct(NHS_NO),
ITEMS = sum(ITEM_COUNT),
Expand Down
11 changes: 5 additions & 6 deletions R/mod_06_geo_ch_flag.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,11 @@ mod_06_geo_ch_flag_ui <- function(id) {
choices = c(
"2020/21",
"2021/22",
"2022/23",
"2023/24"
"2022/23"
),
selected = carehomes2::metrics_by_geo_and_ch_flag_df$FY %>%
levels() %>%
max(),
max,
full_width = TRUE
)
),
Expand Down Expand Up @@ -248,7 +247,7 @@ mod_06_geo_ch_flag_server <- function(id) {
span(
class = "nhsuk-body-s",
style = "font-size: 12px;",
gsub("^20", "", gsub(" NCH", " ", gsub(" CH", "", col)))
gsub(" NCH", " ", gsub(" CH", "", col))
) %>%
as.character()
}
Expand Down Expand Up @@ -308,11 +307,11 @@ mod_06_geo_ch_flag_server <- function(id) {
),
rowCallback = DT::JS(rowCallback)
),
height = "500px",
height = "400px",
filter = "none",
selection = "single"
) %>%
DT::formatStyle(columns = 1:9, `font-size` = "12px")
DT::formatStyle(columns = 1:7, `font-size` = "12px")
}

# Create download data (all data)
Expand Down
25 changes: 9 additions & 16 deletions data-raw/app/00_run_all.R
Original file line number Diff line number Diff line change
@@ -1,20 +1,13 @@
Sys.time()
# Load library and generate base geo data
library(tictoc)
source("data-raw/app/data_raw_helpers.R")
source("data-raw/app/geo_data.R")

# Define vars to retain in workflow
keep_vars = c(ls(), 'keep_vars')

# Run all scripts that generate an Rda file
tic(); source("data-raw/app/01_headline_figures_df.R"); toc() # 10 mins
tic(); source("data-raw/app/02_patients_age_gender_df.R"); toc()
tic(); source("data-raw/app/03_patients_by_imd_df.R"); toc() # 10 mins
tic(); source("data-raw/app/04_metrics_by_ch_type_85_split_df.R"); toc() # 3 hours
tic(); source("data-raw/app/05_metrics_age_gender_df.R"); toc() # 30 mins
tic(); source("data-raw/app/06_metrics_by_geo_and_ch_flag_df.R"); toc() # 90 mins
tic(); source("data-raw/app/07_ch_flag_drug_df.R"); toc() # 30 mins
tic(); source("data-raw/app/08_geo_ch_flag_drug_df.R"); toc() # 90 mins
Sys.time()
keep_vars = c(ls(), 'keep_vars', 'get_metrics')

source("data-raw/app/01_headline_figures_df.R")
source("data-raw/app/02_patients_age_gender_df.R")
source("data-raw/app/03_patients_by_imd_df.R")
source("data-raw/app/04_metrics_by_ch_type_df.R")
source("data-raw/app/05_metrics_age_gender_df.R")
source("data-raw/app/06_metrics_by_geo_and_ch_flag_df.R")
source("data-raw/app/07_ch_flag_drug_df.R")
source("data-raw/app/08_geo_ch_flag_drug_df.R")
10 changes: 2 additions & 8 deletions data-raw/app/01_headline_figures_df.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ con <- nhsbsaR::con_nhsbsa(database = "DALP")

# Create a lazy table from year month dim table in DWCP
data_db <- con %>%
tbl(from = in_schema("DALL_REF", "INT646_BASE_20200401_20240331"))
tbl(from = in_schema("DALL_REF", "INT646_BASE_20200401_20230331"))

# Key findings used within analysis summary text
data_db %>%
Expand Down Expand Up @@ -104,10 +104,4 @@ mod_headline_figures_df = rbind(annual_df, monthly_df)
usethis::use_data(mod_headline_figures_df, overwrite = TRUE)

# Disconnect from database
DBI::dbDisconnect(con)

# Remove vars specific to script
remove_vars <- setdiff(ls(), keep_vars)

# Remove objects and clean environment
rm(list = remove_vars, remove_vars); gc()
DBI::dbDisconnect(con); rm(list = ls()); gc()
14 changes: 4 additions & 10 deletions data-raw/app/02_patients_age_gender_df.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

# Running time ~10 min

library(dplyr)
library(dbplyr)
devtools::load_all()
Expand All @@ -9,13 +9,14 @@ con <- nhsbsaR::con_nhsbsa(database = "DALP")

# Item-level base table
base_db <- con |>
tbl(from = in_schema("DALL_REF", "INT646_BASE_20200401_20240331"))
tbl(from = in_schema("DALL_REF", "INT646_BASE_20200401_20230331"))

# Add a dummy overall column
base_db <- base_db |>
mutate(OVERALL = "Overall")

# Loop over each geography and aggregate using purrr's map function approach

patients_by_fy_geo_age_gender_fun <- function(geography_name) {

# Identify geography cols
Expand Down Expand Up @@ -58,7 +59,6 @@ patients_by_fy_geo_age_gender_fun <- function(geography_name) {

}

# Map function
patients_by_fy_geo_age_gender_df <- purrr::map(
names(geographies),
patients_by_fy_geo_age_gender_fun
Expand All @@ -74,7 +74,6 @@ patients_by_fy_geo_age_gender_df <-
#PCT_PATIENTS = janitor::round_half_up(PCT_PATIENTS, 1)
)

# Calculate patient proportions
patients_by_fy_geo_age_gender_df <- patients_by_fy_geo_age_gender_df |>
group_by(CH_FLAG, FY, GEOGRAPHY, SUB_GEOGRAPHY_CODE, SUB_GEOGRAPHY_NAME) |>
mutate(
Expand Down Expand Up @@ -122,9 +121,4 @@ usethis::use_data(

# Disconnect from database
DBI::dbDisconnect(con)

# Remove vars specific to script
remove_vars <- setdiff(ls(), keep_vars)

# Remove objects and clean environment
rm(list = remove_vars, remove_vars); gc()
rm(list = ls()); gc()
10 changes: 2 additions & 8 deletions data-raw/app/03_patients_by_imd_df.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ con <- nhsbsaR::con_nhsbsa(database = "DALP")

# Create a lazy table from the item level base table
fact_db <- con %>%
tbl(from = in_schema("DALL_REF", "INT646_BASE_20200401_20240331"))
tbl(from = in_schema("DALL_REF", "INT646_BASE_20200401_20230331"))

# Count care home patients in each decile
mod_patients_by_imd_df <- fact_db %>%
Expand Down Expand Up @@ -37,10 +37,4 @@ mod_patients_by_imd_df <- fact_db %>%
usethis::use_data(mod_patients_by_imd_df, overwrite = TRUE)

# Disconnect
DBI::dbDisconnect(con)

# Remove vars specific to script
remove_vars <- setdiff(ls(), keep_vars)

# Remove objects and clean environment
rm(list = remove_vars, remove_vars); gc()
DBI::dbDisconnect(con); rm(list = ls()); gc()
22 changes: 7 additions & 15 deletions data-raw/app/04_metrics_by_ch_type_85_split_df.R
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
# Initial setup -----------------------------------------------------------

# Expected run time ~35 minutes @parallel 36
# Expected run time ~35 minutes @parallel 24

library(dplyr)
library(dbplyr)
library(tidyr)

devtools::load_all()

# Set up connection to DALP
Expand All @@ -15,7 +17,7 @@ con <- nhsbsaR::con_nhsbsa(database = "DALP")

# Item-level base table
base_db <- con %>%
tbl(from = in_schema("DALL_REF", "INT646_BASE_20200401_20240331"))
tbl(from = in_schema("DALL_REF", "INT646_BASE_20200401_20230331"))

# Initial manipulation to create CH_TYPE column, later to be grouped by
init_db <- base_db %>%
Expand Down Expand Up @@ -46,7 +48,6 @@ init_db <- base_db %>%
)
)

# Union both initi_db variants
init_db <- init_db %>%
union(
init_db %>%
Expand All @@ -56,7 +57,6 @@ init_db <- init_db %>%

## Process ----------------------------------------------------------------

# Get metrics
metrics_by_ch_type_85_split_df <- get_metrics(
init_db,
first_grouping = c(
Expand All @@ -73,7 +73,6 @@ metrics_by_ch_type_85_split_df <- get_metrics(
)
)

# Generate age band categories
metrics_by_ch_type_85_split_df <- metrics_by_ch_type_85_split_df %>%
mutate(
AGE_BAND = dplyr::case_match(
Expand All @@ -85,16 +84,9 @@ metrics_by_ch_type_85_split_df <- metrics_by_ch_type_85_split_df %>%
) %>%
dplyr::relocate(AGE_BAND, .after = CH_TYPE)

## Save ------------------------------------------------------------------------
## Save -------------------------------------------------------------------
usethis::use_data(metrics_by_ch_type_85_split_df, overwrite = TRUE)

# Cleanup ----------------------------------------------------------------------

# Disconnect
# Cleanup -----------------------------------------------------------------
DBI::dbDisconnect(con)

# Remove vars specific to script
remove_vars <- setdiff(ls(), keep_vars)

# Remove objects and clean environment
rm(list = remove_vars, remove_vars); gc()
rm(list = ls())
12 changes: 3 additions & 9 deletions data-raw/app/05_metrics_age_gender_df.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# Running time ~35 min

# Libraries and functions
library(dplyr)
library(dbplyr)
devtools::load_all()
Expand All @@ -11,10 +10,10 @@ con <- nhsbsaR::con_nhsbsa(database = "DALP")

# Item-level base table
base_db <- con |>
tbl(from = in_schema("DALL_REF", "INT646_BASE_20200401_20240331")) %>%
tbl(from = in_schema("DALL_REF", "INT646_BASE_20200401_20230331")) |>
filter(GENDER %in% c("Male", "Female"))

# Get metrics

metrics_by_age_gender_and_ch_flag_df <- get_metrics(
base_db,
first_grouping = c(
Expand Down Expand Up @@ -47,9 +46,4 @@ usethis::use_data(

# Disconnect from database
DBI::dbDisconnect(con)

# Remove vars specific to script
remove_vars <- setdiff(ls(), keep_vars)

# Remove objects and clean environment
rm(list = remove_vars, remove_vars); gc()
rm(list = ls()); gc()
Loading

0 comments on commit 3de30c9

Please sign in to comment.