Skip to content

Commit

Permalink
Refresh data 2023 2024 (#86)
Browse files Browse the repository at this point in the history
* cqc funs

* updated cqc api

* updated cqc script finished

* abp from dall_ref

* edit

* workflow production update

* sql file additions

* run_all script amendments

* eda edit

* cqc funs

* updated cqc api

* updated cqc script finished

* abp from dall_ref

* edit

* workflow production update

* sql file additions

* run_all script amendments

* eda edit

* Update mod6 FY select and table

* Remove commented old code

---------

Co-authored-by: Adnan Shroufi <[email protected]>
  • Loading branch information
MarkMc1089 and AdnanShroufi authored Sep 11, 2024
1 parent 358ef07 commit 441c430
Show file tree
Hide file tree
Showing 23 changed files with 750 additions and 210 deletions.
7 changes: 4 additions & 3 deletions EDA/eda_parent_uprn_and_pat_threshold.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ con <- nhsbsaR::con_nhsbsa(database = "DALP")

# Create a lazy table from year month dim table in DWCP
data <- con %>%
tbl(from = in_schema("ADNSH", "INT646_BASE_20210401_20220331"))
tbl(from = in_schema("DALL_REF", "INT646_BASE_20200401_20230331")) %>%
filter(FY == "2020/21")

# Get chapter info
chapters = con %>%
Expand Down Expand Up @@ -39,7 +40,7 @@ df_std = data %>%
NURSING_HOME_FLAG = max(NURSING_HOME_FLAG),
RESIDENTIAL_HOME_FLAG = max(RESIDENTIAL_HOME_FLAG),
MAX_MONTHLY_PATIENTS = max(MAX_MONTHLY_PATIENTS),
NUMBER_OF_BEDS = max(NUMBER_OF_BEDS),
#NUMBER_OF_BEDS = max(NUMBER_OF_BEDS),
MONTHS = n_distinct(YEAR_MONTH),
PATS = n_distinct(NHS_NO),
ITEMS = sum(ITEM_COUNT),
Expand All @@ -65,7 +66,7 @@ df_merge = data %>%
NURSING_HOME_FLAG = max(NURSING_HOME_FLAG),
RESIDENTIAL_HOME_FLAG = max(RESIDENTIAL_HOME_FLAG),
MAX_MONTHLY_PATIENTS = max(MAX_MONTHLY_PATIENTS),
NUMBER_OF_BEDS = max(NUMBER_OF_BEDS),
#NUMBER_OF_BEDS = max(NUMBER_OF_BEDS),
MONTHS = n_distinct(YEAR_MONTH),
PATS = n_distinct(NHS_NO),
ITEMS = sum(ITEM_COUNT),
Expand Down
11 changes: 6 additions & 5 deletions R/mod_06_geo_ch_flag.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@ mod_06_geo_ch_flag_ui <- function(id) {
choices = c(
"2020/21",
"2021/22",
"2022/23"
"2022/23",
"2023/24"
),
selected = carehomes2::metrics_by_geo_and_ch_flag_df$FY %>%
levels() %>%
max,
max(),
full_width = TRUE
)
),
Expand Down Expand Up @@ -247,7 +248,7 @@ mod_06_geo_ch_flag_server <- function(id) {
span(
class = "nhsuk-body-s",
style = "font-size: 12px;",
gsub(" NCH", " ", gsub(" CH", "", col))
gsub("^20", "", gsub(" NCH", " ", gsub(" CH", "", col)))
) %>%
as.character()
}
Expand Down Expand Up @@ -307,11 +308,11 @@ mod_06_geo_ch_flag_server <- function(id) {
),
rowCallback = DT::JS(rowCallback)
),
height = "400px",
height = "500px",
filter = "none",
selection = "single"
) %>%
DT::formatStyle(columns = 1:7, `font-size` = "12px")
DT::formatStyle(columns = 1:9, `font-size` = "12px")
}

# Create download data (all data)
Expand Down
25 changes: 16 additions & 9 deletions data-raw/app/00_run_all.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,20 @@
Sys.time()
# Load library and generate base geo data
library(tictoc)
source("data-raw/app/data_raw_helpers.R")
source("data-raw/app/geo_data.R")

keep_vars = c(ls(), 'keep_vars', 'get_metrics')
# Define vars to retain in workflow
keep_vars = c(ls(), 'keep_vars')

# Run all scripts that generate an Rda file
tic(); source("data-raw/app/01_headline_figures_df.R"); toc() # 10 mins
tic(); source("data-raw/app/02_patients_age_gender_df.R"); toc()
tic(); source("data-raw/app/03_patients_by_imd_df.R"); toc() # 10 mins
tic(); source("data-raw/app/04_metrics_by_ch_type_85_split_df.R"); toc() # 3 hours
tic(); source("data-raw/app/05_metrics_age_gender_df.R"); toc() # 30 mins
tic(); source("data-raw/app/06_metrics_by_geo_and_ch_flag_df.R"); toc() # 90 mins
tic(); source("data-raw/app/07_ch_flag_drug_df.R"); toc() # 30 mins
tic(); source("data-raw/app/08_geo_ch_flag_drug_df.R"); toc() # 90 mins
Sys.time()

source("data-raw/app/01_headline_figures_df.R")
source("data-raw/app/02_patients_age_gender_df.R")
source("data-raw/app/03_patients_by_imd_df.R")
source("data-raw/app/04_metrics_by_ch_type_df.R")
source("data-raw/app/05_metrics_age_gender_df.R")
source("data-raw/app/06_metrics_by_geo_and_ch_flag_df.R")
source("data-raw/app/07_ch_flag_drug_df.R")
source("data-raw/app/08_geo_ch_flag_drug_df.R")
10 changes: 8 additions & 2 deletions data-raw/app/01_headline_figures_df.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ con <- nhsbsaR::con_nhsbsa(database = "DALP")

# Create a lazy table from year month dim table in DWCP
data_db <- con %>%
tbl(from = in_schema("DALL_REF", "INT646_BASE_20200401_20230331"))
tbl(from = in_schema("DALL_REF", "INT646_BASE_20200401_20240331"))

# Key findings used within analysis summary text
data_db %>%
Expand Down Expand Up @@ -104,4 +104,10 @@ mod_headline_figures_df = rbind(annual_df, monthly_df)
usethis::use_data(mod_headline_figures_df, overwrite = TRUE)

# Disconnect from database
DBI::dbDisconnect(con); rm(list = ls()); gc()
DBI::dbDisconnect(con)

# Remove vars specific to script
remove_vars <- setdiff(ls(), keep_vars)

# Remove objects and clean environment
rm(list = remove_vars, remove_vars); gc()
14 changes: 10 additions & 4 deletions data-raw/app/02_patients_age_gender_df.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Running time ~10 min

# Running time ~10 min
library(dplyr)
library(dbplyr)
devtools::load_all()
Expand All @@ -9,14 +9,13 @@ con <- nhsbsaR::con_nhsbsa(database = "DALP")

# Item-level base table
base_db <- con |>
tbl(from = in_schema("DALL_REF", "INT646_BASE_20200401_20230331"))
tbl(from = in_schema("DALL_REF", "INT646_BASE_20200401_20240331"))

# Add a dummy overall column
base_db <- base_db |>
mutate(OVERALL = "Overall")

# Loop over each geography and aggregate using purrr's map function approach

patients_by_fy_geo_age_gender_fun <- function(geography_name) {

# Identify geography cols
Expand Down Expand Up @@ -59,6 +58,7 @@ patients_by_fy_geo_age_gender_fun <- function(geography_name) {

}

# Map function
patients_by_fy_geo_age_gender_df <- purrr::map(
names(geographies),
patients_by_fy_geo_age_gender_fun
Expand All @@ -74,6 +74,7 @@ patients_by_fy_geo_age_gender_df <-
#PCT_PATIENTS = janitor::round_half_up(PCT_PATIENTS, 1)
)

# Calculate patient proportions
patients_by_fy_geo_age_gender_df <- patients_by_fy_geo_age_gender_df |>
group_by(CH_FLAG, FY, GEOGRAPHY, SUB_GEOGRAPHY_CODE, SUB_GEOGRAPHY_NAME) |>
mutate(
Expand Down Expand Up @@ -121,4 +122,9 @@ usethis::use_data(

# Disconnect from database
DBI::dbDisconnect(con)
rm(list = ls()); gc()

# Remove vars specific to script
remove_vars <- setdiff(ls(), keep_vars)

# Remove objects and clean environment
rm(list = remove_vars, remove_vars); gc()
10 changes: 8 additions & 2 deletions data-raw/app/03_patients_by_imd_df.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ con <- nhsbsaR::con_nhsbsa(database = "DALP")

# Create a lazy table from the item level base table
fact_db <- con %>%
tbl(from = in_schema("DALL_REF", "INT646_BASE_20200401_20230331"))
tbl(from = in_schema("DALL_REF", "INT646_BASE_20200401_20240331"))

# Count care home patients in each decile
mod_patients_by_imd_df <- fact_db %>%
Expand Down Expand Up @@ -37,4 +37,10 @@ mod_patients_by_imd_df <- fact_db %>%
usethis::use_data(mod_patients_by_imd_df, overwrite = TRUE)

# Disconnect
DBI::dbDisconnect(con); rm(list = ls()); gc()
DBI::dbDisconnect(con)

# Remove vars specific to script
remove_vars <- setdiff(ls(), keep_vars)

# Remove objects and clean environment
rm(list = remove_vars, remove_vars); gc()
22 changes: 15 additions & 7 deletions data-raw/app/04_metrics_by_ch_type_85_split_df.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
# Initial setup -----------------------------------------------------------

# Expected run time ~35 minutes @parallel 24

# Expected run time ~35 minutes @parallel 36
library(dplyr)
library(dbplyr)
library(tidyr)

devtools::load_all()

# Set up connection to DALP
Expand All @@ -17,7 +15,7 @@ con <- nhsbsaR::con_nhsbsa(database = "DALP")

# Item-level base table
base_db <- con %>%
tbl(from = in_schema("DALL_REF", "INT646_BASE_20200401_20230331"))
tbl(from = in_schema("DALL_REF", "INT646_BASE_20200401_20240331"))

# Initial manipulation to create CH_TYPE column, later to be grouped by
init_db <- base_db %>%
Expand Down Expand Up @@ -48,6 +46,7 @@ init_db <- base_db %>%
)
)

# Union both initi_db variants
init_db <- init_db %>%
union(
init_db %>%
Expand All @@ -57,6 +56,7 @@ init_db <- init_db %>%

## Process ----------------------------------------------------------------

# Get metrics
metrics_by_ch_type_85_split_df <- get_metrics(
init_db,
first_grouping = c(
Expand All @@ -73,6 +73,7 @@ metrics_by_ch_type_85_split_df <- get_metrics(
)
)

# Generate age band categories
metrics_by_ch_type_85_split_df <- metrics_by_ch_type_85_split_df %>%
mutate(
AGE_BAND = dplyr::case_match(
Expand All @@ -84,9 +85,16 @@ metrics_by_ch_type_85_split_df <- metrics_by_ch_type_85_split_df %>%
) %>%
dplyr::relocate(AGE_BAND, .after = CH_TYPE)

## Save -------------------------------------------------------------------
## Save ------------------------------------------------------------------------
usethis::use_data(metrics_by_ch_type_85_split_df, overwrite = TRUE)

# Cleanup -----------------------------------------------------------------
# Cleanup ----------------------------------------------------------------------

# Disconnect
DBI::dbDisconnect(con)
rm(list = ls())

# Remove vars specific to script
remove_vars <- setdiff(ls(), keep_vars)

# Remove objects and clean environment
rm(list = remove_vars, remove_vars); gc()
12 changes: 9 additions & 3 deletions data-raw/app/05_metrics_age_gender_df.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Running time ~35 min

# Libraries and functions
library(dplyr)
library(dbplyr)
devtools::load_all()
Expand All @@ -10,10 +11,10 @@ con <- nhsbsaR::con_nhsbsa(database = "DALP")

# Item-level base table
base_db <- con |>
tbl(from = in_schema("DALL_REF", "INT646_BASE_20200401_20230331")) |>
tbl(from = in_schema("DALL_REF", "INT646_BASE_20200401_20240331")) %>%
filter(GENDER %in% c("Male", "Female"))


# Get metrics
metrics_by_age_gender_and_ch_flag_df <- get_metrics(
base_db,
first_grouping = c(
Expand Down Expand Up @@ -46,4 +47,9 @@ usethis::use_data(

# Disconnect from database
DBI::dbDisconnect(con)
rm(list = ls()); gc()

# Remove vars specific to script
remove_vars <- setdiff(ls(), keep_vars)

# Remove objects and clean environment
rm(list = remove_vars, remove_vars); gc()
Loading

0 comments on commit 441c430

Please sign in to comment.