From 145f54b1d9626a648b2c4ab3778b1833a5f0e157 Mon Sep 17 00:00:00 2001 From: kaygo Date: Mon, 23 Oct 2023 10:05:29 +0100 Subject: [PATCH 01/19] add .gitignore temporary data folder --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index dc34566..2755bb2 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,4 @@ rsconnect ################################################ *.[xX][lL][sS][xXmMtT]? data/* +data_temp/* From a25e3a2a76ea51234b88ab50ce4ddaeb13802c22 Mon Sep 17 00:00:00 2001 From: kaygo Date: Mon, 23 Oct 2023 13:12:27 +0100 Subject: [PATCH 02/19] data-raw process and create three basic data for the report --- DESCRIPTION | 3 +- data-raw/01_headcount_by_gender_afc.R | 53 --------------- data-raw/01_read_hourly_gpg.R | 96 +++++++++++++++++++++++++++ 3 files changed, 98 insertions(+), 54 deletions(-) delete mode 100644 data-raw/01_headcount_by_gender_afc.R create mode 100644 data-raw/01_read_hourly_gpg.R diff --git a/DESCRIPTION b/DESCRIPTION index 636a29b..85688bf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,7 +27,8 @@ Imports: futile.logger (>= 1.4.3), stringr, formatR, - tidyr + tidyr, + purrr Suggests: pkgload, testthat (>= 3.0.0), diff --git a/data-raw/01_headcount_by_gender_afc.R b/data-raw/01_headcount_by_gender_afc.R deleted file mode 100644 index 3c0c2e5..0000000 --- a/data-raw/01_headcount_by_gender_afc.R +++ /dev/null @@ -1,53 +0,0 @@ -# This is dummy tidy dataset but it will include -# mean and median pay per AFC band to create one file - -# This dummy data includes maternity leave, sick leave etc -# Therefore, headcounts are slightly higher then reported -# figure - -# Library - -library(dplyr) -library(dbplyr) - -# Set up connection to DALP -con <- nhsbsaR::con_nhsbsa(database = "DALP") - -# Create a lazy table from cleaned employee table in DALP -data_db <- con |> - tbl(from = in_schema("DALL_REF", "EMPLOYEE_DASHBOARD_COMBINED_EMPLOYMENT_DATA")) - -# Summary headcount table of Financial Year, Gender, AFC band - -headcount <- data_db |> - filter(substr(ESR_MONTH, 1, 6) == '01-MAR', - as.numeric(substr(ESR_MONTH, 8, 9)) %in% c(18, 19, 20, 21, 22, 23)) |> - mutate( - FINANCIAL_YEAR = case_when( - as.numeric(substr(ESR_MONTH, 8, 9)) == 18 ~ '2017/18', - as.numeric(substr(ESR_MONTH, 8, 9)) == 19 ~ '2018/19', - as.numeric(substr(ESR_MONTH, 8, 9)) == 20 ~ '2019/20', - as.numeric(substr(ESR_MONTH, 8, 9)) == 21 ~ '2020/21', - as.numeric(substr(ESR_MONTH, 8, 9)) == 22 ~ '2021/22', - as.numeric(substr(ESR_MONTH, 8, 9)) == 23 ~ '2022/23', - TRUE ~ 'unknown' - ) - ) |> - group_by(FINANCIAL_YEAR, GENDER, PAY_GRADE_NAME , FTE_GROUP) |> - summarise(HEADCOUNT = sum(HEADCOUNT, na.rm = TRUE)) |> - ungroup() |> - arrange(FINANCIAL_YEAR, GENDER, PAY_GRADE_NAME, FTE_GROUP) |> - collect() |> - # In case we want to report by year (keep it as factor) - mutate(FINANCIAL_YEAR = factor(FINANCIAL_YEAR, - levels = unique(FINANCIAL_YEAR)), - PAY_GRADE_NAME = factor(PAY_GRADE_NAME)) - - -# Add to data -usethis::use_data(headcount, overwrite = TRUE) - -DBI::dbDisconnect(con) -rm(list = ls()) -gc() - diff --git a/data-raw/01_read_hourly_gpg.R b/data-raw/01_read_hourly_gpg.R new file mode 100644 index 0000000..30b00de --- /dev/null +++ b/data-raw/01_read_hourly_gpg.R @@ -0,0 +1,96 @@ +# This data is an extract from ESR dashboard +# Three parts will be pulled for the report +# Gender pay gap (%) based on male hourly pay +# Quantiles by gender +# Join with staff list to get AFC band information, FTE + +# Load required libraries +library(readxl) +library(dplyr) +library(purrr) +library(stringr) + +# List all excel and csv files in the directory +files <- list.files(path = "./data_temp", pattern = "\\.xlsx$|\\.csv$", full.names = TRUE) + +# Function to process each file +process_file <- function(filepath) { + # Determine the financial year from the filename + fy_pattern <- "FY(\\d{2})(\\d{2})" + fy_matches <- regmatches(filepath, regexec(fy_pattern, filepath))[[1]] + # reporting period + financial_year <- paste0("20", fy_matches[2], "/", fy_matches[3]) + + # Create three data frames and add the financial year (reporting period) + if (stringr::str_detect(filepath, "\\.xlsx$")) { + list( + paygap = read_excel(filepath, range = cell_rows(3:7), col_names = TRUE) |> + select(1:3) |> + janitor::clean_names() |> + mutate(period = financial_year), + quartile = read_excel(filepath, range = cell_rows(3:7), col_names = TRUE) |> + select(5:9) |> + janitor::clean_names() |> + mutate(period = financial_year), + afc = read_excel(filepath, skip = 8, col_names = TRUE) |> + select(2:7) |> + janitor::clean_names() |> + mutate(period = financial_year) + ) + # staff list information as csv + } else if (stringr::str_detect(filepath, "\\.csv$")) { + list( + staff = read.csv(filepath, header = TRUE) |> + janitor::clean_names() |> + filter(primary == "Y") |> + select( + employee_number, + org_l3, + pay_scale, + fte + ) |> + mutate( + period = financial_year, + employee_number = as.character(employee_number) + ) + ) + } +} + +# Apply the function to each file +dfs <- map(files, process_file) + +# Row bind all df1s, df2s, and df3s +paygap <- map(dfs, "paygap") |> + bind_rows() |> + select(period, everything()) +quartile <- map(dfs, "quartile") |> + bind_rows() |> + select(period, everything()) +afc <- map(dfs, "afc") |> + bind_rows() |> + select(period, everything()) +staff <- map(dfs, "staff") |> + bind_rows() |> + select(period, everything()) + +# AFC and staff information join based on employee number +# After that, add lookup +lookup <- read.csv("./data-raw/afc_band_lookup.csv", header = TRUE) + +afc_staff <- afc |> + left_join(staff, + by = c("period", "employee_number") + ) |> + left_join(lookup, + by = "pay_scale" + ) + +# Keep three main data frame and it will be used to create S3 class +usethis::use_data(paygap, overwrite = TRUE) +usethis::use_data(quartile, overwrite = TRUE) +usethis::use_data(afc_staff, overwrite = TRUE) + + + +rm(c(dfs, afc, staff)) From 3963bfcf4494a00a20800853bd5842b197798997 Mon Sep 17 00:00:00 2001 From: kaygo Date: Mon, 23 Oct 2023 16:12:03 +0100 Subject: [PATCH 03/19] add gpg_class S3 object --- DESCRIPTION | 2 +- NAMESPACE | 1 - R/gpg_data_class.R | 220 ++++++++++++++++++++++++++++++++++ R/headcount_data_class.R | 209 -------------------------------- data-raw/01_read_hourly_gpg.R | 13 +- man/gender_profile.Rd | 33 ++++- man/headcount_data.Rd | 27 +++-- 7 files changed, 275 insertions(+), 230 deletions(-) create mode 100644 R/gpg_data_class.R delete mode 100644 R/headcount_data_class.R diff --git a/DESCRIPTION b/DESCRIPTION index 85688bf..d90693d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,8 +16,8 @@ Imports: highcharter, htmltools, magrittr, - nhsbsaR, rlang, + nhsbsaR, scrollytell, shiny, shinyjs, diff --git a/NAMESPACE b/NAMESPACE index d01036d..05ec5ca 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,7 +8,6 @@ export(h3_tabstop) export(h4_tabstop) export(h5_tabstop) export(h6_tabstop) -export(headcount_data) export(nhs_card) export(nhs_card_tabstop) export(nhs_footer) diff --git a/R/gpg_data_class.R b/R/gpg_data_class.R new file mode 100644 index 0000000..f17a663 --- /dev/null +++ b/R/gpg_data_class.R @@ -0,0 +1,220 @@ +#' @title S3 gpg class to create headcount, hourly rate by AFC, +#' directorate level hourly rate.' +#' +#' @description \code{gpg_data} is the class used for the creation of +#' headcount, hourly rate by AFC with without directorate in the GPG report. +#' +#' @details The \code{gpg_data} class expects a \code{data.frame} with at +#' least seven columns: period, gender, hourly_rate, quartile, fte, afc_band, +#' directorate. +#' +#' Once initiated, the class has six slots: +#' \code{df}: raw data frame \n +#' \code{df_hdcnt}: data frame contains headcount by period \n +#' \code{df_hdcnt_afc}: data frame contains headcount by afc band \n +#' \code{df_hdcnt_dir}: data frame contains headcount by directorate \n +#' \code{df_hrrate}: data frame contains hourly rate by gender for each grade \n +#' \code{ending_fy}: a character vector containing ending reporting period +#' (e.g. 31 March 2023). This uses for introduction paragraph +#' +#' +#' +#' @param x Input data frame. +#' @param log_level keep it WARN +#' @param eda If TRUE base R plot shows in the Viewer +#' +#' @return If the class is not instantiated correctly, nothing is returned. +#' +#' @examples +#' +#' library(nhsbsaGPG) +#' +#' df <- gpg_data(afc_staff) +#' +#' @export + + +gpg_data <- function(x, + log_level = futile.logger::WARN, + eda = FALSE) { + # Set logger severity threshold, defaults to WARN + futile.logger::flog.threshold(log_level) + + + # Checks + futile.logger::flog.info("Initiating gpg_data class. + \n\nIt expects a data.frame with at + least eight columns: period, gender, + headcount, hourly_rate, quartile, + fte, afc_band, directorate.") + + + + futile.logger::flog.debug("Checking x is a data.frame...") + if (!is.data.frame(x)) { + futile.logger::flog.error("x must be a data.frame", + x, + capture = TRUE + ) + } + + futile.logger::flog.debug("Checking x has correct columns...") + + if (length(colnames(x)) < 7) { + futile.logger::flog.error("x must have at least eight columns: + period, gender, headcount, hourly_rate, + quartile, fte, afc_band, directorate.") + } + + futile.logger::flog.debug("Checking x contains a period column...") + if (!"period" %in% colnames(x)) { + stop("x must contain period column") + } + + futile.logger::flog.debug("Checking x contains a gender column...") + if (!"gender" %in% colnames(x)) stop("x must contain gender column") + + futile.logger::flog.debug("Checking x contains a headcount column...") + if (!"headcount" %in% colnames(x)) stop("x must contain headcount column") + + futile.logger::flog.debug("Checking x contains a hourly_rate column...") + if (!"hourly_rate" %in% colnames(x)) { + stop("x must contain hourly_rate column") + } + + futile.logger::flog.debug("Checking x contains a fte column...") + if (!"fte" %in% colnames(x)) { + stop("x must contain fte column") + } + + futile.logger::flog.debug("Checking x contains a afc_band column...") + if (!"afc_band" %in% colnames(x)) { + stop("x must contain afc_band column") + } + + futile.logger::flog.debug("Checking x contains a directorate column...") + if (!"directorate" %in% colnames(x)) { + stop("x must contain directorate column") + } + + futile.logger::flog.debug("Checking x does not contain missing values...") + if (anyNA(x)) stop("x cannot contain any missing values") + + futile.logger::flog.debug("Checking for the correct number of rows...") + if (nrow(x) < 16000) { + futile.logger::flog.warn("x does not appear to be well formed. nrow(x) should be + greater than 16000.") + } + + # Check sensible range for reporting period + futile.logger::flog.debug("Checking beginning reporting period in a sensible + range e.g.(2018:2023)...") + + + if (any(as.numeric(stringr::str_sub(x$period, 1, 4)) < 2018)) { + futile.logger::flog.warn("The dates should start from + 2018/19 financial year. Please check data-raw script.") + } + + + futile.logger::flog.info("...check done..") + + # Message required to pass a test + message("Checks completed: 'gpg_data' S3 class created. Good to use for charts") + + # EDA + # number of HEADCOUNT per financial year - expect to increase? + agg_data <- aggregate(headcount ~ period, x, sum) + + if (eda == TRUE) { + barplot(agg_data$headcount, + names.arg = agg_data$period, + las = 2, + ylab = "Reporting period", + xlab = "Headcount" + ) + } + + + # Calculate the latest reporting year + # This values are required to add to the introduction text + # (eg. as of 31 March 2023) + start_latest_year <- max(as.numeric(stringr::str_sub(x$period, 1, 4))) + latest_fy <- paste0( + start_latest_year, "/", + as.numeric(stringr::str_sub(start_latest_year, 3, 4)) + 1 + ) + + + # data frame: aggregate headcount by period + df_hdcnt <- x |> + dplyr::group_by(period) |> + dplyr::summarise(headcount = sum(headcount, na.rm = TRUE)) |> + dplyr::arrange(period) + + # Extract the values + reporting_headcount <- + agg_data$headcount[agg_data$period == latest_fy] + + ending_fy <- as.character(start_latest_year + 1) + + # data frame: aggregate headcount by period and AFC band + df_hdcnt_afc <- x |> + dplyr::group_by(period, gender, afc_band) |> + dplyr::summarise( + headcount = sum(headcount, na.rm = TRUE), + .groups = "drop" + ) |> + dplyr::group_by(period, afc_band) |> + dplyr::mutate( + perc = headcount / sum(headcount) * 100 + ) + + # data frame: aggregate headcount by period and directorate + df_hdcnt_dir <- x |> + dplyr::group_by(period, gender, directorate) |> + dplyr::summarise( + headcount = sum(headcount, na.rm = TRUE), + .groups = "drop" + ) |> + dplyr::group_by(period, directorate) |> + dplyr::mutate( + perc = headcount / sum(headcount) * 100 + ) + + # data frame: hourly rate by gender for overall, each AFC band + df_hrrate <- dplyr::bind_rows( + x |> + dplyr::group_by(period, gender, afc_band) |> + dplyr::summarise( + mean_rate = mean(hourly_rate, na.rm = TRUE), + median_rate = median(hourly_rate, na.rm = TRUE), + .groups = "drop" + ), + x |> + dplyr::group_by(period, gender) |> + dplyr::summarise( + mean_rate = mean(hourly_rate, na.rm = TRUE), + median_rate = median(hourly_rate, na.rm = TRUE), + .groups = "drop" + ) |> + dplyr::mutate(afc_band = "Overall") + ) + + + # Define the class here ---- + # This will mainly use for highchart graphs + + structure( + list( + df = x, + df_hdcnt = df_hdcnt, + df_hdcnt_afc = df_hdcnt_afc, + df_hdcnt_dir = df_hdcnt_dir, + df_hrrate = df_hrrate, + reporting_headcoun = reporting_headcount, + ending_fy = ending_fy + ), + class = "gpg_data" + ) +} diff --git a/R/headcount_data_class.R b/R/headcount_data_class.R deleted file mode 100644 index 58ad540..0000000 --- a/R/headcount_data_class.R +++ /dev/null @@ -1,209 +0,0 @@ -#' @title S3 headcount class to create number of headcount by gender and also -#' gender and AFC pay band.#' -#' -#' @description \code{headcount_data} is the class used for the creation of -#' first two headcount related figures in the GPG report. -#' -#' @details The \code{headcount_data} class expects a \code{data.frame} with at -#' least five columns: FINANCIAL_YEAR, GENDER, PAY_GRADE_NAME, FTE_GROUP, HEADCOUNT. Each -#' row represents aggregated headcount by four columns. -#' -#' Once initiated, the class has seven slots: -#' \code{df}: data frame \n -#' \code{overview_gender}: data frame \n -#' \code{overview_afc}: data frame \n -#' \code{overview_fte}: data frame \n -#' \code{reporting_headcount}: a numeric vector containing reporting financial -#' year's headcount \n -#' \code{diffs}: a numeric vector containing differences from previous \n -#' financial year headcount to current reporting financial year headcount \n -#' \code{ending_fy}: a character vector containing ending reporting period -#' (e.g. 31 March 2022). This uses for introduction paragraph -#' -#' -#' @param x Input data frame. -#' @param log_level keep it WARN -#' @param eda If TRUE base R plot shows in the Viewer -#' -#' @return If the class is not instantiated correctly, nothing is returned. -#' -#' @examples -#' -#' library(nhsbsaGPG) -#' -#' df <- headcount_data(headcount) -#' -#' @export - - -headcount_data <- function(x, - log_level = futile.logger::WARN, - eda = FALSE) { - - # Set logger severity threshold, defaults to WARN - futile.logger::flog.threshold(log_level) - - - # Checks - futile.logger::flog.info("Initiating headcount_data class. - \n\nIt expects a data.frame with at - least five columns: FINANCIAL_YEAR, gender, - PAY_GRADE_NAME, FTE_GROUP and HEADCOUNT. - Each row represents an aggregated headcount - based on four columns.") - - - - futile.logger::flog.debug("Checking x is a data.frame...") - if (!is.data.frame(x)) { - futile.logger::flog.error("x must be a data.frame", - x, - capture = TRUE - ) - } - - futile.logger::flog.debug("Checking x has correct columns...") - if (length(colnames(x)) < 5) { - futile.logger::flog.error("x must have at least five columns: - FINANCIAL_YEAR, - GENDER, PAY_GRADE_NAME, - FTE_GROUP, HEADCOUNT") - } - - futile.logger::flog.debug("Checking x contains a FINANCIAL_YEAR column...") - if (!"FINANCIAL_YEAR" %in% colnames(x)) { - stop("x must contain FINANCIAL_YEAR column") - } - - futile.logger::flog.debug("Checking x contains a GENDER column...") - if (!"GENDER" %in% colnames(x)) stop("x must contain GENDER column") - - futile.logger::flog.debug("Checking x contains a PAY_GRADE_NAME column...") - if (!"PAY_GRADE_NAME" %in% colnames(x)) { - stop("x must contain PAY_GRADE_NAME column") - } - - futile.logger::flog.debug("Checking x contains a FTE_GROUP column...") - if (!"FTE_GROUP" %in% colnames(x)) { - stop("x must contain FTE_GROUP column") - } - - futile.logger::flog.debug("Checking x contains a HEADCOUNT column...") - if (!"HEADCOUNT" %in% colnames(x)) { - stop("x must contain HEADCOUNT column") - } - - futile.logger::flog.debug("Checking x does not contain missing values...") - if (anyNA(x)) stop("x cannot contain any missing values") - - futile.logger::flog.debug("Checking for the correct number of rows...") - if (nrow(x) < 260) { - futile.logger::flog.warn("x does not appear to be well formed. nrow(x) should be - greater than 180 (5 year * gender * fte * afc) - as of 2021/22 report.") - } - - - - # Check sensible range for year - futile.logger::flog.debug("Checking beginning financial years in a sensible - range e.g.(2017:2022)...") - - - if (any(as.numeric(stringr::str_sub(x$FINANCIAL_YEAR, 1, 4)) < 2017)) { - futile.logger::flog.warn("The dates should start from - 2017/18 financial year. Please check data-raw script.") - } - - - futile.logger::flog.info("...check done..") - - # Message required to pass a test - message("Checks completed: 'headcount_data' S3 class created.") - - # EDA - # number of HEADCOUNT per financial year - expect to increase? - if (eda == TRUE) { - agg_data <- aggregate(HEADCOUNT ~ FINANCIAL_YEAR, x, sum) - barplot(agg_data$HEADCOUNT, - names.arg = agg_data$FINANCIAL_YEAR, - las = 2, - ylab = "Financial Year", - xlab = "Headcount" - ) - } - - - # Calculate the latest and previous years - # This values are required to add to the interactive document - start_latest_year <- max(as.numeric(stringr::str_sub(x$FINANCIAL_YEAR, 1, 4))) - start_prev_year <- start_latest_year - 1 - # Financial year of interest for the report - latest_fy <- paste0( - start_latest_year, "/", - as.numeric(stringr::str_sub(start_latest_year, 3, 4)) + 1 - ) - previous_fy <- paste0( - start_prev_year, "/", - stringr::str_sub(start_latest_year, 3, 4) - ) - - # First aggregate by financial year - agg_data <- x |> - dplyr::filter(FINANCIAL_YEAR %in% c(latest_fy, previous_fy)) |> - dplyr::group_by(FINANCIAL_YEAR) |> - dplyr::summarise(TOTAL_HEADCOUNT = sum(HEADCOUNT, na.rm = TRUE)) |> - dplyr::arrange(FINANCIAL_YEAR) - - # Extract the values - reporting_headcount <- - agg_data$TOTAL_HEADCOUNT[agg_data$FINANCIAL_YEAR == latest_fy] - previous_reporting_headcount <- - agg_data$TOTAL_HEADCOUNT[agg_data$FINANCIAL_YEAR == previous_fy] - - diffs <- reporting_headcount - previous_reporting_headcount - - ending_fy <- as.character(start_latest_year + 1) - - # Attach data frame: headcount by GENDER - overview_gender <- x |> - dplyr::group_by(FINANCIAL_YEAR, GENDER) |> - dplyr::summarise(HEADCOUNT = sum(HEADCOUNT, na.rm = TRUE), - .groups = "drop") |> - tidyr::pivot_wider(names_from = GENDER, - values_from = HEADCOUNT) - - # Attach data frame: headcount by GENDER & PAY_GRADE_NAME - overview_afc <- x |> - dplyr::group_by(FINANCIAL_YEAR, GENDER, PAY_GRADE_NAME) |> - dplyr::summarise(HEADCOUNT = sum(HEADCOUNT, na.rm = TRUE), - .groups = "drop") - - # Attach data frame: headcount by GENDER & FTE - overview_fte <- x |> - dplyr::group_by(FINANCIAL_YEAR, GENDER, FTE_GROUP) |> - dplyr::summarise(HEADCOUNT = sum(HEADCOUNT, na.rm = TRUE), - .groups = "drop") |> - tidyr::pivot_wider(names_from = c(GENDER, FTE_GROUP), - values_from = HEADCOUNT) - - - # Define the class here ---- - # It will use to create highchart line graph - - structure( - list( - df = x, - overview_gender = overview_gender, - overview_afc = overview_afc, - overview_fte = overview_fte, - reporting_headcount = reporting_headcount, - diffs = diffs, - ending_fy = ending_fy - ), - class = "headcount_data" - ) -} - - - diff --git a/data-raw/01_read_hourly_gpg.R b/data-raw/01_read_hourly_gpg.R index 30b00de..139528d 100644 --- a/data-raw/01_read_hourly_gpg.R +++ b/data-raw/01_read_hourly_gpg.R @@ -84,7 +84,16 @@ afc_staff <- afc |> ) |> left_join(lookup, by = "pay_scale" - ) + ) |> + select(-employee_number) |> +# Data quality error July 2013 Archive employee org is wrong, manually edited + mutate(org_l3 = ifelse(org_l3 == 'July 2013 Archive', "914 BSA Finance, Commercial and Estates L3", org_l3), + directorate = stringr::str_replace_all( + org_l3, c("^914 BSA " = "", " L3" = "")), + directorate = stringr::str_trim(directorate), + headcount = 1) |> + select(period, gender, headcount,hourly_rate, quartile, fte, afc_band, directorate) + # Keep three main data frame and it will be used to create S3 class usethis::use_data(paygap, overwrite = TRUE) @@ -93,4 +102,4 @@ usethis::use_data(afc_staff, overwrite = TRUE) -rm(c(dfs, afc, staff)) +rm(dfs, afc, staff) diff --git a/man/gender_profile.Rd b/man/gender_profile.Rd index 9b503b2..0ff5e32 100644 --- a/man/gender_profile.Rd +++ b/man/gender_profile.Rd @@ -2,18 +2,43 @@ % Please edit documentation in R/utils_charts.R \name{gender_profile} \alias{gender_profile} -\title{line chart to show the number of headcount by financial year by gender.} +\title{Highcharter line chart to show the number of headcount by financial +year by gender.} \usage{ -gender_profile(x) +gender_profile( + x, + xvar = "FINANCIAL_YEAR", + yvars, + series_names, + yaxis_title, + yaxis_label +) } \arguments{ -\item{x}{Input headcount_data S3 class object.} +\item{x}{Input data frame from \code{headcount_data} S3 class object.} + +\item{xvar}{"Financial Year", default} + +\item{yvars}{data frame converts to list and each list element to create line} + +\item{series_names}{If user wants to give different series name for +highchart legend} + +\item{yaxis_title}{Title of y axis} + +\item{yaxis_label}{Indication of percentage or number} + +\item{show_legend}{TRUE default} + +\item{line_style}{Control line style either Solid or DashDot} + +\item{series_alpha}{Control opacity} } \value{ Returns a highchart or htmlwidget object. } \description{ -\code{headcount_data} is the S3 class used for gender related +\code{headcount_data} is the S3 class used for gender related summary of workforce } \examples{ diff --git a/man/headcount_data.Rd b/man/headcount_data.Rd index bc972e7..7b64762 100644 --- a/man/headcount_data.Rd +++ b/man/headcount_data.Rd @@ -2,18 +2,17 @@ % Please edit documentation in R/headcount_data_class.R \name{headcount_data} \alias{headcount_data} -\title{tidy data set for first headcount related two graphs.} +\title{S3 headcount class to create number of headcount by gender and also +gender and AFC pay band.#'} \usage{ headcount_data(x, log_level = futile.logger::WARN, eda = FALSE) } \arguments{ \item{x}{Input data frame.} -\item{log_level}{The severity level at which log messages are written from -least to most serious: TRACE, DEBUG, INFO, WARN, ERROR, FATAL. Default is -level is INFO. See \code{?flog.threshold()} for additional details.} +\item{log_level}{keep it WARN} -\item{eda}{If TRUE an graphical data analysis is conducted for a human to check.} +\item{eda}{If TRUE base R plot shows in the Viewer} } \value{ If the class is not instantiated correctly, nothing is returned. @@ -27,14 +26,16 @@ The \code{headcount_data} class expects a \code{data.frame} with at least five columns: FINANCIAL_YEAR, GENDER, PAY_GRADE_NAME, FTE_GROUP, HEADCOUNT. Each row represents aggregated headcount by four columns. -Once initiated, the class has five slots: -\code{df}: dataframe -\code{colnames}: a character vector containing the column names from the {df} -\code{reporting_headcount}: a numeric vector containing reporting financial -year's headcount -\code{diffs}: a numeric vector containing differences from previous -financial year headcount to current reporting financial year headcount -\code{ending_fy}: a character vector containing ending reporting period +Once initiated, the class has seven slots: +\code{df}: data frame \n +\code{overview_gender}: data frame \n +\code{overview_afc}: data frame \n +\code{overview_fte}: data frame \n +\code{reporting_headcount}: a numeric vector containing reporting financial +year's headcount \n +\code{diffs}: a numeric vector containing differences from previous \n +financial year headcount to current reporting financial year headcount \n +\code{ending_fy}: a character vector containing ending reporting period (e.g. 31 March 2022). This uses for introduction paragraph } \examples{ From 477165c8ad8ffcf9de3c07ce8ce8759fbdd63873 Mon Sep 17 00:00:00 2001 From: kaygo Date: Mon, 23 Oct 2023 22:16:02 +0100 Subject: [PATCH 04/19] fix to pass build test --- .Rbuildignore | 1 + DESCRIPTION | 11 +-- NAMESPACE | 1 + R/data.R | 81 +++++++++++++++---- R/gpg_data_class.R | 25 ++++-- R/utils_charts.R | 33 ++++---- ...1_read_hourly_gpg.R => gpg_data_process.R} | 6 +- man/afc_staff.Rd | 27 +++++++ man/gender_profile.Rd | 26 +++--- man/gpg_data.Rd | 44 ++++++++++ man/headcount.Rd | 18 ----- man/headcount_data.Rd | 47 ----------- man/paygap.Rd | 25 ++++++ man/quartile.Rd | 34 ++++++++ tests/testthat/test-utils_charts.R | 20 +++-- 15 files changed, 263 insertions(+), 136 deletions(-) rename data-raw/{01_read_hourly_gpg.R => gpg_data_process.R} (94%) create mode 100644 man/afc_staff.Rd create mode 100644 man/gpg_data.Rd delete mode 100644 man/headcount.Rd delete mode 100644 man/headcount_data.Rd create mode 100644 man/paygap.Rd create mode 100644 man/quartile.Rd diff --git a/.Rbuildignore b/.Rbuildignore index 73641e8..f6a7b85 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -11,3 +11,4 @@ ^gitleaks.toml$ ^\.github$ ^\.lintr$ +^data_temp$ diff --git a/DESCRIPTION b/DESCRIPTION index d90693d..19ef858 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,23 +16,18 @@ Imports: highcharter, htmltools, magrittr, - rlang, nhsbsaR, scrollytell, shiny, shinyjs, dplyr (>= 1.1.3), - dbplyr (>= 2.3.3), - forcats (>= 1.0.0), futile.logger (>= 1.4.3), - stringr, - formatR, - tidyr, - purrr + stringr Suggests: pkgload, testthat (>= 3.0.0), - usethis + usethis, + tidyr Remotes: nhsbsa-data-analytics/nhsbsaR, statistiekcbs/scrollytell diff --git a/NAMESPACE b/NAMESPACE index 05ec5ca..64342d2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,7 @@ export("%>%") export(gender_profile) +export(gpg_data) export(h1_tabstop) export(h2_tabstop) export(h3_tabstop) diff --git a/R/data.R b/R/data.R index 72da395..8b824cd 100644 --- a/R/data.R +++ b/R/data.R @@ -1,16 +1,69 @@ -#' NHSBSA employee headcount +#' NHSBSA employee staff_afc #' -#' A dataset containing NHSBSA employee headcount -#' since financial year 2017/18, split by gender, AFC band -#' and FTE (full time or part time) +#' A dataset containing NHSBSA employee hourly pay +#' by reporting period split by gender, AFC band, directorate. +#' \itemize{ +#' \item period. 2018/19, 2019/20 etc character +#' \item gender. Male or Female, character +#' \item headcount. employee headcount used for aggregation +#' \item hourly_rate. hourly rate as shown pay slip +#' \item quartile. split hourly_rate by quartile by gender +#' \item fte. employee full time or part time info +#' \item afc_band. AFC band +#' \item directorate. NHSBSA directorate +#' } +#' +#' @docType data +#' @keywords datasets +#' @name afc_staff +#' @usage afc_staff +#' @format A data frame with gender pay gap information +#' +#' +"afc_staff" + + +#' NHSBSA employee paygap +#' +#' A dataset containing NHSBSA employee paygap +#' Directly pulled from ESR dashboard (NHS National Returns) +#' gender, average hourly rate, median hourly rate and pay gap% +#' +#' \itemize{ +#' \item period. 2018/19, 2019/20 etc character +#' \item avg_hr_gpg. Mean gender pay gap % based on male full-pay relevant employees +#' \item median_hr_gpg. Median gender pay gap % based on male full-pay relevant employees +#' } +#' +#' @docType data +#' @keywords datasets +#' @name paygap +#' @usage paygap +#' @format A data frame with paygap information + +"paygap" + + +#' NHSBSA employee quartile #' -#' @format A data frame -#' \describe{ -#' \item{FINANCIAL_YEAR}{01-Mar-year, factor} -#' \item{GENDER}{Male or Female, character} -#' \item{PAY_GRADE_NAME}{AFC band} -#' \item{FTE_GROUP}{employee full time or part time info} -#' \item{HEADCOUNT}To get number of employees} -#' ... -#' } -"headcount" \ No newline at end of file +#' A dataset containing NHSBSA employee hourly pay +#' by reporting period from 2018 (eg. snapshot 31/03/2018 for 2018/19 report etc) +#' split by gender, AFC band, directorate +#' +#' +#' \itemize{ +#' \item period. 2018/19, 2019/20 etc character +#' \item quartile. split hourly pay into quartiles +#' \item female. number of female employees in each quartile +#' \item male. number of male employees in each quartile +#' \item quartile. split hourly_rate by quartile by gender +#' \item female_percent. female employee % in quartile +#' \item male_percent. male employee % in quartile +#' } +#' @docType data +#' @keywords datasets +#' @name quartile +#' @usage quartile +#' @format data frame with employee gender pay gap by quartiles + +"quartile" \ No newline at end of file diff --git a/R/gpg_data_class.R b/R/gpg_data_class.R index f17a663..5b2a47a 100644 --- a/R/gpg_data_class.R +++ b/R/gpg_data_class.R @@ -9,11 +9,12 @@ #' directorate. #' #' Once initiated, the class has six slots: -#' \code{df}: raw data frame \n -#' \code{df_hdcnt}: data frame contains headcount by period \n -#' \code{df_hdcnt_afc}: data frame contains headcount by afc band \n -#' \code{df_hdcnt_dir}: data frame contains headcount by directorate \n -#' \code{df_hrrate}: data frame contains hourly rate by gender for each grade \n +#' \code{df}: raw data frame +#' \code{df_hdcnt}: data frame contains headcount by period +#' \code{df_hdcnt_gender}: data frame contains headcount by gender by period +#' \code{df_hdcnt_afc}: data frame contains headcount by afc band +#' \code{df_hdcnt_dir}: data frame contains headcount by directorate +#' \code{df_hrrate}: data frame contains hourly rate by gender for each grade #' \code{ending_fy}: a character vector containing ending reporting period #' (e.g. 31 March 2023). This uses for introduction paragraph #' @@ -25,10 +26,9 @@ #' #' @return If the class is not instantiated correctly, nothing is returned. #' -#' @examples -#' +#' @examples +#' #' library(nhsbsaGPG) -#' #' df <- gpg_data(afc_staff) #' #' @export @@ -157,6 +157,14 @@ gpg_data <- function(x, agg_data$headcount[agg_data$period == latest_fy] ending_fy <- as.character(start_latest_year + 1) + + # data frame: aggregate headcount by gender by period + df_hdcnt_gender <- x |> + dplyr::group_by(period,gender) |> + dplyr::summarise(headcount = sum(headcount, na.rm = TRUE)) |> + dplyr::arrange(period) + + # data frame: aggregate headcount by period and AFC band df_hdcnt_afc <- x |> @@ -209,6 +217,7 @@ gpg_data <- function(x, list( df = x, df_hdcnt = df_hdcnt, + df_hdcnt_gender = df_hdcnt_gender, df_hdcnt_afc = df_hdcnt_afc, df_hdcnt_dir = df_hdcnt_dir, df_hrrate = df_hrrate, diff --git a/R/utils_charts.R b/R/utils_charts.R index 8ac956e..c5285b3 100644 --- a/R/utils_charts.R +++ b/R/utils_charts.R @@ -1,37 +1,36 @@ #' @title Highcharter line chart to show the number of headcount by financial #' year by gender. #' -#' @description \code{headcount_data} is the S3 class used for gender related -#' summary of workforce +#' @description \code{gpg_data} is the S3 class used for trend #' #' #' @return Returns a highchart or htmlwidget object. #' -#' @examples -#' -#' workforce <- nhsbsaGPG::headcount_data(nhsbsaGPG::headcount) -#' nhsbsaGPG::gender_profile(workforce) +#' @examples \dontrun{ +#' workforce <- nhsbsaGPG::gpg_data(nhsbsaGPG::afc_staff) +#' nhsbsaGPG::gender_profile(workforce$df_hdcnt_gender) +#' } #' #' @export -#' @param x Input data frame from \code{headcount_data} S3 class object. -#' @param xvar "Financial Year", default +#' @param x Input data frame from \code{gpg_data} S3 class object. +#' @param xvar "period", default #' @param yvars data frame converts to list and each list element to create line -#' @param series_names If user wants to give different series name for -#' highchart legend +#' @param series_names If user wants to give different series name for highchart legend #' @param yaxis_title Title of y axis #' @param yaxis_label Indication of percentage or number #' @param show_legend TRUE default -#' @param line_style Control line style either Solid or DashDot -#' @param series_alpha Control opacity -#' -#' @import nhsbsa-data-analytics/nhsbsaR +#' +#' @importFrom aggregate +#' @importFrom barplot +#' #' gender_profile <- function(x, - xvar = "FINANCIAL_YEAR", + xvar = "period", yvars, series_names, yaxis_title, - yaxis_label + yaxis_label, + show_legend = TRUE ){ out <- tryCatch( expr = { @@ -57,7 +56,7 @@ gender_profile <- function(x, data = data, type = "line", highcharter::hcaes( - x = .data[[xvar]], # default financial year + x = .data[[xvar]], # default period3 y = .data[[yvar]] # Female for example ), name = series_name # these labels will show in legend diff --git a/data-raw/01_read_hourly_gpg.R b/data-raw/gpg_data_process.R similarity index 94% rename from data-raw/01_read_hourly_gpg.R rename to data-raw/gpg_data_process.R index 139528d..a0b5534 100644 --- a/data-raw/01_read_hourly_gpg.R +++ b/data-raw/gpg_data_process.R @@ -27,7 +27,11 @@ process_file <- function(filepath) { paygap = read_excel(filepath, range = cell_rows(3:7), col_names = TRUE) |> select(1:3) |> janitor::clean_names() |> - mutate(period = financial_year), + mutate(period = financial_year) |> + filter(gender == "Pay Gap %") |> + select(period, + avg_hr_gpg = avg_hourly_rate, + median_hr_gpg = median_hourly_rate), quartile = read_excel(filepath, range = cell_rows(3:7), col_names = TRUE) |> select(5:9) |> janitor::clean_names() |> diff --git a/man/afc_staff.Rd b/man/afc_staff.Rd new file mode 100644 index 0000000..afb475f --- /dev/null +++ b/man/afc_staff.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{afc_staff} +\alias{afc_staff} +\title{NHSBSA employee staff_afc} +\format{ +A data frame with gender pay gap information +} +\usage{ +afc_staff +} +\description{ +A dataset containing NHSBSA employee hourly pay +by reporting period split by gender, AFC band, directorate. +\itemize{ + \item period. 2018/19, 2019/20 etc character + \item gender. Male or Female, character + \item headcount. employee headcount used for aggregation + \item hourly_rate. hourly rate as shown pay slip + \item quartile. split hourly_rate by quartile by gender + \item fte. employee full time or part time info + \item afc_band. AFC band + \item directorate. NHSBSA directorate +} +} +\keyword{datasets} diff --git a/man/gender_profile.Rd b/man/gender_profile.Rd index 0ff5e32..e31235c 100644 --- a/man/gender_profile.Rd +++ b/man/gender_profile.Rd @@ -7,43 +7,39 @@ year by gender.} \usage{ gender_profile( x, - xvar = "FINANCIAL_YEAR", + xvar = "period", yvars, series_names, yaxis_title, - yaxis_label + yaxis_label, + show_legend = TRUE ) } \arguments{ -\item{x}{Input data frame from \code{headcount_data} S3 class object.} +\item{x}{Input data frame from \code{gpg_data} S3 class object.} -\item{xvar}{"Financial Year", default} +\item{xvar}{"period", default} \item{yvars}{data frame converts to list and each list element to create line} -\item{series_names}{If user wants to give different series name for -highchart legend} +\item{series_names}{If user wants to give different series name for highchart legend} \item{yaxis_title}{Title of y axis} \item{yaxis_label}{Indication of percentage or number} \item{show_legend}{TRUE default} - -\item{line_style}{Control line style either Solid or DashDot} - -\item{series_alpha}{Control opacity} } \value{ Returns a highchart or htmlwidget object. } \description{ -\code{headcount_data} is the S3 class used for gender related -summary of workforce +\code{gpg_data} is the S3 class used for trend } \examples{ - -workforce <- nhsbsaGPG::headcount_data(nhsbsaGPG::headcount) -nhsbsaGPG::gender_profile(workforce) +\dontrun{ +workforce <- nhsbsaGPG::gpg_data(nhsbsaGPG::afc_staff) +nhsbsaGPG::gender_profile(workforce$df_hdcnt_gender) +} } diff --git a/man/gpg_data.Rd b/man/gpg_data.Rd new file mode 100644 index 0000000..3e79d37 --- /dev/null +++ b/man/gpg_data.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gpg_data_class.R +\name{gpg_data} +\alias{gpg_data} +\title{S3 gpg class to create headcount, hourly rate by AFC, +directorate level hourly rate.'} +\usage{ +gpg_data(x, log_level = futile.logger::WARN, eda = FALSE) +} +\arguments{ +\item{x}{Input data frame.} + +\item{log_level}{keep it WARN} + +\item{eda}{If TRUE base R plot shows in the Viewer} +} +\value{ +If the class is not instantiated correctly, nothing is returned. +} +\description{ +\code{gpg_data} is the class used for the creation of +headcount, hourly rate by AFC with without directorate in the GPG report. +} +\details{ +The \code{gpg_data} class expects a \code{data.frame} with at +least seven columns: period, gender, hourly_rate, quartile, fte, afc_band, +directorate. + +Once initiated, the class has six slots: +\code{df}: raw data frame +\code{df_hdcnt}: data frame contains headcount by period +\code{df_hdcnt_gender}: data frame contains headcount by gender by period +\code{df_hdcnt_afc}: data frame contains headcount by afc band +\code{df_hdcnt_dir}: data frame contains headcount by directorate +\code{df_hrrate}: data frame contains hourly rate by gender for each grade +\code{ending_fy}: a character vector containing ending reporting period +(e.g. 31 March 2023). This uses for introduction paragraph +} +\examples{ + +library(nhsbsaGPG) +df <- gpg_data(afc_staff) + +} diff --git a/man/headcount.Rd b/man/headcount.Rd deleted file mode 100644 index c02164e..0000000 --- a/man/headcount.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data.R -\docType{data} -\name{headcount} -\alias{headcount} -\title{NHSBSA employee headcount} -\format{ - -} -\usage{ -headcount -} -\description{ -A dataset containing NHSBSA employee headcount -since financial year 2017/18, split by gender, AFC band -and FTE (full time or part time) -} -\keyword{datasets} diff --git a/man/headcount_data.Rd b/man/headcount_data.Rd deleted file mode 100644 index 7b64762..0000000 --- a/man/headcount_data.Rd +++ /dev/null @@ -1,47 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/headcount_data_class.R -\name{headcount_data} -\alias{headcount_data} -\title{S3 headcount class to create number of headcount by gender and also -gender and AFC pay band.#'} -\usage{ -headcount_data(x, log_level = futile.logger::WARN, eda = FALSE) -} -\arguments{ -\item{x}{Input data frame.} - -\item{log_level}{keep it WARN} - -\item{eda}{If TRUE base R plot shows in the Viewer} -} -\value{ -If the class is not instantiated correctly, nothing is returned. -} -\description{ -\code{headcount_data} is the class used for the creation of -first two headcount related figures in the GPG report. -} -\details{ -The \code{headcount_data} class expects a \code{data.frame} with at -least five columns: FINANCIAL_YEAR, GENDER, PAY_GRADE_NAME, FTE_GROUP, HEADCOUNT. Each -row represents aggregated headcount by four columns. - -Once initiated, the class has seven slots: -\code{df}: data frame \n -\code{overview_gender}: data frame \n -\code{overview_afc}: data frame \n -\code{overview_fte}: data frame \n -\code{reporting_headcount}: a numeric vector containing reporting financial -year's headcount \n -\code{diffs}: a numeric vector containing differences from previous \n -financial year headcount to current reporting financial year headcount \n -\code{ending_fy}: a character vector containing ending reporting period -(e.g. 31 March 2022). This uses for introduction paragraph -} -\examples{ - -library(nhsbsaGPG) - -df <- headcount_data(headcount) - -} diff --git a/man/paygap.Rd b/man/paygap.Rd new file mode 100644 index 0000000..2e19a52 --- /dev/null +++ b/man/paygap.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{paygap} +\alias{paygap} +\title{NHSBSA employee paygap} +\format{ +A data frame with paygap information +} +\usage{ +paygap +} +\description{ +A dataset containing NHSBSA employee paygap +Directly pulled from ESR dashboard (NHS National Returns) +gender, average hourly rate, median hourly rate and pay gap% +} +\details{ +\itemize{ + \item period. 2018/19, 2019/20 etc character + \item avg_hr_gpg. Mean gender pay gap % based on male full-pay relevant employees + \item median_hr_gpg. Median gender pay gap % based on male full-pay relevant employees + } +} +\keyword{datasets} diff --git a/man/quartile.Rd b/man/quartile.Rd new file mode 100644 index 0000000..9cc63aa --- /dev/null +++ b/man/quartile.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{quartile} +\alias{quartile} +\title{NHSBSA employee quartile} +\format{ +An object of class \code{tbl_df} (inherits from \code{tbl}, \code{data.frame}) with 20 rows and 6 columns. +} +\usage{ +quartile +} +\description{ +A dataset containing NHSBSA employee hourly pay +by reporting period from 2018 (eg. snapshot 31/03/2018 for 2018/19 report etc) +split by gender, AFC band, directorate +} +\details{ +\itemize{ + \item period. 2018/19, 2019/20 etc character + \item quartile. split hourly pay into quartiles + \item female. number of female employees in each quartile + \item male. number of male employees in each quartile + \item quartile. split hourly_rate by quartile by gender + \item female_percent. female employee % in quartile + \item male_percent. male employee % in quartile + } + @docType data + @keywords datasets + @name quartile + @usage quartile + @format data frame with employee gender pay gap by quartiles +} +\keyword{datasets} diff --git a/tests/testthat/test-utils_charts.R b/tests/testthat/test-utils_charts.R index bb6551f..f418d8f 100644 --- a/tests/testthat/test-utils_charts.R +++ b/tests/testthat/test-utils_charts.R @@ -1,9 +1,15 @@ -df <- headcount_data(headcount) -x <- df$overview_gender + +library(tidyr) + +df <- gpg_data(afc_staff) +x <- df$df_hdcnt_gender |> + tidyr::pivot_wider(names_from = gender, + values_from = headcount) + testthat::test_that("gender_profile runs without errors", { expect_silent(gender_profile(x, - xvar = "FINANCIAL_YEAR", + xvar = "period", yvars = c("Male", "Female"), series_names = c("Male", "Female"), yaxis_title = "Male and Female employee headcount", @@ -15,14 +21,12 @@ testthat::test_that("gender_profile runs without errors", { testthat::test_that("gender_profile outputs a highchart, htmlwidget class", { expect_equal(class( gender_profile(x, - xvar = "FINANCIAL_YEAR", + xvar = "period", yvars = c("Male", "Female"), series_names = c("Male", "Female"), yaxis_title = "Male and Female employee headcount", - yaxis_label = "" - ), - yaxis_label = "number" - ), c("highchart", "htmlwidget")) + yaxis_label = "number" + )), c("highchart", "htmlwidget")) }) testthat::test_that("gender_profile takes list as an input", { From 94e61bda7a697ff0e0d72045ba256bb473817fe5 Mon Sep 17 00:00:00 2001 From: kaygo Date: Tue, 24 Oct 2023 08:57:21 +0100 Subject: [PATCH 05/19] remove example old faithful data --- R/faithful.R | 23 ----------------------- data/faithful.rda | Bin 1327 -> 0 bytes 2 files changed, 23 deletions(-) delete mode 100644 R/faithful.R delete mode 100644 data/faithful.rda diff --git a/R/faithful.R b/R/faithful.R deleted file mode 100644 index 23fd845..0000000 --- a/R/faithful.R +++ /dev/null @@ -1,23 +0,0 @@ -#' Old Faithful Geyser Data -#' -#' A closer look at \code{faithful$eruptions} reveals that these are -#' heavily rounded times originally in seconds, where multiples of 5 are -#' more frequent than expected under non-human measurement. For a -#' better version of the eruption times, see the example below. -#' There are many versions of this dataset around: Azzalini and Bowman -#' (1990) use a more complete version. -#' -#' \itemize{ -#' \item eruptions. Eruption time in mins -#' \item waiting. Waiting time to next eruption (in mins) -#' } -#' -#' @source {W. Hrdle.} -#' -#' @docType data -#' @keywords datasets -#' @name faithful -#' @usage faithful -#' @format A data frame with 272 observations on 2 variables. -#' -"faithful" diff --git a/data/faithful.rda b/data/faithful.rda deleted file mode 100644 index 704313a32aa3923c57fcce405bc528d366db6850..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1327 zcmV+~1O0K7-yKA;Na_yU2fn>B&qEw>S)FyGJ8`~)BrR914pE200)$98bBHV27q8~ zQ`9sKG5|COp%m~$ViF#p&_;ov0iXjD6Vw1SWEubf004S`000001e#QeO+8Jip{I!h zO$LmHfCEROWYbI#VlrsZG5`QH(8vuM1_%Z~G$3Tc8X9EK!Wv*qA(4V4zs?v0i6hvz(EG3fk_GyHIhLRE`vKP z`#QBkoj9c^DKpJdKnc*P@<#{;xhe}tJOe))3|Hu=NmYQyk(g{%HE@J>fWcQ?d}Mc0 z-YJ?81szFf?NViQ$#oum?~%?&fhud}g+(YRKT8+g{&%)0=4g&jv`3SrUQUY%^nNXC zq6?Qqs-hAQG-S{KhRAR*Fi=2{2udLgAXLIdAx4lfNf;ysKs2O;{A%L3tW~Q;1|XIx zWbl#vOOLv}CY&sk5J=u*P3TiISm@*;SPOi~46LRQWIr`A)#+Vw%LwRwA;qNx)x@xC ziZSL*F@QFD%LEG2IYSsU&Vh48q?U#~O2)w}>uDVHy)U?f4C%*fLsg*bZXFg*Wt^6> zqB<7#^0Ks2&NG&Itr5Zv%FM#@_oH+<}PzTlhA0cb3fL}%P?YsvJ6yJ zil7RLf~*u45LhUpu@Q=*z+$SZBC*M`qNu2>1rY^|V#P)Zs4P@f7AT__jP}W>F!~5- zJ1)C7u)`t6b-YOe6?nkHk4a!du#k_S2o=>pE3jL56a|E0LS$rTn@siy+ioIBpfpE; z=_V>Pzq_Xlh}kb8>P~Ll{#)tcRk;R7%x>EY5kQDyKlsdVY(`g(oif zZif@ivZqV(BP6nx9&1SAPO^<(Vb+i)>a?7(81wqTja6US(^mXZb0P8uYAYN?G_clc z*fxC;Ev};UjKuKvb*d!t1d@}AIa`L=AoD(HCkYV31k zw19-cf&djGw)(^wyPXLT-CiMp3SF6*kcq@Mgu_u%OfACyQXh~RvVPOvko*9F+~sKX z4qs7$a539lI(%u7tHA*AM)7TjP?rK-BTcWx^Q()7iLFa8A-P8EY=r)aa%RX(H3%!I l{xG*oGG7!d04iOu%p?FAFbl?%p%AD1UC9*TLP8NpAW*xzIeGv9 From c4ce6e1de86bd1094ddb4aebf00695e6ca0b8035 Mon Sep 17 00:00:00 2001 From: kaygo Date: Tue, 24 Oct 2023 08:59:59 +0100 Subject: [PATCH 06/19] rda added gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 2755bb2..fb16f07 100644 --- a/.gitignore +++ b/.gitignore @@ -15,5 +15,7 @@ rsconnect # Excel files (including macros and templates) # ################################################ *.[xX][lL][sS][xXmMtT]? +# Any RDA files +*.[rR][dD][aA] data/* data_temp/* From 6939313374d3244ec841f2a6d6fabbe29e9228c9 Mon Sep 17 00:00:00 2001 From: kaygo Date: Tue, 24 Oct 2023 13:34:09 +0100 Subject: [PATCH 07/19] Data stays in azure people, temporary download to process gpg_data_process.r then delete all files in data_temp --- R/data.R | 4 +- data-raw/1.read_azure_blob.R | 42 +++++++++++++++++++ ...pg_data_process.R => 2.gpg_data_process.R} | 16 +++++++ data-raw/execution.json | 7 ++++ data-raw/faithful.R | 4 -- 5 files changed, 67 insertions(+), 6 deletions(-) create mode 100644 data-raw/1.read_azure_blob.R rename data-raw/{gpg_data_process.R => 2.gpg_data_process.R} (88%) create mode 100644 data-raw/execution.json delete mode 100644 data-raw/faithful.R diff --git a/R/data.R b/R/data.R index 8b824cd..e02135a 100644 --- a/R/data.R +++ b/R/data.R @@ -1,7 +1,7 @@ -#' NHSBSA employee staff_afc +#' NHSBSA employee afc_staff #' #' A dataset containing NHSBSA employee hourly pay -#' by reporting period split by gender, AFC band, directorate. +#' by reporting period split by gender, FTE, AFC band, directorate. #' \itemize{ #' \item period. 2018/19, 2019/20 etc character #' \item gender. Male or Female, character diff --git a/data-raw/1.read_azure_blob.R b/data-raw/1.read_azure_blob.R new file mode 100644 index 0000000..54ad5f4 --- /dev/null +++ b/data-raw/1.read_azure_blob.R @@ -0,0 +1,42 @@ +library(AzureStor) +library(jsonlite) +library(janitor) +library(nhsbsaR) +library(dplyr) + +config <- jsonlite::fromJSON("./data-raw/execution.json") + +account_name <- Sys.getenv('AZURE_BLOB_STORAGE_PDS_ACCOUNT_NAME') +account_key <- Sys.getenv('AZURE_BLOB_STORAGE_PDS_ACCOUNT_KEY') +endpoint_suffix <- "core.windows.net" + +container_name <- config$import$container +folder_path <- config$import$folder + + +# Create a blob service client +blob_endpoint <- sprintf('https://%s.blob.%s', account_name, endpoint_suffix) +blob_client <- storage_endpoint(blob_endpoint, key=account_key) + +# Get the blob client instance for the given container +blob_container <- storage_container(blob_client, container_name) + +# List all blobs in the specified folder +blobs_in_folder <- list_blobs(blob_container, prefix=folder_path) + +# Extract the 'name' values from the result +blob_names <- blobs_in_folder[["name"]] + +# Loop through each blob in the folder and download +for (blob_name in blob_names) { + + local_file_path <- paste0(config$import$local_path, '/', basename(blob_name)) + + # Check if the file exists locally, and if so, delete it + if (file.exists(local_file_path)) { + file.remove(local_file_path) + } + + # Download the blob data to the local file + storage_download(blob_container, blob_name, local_file_path, overwrite = TRUE) +} diff --git a/data-raw/gpg_data_process.R b/data-raw/2.gpg_data_process.R similarity index 88% rename from data-raw/gpg_data_process.R rename to data-raw/2.gpg_data_process.R index a0b5534..1e2a23c 100644 --- a/data-raw/gpg_data_process.R +++ b/data-raw/2.gpg_data_process.R @@ -104,6 +104,22 @@ usethis::use_data(paygap, overwrite = TRUE) usethis::use_data(quartile, overwrite = TRUE) usethis::use_data(afc_staff, overwrite = TRUE) +# delete all the files in data_temp as they only stay in azure storage +# Specify the folder path +folder_path <- "./data_temp" + +# List all files in the directory +files_to_delete <- list.files(path = folder_path, full.names = TRUE) + +# Remove all files +result <- file.remove(files_to_delete) + +# Check if all files were deleted successfully +if (all(result)) { + cat("All files deleted successfully.\n") +} else { + cat("Some files could not be deleted.\n") +} rm(dfs, afc, staff) diff --git a/data-raw/execution.json b/data-raw/execution.json new file mode 100644 index 0000000..f60bf46 --- /dev/null +++ b/data-raw/execution.json @@ -0,0 +1,7 @@ +{ + "import": { + "container": "peopledata-prod", + "local_path": "./data_temp", + "folder": "Gender Pay Gap Data" + } +} \ No newline at end of file diff --git a/data-raw/faithful.R b/data-raw/faithful.R deleted file mode 100644 index 41faec3..0000000 --- a/data-raw/faithful.R +++ /dev/null @@ -1,4 +0,0 @@ -# Add any preparation before saving your data using usethis::use_data - -# Add to data/ -usethis::use_data(faithful, overwrite = TRUE) From 191d87661aa255c110eec2d48ce823ed94d0faff Mon Sep 17 00:00:00 2001 From: kaygo Date: Wed, 25 Oct 2023 15:44:03 +0100 Subject: [PATCH 08/19] chart functions for GPG report --- DESCRIPTION | 7 +- NAMESPACE | 6 +- R/app_server.R | 4 +- R/app_ui.R | 12 +- R/data.R | 38 ++--- R/gpg_data_class.R | 36 ++-- R/mod_chart_example.R | 60 ------- R/mod_introduction.R | 72 ++++++++ R/mod_markdown_example.R | 22 --- R/mod_scrollytell_example.R | 189 --------------------- R/utils_charts.R | 211 ++++++++++++++++++++---- data-raw/1.read_azure_blob.R | 18 +- data-raw/2.gpg_data_process.R | 54 ++++-- man/afc_staff.Rd | 6 +- man/faithful.Rd | 30 ---- man/gpg_pyramid.Rd | 24 +++ man/gpg_stack.Rd | 26 +++ man/{gender_profile.Rd => gpg_trend.Rd} | 19 +-- man/quartile.Rd | 9 +- tests/testthat/test-utils_charts.R | 70 ++++++-- 20 files changed, 464 insertions(+), 449 deletions(-) delete mode 100644 R/mod_chart_example.R create mode 100644 R/mod_introduction.R delete mode 100644 R/mod_markdown_example.R delete mode 100644 R/mod_scrollytell_example.R delete mode 100644 man/faithful.Rd create mode 100644 man/gpg_pyramid.Rd create mode 100644 man/gpg_stack.Rd rename man/{gender_profile.Rd => gpg_trend.Rd} (71%) diff --git a/DESCRIPTION b/DESCRIPTION index 19ef858..29de8f7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -17,20 +17,19 @@ Imports: htmltools, magrittr, nhsbsaR, - scrollytell, shiny, shinyjs, dplyr (>= 1.1.3), futile.logger (>= 1.4.3), - stringr + stringr, + htmlwidgets Suggests: pkgload, testthat (>= 3.0.0), usethis, tidyr Remotes: - nhsbsa-data-analytics/nhsbsaR, - statistiekcbs/scrollytell + nhsbsa-data-analytics/nhsbsaR Encoding: UTF-8 LazyData: true RoxygenNote: 7.2.3 diff --git a/NAMESPACE b/NAMESPACE index 64342d2..eb263e2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,8 +1,10 @@ # Generated by roxygen2: do not edit by hand export("%>%") -export(gender_profile) export(gpg_data) +export(gpg_pyramid) +export(gpg_stack) +export(gpg_trend) export(h1_tabstop) export(h2_tabstop) export(h3_tabstop) @@ -27,3 +29,5 @@ importFrom(golem,bundle_resources) importFrom(golem,favicon) importFrom(golem,with_golem_options) importFrom(magrittr,"%>%") +importFrom(shiny,NS) +importFrom(shiny,tagList) diff --git a/R/app_server.R b/R/app_server.R index 1c429f7..61c8c3a 100644 --- a/R/app_server.R +++ b/R/app_server.R @@ -6,7 +6,5 @@ #' @noRd app_server <- function(input, output, session) { # Your application server logic - mod_markdown_example_server("markdown_example_ui_1") - mod_chart_example_server("chart_example_ui_1") - mod_scrollytell_example_server("scrollytell_example_1") + mod_introduction_server("introduction_1") } diff --git a/R/app_ui.R b/R/app_ui.R index 9416988..7299b3f 100644 --- a/R/app_ui.R +++ b/R/app_ui.R @@ -26,16 +26,8 @@ app_ui <- function(request) { well = FALSE, widths = c(3, 9), tabPanel( - title = "Introduction", - mod_markdown_example_ui("markdown_example_ui_1") - ), - tabPanel( - title = "Charts", - mod_chart_example_ui("chart_example_ui_1") - ), - tabPanel( - title = "Scrolly example", - mod_scrollytell_example_ui("scrollytell_example_1") + title = "Report", + mod_introduction_ui("introduction_1") ) ) ) diff --git a/R/data.R b/R/data.R index e02135a..0803780 100644 --- a/R/data.R +++ b/R/data.R @@ -8,33 +8,34 @@ #' \item headcount. employee headcount used for aggregation #' \item hourly_rate. hourly rate as shown pay slip #' \item quartile. split hourly_rate by quartile by gender -#' \item fte. employee full time or part time info +#' \item fte. employee full time or part time information #' \item afc_band. AFC band #' \item directorate. NHSBSA directorate -#' } -#' +#' } +#' #' @docType data #' @keywords datasets #' @name afc_staff #' @usage afc_staff -#' @format A data frame with gender pay gap information -#' -#' +#' @format A data frame with gender pay gap information +#' +#' "afc_staff" #' NHSBSA employee paygap -#' +#' #' A dataset containing NHSBSA employee paygap -#' Directly pulled from ESR dashboard (NHS National Returns) +#' Directly pulled from ESR dashboard (NHS National Returns) #' gender, average hourly rate, median hourly rate and pay gap% -#' +#' #' \itemize{ #' \item period. 2018/19, 2019/20 etc character #' \item avg_hr_gpg. Mean gender pay gap % based on male full-pay relevant employees -#' \item median_hr_gpg. Median gender pay gap % based on male full-pay relevant employees -#' } -#' +#' \item median_hr_gpg. Median gender pay gap % based +#' on male full-pay relevant employees +#' } +#' #' @docType data #' @keywords datasets #' @name paygap @@ -48,22 +49,21 @@ #' #' A dataset containing NHSBSA employee hourly pay #' by reporting period from 2018 (eg. snapshot 31/03/2018 for 2018/19 report etc) -#' split by gender, AFC band, directorate -#' -#' +#' by quartiles +#' +#' #' \itemize{ #' \item period. 2018/19, 2019/20 etc character #' \item quartile. split hourly pay into quartiles #' \item female. number of female employees in each quartile #' \item male. number of male employees in each quartile #' \item quartile. split hourly_rate by quartile by gender -#' \item female_percent. female employee % in quartile -#' \item male_percent. male employee % in quartile #' } +#' #' @docType data #' @keywords datasets #' @name quartile #' @usage quartile -#' @format data frame with employee gender pay gap by quartiles +#' @format data frame with employee gender hourly pay by quartiles -"quartile" \ No newline at end of file +"quartile" diff --git a/R/gpg_data_class.R b/R/gpg_data_class.R index 5b2a47a..04df3a7 100644 --- a/R/gpg_data_class.R +++ b/R/gpg_data_class.R @@ -9,12 +9,12 @@ #' directorate. #' #' Once initiated, the class has six slots: -#' \code{df}: raw data frame -#' \code{df_hdcnt}: data frame contains headcount by period -#' \code{df_hdcnt_gender}: data frame contains headcount by gender by period -#' \code{df_hdcnt_afc}: data frame contains headcount by afc band -#' \code{df_hdcnt_dir}: data frame contains headcount by directorate -#' \code{df_hrrate}: data frame contains hourly rate by gender for each grade +#' \code{df}: raw data frame +#' \code{df_hdcnt}: data frame contains headcount by period +#' \code{df_hdcnt_gender}: data frame contains headcount by gender by period +#' \code{df_hdcnt_afc}: data frame contains headcount by afc band +#' \code{df_hdcnt_dir}: data frame contains headcount by directorate +#' \code{df_hrrate}: data frame contains hourly rate by gender for each grade #' \code{ending_fy}: a character vector containing ending reporting period #' (e.g. 31 March 2023). This uses for introduction paragraph #' @@ -26,8 +26,8 @@ #' #' @return If the class is not instantiated correctly, nothing is returned. #' -#' @examples -#' +#' @examples +#' #' library(nhsbsaGPG) #' df <- gpg_data(afc_staff) #' @@ -149,7 +149,8 @@ gpg_data <- function(x, # data frame: aggregate headcount by period df_hdcnt <- x |> dplyr::group_by(period) |> - dplyr::summarise(headcount = sum(headcount, na.rm = TRUE)) |> + dplyr::summarise(headcount = sum(headcount, na.rm = TRUE), + .groups = "drop") |> dplyr::arrange(period) # Extract the values @@ -157,14 +158,15 @@ gpg_data <- function(x, agg_data$headcount[agg_data$period == latest_fy] ending_fy <- as.character(start_latest_year + 1) - + # data frame: aggregate headcount by gender by period df_hdcnt_gender <- x |> - dplyr::group_by(period,gender) |> - dplyr::summarise(headcount = sum(headcount, na.rm = TRUE)) |> + dplyr::group_by(period, gender) |> + dplyr::summarise(headcount = sum(headcount, na.rm = TRUE), + .groups = "drop") |> dplyr::arrange(period) - - + + # data frame: aggregate headcount by period and AFC band df_hdcnt_afc <- x |> @@ -176,7 +178,8 @@ gpg_data <- function(x, dplyr::group_by(period, afc_band) |> dplyr::mutate( perc = headcount / sum(headcount) * 100 - ) + ) |> + dplyr::ungroup() # data frame: aggregate headcount by period and directorate df_hdcnt_dir <- x |> @@ -188,7 +191,8 @@ gpg_data <- function(x, dplyr::group_by(period, directorate) |> dplyr::mutate( perc = headcount / sum(headcount) * 100 - ) + ) |> + dplyr::ungroup() # data frame: hourly rate by gender for overall, each AFC band df_hrrate <- dplyr::bind_rows( diff --git a/R/mod_chart_example.R b/R/mod_chart_example.R deleted file mode 100644 index b1a46e6..0000000 --- a/R/mod_chart_example.R +++ /dev/null @@ -1,60 +0,0 @@ -#' chart_example UI Function -#' -#' @description A shiny Module. -#' -#' @param id,input,output,session Internal parameters for {shiny}. -#' -#' @noRd -mod_chart_example_ui <- function(id) { - ns <- NS(id) - tagList( - h1_tabstop("First level"), - h2_tabstop("Second level"), - nhs_card_tabstop( - heading = "example chart title", - nhs_selectInput( - inputId = ns("bins"), - label = "Number of bins:", - choices = c(5, 10, 15, 20), - selected = 20, - full_width = TRUE - ), - highcharter::highchartOutput( - outputId = ns("chart"), - height = "400px" - ), - mod_nhs_download_ui( - id = ns("download_test") - ) - ) - ) -} - -#' chart_example Server Functions -#' -#' @noRd -mod_chart_example_server <- function(id) { - moduleServer(id, function(input, output, session) { - ns <- session$ns - - output$chart <- highcharter::renderHighchart({ - # Generate bins based on input$bins from ui.R - x <- nhsbsaGPG::faithful[, 2] - bins <- seq(min(x), max(x), length.out = as.numeric(input$bins) + 1) - - # Draw the histogram with the specified number of bins - chart <- graphics::hist(x, breaks = bins, plot = FALSE) - - # Output interactive chart - chart %>% - highcharter::hchart() %>% - nhsbsaR::theme_nhsbsa_highchart() - }) - - mod_nhs_download_server( - id = "download_test", - filename = "test.csv", - export_data = nhsbsaGPG::faithful - ) - }) -} diff --git a/R/mod_introduction.R b/R/mod_introduction.R new file mode 100644 index 0000000..b1e5db1 --- /dev/null +++ b/R/mod_introduction.R @@ -0,0 +1,72 @@ +#' introduction UI Function +#' +#' @description A shiny Module. +#' +#' @param id,input,output,session Internal parameters for {shiny}. +#' +#' @noRd +#' +#' @importFrom shiny NS tagList +mod_introduction_ui <- function(id) { + + ns <- NS(id) + tagList( + + h2_tabstop("Introduction"), + p( + "Under the Equality Act 2010 (Specific Duties and Public Authorities) + Regulations 2017, the NHS Business Services Authority (NHSBSA), + along with all public bodies with more than 250 employees, + is required to publish gender pay gap information by 30th March each year. + This includes information on the mean and median gender gaps in hourly pay, + the mean and median gender gaps in bonus pay, + the proportion of men and women who received bonuses, + and the proportions of male and female employees in each pay quartile." + ), + p( + "The gender pay gap shows the difference in the average pay between + all men and women in an organisation. It is different to equal pay, + which examines the pay differences between men and women who carry out + the same or similar jobs, or work of equal value. It is unlawful to pay + people unequally because they are a man or a woman." + ), + p( + "At the NHSBSA, our people are at the centre of our business strategy, + and we aspire to be an employer of choice who provides a great place + to work and can recruit and retain the right talent with the wide + range of knowledge, skills and capabilities we need. We are committed + to a diverse and inclusive culture which supports the fair treatment + and reward of all colleagues, irrespective of gender, and our pay + framework is based on the principles of fairness, transparency, + and consistency" + ), + p( + span("This report fulfils our reporting requirements and sets out what we are + doing to address the gender pay gap in our organisation. The data is based on a + snapshot of all employees as of 31 March ", + textOutput(ns("reporting_year"), inline = TRUE), style = "font-size:15pt"), + span(", as this is the date which all public authorities must use each year. + The calculations used are those set out in the relevant legislation + to ensure the data is calculated consistently across organisations.", + style = "font-size:15pt") + ) + ) +} + +#' introduction Server Functions +#' +#' @noRd +mod_introduction_server <- function(id) { + + moduleServer(id, function(input, output, session) { + + ns <- session$ns + + + output$reporting_year <- renderText({ + nhsbsaGPG::gpg_data(afc_staff)$ending_fy + }) + + + }) +} diff --git a/R/mod_markdown_example.R b/R/mod_markdown_example.R deleted file mode 100644 index adb84ad..0000000 --- a/R/mod_markdown_example.R +++ /dev/null @@ -1,22 +0,0 @@ -#' markdown_example UI Function -#' -#' @description A shiny Module. -#' -#' @param id,input,output,session Internal parameters for {shiny}. -#' -#' @noRd -mod_markdown_example_ui <- function(id) { - ns <- NS(id) - tagList( - includeMarkdown("inst/app/www/assets/markdown/mod_markdown_example.md") - ) -} - -#' markdown_example Server Functions -#' -#' @noRd -mod_markdown_example_server <- function(id) { - moduleServer(id, function(input, output, session) { - ns <- session$ns - }) -} diff --git a/R/mod_scrollytell_example.R b/R/mod_scrollytell_example.R deleted file mode 100644 index c7f6945..0000000 --- a/R/mod_scrollytell_example.R +++ /dev/null @@ -1,189 +0,0 @@ -#' scrollytell_example UI Function -#' -#' @description A shiny Module. -#' -#' @param id,input,output,session Internal parameters for {shiny}. -#' -#' @noRd -mod_scrollytell_example_ui <- function(id) { - ns <- NS(id) - tagList( - h1_tabstop("Demo of scrollytelling using iris dataset"), - p( - "This section shows an example of a scrolly chart in action using the 'iris' - dataset. The scatter chart will remain in place and react to changes as the - user scrolls." - ), - # start with the overall container object that will hold the different - # sections to scroll through - scrollytell::scrolly_container( - # the outputID will hold the reference for the input showing the current - # scroll section - outputId = ns("scroll_level"), - h2_tabstop("The iris dataset"), - # define the container for the static part of the scrolly - scrollytell::scrolly_graph( - # place the sticky part in the center of the page - # for aesthetics stops the chart hitting top of page - div( - style = "margin-top: 10vh" # change based on size of sticky graph - ), - # use a nhs_card element to hold the chart - nhs_card_tabstop( - # this could be made dynamic if required by using a textOutput() object - heading = "Iris Dataset: Sepal Length v Width", - highcharter::highchartOutput(outputId = ns("example_scroll_chart")) - ) - ), - - # create the container for the scrolling sections of the scrolly - scrollytell::scrolly_sections( - scrollytell::scrolly_section( - # each section needs a unique ID to reference, use meaningful names - id = "section_1_all", - # bump the start of each section to avoid top of screen - div( - style = "height: 20vh" - ), - # text output, including header if required - h3_tabstop("Length v Width"), - p( - "Looking purely at the Sepal length and width does not suggest a - strong relationship." - ), - ), - scrollytell::scrolly_section( - id = "section_2_group", # each section needs a unique ID to reference - # bump the start of each section to avoid top of screen - div( - style = "height: 20vh" - ), - # text output, including header if required - h3_tabstop("Split by species"), - p( - "When highlighting by species type we start to see that there is - correlation within each species." - ) - ), - scrollytell::scrolly_section( - id = "section_3_setosa", # each section needs a unique ID to reference - # bump the start of each section to avoid top of screen - div( - style = "height: 20vh" - ), - # text output, including header if required - h3_tabstop("Setosa"), - p( - "This species has the largest sepal width but some of the smallest - sepal lengths." - ) - ), - scrollytell::scrolly_section( - # each section needs a unique ID to reference - id = "section_4_versicolor", - # bump the start of each section to avoid top of screen - div( - style = "height: 20vh" - ), - # text output, including header if required - h3_tabstop("Versicolor"), - p("This species has the some of the smallest sepal widths.") - ), - scrollytell::scrolly_section( - # each section needs a unique ID to reference - id = "section_5_virginica", - # bump the start of each section to avoid top of screen - div( - style = "height: 20vh" - ), - # text output, including header if required - h3_tabstop("Setosa"), - p("This species has the largest sepal lengths."), - # Bump the height of the last section so that the top of it aligns - # with the top of the sticky image when you scroll - div( - style = "height: 30vh" # change based on size of section - ) - ) - ) - ) - ) -} - -#' scrollytell_example Server Functions -#' -#' @noRd -mod_scrollytell_example_server <- function(id) { - moduleServer(id, function(input, output, session) { - ns <- session$ns - - group_lvl <- NULL - point_col <- NULL - - # create the chart object - output$example_scroll_chart <- highcharter::renderHighchart({ - # require the scroll input to prevent errors on initialisation - req(input$scroll_level) - - # create a custom chart dataset based on the scrolly section inputs the - # input$scroll_level will allow you to define the chart input this input - # is based on section of the report that is currently active during the - # scroll - chart_data <- datasets::iris |> - dplyr::filter( - .data$Species %in% switch(input$scroll_level, - "section_3_setosa" = c("setosa"), - "section_4_versicolor" = c("versicolor"), - "section_5_virginica" = c("virginica"), - c("setosa", "versicolor", "virginica") - ) - ) - - if (input$scroll_level == "section_1_all") { - chart_data <- chart_data |> - dplyr::mutate(group_lvl = "Species") |> - dplyr::mutate(point_col = "#0000FF") - } else { - chart_data <- chart_data |> - dplyr::mutate(group_lvl = .data$Species) |> - dplyr::mutate( - point_col = dplyr::case_when( - .data$Species == "setosa" ~ "#fdb863", - .data$Species == "versicolor" ~ "#b2abd2", - .data$Species == "virginica" ~ "#5e3c99", - TRUE ~ "#000000" - ) - ) - } - - # produce the chart object - chart_data |> - highcharter::hchart( - type = "scatter", - highcharter::hcaes( - x = .data$Sepal.Length, - y = .data$Sepal.Width, - group = group_lvl, - color = point_col - ) - ) |> - highcharter::hc_xAxis( - min = 4, - max = 8 - ) |> - highcharter::hc_yAxis( - min = 1, - max = 5 - ) |> - # remove plot animations - highcharter::hc_plotOptions(series = list(animation = FALSE)) |> - # disable the legend - highcharter::hc_legend(enabled = FALSE) - }) - - # output the scrolly object - MUST BE INCLUDED FOR SCROLLY OBJECT TO BE RENDERED - output$scroll_level <- scrollytell::renderScrollytell({ - scrollytell::scrollytell() - }) - }) -} diff --git a/R/utils_charts.R b/R/utils_charts.R index c5285b3..6803b47 100644 --- a/R/utils_charts.R +++ b/R/utils_charts.R @@ -6,32 +6,23 @@ #' #' @return Returns a highchart or htmlwidget object. #' -#' @examples \dontrun{ -#' workforce <- nhsbsaGPG::gpg_data(nhsbsaGPG::afc_staff) -#' nhsbsaGPG::gender_profile(workforce$df_hdcnt_gender) -#' } -#' #' @export #' @param x Input data frame from \code{gpg_data} S3 class object. #' @param xvar "period", default #' @param yvars data frame converts to list and each list element to create line #' @param series_names If user wants to give different series name for highchart legend -#' @param yaxis_title Title of y axis +#' @param yaxis_title Y axis title #' @param yaxis_label Indication of percentage or number -#' @param show_legend TRUE default -#' -#' @importFrom aggregate -#' @importFrom barplot -#' -#' -gender_profile <- function(x, - xvar = "period", - yvars, - series_names, - yaxis_title, - yaxis_label, - show_legend = TRUE - ){ +#' @param colpalette custom palette +#' +#' +gpg_trend <- function(x, + xvar = "period", + yvars, + series_names, + yaxis_title, + yaxis_label, + colpalette) { out <- tryCatch( expr = { # Input data frame convert to list @@ -40,9 +31,11 @@ gender_profile <- function(x, # create plot object (empty one..) plt <- highcharter::highchart() |> highcharter::hc_chart(type = "line") |> - nhsbsaR::theme_nhsbsa_highchart(stack = NA, - palette = c("Blue", "AquaGreen")) - + nhsbsaR::theme_nhsbsa_highchart( + stack = NA, + palette = colpalette + ) + # It requires minimum two series (male, female) but it could split further for (i in seq_along(series_names)) { @@ -56,48 +49,194 @@ gender_profile <- function(x, data = data, type = "line", highcharter::hcaes( - x = .data[[xvar]], # default period3 + x = .data[[xvar]], # default period y = .data[[yvar]] # Female for example ), name = series_name # these labels will show in legend ) } + plt <- if (yaxis_label == "percentage") { plt |> highcharter::hc_yAxis( title = list(text = yaxis_title), - labels <- list(format = "{value}"), + labels = list(format = "{value}%"), min = 0, - max = 100 + max = 20 ) } else { plt |> highcharter::hc_yAxis( title = list(text = yaxis_title), - labels <- list(format = "{value:,f}"), + labels = list(format = "{value:,f}"), min = 0 ) + } - plt <- plt |> - highcharter::hc_xAxis(type = "category") |> - highcharter::hc_legend( - itemWidth = 600, - itemMarginTop = 5, - y = 0 + plt <- plt |> + highcharter::hc_xAxis(type = "category") |> + highcharter::hc_legend( + itemWidth = 600, + itemMarginTop = 5, + y = 0 + ) + + return(plt) + }, + warning = function() { + w <- warnings() + warning("Warning produced running gpg_trend():", w) + }, + error = function(e) { + stop("Error produced running gpg_trend():", e) + }, + finally = {} + ) +} + + + + +#' @title Highcharter bar chart to create pyramid chart. This chart +#' shows gender pay gap information by AFC band in NHSBSA +#' +#' @description \code{gpg_data} is the S3 class used for trend +#' +#' +#' @return Returns a highchart or htmlwidget object. +#' +#' +#' @export +#' @param x Input data frame from \code{gpg_data} S3 class object. +#' @param xvar "afc_band" default value +#' @param yvar headcount/mean hourly/median hourly pay +#' @param yaxis_title Y axis title + +gpg_pyramid <- function(x, xvar = "afc_band", yvar, yaxis_title) { + out <- tryCatch( + exp = { + data <- x + # Create chart object + plt <- data |> + highcharter::hchart( + type = "bar", + highcharter::hcaes( + x = .data[[xvar]], + y = .data[[yvar]], + group = "gender" ) - } + ) |> + nhsbsaR::theme_nhsbsa_highchart(palette = "gender") |> + highcharter::hc_yAxis( + title = list(text = yaxis_title), + labels = list( + formatter = highcharter::JS( + " + function() { + outHTML = this.axis.defaultLabelFormatter.call(this) + return outHTML.replace('-', '') + } + " + ) + ) + ) |> + highcharter::hc_xAxis( + title = list(text = "AFC band"), + reversed = FALSE + ) |> + highcharter::hc_plotOptions( + series = list( + states = list( + # Disable series highlighting + inactive = list(enabled = FALSE) + ), + events = list( + # Disables turning the series off + legendItemClick = htmlwidgets::JS("function () { return false; }") + ) + ) + ) + + return(plt) }, warning = function() { w <- warnings() - warning("Warning produced running gender_profile():", w) + warning("Warning produced running gpg_pyramid():", w) }, error = function(e) { - stop("Error produced running gender_profile():", e) + stop("Error produced running gpg_pyramid():", e) }, finally = {} ) } + + + +#' @title Highcharter column chart to create stacked column chart. This chart +#' shows proportion of males and females in each quartile pay band. +#' +#' @description {quartile} data frame is used for stacked column chart. +#' +#' +#' @return Returns a highchart or htmlwidget object. +#' +#' +#' @export +#' @param x Input quartile data frame. +#' @param xvar "afc_band" default value +#' @param yvar headcount/mean hourly/median hourly pay +#' @param groupvar group by variable +#' @param yaxis_title Y axis title + +gpg_stack <- function(x, xvar, yvar, groupvar, yaxis_title) { + out <- tryCatch( + exp = { + data <- x + # Create chart object + plt <- data |> + highcharter::hchart( + type = "column", + highcharter::hcaes( + x = .data[[xvar]], + y = .data[[yvar]], + group = .data[[groupvar]] + ) + ) |> + nhsbsaR::theme_nhsbsa_highchart(palette = "gender") |> + highcharter::hc_yAxis( + title = list(text = yaxis_title), + max = 100 + ) |> + highcharter::hc_xAxis( + title = list(text = "Quartile") + ) |> + highcharter::hc_plotOptions( + series = list( + states = list( + # Disable series highlighting + inactive = list(enabled = FALSE) + ), + events = list( + # Disables turning the series off + legendItemClick = htmlwidgets::JS("function () { return false; }") + ) + ) + ) + + + return(plt) + }, + warning = function() { + w <- warnings() + warning("Warning produced running gpg_pyramid():", w) + }, + error = function(e) { + stop("Error produced running gpg_pyramid():", e) + }, + finally = {} + ) +} diff --git a/data-raw/1.read_azure_blob.R b/data-raw/1.read_azure_blob.R index 54ad5f4..bf9024d 100644 --- a/data-raw/1.read_azure_blob.R +++ b/data-raw/1.read_azure_blob.R @@ -6,8 +6,8 @@ library(dplyr) config <- jsonlite::fromJSON("./data-raw/execution.json") -account_name <- Sys.getenv('AZURE_BLOB_STORAGE_PDS_ACCOUNT_NAME') -account_key <- Sys.getenv('AZURE_BLOB_STORAGE_PDS_ACCOUNT_KEY') +account_name <- Sys.getenv("AZURE_BLOB_STORAGE_PDS_ACCOUNT_NAME") +account_key <- Sys.getenv("AZURE_BLOB_STORAGE_PDS_ACCOUNT_KEY") endpoint_suffix <- "core.windows.net" container_name <- config$import$container @@ -15,28 +15,28 @@ folder_path <- config$import$folder # Create a blob service client -blob_endpoint <- sprintf('https://%s.blob.%s', account_name, endpoint_suffix) -blob_client <- storage_endpoint(blob_endpoint, key=account_key) +blob_endpoint <- sprintf("https://%s.blob.%s", account_name, endpoint_suffix) +blob_client <- storage_endpoint(blob_endpoint, key = account_key) # Get the blob client instance for the given container blob_container <- storage_container(blob_client, container_name) # List all blobs in the specified folder -blobs_in_folder <- list_blobs(blob_container, prefix=folder_path) +blobs_in_folder <- list_blobs(blob_container, prefix = folder_path) # Extract the 'name' values from the result blob_names <- blobs_in_folder[["name"]] # Loop through each blob in the folder and download for (blob_name in blob_names) { - - local_file_path <- paste0(config$import$local_path, '/', basename(blob_name)) - + + local_file_path <- paste0(config$import$local_path, "/", basename(blob_name)) + # Check if the file exists locally, and if so, delete it if (file.exists(local_file_path)) { file.remove(local_file_path) } - + # Download the blob data to the local file storage_download(blob_container, blob_name, local_file_path, overwrite = TRUE) } diff --git a/data-raw/2.gpg_data_process.R b/data-raw/2.gpg_data_process.R index 1e2a23c..bd6e0c3 100644 --- a/data-raw/2.gpg_data_process.R +++ b/data-raw/2.gpg_data_process.R @@ -27,15 +27,16 @@ process_file <- function(filepath) { paygap = read_excel(filepath, range = cell_rows(3:7), col_names = TRUE) |> select(1:3) |> janitor::clean_names() |> - mutate(period = financial_year) |> - filter(gender == "Pay Gap %") |> + mutate(period = financial_year) |> + filter(gender == "Pay Gap %") |> select(period, - avg_hr_gpg = avg_hourly_rate, + mean_hr_gpg = avg_hourly_rate, median_hr_gpg = median_hourly_rate), quartile = read_excel(filepath, range = cell_rows(3:7), col_names = TRUE) |> select(5:9) |> janitor::clean_names() |> - mutate(period = financial_year), + mutate(period = financial_year) |> + select(period, quartile, female, male) , afc = read_excel(filepath, skip = 8, col_names = TRUE) |> select(2:7) |> janitor::clean_names() |> @@ -70,7 +71,8 @@ paygap <- map(dfs, "paygap") |> select(period, everything()) quartile <- map(dfs, "quartile") |> bind_rows() |> - select(period, everything()) + select(period, everything()) |> + mutate(quartile = as.character(quartile)) afc <- map(dfs, "afc") |> bind_rows() |> select(period, everything()) @@ -88,16 +90,38 @@ afc_staff <- afc |> ) |> left_join(lookup, by = "pay_scale" - ) |> - select(-employee_number) |> -# Data quality error July 2013 Archive employee org is wrong, manually edited - mutate(org_l3 = ifelse(org_l3 == 'July 2013 Archive', "914 BSA Finance, Commercial and Estates L3", org_l3), - directorate = stringr::str_replace_all( - org_l3, c("^914 BSA " = "", " L3" = "")), + ) |> + select(-employee_number) |> + + # Data quality error July 2013 Archive employee org + # is wrong, manually edited + mutate(org_l3 = ifelse(org_l3 == "July 2013 Archive", + "914 BSA Finance, Commercial and Estates L3", org_l3), + directorate = stringr::str_replace_all(org_l3, c("^914 BSA " = "", " L3" = "")), directorate = stringr::str_trim(directorate), - headcount = 1) |> - select(period, gender, headcount,hourly_rate, quartile, fte, afc_band, directorate) - + headcount = 1) |> + select(period, gender, headcount, hourly_rate, quartile, fte, afc_band, directorate) + +# quartile requires data transformation +quartile_overall <- quartile |> + group_by(period) |> + summarise(female = sum(female), + male = sum(male), + .groups = "drop") |> + mutate(quartile = "Overall") + +quartile <- quartile |> + bind_rows(quartile_overall) + +quartile <- quartile |> + tidyr::pivot_longer(cols = c(female, male), + names_to = "gender", + values_to = "count") |> + group_by(period, quartile) |> + mutate(percent = count / sum(count) * 100) |> + ungroup() + + # Keep three main data frame and it will be used to create S3 class usethis::use_data(paygap, overwrite = TRUE) @@ -107,7 +131,7 @@ usethis::use_data(afc_staff, overwrite = TRUE) # delete all the files in data_temp as they only stay in azure storage # Specify the folder path -folder_path <- "./data_temp" +folder_path <- "./data_temp" # List all files in the directory files_to_delete <- list.files(path = folder_path, full.names = TRUE) diff --git a/man/afc_staff.Rd b/man/afc_staff.Rd index afb475f..a72f4c9 100644 --- a/man/afc_staff.Rd +++ b/man/afc_staff.Rd @@ -3,7 +3,7 @@ \docType{data} \name{afc_staff} \alias{afc_staff} -\title{NHSBSA employee staff_afc} +\title{NHSBSA employee afc_staff} \format{ A data frame with gender pay gap information } @@ -12,14 +12,14 @@ afc_staff } \description{ A dataset containing NHSBSA employee hourly pay -by reporting period split by gender, AFC band, directorate. +by reporting period split by gender, FTE, AFC band, directorate. \itemize{ \item period. 2018/19, 2019/20 etc character \item gender. Male or Female, character \item headcount. employee headcount used for aggregation \item hourly_rate. hourly rate as shown pay slip \item quartile. split hourly_rate by quartile by gender - \item fte. employee full time or part time info + \item fte. employee full time or part time information \item afc_band. AFC band \item directorate. NHSBSA directorate } diff --git a/man/faithful.Rd b/man/faithful.Rd deleted file mode 100644 index f4be27e..0000000 --- a/man/faithful.Rd +++ /dev/null @@ -1,30 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/faithful.R -\docType{data} -\name{faithful} -\alias{faithful} -\title{Old Faithful Geyser Data} -\format{ -A data frame with 272 observations on 2 variables. -} -\source{ -{W. Hrdle.} -} -\usage{ -faithful -} -\description{ -A closer look at \code{faithful$eruptions} reveals that these are -heavily rounded times originally in seconds, where multiples of 5 are -more frequent than expected under non-human measurement. For a -better version of the eruption times, see the example below. -There are many versions of this dataset around: Azzalini and Bowman -(1990) use a more complete version. -} -\details{ -\itemize{ - \item eruptions. Eruption time in mins - \item waiting. Waiting time to next eruption (in mins) -} -} -\keyword{datasets} diff --git a/man/gpg_pyramid.Rd b/man/gpg_pyramid.Rd new file mode 100644 index 0000000..7bb12a1 --- /dev/null +++ b/man/gpg_pyramid.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils_charts.R +\name{gpg_pyramid} +\alias{gpg_pyramid} +\title{Highcharter bar chart to create pyramid chart. This chart +shows gender pay gap information by AFC band in NHSBSA} +\usage{ +gpg_pyramid(x, xvar = "afc_band", yvar, yaxis_title) +} +\arguments{ +\item{x}{Input data frame from \code{gpg_data} S3 class object.} + +\item{xvar}{"afc_band" default value} + +\item{yvar}{headcount/mean hourly/median hourly pay} + +\item{yaxis_title}{Y axis title} +} +\value{ +Returns a highchart or htmlwidget object. +} +\description{ +\code{gpg_data} is the S3 class used for trend +} diff --git a/man/gpg_stack.Rd b/man/gpg_stack.Rd new file mode 100644 index 0000000..1de7000 --- /dev/null +++ b/man/gpg_stack.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils_charts.R +\name{gpg_stack} +\alias{gpg_stack} +\title{Highcharter column chart to create stacked column chart. This chart +shows proportion of males and females in each quartile pay band.} +\usage{ +gpg_stack(x, xvar, yvar, groupvar, yaxis_title) +} +\arguments{ +\item{x}{Input quartile data frame.} + +\item{xvar}{"afc_band" default value} + +\item{yvar}{headcount/mean hourly/median hourly pay} + +\item{groupvar}{group by variable} + +\item{yaxis_title}{Y axis title} +} +\value{ +Returns a highchart or htmlwidget object. +} +\description{ +{quartile} data frame is used for stacked column chart. +} diff --git a/man/gender_profile.Rd b/man/gpg_trend.Rd similarity index 71% rename from man/gender_profile.Rd rename to man/gpg_trend.Rd index e31235c..8948717 100644 --- a/man/gender_profile.Rd +++ b/man/gpg_trend.Rd @@ -1,18 +1,18 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils_charts.R -\name{gender_profile} -\alias{gender_profile} +\name{gpg_trend} +\alias{gpg_trend} \title{Highcharter line chart to show the number of headcount by financial year by gender.} \usage{ -gender_profile( +gpg_trend( x, xvar = "period", yvars, series_names, yaxis_title, yaxis_label, - show_legend = TRUE + colpalette ) } \arguments{ @@ -24,11 +24,11 @@ gender_profile( \item{series_names}{If user wants to give different series name for highchart legend} -\item{yaxis_title}{Title of y axis} +\item{yaxis_title}{Y axis title} \item{yaxis_label}{Indication of percentage or number} -\item{show_legend}{TRUE default} +\item{colpalette}{custom palette} } \value{ Returns a highchart or htmlwidget object. @@ -36,10 +36,3 @@ Returns a highchart or htmlwidget object. \description{ \code{gpg_data} is the S3 class used for trend } -\examples{ -\dontrun{ -workforce <- nhsbsaGPG::gpg_data(nhsbsaGPG::afc_staff) -nhsbsaGPG::gender_profile(workforce$df_hdcnt_gender) -} - -} diff --git a/man/quartile.Rd b/man/quartile.Rd index 9cc63aa..ab21d7b 100644 --- a/man/quartile.Rd +++ b/man/quartile.Rd @@ -5,7 +5,7 @@ \alias{quartile} \title{NHSBSA employee quartile} \format{ -An object of class \code{tbl_df} (inherits from \code{tbl}, \code{data.frame}) with 20 rows and 6 columns. +An object of class \code{tbl_df} (inherits from \code{tbl}, \code{data.frame}) with 50 rows and 5 columns. } \usage{ quartile @@ -13,7 +13,7 @@ quartile \description{ A dataset containing NHSBSA employee hourly pay by reporting period from 2018 (eg. snapshot 31/03/2018 for 2018/19 report etc) -split by gender, AFC band, directorate +by quartiles } \details{ \itemize{ @@ -22,13 +22,12 @@ split by gender, AFC band, directorate \item female. number of female employees in each quartile \item male. number of male employees in each quartile \item quartile. split hourly_rate by quartile by gender - \item female_percent. female employee % in quartile - \item male_percent. male employee % in quartile } + @docType data @keywords datasets @name quartile @usage quartile - @format data frame with employee gender pay gap by quartiles + @format data frame with employee gender hourly pay by quartiles } \keyword{datasets} diff --git a/tests/testthat/test-utils_charts.R b/tests/testthat/test-utils_charts.R index f418d8f..b031074 100644 --- a/tests/testthat/test-utils_charts.R +++ b/tests/testthat/test-utils_charts.R @@ -1,40 +1,82 @@ - library(tidyr) +library(dplyr) df <- gpg_data(afc_staff) -x <- df$df_hdcnt_gender |> - tidyr::pivot_wider(names_from = gender, - values_from = headcount) +x <- df$df_hdcnt_gender |> + tidyr::pivot_wider( + names_from = gender, + values_from = headcount + ) |> + dplyr::ungroup() + +y <- nhsbsaGPG::paygap +z <- df$df_hdcnt_afc |> + filter(period == "2021/22") |> + mutate(headcount = headcount * ifelse(gender == "Male", 1, -1)) -testthat::test_that("gender_profile runs without errors", { - expect_silent(gender_profile(x, +testthat::test_that("gpg_trend function runs without errors", { + expect_silent(gpg_trend(x, xvar = "period", yvars = c("Male", "Female"), series_names = c("Male", "Female"), yaxis_title = "Male and Female employee headcount", - yaxis_label = "number" + yaxis_label = "number", + colpalette = "gender" )) }) -testthat::test_that("gender_profile outputs a highchart, htmlwidget class", { +testthat::test_that("gpg_trend outputs a highchart, htmlwidget class", { expect_equal(class( - gender_profile(x, + gpg_trend(x, xvar = "period", yvars = c("Male", "Female"), series_names = c("Male", "Female"), yaxis_title = "Male and Female employee headcount", - yaxis_label = "number" - )), c("highchart", "htmlwidget")) + yaxis_label = "number", + colpalette = c("DarkBlue", "Green") + ) + ), c("highchart", "htmlwidget")) }) -testthat::test_that("gender_profile takes list as an input", { +testthat::test_that("gpg_trend takes list as an input", { expect_equal(class(list(x)), "list") }) -testthat::test_that("gender_profile input data frame must contain Female, +testthat::test_that("gpg_trend input data frame must contain Female, Male column", { - expect_equal(length(grep("Female|Male", names(x))), 2) + expect_equal(length(grep("Female|Male", names(x))), 2) + }) + + +testthat::test_that("gpg_trend input data frame must contain period column", { + expect_equal(length(grep("period", names(x))), 1) +}) + + +testthat::test_that("gpg_trend function runs with paygap dataframe", { + expect_silent(gpg_trend(y, + xvar = "period", + yvars = c("mean_hr_gpg", "median_hr_gpg"), + series_names = c("Mean gender pay gap", "Median gender pay gap"), + yaxis_title = "Gender pay gap in hourly pay", + yaxis_label = "percentage", + colpalette = c("Purple", "WarmYellow") + )) +}) + + +testthat::test_that("gpg_pyramid function runs without error", { + expect_silent(gpg_pyramid(z, xvar = "afc_band", yvar = "headcount", + yaxis_title = "Headcount" + )) +}) + +testthat::test_that("gpg_stack function runs without error", { + expect_silent(gpg_stack(quartile |> filter(period == "2021/22"), + xvar = "quartile", yvar = "percent", groupvar = "ender", + yaxis_title = "Males and females in pay quartile" + )) }) From a385e3529f1393485486ee4844431f4c28997d2e Mon Sep 17 00:00:00 2001 From: kaygo Date: Wed, 25 Oct 2023 15:56:29 +0100 Subject: [PATCH 09/19] R cmd check error fix --- R/gpg_data_class.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/gpg_data_class.R b/R/gpg_data_class.R index 04df3a7..2149318 100644 --- a/R/gpg_data_class.R +++ b/R/gpg_data_class.R @@ -26,10 +26,11 @@ #' #' @return If the class is not instantiated correctly, nothing is returned. #' -#' @examples +#' @examples \dontrun{ #' #' library(nhsbsaGPG) #' df <- gpg_data(afc_staff) +#' } #' #' @export From 7ac33f9c953c606b0e4dbc34050f64c752e5c161 Mon Sep 17 00:00:00 2001 From: kaygo Date: Wed, 25 Oct 2023 16:07:35 +0100 Subject: [PATCH 10/19] tryCatch spelling error --- R/utils_charts.R | 4 ++-- man/gpg_data.Rd | 16 +++++++++------- man/paygap.Rd | 7 ++++--- man/quartile.Rd | 2 +- tests/testthat/test-utils_charts.R | 28 ++++++++++++++++++---------- 5 files changed, 34 insertions(+), 23 deletions(-) diff --git a/R/utils_charts.R b/R/utils_charts.R index 6803b47..05aa976 100644 --- a/R/utils_charts.R +++ b/R/utils_charts.R @@ -115,7 +115,7 @@ gpg_trend <- function(x, gpg_pyramid <- function(x, xvar = "afc_band", yvar, yaxis_title) { out <- tryCatch( - exp = { + expr = { data <- x # Create chart object plt <- data |> @@ -194,7 +194,7 @@ gpg_pyramid <- function(x, xvar = "afc_band", yvar, yaxis_title) { gpg_stack <- function(x, xvar, yvar, groupvar, yaxis_title) { out <- tryCatch( - exp = { + expr = { data <- x # Create chart object plt <- data |> diff --git a/man/gpg_data.Rd b/man/gpg_data.Rd index 3e79d37..4ad3d20 100644 --- a/man/gpg_data.Rd +++ b/man/gpg_data.Rd @@ -27,18 +27,20 @@ least seven columns: period, gender, hourly_rate, quartile, fte, afc_band, directorate. Once initiated, the class has six slots: -\code{df}: raw data frame -\code{df_hdcnt}: data frame contains headcount by period -\code{df_hdcnt_gender}: data frame contains headcount by gender by period -\code{df_hdcnt_afc}: data frame contains headcount by afc band -\code{df_hdcnt_dir}: data frame contains headcount by directorate -\code{df_hrrate}: data frame contains hourly rate by gender for each grade +\code{df}: raw data frame +\code{df_hdcnt}: data frame contains headcount by period +\code{df_hdcnt_gender}: data frame contains headcount by gender by period +\code{df_hdcnt_afc}: data frame contains headcount by afc band +\code{df_hdcnt_dir}: data frame contains headcount by directorate +\code{df_hrrate}: data frame contains hourly rate by gender for each grade \code{ending_fy}: a character vector containing ending reporting period (e.g. 31 March 2023). This uses for introduction paragraph } \examples{ - +\dontrun{ + library(nhsbsaGPG) df <- gpg_data(afc_staff) +} } diff --git a/man/paygap.Rd b/man/paygap.Rd index 2e19a52..1ca4916 100644 --- a/man/paygap.Rd +++ b/man/paygap.Rd @@ -12,14 +12,15 @@ paygap } \description{ A dataset containing NHSBSA employee paygap -Directly pulled from ESR dashboard (NHS National Returns) +Directly pulled from ESR dashboard (NHS National Returns) gender, average hourly rate, median hourly rate and pay gap% } \details{ \itemize{ \item period. 2018/19, 2019/20 etc character \item avg_hr_gpg. Mean gender pay gap % based on male full-pay relevant employees - \item median_hr_gpg. Median gender pay gap % based on male full-pay relevant employees - } + \item median_hr_gpg. Median gender pay gap % based + on male full-pay relevant employees +} } \keyword{datasets} diff --git a/man/quartile.Rd b/man/quartile.Rd index ab21d7b..41e328f 100644 --- a/man/quartile.Rd +++ b/man/quartile.Rd @@ -23,7 +23,7 @@ by quartiles \item male. number of male employees in each quartile \item quartile. split hourly_rate by quartile by gender } - + @docType data @keywords datasets @name quartile diff --git a/tests/testthat/test-utils_charts.R b/tests/testthat/test-utils_charts.R index b031074..1651d4a 100644 --- a/tests/testthat/test-utils_charts.R +++ b/tests/testthat/test-utils_charts.R @@ -1,7 +1,7 @@ library(tidyr) library(dplyr) -df <- gpg_data(afc_staff) +df <- gpg_data(nhsbsaGPG::afc_staff) x <- df$df_hdcnt_gender |> tidyr::pivot_wider( names_from = gender, @@ -11,8 +11,8 @@ x <- df$df_hdcnt_gender |> y <- nhsbsaGPG::paygap -z <- df$df_hdcnt_afc |> - filter(period == "2021/22") |> +z <- df$df_hdcnt_afc |> + filter(period == '2021/22') |> mutate(headcount = headcount * ifelse(gender == "Male", 1, -1)) testthat::test_that("gpg_trend function runs without errors", { @@ -47,8 +47,8 @@ testthat::test_that("gpg_trend takes list as an input", { testthat::test_that("gpg_trend input data frame must contain Female, Male column", { - expect_equal(length(grep("Female|Male", names(x))), 2) - }) + expect_equal(length(grep("Female|Male", names(x))), 2) +}) testthat::test_that("gpg_trend input data frame must contain period column", { @@ -69,14 +69,22 @@ testthat::test_that("gpg_trend function runs with paygap dataframe", { testthat::test_that("gpg_pyramid function runs without error", { - expect_silent(gpg_pyramid(z, xvar = "afc_band", yvar = "headcount", - yaxis_title = "Headcount" + expect_silent(gpg_pyramid(z , + xvar = "afc_band", + yvar = "headcount", + yaxis_title = "Headcount" )) }) testthat::test_that("gpg_stack function runs without error", { - expect_silent(gpg_stack(quartile |> filter(period == "2021/22"), - xvar = "quartile", yvar = "percent", groupvar = "ender", - yaxis_title = "Males and females in pay quartile" + expect_silent(gpg_stack(quartile |> filter(period == "2021/22") , + xvar = "quartile", + yvar = "percent", + groupvar = "gender", + yaxis_title = "Males and females in pay quartile" )) }) + + + + From 9fb8208c099c43fc1cdf437615feef8e78e93b65 Mon Sep 17 00:00:00 2001 From: kaygo Date: Wed, 25 Oct 2023 18:33:51 +0100 Subject: [PATCH 11/19] dummy data as it failed RMD check --- tests/testthat/test-utils_charts.R | 54 +++++++++++++++--------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/tests/testthat/test-utils_charts.R b/tests/testthat/test-utils_charts.R index 1651d4a..c2cce80 100644 --- a/tests/testthat/test-utils_charts.R +++ b/tests/testthat/test-utils_charts.R @@ -1,19 +1,27 @@ library(tidyr) library(dplyr) -df <- gpg_data(nhsbsaGPG::afc_staff) -x <- df$df_hdcnt_gender |> - tidyr::pivot_wider( - names_from = gender, - values_from = headcount - ) |> - dplyr::ungroup() +x <- data.frame( + period = c("2018/19", "2019/20", "2020/21", "2021/22", "2022/23"), + Female = c(1700, 1800, 1900, 2000, 2300), + Male = c(1100, 1300, 1300, 1400, 1500) +) + +y <- data.frame( + period = c("2018/19", "2019/20", "2020/21", "2021/22", "2022/23"), + mean_hr_gpg = c(11, 11, 12, 14, 12), + median_hr_gpg = c(0.80, 0.5, 2.3, 12.5, 8.88) +) + +z <- data.frame( + period = rep("2021/22", 20), + gender = c(rep("Female", 10), rep("Male", 10)), + afc_band = c("Band 2", "Band 3", "Band 4", "Band 5", "Band 6", + "Band 2", "Band 3", "Band 4", "Band 5", "Band 6") , + headcount = c(-460, -645, -280, -218, -118, 156, 80, 41, 13, 7), + perc = c(65.8, 66.5, 62.9, 57.8, 45.2, 47.0, 48.8, 48.2, 38.2, 31.8) +) -y <- nhsbsaGPG::paygap - -z <- df$df_hdcnt_afc |> - filter(period == '2021/22') |> - mutate(headcount = headcount * ifelse(gender == "Male", 1, -1)) testthat::test_that("gpg_trend function runs without errors", { expect_silent(gpg_trend(x, @@ -47,8 +55,8 @@ testthat::test_that("gpg_trend takes list as an input", { testthat::test_that("gpg_trend input data frame must contain Female, Male column", { - expect_equal(length(grep("Female|Male", names(x))), 2) -}) + expect_equal(length(grep("Female|Male", names(x))), 2) + }) testthat::test_that("gpg_trend input data frame must contain period column", { @@ -69,22 +77,14 @@ testthat::test_that("gpg_trend function runs with paygap dataframe", { testthat::test_that("gpg_pyramid function runs without error", { - expect_silent(gpg_pyramid(z , - xvar = "afc_band", - yvar = "headcount", - yaxis_title = "Headcount" + expect_silent(gpg_pyramid(z, xvar = "afc_band", yvar = "headcount", + yaxis_title = "Headcount" )) }) testthat::test_that("gpg_stack function runs without error", { - expect_silent(gpg_stack(quartile |> filter(period == "2021/22") , - xvar = "quartile", - yvar = "percent", - groupvar = "gender", - yaxis_title = "Males and females in pay quartile" + expect_silent(gpg_stack(quartile |> filter(period == "2021/22"), + xvar = "quartile", yvar = "percent", groupvar = "gender", + yaxis_title = "Males and females in pay quartile" )) }) - - - - From 4641861a02b93f6fb65726bfe819b9f04cf4ec08 Mon Sep 17 00:00:00 2001 From: kaygo Date: Wed, 25 Oct 2023 18:53:15 +0100 Subject: [PATCH 12/19] change to dummy data for test --- tests/testthat/test-utils_charts.R | 45 +++++++++++++++--------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/tests/testthat/test-utils_charts.R b/tests/testthat/test-utils_charts.R index c2cce80..d44a953 100644 --- a/tests/testthat/test-utils_charts.R +++ b/tests/testthat/test-utils_charts.R @@ -1,28 +1,28 @@ library(tidyr) library(dplyr) -x <- data.frame( - period = c("2018/19", "2019/20", "2020/21", "2021/22", "2022/23"), - Female = c(1700, 1800, 1900, 2000, 2300), - Male = c(1100, 1300, 1300, 1400, 1500) +df <- gpg_data(nhsbsaGPG::afc_staff) +x <- df$df_hdcnt_gender |> + tidyr::pivot_wider( + names_from = gender, + values_from = headcount + ) |> + dplyr::ungroup() + +y <- nhsbsaGPG::paygap + +z <- df$df_hdcnt_afc |> + filter(period == "2021/22") |> + mutate(headcount = headcount * ifelse(gender == "Male", 1, -1)) + +quartile <- data.frame( + period = c(rep("2018/19", 8)), + quartile = c(rep(1, 2), rep(2, 2), rep(3, 2), rep(4, 2)), + gender = c("female", "male", "female", "male", "female", "male", "female", "male"), + count = c(425, 282, 438, 261, 461, 269, 380, 344), + percent = c(60.1, 39.9, 62.7, 37.3, 63.2, 36.8, 52.5, 47.5) ) -y <- data.frame( - period = c("2018/19", "2019/20", "2020/21", "2021/22", "2022/23"), - mean_hr_gpg = c(11, 11, 12, 14, 12), - median_hr_gpg = c(0.80, 0.5, 2.3, 12.5, 8.88) -) - -z <- data.frame( - period = rep("2021/22", 20), - gender = c(rep("Female", 10), rep("Male", 10)), - afc_band = c("Band 2", "Band 3", "Band 4", "Band 5", "Band 6", - "Band 2", "Band 3", "Band 4", "Band 5", "Band 6") , - headcount = c(-460, -645, -280, -218, -118, 156, 80, 41, 13, 7), - perc = c(65.8, 66.5, 62.9, 57.8, 45.2, 47.0, 48.8, 48.2, 38.2, 31.8) -) - - testthat::test_that("gpg_trend function runs without errors", { expect_silent(gpg_trend(x, xvar = "period", @@ -83,8 +83,7 @@ testthat::test_that("gpg_pyramid function runs without error", { }) testthat::test_that("gpg_stack function runs without error", { - expect_silent(gpg_stack(quartile |> filter(period == "2021/22"), - xvar = "quartile", yvar = "percent", groupvar = "gender", - yaxis_title = "Males and females in pay quartile" + expect_silent(gpg_stack(quartile, xvar = "quartile", yvar = "percent", + groupvar = "gender", yaxis_title = "Males and females in pay quartile" )) }) From e5ecbf39e3f2dbea04cad7754b0990941d955b64 Mon Sep 17 00:00:00 2001 From: kaygo Date: Wed, 25 Oct 2023 19:00:15 +0100 Subject: [PATCH 13/19] dummy data --- tests/testthat/test-utils_charts.R | 33 ++++++++++++++++++------------ 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/tests/testthat/test-utils_charts.R b/tests/testthat/test-utils_charts.R index d44a953..aad707c 100644 --- a/tests/testthat/test-utils_charts.R +++ b/tests/testthat/test-utils_charts.R @@ -1,19 +1,26 @@ library(tidyr) library(dplyr) -df <- gpg_data(nhsbsaGPG::afc_staff) -x <- df$df_hdcnt_gender |> - tidyr::pivot_wider( - names_from = gender, - values_from = headcount - ) |> - dplyr::ungroup() - -y <- nhsbsaGPG::paygap - -z <- df$df_hdcnt_afc |> - filter(period == "2021/22") |> - mutate(headcount = headcount * ifelse(gender == "Male", 1, -1)) +x <- data.frame( + period = c("2018/19", "2019/20", "2020/21", "2021/22", "2022/23"), + Female = c(1700, 1800, 1900, 2000, 2300), + Male = c(1100, 1300, 1300, 1400, 1500) +) + +y <- data.frame( + period = c("2018/19", "2019/20", "2020/21", "2021/22", "2022/23"), + mean_hr_gpg = c(11, 11, 12, 14, 12), + median_hr_gpg = c(0.80, 0.5, 2.3, 12.5, 8.88) +) + +z <- data.frame( + period = rep("2021/22", 20), + gender = c(rep("Female", 10), rep("Male", 10)), + afc_band = c("Band 2", "Band 3", "Band 4", "Band 5", "Band 6", + "Band 2", "Band 3", "Band 4", "Band 5", "Band 6") , + headcount = c(-460, -645, -280, -218, -118, 156, 80, 41, 13, 7), + perc = c(65.8, 66.5, 62.9, 57.8, 45.2, 47.0, 48.8, 48.2, 38.2, 31.8) +) quartile <- data.frame( period = c(rep("2018/19", 8)), From e1963a5338e63ca26d83912432d96a1387b4f214 Mon Sep 17 00:00:00 2001 From: kaygo Date: Wed, 25 Oct 2023 19:18:38 +0100 Subject: [PATCH 14/19] ignore data.r --- .Rbuildignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.Rbuildignore b/.Rbuildignore index f6a7b85..5692f1d 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -12,3 +12,4 @@ ^\.github$ ^\.lintr$ ^data_temp$ +^R\data.R From c92b670eaafe9919b6a1265ee95b239f4e489150 Mon Sep 17 00:00:00 2001 From: kaygo Date: Wed, 25 Oct 2023 19:43:10 +0100 Subject: [PATCH 15/19] try to fix warning msg --- DESCRIPTION | 4 +++- R/utils-globalVariables.R | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 R/utils-globalVariables.R diff --git a/DESCRIPTION b/DESCRIPTION index 29de8f7..cfc2210 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,7 +22,9 @@ Imports: dplyr (>= 1.1.3), futile.logger (>= 1.4.3), stringr, - htmlwidgets + htmlwidgets, + graphics, + stats Suggests: pkgload, testthat (>= 3.0.0), diff --git a/R/utils-globalVariables.R b/R/utils-globalVariables.R new file mode 100644 index 0000000..9d8628d --- /dev/null +++ b/R/utils-globalVariables.R @@ -0,0 +1,2 @@ +utils::globalVariables(c("period", "headcount", "gender", "afc_band", + "afc_band", "hourly_rate", "median", ".data", "afc_staff")) From 5d4c64b7156de8fe5e583f276c637b78fae5ef2c Mon Sep 17 00:00:00 2001 From: kaygo Date: Wed, 25 Oct 2023 20:22:16 +0100 Subject: [PATCH 16/19] try to fix warning note RMD check --- .Rbuildignore | 2 +- DESCRIPTION | 8 ++++---- R/utils-globalVariables.R | 3 ++- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index 5692f1d..f51e9aa 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -12,4 +12,4 @@ ^\.github$ ^\.lintr$ ^data_temp$ -^R\data.R +^\./R/data\.r$ diff --git a/DESCRIPTION b/DESCRIPTION index cfc2210..3aa3e70 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,14 +22,14 @@ Imports: dplyr (>= 1.1.3), futile.logger (>= 1.4.3), stringr, - htmlwidgets, - graphics, - stats + htmlwidgets Suggests: pkgload, testthat (>= 3.0.0), usethis, - tidyr + tidyr, + graphics, + stats Remotes: nhsbsa-data-analytics/nhsbsaR Encoding: UTF-8 diff --git a/R/utils-globalVariables.R b/R/utils-globalVariables.R index 9d8628d..f7f14d6 100644 --- a/R/utils-globalVariables.R +++ b/R/utils-globalVariables.R @@ -1,2 +1,3 @@ utils::globalVariables(c("period", "headcount", "gender", "afc_band", - "afc_band", "hourly_rate", "median", ".data", "afc_staff")) + "afc_band", "hourly_rate", "median", ".data", + "afc_staff", "directorate", "aggregate", "barplot")) From effbd5cdec1e5947e0d13d92531100981bf227c8 Mon Sep 17 00:00:00 2001 From: kaygo Date: Wed, 25 Oct 2023 20:30:06 +0100 Subject: [PATCH 17/19] RMD final warning fix --- R/mod_introduction.R | 7 +++++++ R/utils-globalVariables.R | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/R/mod_introduction.R b/R/mod_introduction.R index b1e5db1..2f2988d 100644 --- a/R/mod_introduction.R +++ b/R/mod_introduction.R @@ -67,6 +67,13 @@ mod_introduction_server <- function(id) { nhsbsaGPG::gpg_data(afc_staff)$ending_fy }) + nhsbsaGPG::paygap + nhsbsaGPG::quartile + + + + + }) } diff --git a/R/utils-globalVariables.R b/R/utils-globalVariables.R index f7f14d6..e0dfff4 100644 --- a/R/utils-globalVariables.R +++ b/R/utils-globalVariables.R @@ -1,3 +1,3 @@ utils::globalVariables(c("period", "headcount", "gender", "afc_band", - "afc_band", "hourly_rate", "median", ".data", + "afc_band", "hourly_rate", "median", ".data", "afc_staff", "directorate", "aggregate", "barplot")) From cc3e7e2531dc8afbf2606fd2dd0698766c340b5e Mon Sep 17 00:00:00 2001 From: kaygo Date: Wed, 25 Oct 2023 20:40:24 +0100 Subject: [PATCH 18/19] CMD warning message --- R/mod_introduction.R | 8 -------- R/utils-globalVariables.R | 3 ++- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/R/mod_introduction.R b/R/mod_introduction.R index 2f2988d..e2f6e62 100644 --- a/R/mod_introduction.R +++ b/R/mod_introduction.R @@ -67,13 +67,5 @@ mod_introduction_server <- function(id) { nhsbsaGPG::gpg_data(afc_staff)$ending_fy }) - nhsbsaGPG::paygap - nhsbsaGPG::quartile - - - - - - }) } diff --git a/R/utils-globalVariables.R b/R/utils-globalVariables.R index e0dfff4..3b63edc 100644 --- a/R/utils-globalVariables.R +++ b/R/utils-globalVariables.R @@ -1,3 +1,4 @@ utils::globalVariables(c("period", "headcount", "gender", "afc_band", "afc_band", "hourly_rate", "median", ".data", - "afc_staff", "directorate", "aggregate", "barplot")) + "afc_staff", "directorate", "aggregate", "barplot", + "quartile", "paygap")) From 8700e8d54428c678a5eebf821dd436ba29d76bf5 Mon Sep 17 00:00:00 2001 From: kaygo Date: Wed, 25 Oct 2023 20:44:05 +0100 Subject: [PATCH 19/19] delete rd as it cause warning --- man/afc_staff.Rd | 27 --------------------------- man/paygap.Rd | 26 -------------------------- man/quartile.Rd | 33 --------------------------------- 3 files changed, 86 deletions(-) delete mode 100644 man/afc_staff.Rd delete mode 100644 man/paygap.Rd delete mode 100644 man/quartile.Rd diff --git a/man/afc_staff.Rd b/man/afc_staff.Rd deleted file mode 100644 index a72f4c9..0000000 --- a/man/afc_staff.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data.R -\docType{data} -\name{afc_staff} -\alias{afc_staff} -\title{NHSBSA employee afc_staff} -\format{ -A data frame with gender pay gap information -} -\usage{ -afc_staff -} -\description{ -A dataset containing NHSBSA employee hourly pay -by reporting period split by gender, FTE, AFC band, directorate. -\itemize{ - \item period. 2018/19, 2019/20 etc character - \item gender. Male or Female, character - \item headcount. employee headcount used for aggregation - \item hourly_rate. hourly rate as shown pay slip - \item quartile. split hourly_rate by quartile by gender - \item fte. employee full time or part time information - \item afc_band. AFC band - \item directorate. NHSBSA directorate -} -} -\keyword{datasets} diff --git a/man/paygap.Rd b/man/paygap.Rd deleted file mode 100644 index 1ca4916..0000000 --- a/man/paygap.Rd +++ /dev/null @@ -1,26 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data.R -\docType{data} -\name{paygap} -\alias{paygap} -\title{NHSBSA employee paygap} -\format{ -A data frame with paygap information -} -\usage{ -paygap -} -\description{ -A dataset containing NHSBSA employee paygap -Directly pulled from ESR dashboard (NHS National Returns) -gender, average hourly rate, median hourly rate and pay gap% -} -\details{ -\itemize{ - \item period. 2018/19, 2019/20 etc character - \item avg_hr_gpg. Mean gender pay gap % based on male full-pay relevant employees - \item median_hr_gpg. Median gender pay gap % based - on male full-pay relevant employees -} -} -\keyword{datasets} diff --git a/man/quartile.Rd b/man/quartile.Rd deleted file mode 100644 index 41e328f..0000000 --- a/man/quartile.Rd +++ /dev/null @@ -1,33 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data.R -\docType{data} -\name{quartile} -\alias{quartile} -\title{NHSBSA employee quartile} -\format{ -An object of class \code{tbl_df} (inherits from \code{tbl}, \code{data.frame}) with 50 rows and 5 columns. -} -\usage{ -quartile -} -\description{ -A dataset containing NHSBSA employee hourly pay -by reporting period from 2018 (eg. snapshot 31/03/2018 for 2018/19 report etc) -by quartiles -} -\details{ -\itemize{ - \item period. 2018/19, 2019/20 etc character - \item quartile. split hourly pay into quartiles - \item female. number of female employees in each quartile - \item male. number of male employees in each quartile - \item quartile. split hourly_rate by quartile by gender - } - - @docType data - @keywords datasets - @name quartile - @usage quartile - @format data frame with employee gender hourly pay by quartiles -} -\keyword{datasets}