From 145f54b1d9626a648b2c4ab3778b1833a5f0e157 Mon Sep 17 00:00:00 2001
From: kaygo <Kayoung.Goffe@nhsbsa.nhs.uk>
Date: Mon, 23 Oct 2023 10:05:29 +0100
Subject: [PATCH 01/19] add .gitignore temporary data folder

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index dc34566..2755bb2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,3 +16,4 @@ rsconnect
 ################################################
 *.[xX][lL][sS][xXmMtT]?
 data/*
+data_temp/*

From a25e3a2a76ea51234b88ab50ce4ddaeb13802c22 Mon Sep 17 00:00:00 2001
From: kaygo <Kayoung.Goffe@nhsbsa.nhs.uk>
Date: Mon, 23 Oct 2023 13:12:27 +0100
Subject: [PATCH 02/19] data-raw process and create three basic data for the
 report

---
 DESCRIPTION                           |  3 +-
 data-raw/01_headcount_by_gender_afc.R | 53 ---------------
 data-raw/01_read_hourly_gpg.R         | 96 +++++++++++++++++++++++++++
 3 files changed, 98 insertions(+), 54 deletions(-)
 delete mode 100644 data-raw/01_headcount_by_gender_afc.R
 create mode 100644 data-raw/01_read_hourly_gpg.R

diff --git a/DESCRIPTION b/DESCRIPTION
index 636a29b..85688bf 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -27,7 +27,8 @@ Imports:
     futile.logger (>= 1.4.3),
     stringr,
     formatR,
-    tidyr
+    tidyr,
+    purrr
 Suggests:
     pkgload,
     testthat (>= 3.0.0),
diff --git a/data-raw/01_headcount_by_gender_afc.R b/data-raw/01_headcount_by_gender_afc.R
deleted file mode 100644
index 3c0c2e5..0000000
--- a/data-raw/01_headcount_by_gender_afc.R
+++ /dev/null
@@ -1,53 +0,0 @@
-# This is dummy tidy dataset but it will include 
-# mean and median pay per AFC band to create one file
-
-# This dummy data includes maternity leave, sick leave etc
-# Therefore, headcounts are slightly higher then reported
-# figure
-
-# Library
-
-library(dplyr)
-library(dbplyr)
-
-# Set up connection to DALP
-con <- nhsbsaR::con_nhsbsa(database = "DALP")
-
-# Create a lazy table from cleaned employee table in DALP
-data_db <- con |> 
-  tbl(from = in_schema("DALL_REF", "EMPLOYEE_DASHBOARD_COMBINED_EMPLOYMENT_DATA"))
-
-# Summary headcount table of Financial Year, Gender, AFC band
-
-headcount <- data_db |> 
-  filter(substr(ESR_MONTH, 1, 6) == '01-MAR', 
-         as.numeric(substr(ESR_MONTH, 8, 9)) %in% c(18, 19, 20, 21, 22, 23)) |> 
-  mutate(
-    FINANCIAL_YEAR = case_when(
-      as.numeric(substr(ESR_MONTH, 8, 9)) == 18 ~ '2017/18',
-      as.numeric(substr(ESR_MONTH, 8, 9)) == 19 ~ '2018/19',
-      as.numeric(substr(ESR_MONTH, 8, 9)) == 20 ~ '2019/20',
-      as.numeric(substr(ESR_MONTH, 8, 9)) == 21 ~ '2020/21',
-      as.numeric(substr(ESR_MONTH, 8, 9)) == 22 ~ '2021/22',
-      as.numeric(substr(ESR_MONTH, 8, 9)) == 23 ~ '2022/23',
-      TRUE ~ 'unknown'
-    )
-  ) |> 
-  group_by(FINANCIAL_YEAR, GENDER, PAY_GRADE_NAME , FTE_GROUP) |> 
-  summarise(HEADCOUNT = sum(HEADCOUNT, na.rm = TRUE)) |> 
-  ungroup() |> 
-  arrange(FINANCIAL_YEAR, GENDER, PAY_GRADE_NAME, FTE_GROUP) |> 
-  collect() |> 
-  # In case we want to report by year (keep it as factor) 
-  mutate(FINANCIAL_YEAR = factor(FINANCIAL_YEAR, 
-                                 levels = unique(FINANCIAL_YEAR)),
-         PAY_GRADE_NAME = factor(PAY_GRADE_NAME))
-
-
-# Add to data 
-usethis::use_data(headcount, overwrite = TRUE)
-
-DBI::dbDisconnect(con) 
-rm(list = ls())
-gc()
-
diff --git a/data-raw/01_read_hourly_gpg.R b/data-raw/01_read_hourly_gpg.R
new file mode 100644
index 0000000..30b00de
--- /dev/null
+++ b/data-raw/01_read_hourly_gpg.R
@@ -0,0 +1,96 @@
+# This data is an extract from ESR dashboard
+# Three parts will be pulled for the report
+# Gender pay gap (%) based on male hourly pay
+# Quantiles by gender
+# Join with staff list to get AFC band information, FTE
+
+# Load required libraries
+library(readxl)
+library(dplyr)
+library(purrr)
+library(stringr)
+
+# List all excel and csv files in the directory
+files <- list.files(path = "./data_temp", pattern = "\\.xlsx$|\\.csv$", full.names = TRUE)
+
+# Function to process each file
+process_file <- function(filepath) {
+  # Determine the financial year from the filename
+  fy_pattern <- "FY(\\d{2})(\\d{2})"
+  fy_matches <- regmatches(filepath, regexec(fy_pattern, filepath))[[1]]
+  # reporting period
+  financial_year <- paste0("20", fy_matches[2], "/", fy_matches[3])
+
+  # Create three data frames and add the financial year (reporting period)
+  if (stringr::str_detect(filepath, "\\.xlsx$")) {
+    list(
+      paygap = read_excel(filepath, range = cell_rows(3:7), col_names = TRUE) |>
+        select(1:3) |>
+        janitor::clean_names() |>
+        mutate(period = financial_year),
+      quartile = read_excel(filepath, range = cell_rows(3:7), col_names = TRUE) |>
+        select(5:9) |>
+        janitor::clean_names() |>
+        mutate(period = financial_year),
+      afc = read_excel(filepath, skip = 8, col_names = TRUE) |>
+        select(2:7) |>
+        janitor::clean_names() |>
+        mutate(period = financial_year)
+    )
+    # staff list information as csv
+  } else if (stringr::str_detect(filepath, "\\.csv$")) {
+    list(
+      staff = read.csv(filepath, header = TRUE) |>
+        janitor::clean_names() |>
+        filter(primary == "Y") |>
+        select(
+          employee_number,
+          org_l3,
+          pay_scale,
+          fte
+        ) |>
+        mutate(
+          period = financial_year,
+          employee_number = as.character(employee_number)
+        )
+    )
+  }
+}
+
+# Apply the function to each file
+dfs <- map(files, process_file)
+
+# Row bind all df1s, df2s, and df3s
+paygap <- map(dfs, "paygap") |>
+  bind_rows() |>
+  select(period, everything())
+quartile <- map(dfs, "quartile") |>
+  bind_rows() |>
+  select(period, everything())
+afc <- map(dfs, "afc") |>
+  bind_rows() |>
+  select(period, everything())
+staff <- map(dfs, "staff") |>
+  bind_rows() |>
+  select(period, everything())
+
+# AFC and staff information join based on employee number
+# After that, add lookup
+lookup <- read.csv("./data-raw/afc_band_lookup.csv", header = TRUE)
+
+afc_staff <- afc |>
+  left_join(staff,
+    by = c("period", "employee_number")
+  ) |>
+  left_join(lookup,
+    by = "pay_scale"
+  )
+
+# Keep three main data frame and it will be used to create S3 class
+usethis::use_data(paygap, overwrite = TRUE)
+usethis::use_data(quartile, overwrite = TRUE)
+usethis::use_data(afc_staff, overwrite = TRUE)
+
+
+
+rm(c(dfs, afc, staff))

From 3963bfcf4494a00a20800853bd5842b197798997 Mon Sep 17 00:00:00 2001
From: kaygo <Kayoung.Goffe@nhsbsa.nhs.uk>
Date: Mon, 23 Oct 2023 16:12:03 +0100
Subject: [PATCH 03/19] add gpg_class S3 object

---
 DESCRIPTION                   |   2 +-
 NAMESPACE                     |   1 -
 R/gpg_data_class.R            | 220 ++++++++++++++++++++++++++++++++++
 R/headcount_data_class.R      | 209 --------------------------------
 data-raw/01_read_hourly_gpg.R |  13 +-
 man/gender_profile.Rd         |  33 ++++-
 man/headcount_data.Rd         |  27 +++--
 7 files changed, 275 insertions(+), 230 deletions(-)
 create mode 100644 R/gpg_data_class.R
 delete mode 100644 R/headcount_data_class.R

diff --git a/DESCRIPTION b/DESCRIPTION
index 85688bf..d90693d 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -16,8 +16,8 @@ Imports:
     highcharter,
     htmltools,
     magrittr,
-    nhsbsaR,
     rlang,
+    nhsbsaR,
     scrollytell,
     shiny,
     shinyjs,
diff --git a/NAMESPACE b/NAMESPACE
index d01036d..05ec5ca 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -8,7 +8,6 @@ export(h3_tabstop)
 export(h4_tabstop)
 export(h5_tabstop)
 export(h6_tabstop)
-export(headcount_data)
 export(nhs_card)
 export(nhs_card_tabstop)
 export(nhs_footer)
diff --git a/R/gpg_data_class.R b/R/gpg_data_class.R
new file mode 100644
index 0000000..f17a663
--- /dev/null
+++ b/R/gpg_data_class.R
@@ -0,0 +1,220 @@
+#' @title S3 gpg class to create headcount, hourly rate by AFC,
+#' directorate level hourly rate.'
+#'
+#' @description \code{gpg_data} is the class used for the creation of
+#' headcount, hourly rate by AFC with without directorate in the GPG report.
+#'
+#' @details The \code{gpg_data} class expects a \code{data.frame} with at
+#' least seven columns: period, gender, hourly_rate, quartile, fte, afc_band,
+#' directorate.
+#'
+#' Once initiated, the class has six slots:
+#' \code{df}: raw data frame \n
+#' \code{df_hdcnt}: data frame contains headcount by period \n
+#' \code{df_hdcnt_afc}: data frame contains headcount by afc band \n
+#' \code{df_hdcnt_dir}: data frame contains headcount by directorate \n
+#' \code{df_hrrate}: data frame contains hourly rate by gender for each grade \n
+#' \code{ending_fy}: a character vector containing ending reporting period
+#' (e.g. 31 March 2023). This uses for introduction paragraph
+#'
+#'
+#'
+#' @param x Input data frame.
+#' @param log_level keep it WARN
+#' @param eda If TRUE base R plot shows in the Viewer
+#'
+#' @return If the class is not instantiated correctly, nothing is returned.
+#'
+#' @examples
+#'
+#' library(nhsbsaGPG)
+#'
+#' df <- gpg_data(afc_staff)
+#'
+#' @export
+
+
+gpg_data <- function(x,
+                     log_level = futile.logger::WARN,
+                     eda = FALSE) {
+  # Set logger severity threshold, defaults to WARN
+  futile.logger::flog.threshold(log_level)
+
+
+  # Checks
+  futile.logger::flog.info("Initiating gpg_data class.
+                           \n\nIt expects a data.frame with at
+                           least eight columns: period, gender,
+                           headcount, hourly_rate, quartile,
+                           fte, afc_band, directorate.")
+
+
+
+  futile.logger::flog.debug("Checking x is a data.frame...")
+  if (!is.data.frame(x)) {
+    futile.logger::flog.error("x must be a data.frame",
+      x,
+      capture = TRUE
+    )
+  }
+
+  futile.logger::flog.debug("Checking x has correct columns...")
+
+  if (length(colnames(x)) < 7) {
+    futile.logger::flog.error("x must have at least eight columns:
+                              period, gender, headcount, hourly_rate,
+                              quartile, fte, afc_band, directorate.")
+  }
+
+  futile.logger::flog.debug("Checking x contains a period column...")
+  if (!"period" %in% colnames(x)) {
+    stop("x must contain period column")
+  }
+
+  futile.logger::flog.debug("Checking x contains a gender column...")
+  if (!"gender" %in% colnames(x)) stop("x must contain gender column")
+
+  futile.logger::flog.debug("Checking x contains a headcount column...")
+  if (!"headcount" %in% colnames(x)) stop("x must contain headcount column")
+
+  futile.logger::flog.debug("Checking x contains a hourly_rate column...")
+  if (!"hourly_rate" %in% colnames(x)) {
+    stop("x must contain hourly_rate column")
+  }
+
+  futile.logger::flog.debug("Checking x contains a fte column...")
+  if (!"fte" %in% colnames(x)) {
+    stop("x must contain fte column")
+  }
+
+  futile.logger::flog.debug("Checking x contains a afc_band column...")
+  if (!"afc_band" %in% colnames(x)) {
+    stop("x must contain afc_band column")
+  }
+
+  futile.logger::flog.debug("Checking x contains a directorate column...")
+  if (!"directorate" %in% colnames(x)) {
+    stop("x must contain directorate column")
+  }
+
+  futile.logger::flog.debug("Checking x does not contain missing values...")
+  if (anyNA(x)) stop("x cannot contain any missing values")
+
+  futile.logger::flog.debug("Checking for the correct number of rows...")
+  if (nrow(x) < 16000) {
+    futile.logger::flog.warn("x does not appear to be well formed. nrow(x) should be
+                             greater than 16000.")
+  }
+
+  # Check sensible range for reporting period
+  futile.logger::flog.debug("Checking beginning reporting period in a sensible
+                            range e.g.(2018:2023)...")
+
+
+  if (any(as.numeric(stringr::str_sub(x$period, 1, 4)) < 2018)) {
+    futile.logger::flog.warn("The dates should start from
+                             2018/19 financial year. Please check data-raw script.")
+  }
+
+
+  futile.logger::flog.info("...check done..")
+
+  # Message required to pass a test
+  message("Checks completed: 'gpg_data' S3 class created. Good to use for charts")
+
+  # EDA
+  # number of HEADCOUNT per financial year - expect to increase?
+  agg_data <- aggregate(headcount ~ period, x, sum)
+
+  if (eda == TRUE) {
+    barplot(agg_data$headcount,
+      names.arg = agg_data$period,
+      las = 2,
+      ylab = "Reporting period",
+      xlab = "Headcount"
+    )
+  }
+
+
+  # Calculate the latest reporting year
+  # This values are required to add to the introduction text
+  # (eg. as of 31 March 2023)
+  start_latest_year <- max(as.numeric(stringr::str_sub(x$period, 1, 4)))
+  latest_fy <- paste0(
+    start_latest_year, "/",
+    as.numeric(stringr::str_sub(start_latest_year, 3, 4)) + 1
+  )
+
+
+  # data frame: aggregate headcount by period
+  df_hdcnt <- x |>
+    dplyr::group_by(period) |>
+    dplyr::summarise(headcount = sum(headcount, na.rm = TRUE)) |>
+    dplyr::arrange(period)
+
+  # Extract the values
+  reporting_headcount <-
+    agg_data$headcount[agg_data$period == latest_fy]
+
+  ending_fy <- as.character(start_latest_year + 1)
+
+  # data frame: aggregate headcount by period and AFC band
+  df_hdcnt_afc <- x |>
+    dplyr::group_by(period, gender, afc_band) |>
+    dplyr::summarise(
+      headcount = sum(headcount, na.rm = TRUE),
+      .groups = "drop"
+    ) |>
+    dplyr::group_by(period, afc_band) |>
+    dplyr::mutate(
+      perc = headcount / sum(headcount) * 100
+    )
+
+  # data frame: aggregate headcount by period and directorate
+  df_hdcnt_dir <- x |>
+    dplyr::group_by(period, gender, directorate) |>
+    dplyr::summarise(
+      headcount = sum(headcount, na.rm = TRUE),
+      .groups = "drop"
+    ) |>
+    dplyr::group_by(period, directorate) |>
+    dplyr::mutate(
+      perc = headcount / sum(headcount) * 100
+    )
+
+  # data frame: hourly rate by gender for overall, each AFC band
+  df_hrrate <- dplyr::bind_rows(
+    x |>
+      dplyr::group_by(period, gender, afc_band) |>
+      dplyr::summarise(
+        mean_rate = mean(hourly_rate, na.rm = TRUE),
+        median_rate = median(hourly_rate, na.rm = TRUE),
+        .groups = "drop"
+      ),
+    x |>
+      dplyr::group_by(period, gender) |>
+      dplyr::summarise(
+        mean_rate = mean(hourly_rate, na.rm = TRUE),
+        median_rate = median(hourly_rate, na.rm = TRUE),
+        .groups = "drop"
+      ) |>
+      dplyr::mutate(afc_band = "Overall")
+  )
+
+
+  # Define the class here ----
+  # This will mainly use for highchart graphs
+
+  structure(
+    list(
+      df = x,
+      df_hdcnt = df_hdcnt,
+      df_hdcnt_afc = df_hdcnt_afc,
+      df_hdcnt_dir = df_hdcnt_dir,
+      df_hrrate = df_hrrate,
+      reporting_headcoun = reporting_headcount,
+      ending_fy = ending_fy
+    ),
+    class = "gpg_data"
+  )
+}
diff --git a/R/headcount_data_class.R b/R/headcount_data_class.R
deleted file mode 100644
index 58ad540..0000000
--- a/R/headcount_data_class.R
+++ /dev/null
@@ -1,209 +0,0 @@
-#' @title S3 headcount class to create number of headcount by gender and also 
-#' gender and AFC pay band.#' 
-#'
-#' @description \code{headcount_data} is the class used for the creation of
-#' first two headcount related figures in the GPG report.
-#'
-#' @details The \code{headcount_data} class expects a \code{data.frame} with at
-#' least five columns: FINANCIAL_YEAR, GENDER, PAY_GRADE_NAME, FTE_GROUP, HEADCOUNT. Each
-#' row represents aggregated headcount by four columns.
-#'
-#' Once initiated, the class has seven slots: 
-#' \code{df}: data frame \n
-#' \code{overview_gender}: data frame \n
-#' \code{overview_afc}: data frame \n
-#' \code{overview_fte}: data frame \n
-#' \code{reporting_headcount}: a numeric vector containing reporting financial
-#' year's headcount \n
-#' \code{diffs}: a numeric vector containing differences from previous \n
-#' financial year headcount to current reporting financial year headcount \n
-#' \code{ending_fy}: a character vector containing ending reporting period  
-#' (e.g. 31 March 2022). This uses for introduction paragraph
-#'
-#'
-#' @param x Input data frame.
-#' @param log_level keep it WARN
-#' @param eda If TRUE base R plot shows in the Viewer
-#'
-#' @return If the class is not instantiated correctly, nothing is returned.
-#'
-#' @examples
-#'
-#' library(nhsbsaGPG)
-#'
-#' df <- headcount_data(headcount)
-#'
-#' @export
-
-
-headcount_data <- function(x, 
-                           log_level = futile.logger::WARN,
-                           eda = FALSE) {
-  
-  # Set logger severity threshold, defaults to WARN
-  futile.logger::flog.threshold(log_level)
-
-
-  # Checks
-  futile.logger::flog.info("Initiating headcount_data class.
-                           \n\nIt expects a data.frame with at
-                           least five columns: FINANCIAL_YEAR, gender,
-                           PAY_GRADE_NAME, FTE_GROUP and HEADCOUNT.
-                           Each row represents an aggregated headcount
-                           based on four columns.")
-
-
-
-  futile.logger::flog.debug("Checking x is a data.frame...")
-  if (!is.data.frame(x)) {
-    futile.logger::flog.error("x must be a data.frame",
-      x,
-      capture = TRUE
-    )
-  }
-
-  futile.logger::flog.debug("Checking x has correct columns...")
-  if (length(colnames(x)) < 5) {
-    futile.logger::flog.error("x must have at least five columns:
-                              FINANCIAL_YEAR,
-                              GENDER, PAY_GRADE_NAME,
-                              FTE_GROUP, HEADCOUNT")
-  }
-
-  futile.logger::flog.debug("Checking x contains a FINANCIAL_YEAR column...")
-  if (!"FINANCIAL_YEAR" %in% colnames(x)) {
-    stop("x must contain FINANCIAL_YEAR column")
-  }
-
-  futile.logger::flog.debug("Checking x contains a GENDER column...")
-  if (!"GENDER" %in% colnames(x)) stop("x must contain GENDER column")
-
-  futile.logger::flog.debug("Checking x contains a PAY_GRADE_NAME column...")
-  if (!"PAY_GRADE_NAME" %in% colnames(x)) {
-    stop("x must contain PAY_GRADE_NAME column")
-  }
-
-  futile.logger::flog.debug("Checking x contains a FTE_GROUP column...")
-  if (!"FTE_GROUP" %in% colnames(x)) {
-    stop("x must contain FTE_GROUP column")
-  }
-
-  futile.logger::flog.debug("Checking x contains a HEADCOUNT column...")
-  if (!"HEADCOUNT" %in% colnames(x)) {
-    stop("x must contain HEADCOUNT column")
-  }
-
-  futile.logger::flog.debug("Checking x does not contain missing values...")
-  if (anyNA(x)) stop("x cannot contain any missing values")
-
-  futile.logger::flog.debug("Checking for the correct number of rows...")
-  if (nrow(x) < 260) {
-    futile.logger::flog.warn("x does not appear to be well formed. nrow(x) should be
-                             greater than 180 (5 year * gender * fte * afc) 
-                             as of 2021/22 report.")
-  }
-
-
-
-  # Check sensible range for year
-  futile.logger::flog.debug("Checking beginning financial years in a sensible
-                            range e.g.(2017:2022)...")
-
-
-  if (any(as.numeric(stringr::str_sub(x$FINANCIAL_YEAR, 1, 4)) < 2017)) {
-    futile.logger::flog.warn("The dates should start from
-                             2017/18 financial year. Please check data-raw script.")
-  }
-
-
-  futile.logger::flog.info("...check done..")
-
-  # Message required to pass a test
-  message("Checks completed: 'headcount_data' S3 class created.")
-
-  # EDA
-  # number of HEADCOUNT per financial year - expect to increase?
-  if (eda == TRUE) {
-    agg_data <- aggregate(HEADCOUNT ~ FINANCIAL_YEAR, x, sum)
-    barplot(agg_data$HEADCOUNT,
-      names.arg = agg_data$FINANCIAL_YEAR,
-      las = 2,
-      ylab = "Financial Year",
-      xlab = "Headcount"
-    )
-  }
-
-
-  # Calculate the latest and previous years
-  # This values are required to add to the interactive document
-  start_latest_year <- max(as.numeric(stringr::str_sub(x$FINANCIAL_YEAR, 1, 4)))
-  start_prev_year <- start_latest_year - 1
-  # Financial year of interest for the report
-  latest_fy <-  paste0(
-    start_latest_year, "/",
-    as.numeric(stringr::str_sub(start_latest_year, 3, 4)) + 1
-  )
-  previous_fy <- paste0(
-    start_prev_year, "/",
-    stringr::str_sub(start_latest_year, 3, 4)
-  )
-
-  # First aggregate by financial year
-  agg_data <- x |>
-    dplyr::filter(FINANCIAL_YEAR %in% c(latest_fy, previous_fy)) |>
-    dplyr::group_by(FINANCIAL_YEAR) |>
-    dplyr::summarise(TOTAL_HEADCOUNT = sum(HEADCOUNT, na.rm = TRUE)) |>
-    dplyr::arrange(FINANCIAL_YEAR)
-
-  # Extract the values
-  reporting_headcount <-
-    agg_data$TOTAL_HEADCOUNT[agg_data$FINANCIAL_YEAR == latest_fy]
-  previous_reporting_headcount <-
-    agg_data$TOTAL_HEADCOUNT[agg_data$FINANCIAL_YEAR == previous_fy]
-
-  diffs <- reporting_headcount - previous_reporting_headcount
-
-  ending_fy <- as.character(start_latest_year + 1)
-
-  # Attach data frame: headcount by GENDER
-  overview_gender <- x |> 
-    dplyr::group_by(FINANCIAL_YEAR, GENDER) |> 
-    dplyr::summarise(HEADCOUNT = sum(HEADCOUNT, na.rm = TRUE),
-                      .groups = "drop") |>
-    tidyr::pivot_wider(names_from = GENDER,
-                       values_from = HEADCOUNT)
-  
-  # Attach data frame: headcount by GENDER & PAY_GRADE_NAME
-  overview_afc <- x |> 
-    dplyr::group_by(FINANCIAL_YEAR, GENDER, PAY_GRADE_NAME) |> 
-    dplyr::summarise(HEADCOUNT = sum(HEADCOUNT, na.rm = TRUE),
-                     .groups = "drop") 
-  
-  # Attach data frame: headcount by GENDER & FTE
-  overview_fte <- x |> 
-    dplyr::group_by(FINANCIAL_YEAR, GENDER, FTE_GROUP) |> 
-    dplyr::summarise(HEADCOUNT = sum(HEADCOUNT, na.rm = TRUE),
-                     .groups = "drop") |> 
-    tidyr::pivot_wider(names_from = c(GENDER, FTE_GROUP),
-                       values_from = HEADCOUNT)
-    
-
-  # Define the class here ----
-  # It will use to create highchart line graph
-
-  structure(
-    list(
-      df = x,
-      overview_gender = overview_gender,
-      overview_afc = overview_afc,
-      overview_fte = overview_fte,
-      reporting_headcount = reporting_headcount,
-      diffs = diffs,
-      ending_fy = ending_fy
-    ),
-    class = "headcount_data"
-  )
-}
-
-
-
diff --git a/data-raw/01_read_hourly_gpg.R b/data-raw/01_read_hourly_gpg.R
index 30b00de..139528d 100644
--- a/data-raw/01_read_hourly_gpg.R
+++ b/data-raw/01_read_hourly_gpg.R
@@ -84,7 +84,16 @@ afc_staff <- afc |>
   ) |>
   left_join(lookup,
     by = "pay_scale"
-  )
+  ) |> 
+  select(-employee_number) |> 
+# Data quality error July 2013 Archive employee org is wrong, manually edited
+  mutate(org_l3 = ifelse(org_l3 == 'July 2013 Archive', "914 BSA Finance, Commercial and Estates L3", org_l3),
+         directorate = stringr::str_replace_all(
+           org_l3, c("^914 BSA " = "", " L3" = "")),
+         directorate = stringr::str_trim(directorate),
+         headcount = 1) |> 
+  select(period, gender, headcount,hourly_rate, quartile, fte, afc_band, directorate)
+  
 
 # Keep three main data frame and it will be used to create S3 class
 usethis::use_data(paygap, overwrite = TRUE)
@@ -93,4 +102,4 @@ usethis::use_data(afc_staff, overwrite = TRUE)
 
 
-rm(c(dfs, afc, staff))
+rm(dfs, afc, staff)
diff --git a/man/gender_profile.Rd b/man/gender_profile.Rd
index 9b503b2..0ff5e32 100644
--- a/man/gender_profile.Rd
+++ b/man/gender_profile.Rd
@@ -2,18 +2,43 @@
 % Please edit documentation in R/utils_charts.R
 \name{gender_profile}
 \alias{gender_profile}
-\title{line chart to show the number of headcount by financial year by gender.}
+\title{Highcharter line chart to show the number of headcount by financial
+year by gender.}
 \usage{
-gender_profile(x)
+gender_profile(
+  x,
+  xvar = "FINANCIAL_YEAR",
+  yvars,
+  series_names,
+  yaxis_title,
+  yaxis_label
+)
 }
 \arguments{
-\item{x}{Input headcount_data S3 class object.}
+\item{x}{Input data frame from \code{headcount_data} S3 class object.}
+
+\item{xvar}{"Financial Year", default}
+
+\item{yvars}{data frame converts to list and each list element to create line}
+
+\item{series_names}{If user wants to give different series name for
+highchart legend}
+
+\item{yaxis_title}{Title of y axis}
+
+\item{yaxis_label}{Indication of percentage or number}
+
+\item{show_legend}{TRUE default}
+
+\item{line_style}{Control line style either Solid or DashDot}
+
+\item{series_alpha}{Control opacity}
 }
 \value{
 Returns a highchart or htmlwidget object.
 }
 \description{
-\code{headcount_data} is the S3 class used for gender related 
+\code{headcount_data} is the S3 class used for gender related
 summary of workforce
 }
 \examples{
diff --git a/man/headcount_data.Rd b/man/headcount_data.Rd
index bc972e7..7b64762 100644
--- a/man/headcount_data.Rd
+++ b/man/headcount_data.Rd
@@ -2,18 +2,17 @@
 % Please edit documentation in R/headcount_data_class.R
 \name{headcount_data}
 \alias{headcount_data}
-\title{tidy data set for first headcount related two graphs.}
+\title{S3 headcount class to create number of headcount by gender and also 
+gender and AFC pay band.#'}
 \usage{
 headcount_data(x, log_level = futile.logger::WARN, eda = FALSE)
 }
 \arguments{
 \item{x}{Input data frame.}
 
-\item{log_level}{The severity level at which log messages are written from
-least to most serious: TRACE, DEBUG, INFO, WARN, ERROR, FATAL. Default is
-level is INFO. See \code{?flog.threshold()} for additional details.}
+\item{log_level}{keep it WARN}
 
-\item{eda}{If TRUE an graphical data analysis is conducted for a human to check.}
+\item{eda}{If TRUE base R plot shows in the Viewer}
 }
 \value{
 If the class is not instantiated correctly, nothing is returned.
@@ -27,14 +26,16 @@ The \code{headcount_data} class expects a \code{data.frame} with at
 least five columns: FINANCIAL_YEAR, GENDER, PAY_GRADE_NAME, FTE_GROUP, HEADCOUNT. Each
 row represents aggregated headcount by four columns.
 
-Once initiated, the class has five slots: 
-\code{df}: dataframe
-\code{colnames}: a character vector containing the column names from the {df}
-\code{reporting_headcount}: a numeric vector containing reporting financial 
-year's headcount
-\code{diffs}: a numeric vector containing differences from previous 
-financial year headcount to current reporting financial year headcount
-\code{ending_fy}: a character vector containing ending reporting period 
+Once initiated, the class has seven slots: 
+\code{df}: data frame \n
+\code{overview_gender}: data frame \n
+\code{overview_afc}: data frame \n
+\code{overview_fte}: data frame \n
+\code{reporting_headcount}: a numeric vector containing reporting financial
+year's headcount \n
+\code{diffs}: a numeric vector containing differences from previous \n
+financial year headcount to current reporting financial year headcount \n
+\code{ending_fy}: a character vector containing ending reporting period  
 (e.g. 31 March 2022). This uses for introduction paragraph
 }
 \examples{

From 477165c8ad8ffcf9de3c07ce8ce8759fbdd63873 Mon Sep 17 00:00:00 2001
From: kaygo <Kayoung.Goffe@nhsbsa.nhs.uk>
Date: Mon, 23 Oct 2023 22:16:02 +0100
Subject: [PATCH 04/19] fix to pass build test

---
 .Rbuildignore                                 |  1 +
 DESCRIPTION                                   | 11 +--
 NAMESPACE                                     |  1 +
 R/data.R                                      | 81 +++++++++++++++----
 R/gpg_data_class.R                            | 25 ++++--
 R/utils_charts.R                              | 33 ++++----
 ...1_read_hourly_gpg.R => gpg_data_process.R} |  6 +-
 man/afc_staff.Rd                              | 27 +++++++
 man/gender_profile.Rd                         | 26 +++---
 man/gpg_data.Rd                               | 44 ++++++++++
 man/headcount.Rd                              | 18 -----
 man/headcount_data.Rd                         | 47 -----------
 man/paygap.Rd                                 | 25 ++++++
 man/quartile.Rd                               | 34 ++++++++
 tests/testthat/test-utils_charts.R            | 20 +++--
 15 files changed, 263 insertions(+), 136 deletions(-)
 rename data-raw/{01_read_hourly_gpg.R => gpg_data_process.R} (94%)
 create mode 100644 man/afc_staff.Rd
 create mode 100644 man/gpg_data.Rd
 delete mode 100644 man/headcount.Rd
 delete mode 100644 man/headcount_data.Rd
 create mode 100644 man/paygap.Rd
 create mode 100644 man/quartile.Rd

diff --git a/.Rbuildignore b/.Rbuildignore
index 73641e8..f6a7b85 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -11,3 +11,4 @@
 ^gitleaks.toml$
 ^\.github$
 ^\.lintr$
+^data_temp$
diff --git a/DESCRIPTION b/DESCRIPTION
index d90693d..19ef858 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -16,23 +16,18 @@ Imports:
     highcharter,
     htmltools,
     magrittr,
-    rlang,
     nhsbsaR,
     scrollytell,
     shiny,
     shinyjs,
     dplyr (>= 1.1.3),
-    dbplyr (>= 2.3.3),
-    forcats (>= 1.0.0),
     futile.logger (>= 1.4.3),
-    stringr,
-    formatR,
-    tidyr,
-    purrr
+    stringr
 Suggests:
     pkgload,
     testthat (>= 3.0.0),
-    usethis
+    usethis,
+    tidyr
 Remotes:
     nhsbsa-data-analytics/nhsbsaR,
     statistiekcbs/scrollytell
diff --git a/NAMESPACE b/NAMESPACE
index 05ec5ca..64342d2 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,6 +2,7 @@
 
 export("%>%")
 export(gender_profile)
+export(gpg_data)
 export(h1_tabstop)
 export(h2_tabstop)
 export(h3_tabstop)
diff --git a/R/data.R b/R/data.R
index 72da395..8b824cd 100644
--- a/R/data.R
+++ b/R/data.R
@@ -1,16 +1,69 @@
-#' NHSBSA employee headcount
+#' NHSBSA employee staff_afc
 #'
-#' A dataset containing NHSBSA employee headcount
-#' since financial year 2017/18, split by gender, AFC band 
-#' and FTE (full time or part time) 
+#' A dataset containing NHSBSA employee hourly pay
+#' by reporting period split by gender, AFC band, directorate.
+#' \itemize{
+#'   \item period. 2018/19, 2019/20 etc character
+#'   \item gender. Male or Female, character
+#'   \item headcount. employee headcount used for aggregation
+#'   \item hourly_rate. hourly rate as shown pay slip
+#'   \item quartile. split hourly_rate by quartile by gender
+#'   \item fte. employee full time or part time info
+#'   \item afc_band. AFC band
+#'   \item directorate. NHSBSA directorate
+#' } 
+#' 
+#' @docType data
+#' @keywords datasets
+#' @name afc_staff
+#' @usage afc_staff
+#' @format A data frame with gender pay gap information 
+#' 
+#' 
+"afc_staff"
+
+
+#' NHSBSA employee paygap
+#' 
+#' A dataset containing NHSBSA employee paygap
+#' Directly pulled from ESR dashboard (NHS National Returns) 
+#' gender, average hourly rate, median hourly rate and pay gap%
+#' 
+#' \itemize{
+#'    \item period. 2018/19, 2019/20 etc character
+#'   \item avg_hr_gpg. Mean gender pay gap % based on male full-pay relevant employees
+#'   \item median_hr_gpg. Median gender pay gap % based on male full-pay relevant employees
+#'   }
+#'   
+#' @docType data
+#' @keywords datasets
+#' @name paygap
+#' @usage paygap
+#' @format A data frame with paygap information
+
+"paygap"
+
+
+#' NHSBSA employee quartile
 #'
-#' @format A data frame
-#' \describe{
-#'   \item{FINANCIAL_YEAR}{01-Mar-year, factor}
-#'   \item{GENDER}{Male or Female, character}
-#'   \item{PAY_GRADE_NAME}{AFC band}
-#'   \item{FTE_GROUP}{employee full time or part time info}
-#'   \item{HEADCOUNT}To get number of employees}
-#'   ...
-#' }
-"headcount"
\ No newline at end of file
+#' A dataset containing NHSBSA employee hourly pay
+#' by reporting period from 2018 (eg. snapshot 31/03/2018 for 2018/19 report etc)
+#' split by gender, AFC band, directorate
+#' 
+#' 
+#' \itemize{
+#'   \item period. 2018/19, 2019/20 etc character
+#'   \item quartile. split hourly pay into quartiles
+#'   \item female. number of female employees in each quartile
+#'   \item male. number of male employees in each quartile
+#'   \item quartile. split hourly_rate by quartile by gender
+#'   \item female_percent. female employee % in quartile
+#'   \item male_percent. male employee % in quartile   
+#'  }
+#'  @docType data
+#'  @keywords datasets
+#'  @name quartile
+#'  @usage quartile
+#'  @format data frame with employee gender pay gap by quartiles
+
+"quartile"
\ No newline at end of file
diff --git a/R/gpg_data_class.R b/R/gpg_data_class.R
index f17a663..5b2a47a 100644
--- a/R/gpg_data_class.R
+++ b/R/gpg_data_class.R
@@ -9,11 +9,12 @@
 #' directorate.
 #'
 #' Once initiated, the class has six slots:
-#' \code{df}: raw data frame \n
-#' \code{df_hdcnt}: data frame contains headcount by period \n
-#' \code{df_hdcnt_afc}: data frame contains headcount by afc band \n
-#' \code{df_hdcnt_dir}: data frame contains headcount by directorate \n
-#' \code{df_hrrate}: data frame contains hourly rate by gender for each grade \n
+#' \code{df}: raw data frame 
+#' \code{df_hdcnt}: data frame contains headcount by period 
+#' \code{df_hdcnt_gender}: data frame contains headcount by gender by period 
+#' \code{df_hdcnt_afc}: data frame contains headcount by afc band 
+#' \code{df_hdcnt_dir}: data frame contains headcount by directorate 
+#' \code{df_hrrate}: data frame contains hourly rate by gender for each grade 
 #' \code{ending_fy}: a character vector containing ending reporting period
 #' (e.g. 31 March 2023). This uses for introduction paragraph
 #'
@@ -25,10 +26,9 @@
 #'
 #' @return If the class is not instantiated correctly, nothing is returned.
 #'
-#' @examples
-#'
+#' @examples 
+#'  
 #' library(nhsbsaGPG)
-#'
 #' df <- gpg_data(afc_staff)
 #'
 #' @export
@@ -157,6 +157,14 @@ gpg_data <- function(x,
     agg_data$headcount[agg_data$period == latest_fy]
 
   ending_fy <- as.character(start_latest_year + 1)
+  
+  # data frame: aggregate headcount by gender by period
+  df_hdcnt_gender <- x |>
+    dplyr::group_by(period,gender) |>
+    dplyr::summarise(headcount = sum(headcount, na.rm = TRUE)) |>
+    dplyr::arrange(period)
+  
+  
 
   # data frame: aggregate headcount by period and AFC band
   df_hdcnt_afc <- x |>
@@ -209,6 +217,7 @@ gpg_data <- function(x,
     list(
       df = x,
       df_hdcnt = df_hdcnt,
+      df_hdcnt_gender = df_hdcnt_gender,
       df_hdcnt_afc = df_hdcnt_afc,
       df_hdcnt_dir = df_hdcnt_dir,
       df_hrrate = df_hrrate,
diff --git a/R/utils_charts.R b/R/utils_charts.R
index 8ac956e..c5285b3 100644
--- a/R/utils_charts.R
+++ b/R/utils_charts.R
@@ -1,37 +1,36 @@
 #' @title Highcharter line chart to show the number of headcount by financial
 #' year by gender.
 #'
-#' @description \code{headcount_data} is the S3 class used for gender related
-#' summary of workforce
+#' @description \code{gpg_data} is the S3 class used for trend
 #'
 #'
 #' @return Returns a highchart or htmlwidget object.
 #'
-#' @examples
-#'
-#' workforce <- nhsbsaGPG::headcount_data(nhsbsaGPG::headcount)
-#' nhsbsaGPG::gender_profile(workforce)
+#' @examples \dontrun{
+#' workforce <- nhsbsaGPG::gpg_data(nhsbsaGPG::afc_staff)
+#' nhsbsaGPG::gender_profile(workforce$df_hdcnt_gender)
+#' } 
 #'
 #' @export
-#' @param x Input data frame from \code{headcount_data} S3 class object.
-#' @param xvar "Financial Year", default
+#' @param x Input data frame from \code{gpg_data} S3 class object.
+#' @param xvar "period", default
 #' @param yvars data frame converts to list and each list element to create line
-#' @param series_names If user wants to give different series name for
-#' highchart legend
+#' @param series_names If user wants to give different series name for highchart legend
 #' @param yaxis_title Title of y axis
 #' @param yaxis_label Indication of percentage or number
 #' @param show_legend TRUE default
-#' @param line_style Control line style either Solid or DashDot
-#' @param series_alpha Control opacity
-#'
-#' @import nhsbsa-data-analytics/nhsbsaR
+#' 
+#' @importFrom aggregate
+#' @importFrom barplot 
+#' 
 #'
 gender_profile <- function(x,
-                           xvar = "FINANCIAL_YEAR",
+                           xvar = "period",
                            yvars,
                            series_names,
                            yaxis_title,
-                           yaxis_label 
+                           yaxis_label,
+                           show_legend = TRUE
                            ){
   out <- tryCatch(
     expr = {
@@ -57,7 +56,7 @@ gender_profile <- function(x,
             data = data,
             type = "line",
             highcharter::hcaes(
-              x = .data[[xvar]], # default financial year
+              x = .data[[xvar]], # default period3
               y = .data[[yvar]] # Female for example
             ),
             name = series_name # these labels will show in legend
diff --git a/data-raw/01_read_hourly_gpg.R b/data-raw/gpg_data_process.R
similarity index 94%
rename from data-raw/01_read_hourly_gpg.R
rename to data-raw/gpg_data_process.R
index 139528d..a0b5534 100644
--- a/data-raw/01_read_hourly_gpg.R
+++ b/data-raw/gpg_data_process.R
@@ -27,7 +27,11 @@ process_file <- function(filepath) {
       paygap = read_excel(filepath, range = cell_rows(3:7), col_names = TRUE) |>
         select(1:3) |>
         janitor::clean_names() |>
-        mutate(period = financial_year),
+        mutate(period = financial_year) |> 
+        filter(gender == "Pay Gap %") |> 
+        select(period,
+               avg_hr_gpg = avg_hourly_rate,
+               median_hr_gpg = median_hourly_rate),
       quartile = read_excel(filepath, range = cell_rows(3:7), col_names = TRUE) |>
         select(5:9) |>
         janitor::clean_names() |>
diff --git a/man/afc_staff.Rd b/man/afc_staff.Rd
new file mode 100644
index 0000000..afb475f
--- /dev/null
+++ b/man/afc_staff.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{afc_staff}
+\alias{afc_staff}
+\title{NHSBSA employee staff_afc}
+\format{
+A data frame with gender pay gap information
+}
+\usage{
+afc_staff
+}
+\description{
+A dataset containing NHSBSA employee hourly pay
+by reporting period split by gender, AFC band, directorate.
+\itemize{
+  \item period. 2018/19, 2019/20 etc character
+  \item gender. Male or Female, character
+  \item headcount. employee headcount used for aggregation
+  \item hourly_rate. hourly rate as shown pay slip
+  \item quartile. split hourly_rate by quartile by gender
+  \item fte. employee full time or part time info
+  \item afc_band. AFC band
+  \item directorate. NHSBSA directorate
+}
+}
+\keyword{datasets}
diff --git a/man/gender_profile.Rd b/man/gender_profile.Rd
index 0ff5e32..e31235c 100644
--- a/man/gender_profile.Rd
+++ b/man/gender_profile.Rd
@@ -7,43 +7,39 @@ year by gender.}
 \usage{
 gender_profile(
   x,
-  xvar = "FINANCIAL_YEAR",
+  xvar = "period",
   yvars,
   series_names,
   yaxis_title,
-  yaxis_label
+  yaxis_label,
+  show_legend = TRUE
 )
 }
 \arguments{
-\item{x}{Input data frame from \code{headcount_data} S3 class object.}
+\item{x}{Input data frame from \code{gpg_data} S3 class object.}
 
-\item{xvar}{"Financial Year", default}
+\item{xvar}{"period", default}
 
 \item{yvars}{data frame converts to list and each list element to create line}
 
-\item{series_names}{If user wants to give different series name for
-highchart legend}
+\item{series_names}{If user wants to give different series name for highchart legend}
 
 \item{yaxis_title}{Title of y axis}
 
 \item{yaxis_label}{Indication of percentage or number}
 
 \item{show_legend}{TRUE default}
-
-\item{line_style}{Control line style either Solid or DashDot}
-
-\item{series_alpha}{Control opacity}
 }
 \value{
 Returns a highchart or htmlwidget object.
 }
 \description{
-\code{headcount_data} is the S3 class used for gender related
-summary of workforce
+\code{gpg_data} is the S3 class used for trend
 }
 \examples{
-
-workforce <- nhsbsaGPG::headcount_data(nhsbsaGPG::headcount)
-nhsbsaGPG::gender_profile(workforce)
+\dontrun{
+workforce <- nhsbsaGPG::gpg_data(nhsbsaGPG::afc_staff)
+nhsbsaGPG::gender_profile(workforce$df_hdcnt_gender)
+} 
 
 }
diff --git a/man/gpg_data.Rd b/man/gpg_data.Rd
new file mode 100644
index 0000000..3e79d37
--- /dev/null
+++ b/man/gpg_data.Rd
@@ -0,0 +1,44 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/gpg_data_class.R
+\name{gpg_data}
+\alias{gpg_data}
+\title{S3 gpg class to create headcount, hourly rate by AFC,
+directorate level hourly rate.'}
+\usage{
+gpg_data(x, log_level = futile.logger::WARN, eda = FALSE)
+}
+\arguments{
+\item{x}{Input data frame.}
+
+\item{log_level}{keep it WARN}
+
+\item{eda}{If TRUE base R plot shows in the Viewer}
+}
+\value{
+If the class is not instantiated correctly, nothing is returned.
+}
+\description{
+\code{gpg_data} is the class used for the creation of
+headcount, hourly rate by AFC with without directorate in the GPG report.
+}
+\details{
+The \code{gpg_data} class expects a \code{data.frame} with at
+least seven columns: period, gender, hourly_rate, quartile, fte, afc_band,
+directorate.
+
+Once initiated, the class has six slots:
+\code{df}: raw data frame 
+\code{df_hdcnt}: data frame contains headcount by period 
+\code{df_hdcnt_gender}: data frame contains headcount by gender by period 
+\code{df_hdcnt_afc}: data frame contains headcount by afc band 
+\code{df_hdcnt_dir}: data frame contains headcount by directorate 
+\code{df_hrrate}: data frame contains hourly rate by gender for each grade 
+\code{ending_fy}: a character vector containing ending reporting period
+(e.g. 31 March 2023). This uses for introduction paragraph
+}
+\examples{
+ 
+library(nhsbsaGPG)
+df <- gpg_data(afc_staff)
+
+}
diff --git a/man/headcount.Rd b/man/headcount.Rd
deleted file mode 100644
index c02164e..0000000
--- a/man/headcount.Rd
+++ /dev/null
@@ -1,18 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/data.R
-\docType{data}
-\name{headcount}
-\alias{headcount}
-\title{NHSBSA employee headcount}
-\format{
-
-}
-\usage{
-headcount
-}
-\description{
-A dataset containing NHSBSA employee headcount
-since financial year 2017/18, split by gender, AFC band 
-and FTE (full time or part time)
-}
-\keyword{datasets}
diff --git a/man/headcount_data.Rd b/man/headcount_data.Rd
deleted file mode 100644
index 7b64762..0000000
--- a/man/headcount_data.Rd
+++ /dev/null
@@ -1,47 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/headcount_data_class.R
-\name{headcount_data}
-\alias{headcount_data}
-\title{S3 headcount class to create number of headcount by gender and also 
-gender and AFC pay band.#'}
-\usage{
-headcount_data(x, log_level = futile.logger::WARN, eda = FALSE)
-}
-\arguments{
-\item{x}{Input data frame.}
-
-\item{log_level}{keep it WARN}
-
-\item{eda}{If TRUE base R plot shows in the Viewer}
-}
-\value{
-If the class is not instantiated correctly, nothing is returned.
-}
-\description{
-\code{headcount_data} is the class used for the creation of
-first two headcount related figures in the GPG report.
-}
-\details{
-The \code{headcount_data} class expects a \code{data.frame} with at
-least five columns: FINANCIAL_YEAR, GENDER, PAY_GRADE_NAME, FTE_GROUP, HEADCOUNT. Each
-row represents aggregated headcount by four columns.
-
-Once initiated, the class has seven slots: 
-\code{df}: data frame \n
-\code{overview_gender}: data frame \n
-\code{overview_afc}: data frame \n
-\code{overview_fte}: data frame \n
-\code{reporting_headcount}: a numeric vector containing reporting financial
-year's headcount \n
-\code{diffs}: a numeric vector containing differences from previous \n
-financial year headcount to current reporting financial year headcount \n
-\code{ending_fy}: a character vector containing ending reporting period  
-(e.g. 31 March 2022). This uses for introduction paragraph
-}
-\examples{
-
-library(nhsbsaGPG)
-
-df <- headcount_data(headcount)
-
-}
diff --git a/man/paygap.Rd b/man/paygap.Rd
new file mode 100644
index 0000000..2e19a52
--- /dev/null
+++ b/man/paygap.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{paygap}
+\alias{paygap}
+\title{NHSBSA employee paygap}
+\format{
+A data frame with paygap information
+}
+\usage{
+paygap
+}
+\description{
+A dataset containing NHSBSA employee paygap
+Directly pulled from ESR dashboard (NHS National Returns) 
+gender, average hourly rate, median hourly rate and pay gap%
+}
+\details{
+\itemize{
+   \item period. 2018/19, 2019/20 etc character
+  \item avg_hr_gpg. Mean gender pay gap % based on male full-pay relevant employees
+  \item median_hr_gpg. Median gender pay gap % based on male full-pay relevant employees
+  }
+}
+\keyword{datasets}
diff --git a/man/quartile.Rd b/man/quartile.Rd
new file mode 100644
index 0000000..9cc63aa
--- /dev/null
+++ b/man/quartile.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{quartile}
+\alias{quartile}
+\title{NHSBSA employee quartile}
+\format{
+An object of class \code{tbl_df} (inherits from \code{tbl}, \code{data.frame}) with 20 rows and 6 columns.
+}
+\usage{
+quartile
+}
+\description{
+A dataset containing NHSBSA employee hourly pay
+by reporting period from 2018 (eg. snapshot 31/03/2018 for 2018/19 report etc)
+split by gender, AFC band, directorate
+}
+\details{
+\itemize{
+  \item period. 2018/19, 2019/20 etc character
+  \item quartile. split hourly pay into quartiles
+  \item female. number of female employees in each quartile
+  \item male. number of male employees in each quartile
+  \item quartile. split hourly_rate by quartile by gender
+  \item female_percent. female employee % in quartile
+  \item male_percent. male employee % in quartile   
+ }
+ @docType data
+ @keywords datasets
+ @name quartile
+ @usage quartile
+ @format data frame with employee gender pay gap by quartiles
+}
+\keyword{datasets}
diff --git a/tests/testthat/test-utils_charts.R b/tests/testthat/test-utils_charts.R
index bb6551f..f418d8f 100644
--- a/tests/testthat/test-utils_charts.R
+++ b/tests/testthat/test-utils_charts.R
@@ -1,9 +1,15 @@
-df <- headcount_data(headcount)
-x <- df$overview_gender
+
+library(tidyr)
+
+df <- gpg_data(afc_staff)
+x <- df$df_hdcnt_gender |> 
+  tidyr::pivot_wider(names_from = gender,
+                     values_from = headcount)
+
 
 testthat::test_that("gender_profile runs without errors", {
   expect_silent(gender_profile(x,
-    xvar = "FINANCIAL_YEAR",
+    xvar = "period",
     yvars = c("Male", "Female"),
     series_names = c("Male", "Female"),
     yaxis_title = "Male and Female employee headcount",
@@ -15,14 +21,12 @@ testthat::test_that("gender_profile runs without errors", {
 testthat::test_that("gender_profile outputs a highchart, htmlwidget class", {
   expect_equal(class(
     gender_profile(x,
-      xvar = "FINANCIAL_YEAR",
+      xvar = "period",
       yvars = c("Male", "Female"),
       series_names = c("Male", "Female"),
       yaxis_title = "Male and Female employee headcount",
-      yaxis_label = ""
-    ),
-    yaxis_label = "number"
-  ), c("highchart", "htmlwidget"))
+      yaxis_label = "number"
+    )), c("highchart", "htmlwidget"))
 })
 
 testthat::test_that("gender_profile takes list as an input", {

From 94e61bda7a697ff0e0d72045ba256bb473817fe5 Mon Sep 17 00:00:00 2001
From: kaygo <Kayoung.Goffe@nhsbsa.nhs.uk>
Date: Tue, 24 Oct 2023 08:57:21 +0100
Subject: [PATCH 05/19] remove example old faithful data

---
 R/faithful.R      |  23 -----------------------
 data/faithful.rda | Bin 1327 -> 0 bytes
 2 files changed, 23 deletions(-)
 delete mode 100644 R/faithful.R
 delete mode 100644 data/faithful.rda

diff --git a/R/faithful.R b/R/faithful.R
deleted file mode 100644
index 23fd845..0000000
--- a/R/faithful.R
+++ /dev/null
@@ -1,23 +0,0 @@
-#' Old Faithful Geyser Data
-#'
-#' A closer look at \code{faithful$eruptions} reveals that these are
-#' heavily rounded times originally in seconds, where multiples of 5 are
-#' more frequent than expected under non-human measurement.  For a
-#' better version of the eruption times, see the example below.
-#' There are many versions of this dataset around: Azzalini and Bowman
-#' (1990) use a more complete version.
-#'
-#'  \itemize{
-#'     \item eruptions. Eruption time in mins
-#'     \item waiting. Waiting time to next eruption (in mins)
-#' }
-#'
-#' @source {W. H<U+00E4>rdle.}
-#'
-#' @docType data
-#' @keywords datasets
-#' @name faithful
-#' @usage faithful
-#' @format A data frame with 272 observations on 2 variables.
-#'
-"faithful"
diff --git a/data/faithful.rda b/data/faithful.rda
deleted file mode 100644
index 704313a32aa3923c57fcce405bc528d366db6850..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1327
zcmV+~1<?9JT4*^jL0KkKS%gvu6aWSLfB*mg{ZxGmx~T8}|IvT<-{olPD!P)qRUJoH
zRx665=>O0K7-yKA;Na_yU2fn>B&qEw>S)FyGJ8`~)BrR914pE200)$98bBHV27q8~
zQ`9sKG5|COp%m~$ViF#p&_;ov0iXjD6Vw1SWEubf004S`000001e#QeO+8Jip{I!h
zO$LmHfCEROWYbI#VlrsZG5`QH(8vuM1_%Z~G$3Tc8X9EK!Wv*qA(4V<lSYjiU=sk1
z05UN&U`7$9CV*rbWYY<d5lSd(Wc1W}fB-!}!~lAY27mwtho}Gm0000000E{$O!X&~
z@@eXbjcPWdOB&Wxt7&S`v~6WU1gfb~m9~soYE&(j1s0Sm5iLrhGKjSlP!$lVQ!6V$
znM|_~zvdQbaaS)LZcS5J+q%K;E%|}ezDu?+1jJe;q!qerPW11dhO8UeBdGXafja&H
zrGwGH^k_#iR5WtP2P6^z=d@PR)B?N$#}E+&QA8GHk2rxA#uAl+?=M`*4W!eDAVPwM
z<^p_e;6WPL1zQrfC4wbeC4eZY07+FLDF`Y+^k5GN2!=4ljhUywLmsyl-D)MMR-mj_
zQ7Sf~3{_)kAq)i!i&R034s|*}Fe&IuFT(+>4zs?v0i6hvz(EG3fk_GyHIhLRE`vKP
z`#QBkoj9c^DKpJdKnc*P@<#{;xhe}tJOe))3|Hu=NmYQyk(g{%HE@J>fWcQ?d}Mc0
z-YJ?81szFf?NViQ$#oum?~%?&fhud}g+(YRKT8+g{&%)0=4g&jv`3SrUQUY%^nNXC
zq6?Qqs-hAQG-S{KhRAR*Fi=2{2udLgAXLIdAx4lfNf;ysKs2O;{A%L3tW~Q;1|XIx
zWbl#vOOLv}CY&sk5J=u*P3TiISm@*;SPOi~46LRQWIr`A)#+Vw%LwRwA;qNx)x@xC
ziZSL*F@QFD%LEG2IYSsU&Vh48q?U#~O2)w}>uDVHy)U?f4C%*fLsg*bZXFg*Wt^6>
zqB<7#^0Ks2&NG&Itr5Zv%FM#@_oH+<l)=3YcODm><}PzTlhA0cb3fL}%P?YsvJ6yJ
zil7RLf~*u45LhUpu@Q=*z+$SZBC*M`qNu2>1rY^|V#P)Zs4P@f7AT__jP}W>F!~5-
zJ1)C7u)`t6b-YOe6?nkHk4a!du#k_S2o=>pE3jL56a|E0LS$rTn@siy+ioIBpfpE;
z=_V<?ObeBGw0=P>>Pzq_Xlh}kb8>P~Ll{#)tcRk;R7%x>EY5kQDyKlsdVY(`g(oif
zZif@ivZqV(BP6nx9&1SAPO^<(Vb+i)>a?7(81wqTja6US(^mXZb0P8uYAYN?G_clc
z*fxC;Ev};UjKuKv<bl-|4~{c66ESF*^oY9A)vLV~stt_>b*d!t1d@}AIa`L=Ao<o{
zmFi8LSc;Hwkj`>D(HCkYV3<f!pt4&Bwwt~wX)bmct%PC$`U+rs6%=dKDFa6OZDRdv
zCTjyVLR!e32o|VY2l@6UU<2%py5d=EK<E$ZBa$neIm}%ZF$Aqe^;qlvkZVb{Eq>1k
zw19-cf&djGw)(^wyPXLT-CiMp3SF6*kcq@Mgu_u%OfACyQXh~RvVPOvko*9F+~sKX
z4qs7$a539lI(%u7tHA*AM)7TjP?rK-BTcWx^Q()7iLFa8A-P8EY=r)aa%RX(H3%!I
l{xG*oGG7!d04iOu%p?FAFbl?%p%AD1UC9*TLP8NpAW*xzIeGv9


From c4ce6e1de86bd1094ddb4aebf00695e6ca0b8035 Mon Sep 17 00:00:00 2001
From: kaygo <Kayoung.Goffe@nhsbsa.nhs.uk>
Date: Tue, 24 Oct 2023 08:59:59 +0100
Subject: [PATCH 06/19] rda added gitignore

---
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitignore b/.gitignore
index 2755bb2..fb16f07 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,5 +15,7 @@ rsconnect
 # Excel files (including macros and templates) #
 ################################################
 *.[xX][lL][sS][xXmMtT]?
+# Any RDA files
+*.[rR][dD][aA]
 data/*
 data_temp/*

From 6939313374d3244ec841f2a6d6fabbe29e9228c9 Mon Sep 17 00:00:00 2001
From: kaygo <Kayoung.Goffe@nhsbsa.nhs.uk>
Date: Tue, 24 Oct 2023 13:34:09 +0100
Subject: [PATCH 07/19] Data stays in azure people, temporary download to
 process gpg_data_process.r then delete all files in data_temp

---
 R/data.R                                      |  4 +-
 data-raw/1.read_azure_blob.R                  | 42 +++++++++++++++++++
 ...pg_data_process.R => 2.gpg_data_process.R} | 16 +++++++
 data-raw/execution.json                       |  7 ++++
 data-raw/faithful.R                           |  4 --
 5 files changed, 67 insertions(+), 6 deletions(-)
 create mode 100644 data-raw/1.read_azure_blob.R
 rename data-raw/{gpg_data_process.R => 2.gpg_data_process.R} (88%)
 create mode 100644 data-raw/execution.json
 delete mode 100644 data-raw/faithful.R

diff --git a/R/data.R b/R/data.R
index 8b824cd..e02135a 100644
--- a/R/data.R
+++ b/R/data.R
@@ -1,7 +1,7 @@
-#' NHSBSA employee staff_afc
+#' NHSBSA employee afc_staff
 #'
 #' A dataset containing NHSBSA employee hourly pay
-#' by reporting period split by gender, AFC band, directorate.
+#' by reporting period split by gender, FTE, AFC band, directorate.
 #' \itemize{
 #'   \item period. 2018/19, 2019/20 etc character
 #'   \item gender. Male or Female, character
diff --git a/data-raw/1.read_azure_blob.R b/data-raw/1.read_azure_blob.R
new file mode 100644
index 0000000..54ad5f4
--- /dev/null
+++ b/data-raw/1.read_azure_blob.R
@@ -0,0 +1,42 @@
+library(AzureStor)
+library(jsonlite)
+library(janitor)
+library(nhsbsaR)
+library(dplyr)
+
+config <- jsonlite::fromJSON("./data-raw/execution.json")
+
+account_name <- Sys.getenv('AZURE_BLOB_STORAGE_PDS_ACCOUNT_NAME')
+account_key <- Sys.getenv('AZURE_BLOB_STORAGE_PDS_ACCOUNT_KEY')
+endpoint_suffix <- "core.windows.net"
+
+container_name <- config$import$container
+folder_path <- config$import$folder
+
+
+# Create a blob service client
+blob_endpoint <- sprintf('https://%s.blob.%s', account_name, endpoint_suffix)
+blob_client <- storage_endpoint(blob_endpoint, key=account_key)
+
+# Get the blob client instance for the given container
+blob_container <- storage_container(blob_client, container_name)
+
+# List all blobs in the specified folder
+blobs_in_folder <- list_blobs(blob_container, prefix=folder_path)
+
+# Extract the 'name' values from the result
+blob_names <- blobs_in_folder[["name"]]
+
+# Loop through each blob in the folder and download
+for (blob_name in blob_names) {
+  
+  local_file_path <- paste0(config$import$local_path, '/', basename(blob_name))
+  
+  # Check if the file exists locally, and if so, delete it
+  if (file.exists(local_file_path)) {
+    file.remove(local_file_path)
+  }
+  
+  # Download the blob data to the local file
+  storage_download(blob_container, blob_name, local_file_path, overwrite = TRUE)
+}
diff --git a/data-raw/gpg_data_process.R b/data-raw/2.gpg_data_process.R
similarity index 88%
rename from data-raw/gpg_data_process.R
rename to data-raw/2.gpg_data_process.R
index a0b5534..1e2a23c 100644
--- a/data-raw/gpg_data_process.R
+++ b/data-raw/2.gpg_data_process.R
@@ -104,6 +104,22 @@ usethis::use_data(paygap, overwrite = TRUE)
 usethis::use_data(quartile, overwrite = TRUE)
 usethis::use_data(afc_staff, overwrite = TRUE)
 
+# delete all the files in data_temp as they only stay in azure storage
 
+# Specify the folder path
+folder_path <- "./data_temp" 
+
+# List all files in the directory
+files_to_delete <- list.files(path = folder_path, full.names = TRUE)
+
+# Remove all files
+result <- file.remove(files_to_delete)
+
+# Check if all files were deleted successfully
+if (all(result)) {
+  cat("All files deleted successfully.\n")
+} else {
+  cat("Some files could not be deleted.\n")
+}
 
 rm(dfs, afc, staff)
diff --git a/data-raw/execution.json b/data-raw/execution.json
new file mode 100644
index 0000000..f60bf46
--- /dev/null
+++ b/data-raw/execution.json
@@ -0,0 +1,7 @@
+{
+    "import": {
+        "container": "peopledata-prod",
+        "local_path": "./data_temp",
+        "folder": "Gender Pay Gap Data"
+    }
+}
\ No newline at end of file
diff --git a/data-raw/faithful.R b/data-raw/faithful.R
deleted file mode 100644
index 41faec3..0000000
--- a/data-raw/faithful.R
+++ /dev/null
@@ -1,4 +0,0 @@
-# Add any preparation before saving your data using usethis::use_data
-
-# Add to data/
-usethis::use_data(faithful, overwrite = TRUE)

From 191d87661aa255c110eec2d48ce823ed94d0faff Mon Sep 17 00:00:00 2001
From: kaygo <Kayoung.Goffe@nhsbsa.nhs.uk>
Date: Wed, 25 Oct 2023 15:44:03 +0100
Subject: [PATCH 08/19] chart functions for GPG report

---
 DESCRIPTION                             |   7 +-
 NAMESPACE                               |   6 +-
 R/app_server.R                          |   4 +-
 R/app_ui.R                              |  12 +-
 R/data.R                                |  38 ++---
 R/gpg_data_class.R                      |  36 ++--
 R/mod_chart_example.R                   |  60 -------
 R/mod_introduction.R                    |  72 ++++++++
 R/mod_markdown_example.R                |  22 ---
 R/mod_scrollytell_example.R             | 189 ---------------------
 R/utils_charts.R                        | 211 ++++++++++++++++++++----
 data-raw/1.read_azure_blob.R            |  18 +-
 data-raw/2.gpg_data_process.R           |  54 ++++--
 man/afc_staff.Rd                        |   6 +-
 man/faithful.Rd                         |  30 ----
 man/gpg_pyramid.Rd                      |  24 +++
 man/gpg_stack.Rd                        |  26 +++
 man/{gender_profile.Rd => gpg_trend.Rd} |  19 +--
 man/quartile.Rd                         |   9 +-
 tests/testthat/test-utils_charts.R      |  70 ++++++--
 20 files changed, 464 insertions(+), 449 deletions(-)
 delete mode 100644 R/mod_chart_example.R
 create mode 100644 R/mod_introduction.R
 delete mode 100644 R/mod_markdown_example.R
 delete mode 100644 R/mod_scrollytell_example.R
 delete mode 100644 man/faithful.Rd
 create mode 100644 man/gpg_pyramid.Rd
 create mode 100644 man/gpg_stack.Rd
 rename man/{gender_profile.Rd => gpg_trend.Rd} (71%)

diff --git a/DESCRIPTION b/DESCRIPTION
index 19ef858..29de8f7 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -17,20 +17,19 @@ Imports:
     htmltools,
     magrittr,
     nhsbsaR,
-    scrollytell,
     shiny,
     shinyjs,
     dplyr (>= 1.1.3),
     futile.logger (>= 1.4.3),
-    stringr
+    stringr,
+    htmlwidgets
 Suggests:
     pkgload,
     testthat (>= 3.0.0),
     usethis,
     tidyr
 Remotes:
-    nhsbsa-data-analytics/nhsbsaR,
-    statistiekcbs/scrollytell
+    nhsbsa-data-analytics/nhsbsaR
 Encoding: UTF-8
 LazyData: true
 RoxygenNote: 7.2.3
diff --git a/NAMESPACE b/NAMESPACE
index 64342d2..eb263e2 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,8 +1,10 @@
 # Generated by roxygen2: do not edit by hand
 
 export("%>%")
-export(gender_profile)
 export(gpg_data)
+export(gpg_pyramid)
+export(gpg_stack)
+export(gpg_trend)
 export(h1_tabstop)
 export(h2_tabstop)
 export(h3_tabstop)
@@ -27,3 +29,5 @@ importFrom(golem,bundle_resources)
 importFrom(golem,favicon)
 importFrom(golem,with_golem_options)
 importFrom(magrittr,"%>%")
+importFrom(shiny,NS)
+importFrom(shiny,tagList)
diff --git a/R/app_server.R b/R/app_server.R
index 1c429f7..61c8c3a 100644
--- a/R/app_server.R
+++ b/R/app_server.R
@@ -6,7 +6,5 @@
 #' @noRd
 app_server <- function(input, output, session) {
   # Your application server logic
-  mod_markdown_example_server("markdown_example_ui_1")
-  mod_chart_example_server("chart_example_ui_1")
-  mod_scrollytell_example_server("scrollytell_example_1")
+  mod_introduction_server("introduction_1")
 }
diff --git a/R/app_ui.R b/R/app_ui.R
index 9416988..7299b3f 100644
--- a/R/app_ui.R
+++ b/R/app_ui.R
@@ -26,16 +26,8 @@ app_ui <- function(request) {
             well = FALSE,
             widths = c(3, 9),
             tabPanel(
-              title = "Introduction",
-              mod_markdown_example_ui("markdown_example_ui_1")
-            ),
-            tabPanel(
-              title = "Charts",
-              mod_chart_example_ui("chart_example_ui_1")
-            ),
-            tabPanel(
-              title = "Scrolly example",
-              mod_scrollytell_example_ui("scrollytell_example_1")
+              title = "Report",
+              mod_introduction_ui("introduction_1")
             )
           )
         )
diff --git a/R/data.R b/R/data.R
index e02135a..0803780 100644
--- a/R/data.R
+++ b/R/data.R
@@ -8,33 +8,34 @@
 #'   \item headcount. employee headcount used for aggregation
 #'   \item hourly_rate. hourly rate as shown pay slip
 #'   \item quartile. split hourly_rate by quartile by gender
-#'   \item fte. employee full time or part time info
+#'   \item fte. employee full time or part time information
 #'   \item afc_band. AFC band
 #'   \item directorate. NHSBSA directorate
-#' } 
-#' 
+#' }
+#'
 #' @docType data
 #' @keywords datasets
 #' @name afc_staff
 #' @usage afc_staff
-#' @format A data frame with gender pay gap information 
-#' 
-#' 
+#' @format A data frame with gender pay gap information
+#'
+#'
 "afc_staff"
 
 
 #' NHSBSA employee paygap
-#' 
+#'
 #' A dataset containing NHSBSA employee paygap
-#' Directly pulled from ESR dashboard (NHS National Returns) 
+#' Directly pulled from ESR dashboard (NHS National Returns)
 #' gender, average hourly rate, median hourly rate and pay gap%
-#' 
+#'
 #' \itemize{
 #'    \item period. 2018/19, 2019/20 etc character
 #'   \item avg_hr_gpg. Mean gender pay gap % based on male full-pay relevant employees
-#'   \item median_hr_gpg. Median gender pay gap % based on male full-pay relevant employees
-#'   }
-#'   
+#'   \item median_hr_gpg. Median gender pay gap % based
+#'   on male full-pay relevant employees
+#' }
+#'
 #' @docType data
 #' @keywords datasets
 #' @name paygap
@@ -48,22 +49,21 @@
 #'
 #' A dataset containing NHSBSA employee hourly pay
 #' by reporting period from 2018 (eg. snapshot 31/03/2018 for 2018/19 report etc)
-#' split by gender, AFC band, directorate
-#' 
-#' 
+#' by quartiles
+#'
+#'
 #' \itemize{
 #'   \item period. 2018/19, 2019/20 etc character
 #'   \item quartile. split hourly pay into quartiles
 #'   \item female. number of female employees in each quartile
 #'   \item male. number of male employees in each quartile
 #'   \item quartile. split hourly_rate by quartile by gender
-#'   \item female_percent. female employee % in quartile
-#'   \item male_percent. male employee % in quartile   
 #'  }
+#'
 #'  @docType data
 #'  @keywords datasets
 #'  @name quartile
 #'  @usage quartile
-#'  @format data frame with employee gender pay gap by quartiles
+#'  @format data frame with employee gender hourly pay by quartiles
 
-"quartile"
\ No newline at end of file
+"quartile"
diff --git a/R/gpg_data_class.R b/R/gpg_data_class.R
index 5b2a47a..04df3a7 100644
--- a/R/gpg_data_class.R
+++ b/R/gpg_data_class.R
@@ -9,12 +9,12 @@
 #' directorate.
 #'
 #' Once initiated, the class has six slots:
-#' \code{df}: raw data frame 
-#' \code{df_hdcnt}: data frame contains headcount by period 
-#' \code{df_hdcnt_gender}: data frame contains headcount by gender by period 
-#' \code{df_hdcnt_afc}: data frame contains headcount by afc band 
-#' \code{df_hdcnt_dir}: data frame contains headcount by directorate 
-#' \code{df_hrrate}: data frame contains hourly rate by gender for each grade 
+#' \code{df}: raw data frame
+#' \code{df_hdcnt}: data frame contains headcount by period
+#' \code{df_hdcnt_gender}: data frame contains headcount by gender by period
+#' \code{df_hdcnt_afc}: data frame contains headcount by afc band
+#' \code{df_hdcnt_dir}: data frame contains headcount by directorate
+#' \code{df_hrrate}: data frame contains hourly rate by gender for each grade
 #' \code{ending_fy}: a character vector containing ending reporting period
 #' (e.g. 31 March 2023). This uses for introduction paragraph
 #'
@@ -26,8 +26,8 @@
 #'
 #' @return If the class is not instantiated correctly, nothing is returned.
 #'
-#' @examples 
-#'  
+#' @examples
+#'
 #' library(nhsbsaGPG)
 #' df <- gpg_data(afc_staff)
 #'
@@ -149,7 +149,8 @@ gpg_data <- function(x,
   # data frame: aggregate headcount by period
   df_hdcnt <- x |>
     dplyr::group_by(period) |>
-    dplyr::summarise(headcount = sum(headcount, na.rm = TRUE)) |>
+    dplyr::summarise(headcount = sum(headcount, na.rm = TRUE),
+                     .groups = "drop") |>
     dplyr::arrange(period)
 
   # Extract the values
@@ -157,14 +158,15 @@ gpg_data <- function(x,
     agg_data$headcount[agg_data$period == latest_fy]
 
   ending_fy <- as.character(start_latest_year + 1)
-  
+
   # data frame: aggregate headcount by gender by period
   df_hdcnt_gender <- x |>
-    dplyr::group_by(period,gender) |>
-    dplyr::summarise(headcount = sum(headcount, na.rm = TRUE)) |>
+    dplyr::group_by(period, gender) |>
+    dplyr::summarise(headcount = sum(headcount, na.rm = TRUE),
+                     .groups = "drop") |>
     dplyr::arrange(period)
-  
-  
+
+
 
   # data frame: aggregate headcount by period and AFC band
   df_hdcnt_afc <- x |>
@@ -176,7 +178,8 @@ gpg_data <- function(x,
     dplyr::group_by(period, afc_band) |>
     dplyr::mutate(
       perc = headcount / sum(headcount) * 100
-    )
+    ) |>
+    dplyr::ungroup()
 
   # data frame: aggregate headcount by period and directorate
   df_hdcnt_dir <- x |>
@@ -188,7 +191,8 @@ gpg_data <- function(x,
     dplyr::group_by(period, directorate) |>
     dplyr::mutate(
       perc = headcount / sum(headcount) * 100
-    )
+    ) |>
+    dplyr::ungroup()
 
   # data frame: hourly rate by gender for overall, each AFC band
   df_hrrate <- dplyr::bind_rows(
diff --git a/R/mod_chart_example.R b/R/mod_chart_example.R
deleted file mode 100644
index b1a46e6..0000000
--- a/R/mod_chart_example.R
+++ /dev/null
@@ -1,60 +0,0 @@
-#' chart_example UI Function
-#'
-#' @description A shiny Module.
-#'
-#' @param id,input,output,session Internal parameters for {shiny}.
-#'
-#' @noRd
-mod_chart_example_ui <- function(id) {
-  ns <- NS(id)
-  tagList(
-    h1_tabstop("First level"),
-    h2_tabstop("Second level"),
-    nhs_card_tabstop(
-      heading = "example chart title",
-      nhs_selectInput(
-        inputId = ns("bins"),
-        label = "Number of bins:",
-        choices = c(5, 10, 15, 20),
-        selected = 20,
-        full_width = TRUE
-      ),
-      highcharter::highchartOutput(
-        outputId = ns("chart"),
-        height = "400px"
-      ),
-      mod_nhs_download_ui(
-        id = ns("download_test")
-      )
-    )
-  )
-}
-
-#' chart_example Server Functions
-#'
-#' @noRd
-mod_chart_example_server <- function(id) {
-  moduleServer(id, function(input, output, session) {
-    ns <- session$ns
-
-    output$chart <- highcharter::renderHighchart({
-      # Generate bins based on input$bins from ui.R
-      x <- nhsbsaGPG::faithful[, 2]
-      bins <- seq(min(x), max(x), length.out = as.numeric(input$bins) + 1)
-
-      # Draw the histogram with the specified number of bins
-      chart <- graphics::hist(x, breaks = bins, plot = FALSE)
-
-      # Output interactive chart
-      chart %>%
-        highcharter::hchart() %>%
-        nhsbsaR::theme_nhsbsa_highchart()
-    })
-
-    mod_nhs_download_server(
-      id = "download_test",
-      filename = "test.csv",
-      export_data = nhsbsaGPG::faithful
-    )
-  })
-}
diff --git a/R/mod_introduction.R b/R/mod_introduction.R
new file mode 100644
index 0000000..b1e5db1
--- /dev/null
+++ b/R/mod_introduction.R
@@ -0,0 +1,72 @@
+#' introduction UI Function
+#'
+#' @description A shiny Module.
+#'
+#' @param id,input,output,session Internal parameters for {shiny}.
+#'
+#' @noRd
+#'
+#' @importFrom shiny NS tagList
+mod_introduction_ui <- function(id) {
+
+  ns <- NS(id)
+  tagList(
+
+    h2_tabstop("Introduction"),
+    p(
+      "Under the Equality Act 2010 (Specific Duties and Public Authorities) 
+      Regulations 2017, the NHS Business Services Authority (NHSBSA), 
+      along with all public bodies with more than 250 employees, 
+      is required to publish gender pay gap information by 30th March each year. 
+      This includes information on the mean and median gender gaps in hourly pay, 
+      the mean and median gender gaps in bonus pay, 
+      the proportion of men and women who received bonuses, 
+      and the proportions of male and female employees in each pay quartile."
+    ),
+    p(
+      "The gender pay gap shows the difference in the average pay between 
+      all men and women in an organisation. It is different to equal pay, 
+      which examines the pay differences between men and women who carry out 
+      the same or similar jobs, or work of equal value. It is unlawful to pay 
+      people unequally because they are a man or a woman."
+    ),
+    p(
+      "At the NHSBSA, our people are at the centre of our business strategy, 
+      and we aspire to be an employer of choice who provides a great place 
+      to work and can recruit and retain the right talent with the wide 
+      range of knowledge, skills and capabilities we need. We are committed 
+      to a diverse and inclusive culture which supports the fair treatment 
+      and reward of all colleagues, irrespective of gender, and our pay 
+      framework is based on the principles of fairness, transparency, 
+      and consistency"
+    ),
+    p(
+      span("This report fulfils our reporting requirements and sets out what we are 
+      doing to address the gender pay gap in our organisation. The data is based on a 
+      snapshot of all employees as of 31 March ",
+           textOutput(ns("reporting_year"), inline = TRUE), style = "font-size:15pt"),
+      span(", as this is the date which all public authorities must use each year. 
+      The calculations used are those set out in the relevant legislation 
+      to ensure the data is calculated consistently across organisations.",
+           style = "font-size:15pt")
+    )
+  )
+}
+
+#' introduction Server Functions
+#'
+#' @noRd
+mod_introduction_server <- function(id) {
+
+  moduleServer(id, function(input, output, session) {
+
+    ns <- session$ns
+
+
+    output$reporting_year <- renderText({
+      nhsbsaGPG::gpg_data(afc_staff)$ending_fy
+    })
+
+
+  })
+}
diff --git a/R/mod_markdown_example.R b/R/mod_markdown_example.R
deleted file mode 100644
index adb84ad..0000000
--- a/R/mod_markdown_example.R
+++ /dev/null
@@ -1,22 +0,0 @@
-#' markdown_example UI Function
-#'
-#' @description A shiny Module.
-#'
-#' @param id,input,output,session Internal parameters for {shiny}.
-#'
-#' @noRd
-mod_markdown_example_ui <- function(id) {
-  ns <- NS(id)
-  tagList(
-    includeMarkdown("inst/app/www/assets/markdown/mod_markdown_example.md")
-  )
-}
-
-#' markdown_example Server Functions
-#'
-#' @noRd
-mod_markdown_example_server <- function(id) {
-  moduleServer(id, function(input, output, session) {
-    ns <- session$ns
-  })
-}
diff --git a/R/mod_scrollytell_example.R b/R/mod_scrollytell_example.R
deleted file mode 100644
index c7f6945..0000000
--- a/R/mod_scrollytell_example.R
+++ /dev/null
@@ -1,189 +0,0 @@
-#' scrollytell_example UI Function
-#'
-#' @description A shiny Module.
-#'
-#' @param id,input,output,session Internal parameters for {shiny}.
-#'
-#' @noRd
-mod_scrollytell_example_ui <- function(id) {
-  ns <- NS(id)
-  tagList(
-    h1_tabstop("Demo of scrollytelling using iris dataset"),
-    p(
-      "This section shows an example of a scrolly chart in action using the 'iris'
-       dataset. The scatter chart will remain in place and react to changes as the
-       user scrolls."
-    ),
-    # start with the overall container object that will hold the different
-    # sections to scroll through
-    scrollytell::scrolly_container(
-      # the outputID will hold the reference for the input showing the current
-      # scroll section
-      outputId = ns("scroll_level"),
-      h2_tabstop("The iris dataset"),
-      # define the container for the static part of the scrolly
-      scrollytell::scrolly_graph(
-        # place the sticky part in the center of the page
-        # for aesthetics stops the chart hitting top of page
-        div(
-          style = "margin-top: 10vh" # change based on size of sticky graph
-        ),
-        # use a nhs_card element to hold the chart
-        nhs_card_tabstop(
-          # this could be made dynamic if required by using a textOutput() object
-          heading = "Iris Dataset: Sepal Length v Width",
-          highcharter::highchartOutput(outputId = ns("example_scroll_chart"))
-        )
-      ),
-
-      # create the container for the scrolling sections of the scrolly
-      scrollytell::scrolly_sections(
-        scrollytell::scrolly_section(
-          # each section needs a unique ID to reference, use meaningful names
-          id = "section_1_all",
-          # bump the start of each section to avoid top of screen
-          div(
-            style = "height: 20vh"
-          ),
-          # text output, including header if required
-          h3_tabstop("Length v Width"),
-          p(
-            "Looking purely at the Sepal length and width does not suggest a
-             strong relationship."
-          ),
-        ),
-        scrollytell::scrolly_section(
-          id = "section_2_group", # each section needs a unique ID to reference
-          # bump the start of each section to avoid top of screen
-          div(
-            style = "height: 20vh"
-          ),
-          # text output, including header if required
-          h3_tabstop("Split by species"),
-          p(
-            "When highlighting by species type we start to see that there is
-             correlation within each species."
-          )
-        ),
-        scrollytell::scrolly_section(
-          id = "section_3_setosa", # each section needs a unique ID to reference
-          # bump the start of each section to avoid top of screen
-          div(
-            style = "height: 20vh"
-          ),
-          # text output, including header if required
-          h3_tabstop("Setosa"),
-          p(
-            "This species has the largest sepal width but some of the smallest
-             sepal lengths."
-          )
-        ),
-        scrollytell::scrolly_section(
-          # each section needs a unique ID to reference
-          id = "section_4_versicolor",
-          # bump the start of each section to avoid top of screen
-          div(
-            style = "height: 20vh"
-          ),
-          # text output, including header if required
-          h3_tabstop("Versicolor"),
-          p("This species has the some of the smallest sepal widths.")
-        ),
-        scrollytell::scrolly_section(
-          # each section needs a unique ID to reference
-          id = "section_5_virginica",
-          # bump the start of each section to avoid top of screen
-          div(
-            style = "height: 20vh"
-          ),
-          # text output, including header if required
-          h3_tabstop("Setosa"),
-          p("This species has the largest sepal lengths."),
-          # Bump the height of the last section so that the top of it aligns
-          # with the top of the sticky image when you scroll
-          div(
-            style = "height: 30vh" # change based on size of section
-          )
-        )
-      )
-    )
-  )
-}
-
-#' scrollytell_example Server Functions
-#'
-#' @noRd
-mod_scrollytell_example_server <- function(id) {
-  moduleServer(id, function(input, output, session) {
-    ns <- session$ns
-    
-    group_lvl <- NULL
-    point_col <- NULL
-    
-    # create the chart object
-    output$example_scroll_chart <- highcharter::renderHighchart({
-      # require the scroll input to prevent errors on initialisation
-      req(input$scroll_level)
-
-      # create a custom chart dataset based on the scrolly section inputs the
-      # input$scroll_level will allow you to define the chart input this input
-      # is based on section of the report that is currently active during the
-      # scroll
-      chart_data <- datasets::iris |>
-        dplyr::filter(
-          .data$Species %in% switch(input$scroll_level,
-            "section_3_setosa" = c("setosa"),
-            "section_4_versicolor" = c("versicolor"),
-            "section_5_virginica" = c("virginica"),
-            c("setosa", "versicolor", "virginica")
-          )
-        )
-
-      if (input$scroll_level == "section_1_all") {
-        chart_data <- chart_data |>
-          dplyr::mutate(group_lvl = "Species") |>
-          dplyr::mutate(point_col = "#0000FF")
-      } else {
-        chart_data <- chart_data |>
-          dplyr::mutate(group_lvl = .data$Species) |>
-          dplyr::mutate(
-            point_col = dplyr::case_when(
-              .data$Species == "setosa" ~ "#fdb863",
-              .data$Species == "versicolor" ~ "#b2abd2",
-              .data$Species == "virginica" ~ "#5e3c99",
-              TRUE ~ "#000000"
-            )
-          )
-      }
-
-      # produce the chart object
-      chart_data |>
-        highcharter::hchart(
-          type = "scatter",
-          highcharter::hcaes(
-            x = .data$Sepal.Length,
-            y = .data$Sepal.Width,
-            group = group_lvl,
-            color = point_col
-          )
-        ) |>
-        highcharter::hc_xAxis(
-          min = 4,
-          max = 8
-        ) |>
-        highcharter::hc_yAxis(
-          min = 1,
-          max = 5
-        ) |>
-        # remove plot animations
-        highcharter::hc_plotOptions(series = list(animation = FALSE)) |>
-        # disable the legend
-        highcharter::hc_legend(enabled = FALSE)
-    })
-
-    # output the scrolly object - MUST BE INCLUDED FOR SCROLLY OBJECT TO BE RENDERED
-    output$scroll_level <- scrollytell::renderScrollytell({
-      scrollytell::scrollytell()
-    })
-  })
-}
diff --git a/R/utils_charts.R b/R/utils_charts.R
index c5285b3..6803b47 100644
--- a/R/utils_charts.R
+++ b/R/utils_charts.R
@@ -6,32 +6,23 @@
 #'
 #' @return Returns a highchart or htmlwidget object.
 #'
-#' @examples \dontrun{
-#' workforce <- nhsbsaGPG::gpg_data(nhsbsaGPG::afc_staff)
-#' nhsbsaGPG::gender_profile(workforce$df_hdcnt_gender)
-#' } 
-#'
 #' @export
 #' @param x Input data frame from \code{gpg_data} S3 class object.
 #' @param xvar "period", default
 #' @param yvars data frame converts to list and each list element to create line
 #' @param series_names If user wants to give different series name for highchart legend
-#' @param yaxis_title Title of y axis
+#' @param yaxis_title Y axis title
 #' @param yaxis_label Indication of percentage or number
-#' @param show_legend TRUE default
-#' 
-#' @importFrom aggregate
-#' @importFrom barplot 
-#' 
-#'
-gender_profile <- function(x,
-                           xvar = "period",
-                           yvars,
-                           series_names,
-                           yaxis_title,
-                           yaxis_label,
-                           show_legend = TRUE
-                           ){
+#' @param colpalette custom palette
+#'
+#'
+gpg_trend <- function(x,
+                      xvar = "period",
+                      yvars,
+                      series_names,
+                      yaxis_title,
+                      yaxis_label,
+                      colpalette) {
   out <- tryCatch(
     expr = {
       # Input data frame convert to list
@@ -40,9 +31,11 @@ gender_profile <- function(x,
       # create plot object (empty one..)
       plt <- highcharter::highchart() |>
         highcharter::hc_chart(type = "line") |>
-        nhsbsaR::theme_nhsbsa_highchart(stack = NA, 
-                                        palette = c("Blue", "AquaGreen"))
-      
+        nhsbsaR::theme_nhsbsa_highchart(
+          stack = NA,
+          palette = colpalette
+        )
+
 
       # It requires minimum two series (male, female) but it could split further
       for (i in seq_along(series_names)) {
@@ -56,48 +49,194 @@ gender_profile <- function(x,
             data = data,
             type = "line",
             highcharter::hcaes(
-              x = .data[[xvar]], # default period3
+              x = .data[[xvar]], # default period
               y = .data[[yvar]] # Female for example
             ),
             name = series_name # these labels will show in legend
           )
       }
 
+
       plt <- if (yaxis_label == "percentage") {
         plt |>
           highcharter::hc_yAxis(
             title = list(text = yaxis_title),
-            labels <- list(format = "{value}"),
+            labels = list(format = "{value}%"),
             min = 0,
-            max = 100
+            max = 20
           )
       } else {
         plt |>
           highcharter::hc_yAxis(
             title = list(text = yaxis_title),
-            labels <- list(format = "{value:,f}"),
+            labels = list(format = "{value:,f}"),
             min = 0
           )
+      }
 
-        plt <- plt |>
-          highcharter::hc_xAxis(type = "category") |> 
-          highcharter::hc_legend(
-            itemWidth = 600,
-            itemMarginTop = 5,
-            y = 0
+      plt <- plt |>
+        highcharter::hc_xAxis(type = "category") |>
+        highcharter::hc_legend(
+          itemWidth = 600,
+          itemMarginTop = 5,
+          y = 0
+        )
+
+      return(plt)
+    },
+    warning = function() {
+      w <- warnings()
+      warning("Warning produced running gpg_trend():", w)
+    },
+    error = function(e) {
+      stop("Error produced running gpg_trend():", e)
+    },
+    finally = {}
+  )
+}
+
+
+
+
+#' @title Highcharter bar chart to create pyramid chart. This chart
+#' shows gender pay gap information by AFC band in NHSBSA
+#'
+#' @description \code{gpg_data} is the S3 class used for trend
+#'
+#'
+#' @return Returns a highchart or htmlwidget object.
+#'
+#'
+#' @export
+#' @param x Input data frame from \code{gpg_data} S3 class object.
+#' @param xvar "afc_band" default value
+#' @param yvar headcount/mean hourly/median hourly pay
+#' @param yaxis_title Y axis title
+
+gpg_pyramid <- function(x, xvar = "afc_band", yvar, yaxis_title) {
+  out <- tryCatch(
+    exp = {
+      data <- x
+      # Create chart object
+      plt <- data |>
+        highcharter::hchart(
+          type = "bar",
+          highcharter::hcaes(
+            x = .data[[xvar]],
+            y = .data[[yvar]],
+            group = "gender"
           )
-      }
+        ) |>
+        nhsbsaR::theme_nhsbsa_highchart(palette = "gender") |>
+        highcharter::hc_yAxis(
+          title = list(text = yaxis_title),
+          labels = list(
+            formatter = highcharter::JS(
+              "
+                function() {
+                  outHTML = this.axis.defaultLabelFormatter.call(this)
+                  return outHTML.replace('-', '')
+                }
+                "
+            )
+          )
+        ) |>
+        highcharter::hc_xAxis(
+          title = list(text = "AFC band"),
+          reversed = FALSE
+        ) |>
+        highcharter::hc_plotOptions(
+          series = list(
+            states = list(
+              # Disable series highlighting
+              inactive = list(enabled = FALSE)
+            ),
+            events = list(
+              # Disables turning the series off
+              legendItemClick = htmlwidgets::JS("function () { return false; }")
+            )
+          )
+        )
+
+
       return(plt)
     },
     warning = function() {
       w <- warnings()
-      warning("Warning produced running gender_profile():", w)
+      warning("Warning produced running gpg_pyramid():", w)
     },
     error = function(e) {
-      stop("Error produced running gender_profile():", e)
+      stop("Error produced running gpg_pyramid():", e)
     },
     finally = {}
   )
 }
 
 
+
+
+
+#' @title Highcharter column chart to create stacked column chart. This chart
+#' shows proportion of males and females in each quartile pay band.
+#'
+#' @description {quartile} data frame is used for stacked column chart.
+#'
+#'
+#' @return Returns a highchart or htmlwidget object.
+#'
+#'
+#' @export
+#' @param x Input quartile data frame.
+#' @param xvar "afc_band" default value
+#' @param yvar headcount/mean hourly/median hourly pay
+#' @param groupvar group by variable
+#' @param yaxis_title Y axis title
+
+gpg_stack <- function(x, xvar, yvar, groupvar, yaxis_title) {
+  out <- tryCatch(
+    exp = {
+      data <- x
+      # Create chart object
+      plt <- data |>
+        highcharter::hchart(
+          type = "column",
+          highcharter::hcaes(
+            x = .data[[xvar]],
+            y = .data[[yvar]],
+            group = .data[[groupvar]]
+          )
+        ) |>
+        nhsbsaR::theme_nhsbsa_highchart(palette = "gender") |>
+        highcharter::hc_yAxis(
+          title = list(text = yaxis_title),
+          max = 100
+        ) |>
+        highcharter::hc_xAxis(
+          title = list(text = "Quartile")
+        ) |>
+        highcharter::hc_plotOptions(
+          series = list(
+            states = list(
+              # Disable series highlighting
+              inactive = list(enabled = FALSE)
+            ),
+            events = list(
+              # Disables turning the series off
+              legendItemClick = htmlwidgets::JS("function () { return false; }")
+            )
+          )
+        )
+
+
+      return(plt)
+    },
+    warning = function() {
+      w <- warnings()
+      warning("Warning produced running gpg_pyramid():", w)
+    },
+    error = function(e) {
+      stop("Error produced running gpg_pyramid():", e)
+    },
+    finally = {}
+  )
+}
diff --git a/data-raw/1.read_azure_blob.R b/data-raw/1.read_azure_blob.R
index 54ad5f4..bf9024d 100644
--- a/data-raw/1.read_azure_blob.R
+++ b/data-raw/1.read_azure_blob.R
@@ -6,8 +6,8 @@ library(dplyr)
 
 config <- jsonlite::fromJSON("./data-raw/execution.json")
 
-account_name <- Sys.getenv('AZURE_BLOB_STORAGE_PDS_ACCOUNT_NAME')
-account_key <- Sys.getenv('AZURE_BLOB_STORAGE_PDS_ACCOUNT_KEY')
+account_name <- Sys.getenv("AZURE_BLOB_STORAGE_PDS_ACCOUNT_NAME")
+account_key <- Sys.getenv("AZURE_BLOB_STORAGE_PDS_ACCOUNT_KEY")
 endpoint_suffix <- "core.windows.net"
 
 container_name <- config$import$container
@@ -15,28 +15,28 @@ folder_path <- config$import$folder
 
 
 # Create a blob service client
-blob_endpoint <- sprintf('https://%s.blob.%s', account_name, endpoint_suffix)
-blob_client <- storage_endpoint(blob_endpoint, key=account_key)
+blob_endpoint <- sprintf("https://%s.blob.%s", account_name, endpoint_suffix)
+blob_client <- storage_endpoint(blob_endpoint, key = account_key)
 
 # Get the blob client instance for the given container
 blob_container <- storage_container(blob_client, container_name)
 
 # List all blobs in the specified folder
-blobs_in_folder <- list_blobs(blob_container, prefix=folder_path)
+blobs_in_folder <- list_blobs(blob_container, prefix = folder_path)
 
 # Extract the 'name' values from the result
 blob_names <- blobs_in_folder[["name"]]
 
 # Loop through each blob in the folder and download
 for (blob_name in blob_names) {
-  
-  local_file_path <- paste0(config$import$local_path, '/', basename(blob_name))
-  
+
+  local_file_path <- paste0(config$import$local_path, "/", basename(blob_name))
+
   # Check if the file exists locally, and if so, delete it
   if (file.exists(local_file_path)) {
     file.remove(local_file_path)
   }
-  
+
   # Download the blob data to the local file
   storage_download(blob_container, blob_name, local_file_path, overwrite = TRUE)
 }
diff --git a/data-raw/2.gpg_data_process.R b/data-raw/2.gpg_data_process.R
index 1e2a23c..bd6e0c3 100644
--- a/data-raw/2.gpg_data_process.R
+++ b/data-raw/2.gpg_data_process.R
@@ -27,15 +27,16 @@ process_file <- function(filepath) {
       paygap = read_excel(filepath, range = cell_rows(3:7), col_names = TRUE) |>
         select(1:3) |>
         janitor::clean_names() |>
-        mutate(period = financial_year) |> 
-        filter(gender == "Pay Gap %") |> 
+        mutate(period = financial_year) |>
+        filter(gender == "Pay Gap %") |>
         select(period,
-               avg_hr_gpg = avg_hourly_rate,
+               mean_hr_gpg = avg_hourly_rate,
                median_hr_gpg = median_hourly_rate),
       quartile = read_excel(filepath, range = cell_rows(3:7), col_names = TRUE) |>
         select(5:9) |>
         janitor::clean_names() |>
-        mutate(period = financial_year),
+        mutate(period = financial_year) |>
+        select(period, quartile, female, male) ,
       afc = read_excel(filepath, skip = 8, col_names = TRUE) |>
         select(2:7) |>
         janitor::clean_names() |>
@@ -70,7 +71,8 @@ paygap <- map(dfs, "paygap") |>
   select(period, everything())
 quartile <- map(dfs, "quartile") |>
   bind_rows() |>
-  select(period, everything())
+  select(period, everything()) |>
+  mutate(quartile = as.character(quartile))
 afc <- map(dfs, "afc") |>
   bind_rows() |>
   select(period, everything())
@@ -88,16 +90,38 @@ afc_staff <- afc |>
   ) |>
   left_join(lookup,
     by = "pay_scale"
-  ) |> 
-  select(-employee_number) |> 
-# Data quality error July 2013 Archive employee org is wrong, manually edited
-  mutate(org_l3 = ifelse(org_l3 == 'July 2013 Archive', "914 BSA Finance, Commercial and Estates L3", org_l3),
-         directorate = stringr::str_replace_all(
-           org_l3, c("^914 BSA " = "", " L3" = "")),
+  ) |>
+  select(-employee_number) |>
+
+  # Data quality error July 2013 Archive employee org
+  # is wrong, manually edited
+  mutate(org_l3 = ifelse(org_l3 == "July 2013 Archive",
+                         "914 BSA Finance, Commercial and Estates L3", org_l3),
+         directorate = stringr::str_replace_all(org_l3, c("^914 BSA " = "", " L3" = "")),
          directorate = stringr::str_trim(directorate),
-         headcount = 1) |> 
-  select(period, gender, headcount,hourly_rate, quartile, fte, afc_band, directorate)
-  
+         headcount = 1) |>
+  select(period, gender, headcount, hourly_rate, quartile, fte, afc_band, directorate)
+
+# quartile requires data transformation
+quartile_overall <- quartile |>
+  group_by(period) |>
+  summarise(female = sum(female),
+            male = sum(male),
+            .groups = "drop") |>
+  mutate(quartile = "Overall")
+
+quartile <- quartile |>
+  bind_rows(quartile_overall)
+
+quartile <- quartile |>
+  tidyr::pivot_longer(cols = c(female, male),
+                      names_to = "gender",
+                      values_to = "count") |>
+  group_by(period, quartile) |>
+  mutate(percent = count / sum(count) * 100) |>
+  ungroup()
+
+
 
 # Keep three main data frame and it will be used to create S3 class
 usethis::use_data(paygap, overwrite = TRUE)
@@ -107,7 +131,7 @@ usethis::use_data(afc_staff, overwrite = TRUE)
 # delete all the files in data_temp as they only stay in azure storage
 
 # Specify the folder path
-folder_path <- "./data_temp" 
+folder_path <- "./data_temp"
 
 # List all files in the directory
 files_to_delete <- list.files(path = folder_path, full.names = TRUE)
diff --git a/man/afc_staff.Rd b/man/afc_staff.Rd
index afb475f..a72f4c9 100644
--- a/man/afc_staff.Rd
+++ b/man/afc_staff.Rd
@@ -3,7 +3,7 @@
 \docType{data}
 \name{afc_staff}
 \alias{afc_staff}
-\title{NHSBSA employee staff_afc}
+\title{NHSBSA employee afc_staff}
 \format{
 A data frame with gender pay gap information
 }
@@ -12,14 +12,14 @@ afc_staff
 }
 \description{
 A dataset containing NHSBSA employee hourly pay
-by reporting period split by gender, AFC band, directorate.
+by reporting period split by gender, FTE, AFC band, directorate.
 \itemize{
   \item period. 2018/19, 2019/20 etc character
   \item gender. Male or Female, character
   \item headcount. employee headcount used for aggregation
   \item hourly_rate. hourly rate as shown pay slip
   \item quartile. split hourly_rate by quartile by gender
-  \item fte. employee full time or part time info
+  \item fte. employee full time or part time information
   \item afc_band. AFC band
   \item directorate. NHSBSA directorate
 }
diff --git a/man/faithful.Rd b/man/faithful.Rd
deleted file mode 100644
index f4be27e..0000000
--- a/man/faithful.Rd
+++ /dev/null
@@ -1,30 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/faithful.R
-\docType{data}
-\name{faithful}
-\alias{faithful}
-\title{Old Faithful Geyser Data}
-\format{
-A data frame with 272 observations on 2 variables.
-}
-\source{
-{W. H<U+00E4>rdle.}
-}
-\usage{
-faithful
-}
-\description{
-A closer look at \code{faithful$eruptions} reveals that these are
-heavily rounded times originally in seconds, where multiples of 5 are
-more frequent than expected under non-human measurement.  For a
-better version of the eruption times, see the example below.
-There are many versions of this dataset around: Azzalini and Bowman
-(1990) use a more complete version.
-}
-\details{
-\itemize{
-    \item eruptions. Eruption time in mins
-    \item waiting. Waiting time to next eruption (in mins)
-}
-}
-\keyword{datasets}
diff --git a/man/gpg_pyramid.Rd b/man/gpg_pyramid.Rd
new file mode 100644
index 0000000..7bb12a1
--- /dev/null
+++ b/man/gpg_pyramid.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils_charts.R
+\name{gpg_pyramid}
+\alias{gpg_pyramid}
+\title{Highcharter bar chart to create pyramid chart. This chart
+shows gender pay gap information by AFC band in NHSBSA}
+\usage{
+gpg_pyramid(x, xvar = "afc_band", yvar, yaxis_title)
+}
+\arguments{
+\item{x}{Input data frame from \code{gpg_data} S3 class object.}
+
+\item{xvar}{"afc_band" default value}
+
+\item{yvar}{headcount/mean hourly/median hourly pay}
+
+\item{yaxis_title}{Y axis title}
+}
+\value{
+Returns a highchart or htmlwidget object.
+}
+\description{
+\code{gpg_data} is the S3 class used for trend
+}
diff --git a/man/gpg_stack.Rd b/man/gpg_stack.Rd
new file mode 100644
index 0000000..1de7000
--- /dev/null
+++ b/man/gpg_stack.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils_charts.R
+\name{gpg_stack}
+\alias{gpg_stack}
+\title{Highcharter column chart to create stacked column chart. This chart
+shows proportion of males and females in each quartile pay band.}
+\usage{
+gpg_stack(x, xvar, yvar, groupvar, yaxis_title)
+}
+\arguments{
+\item{x}{Input quartile data frame.}
+
+\item{xvar}{"afc_band" default value}
+
+\item{yvar}{headcount/mean hourly/median hourly pay}
+
+\item{groupvar}{group by variable}
+
+\item{yaxis_title}{Y axis title}
+}
+\value{
+Returns a highchart or htmlwidget object.
+}
+\description{
+{quartile} data frame is used for stacked column chart.
+}
diff --git a/man/gender_profile.Rd b/man/gpg_trend.Rd
similarity index 71%
rename from man/gender_profile.Rd
rename to man/gpg_trend.Rd
index e31235c..8948717 100644
--- a/man/gender_profile.Rd
+++ b/man/gpg_trend.Rd
@@ -1,18 +1,18 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/utils_charts.R
-\name{gender_profile}
-\alias{gender_profile}
+\name{gpg_trend}
+\alias{gpg_trend}
 \title{Highcharter line chart to show the number of headcount by financial
 year by gender.}
 \usage{
-gender_profile(
+gpg_trend(
   x,
   xvar = "period",
   yvars,
   series_names,
   yaxis_title,
   yaxis_label,
-  show_legend = TRUE
+  colpalette
 )
 }
 \arguments{
@@ -24,11 +24,11 @@ gender_profile(
 
 \item{series_names}{If user wants to give different series name for highchart legend}
 
-\item{yaxis_title}{Title of y axis}
+\item{yaxis_title}{Y axis title}
 
 \item{yaxis_label}{Indication of percentage or number}
 
-\item{show_legend}{TRUE default}
+\item{colpalette}{custom palette}
 }
 \value{
 Returns a highchart or htmlwidget object.
@@ -36,10 +36,3 @@ Returns a highchart or htmlwidget object.
 \description{
 \code{gpg_data} is the S3 class used for trend
 }
-\examples{
-\dontrun{
-workforce <- nhsbsaGPG::gpg_data(nhsbsaGPG::afc_staff)
-nhsbsaGPG::gender_profile(workforce$df_hdcnt_gender)
-} 
-
-}
diff --git a/man/quartile.Rd b/man/quartile.Rd
index 9cc63aa..ab21d7b 100644
--- a/man/quartile.Rd
+++ b/man/quartile.Rd
@@ -5,7 +5,7 @@
 \alias{quartile}
 \title{NHSBSA employee quartile}
 \format{
-An object of class \code{tbl_df} (inherits from \code{tbl}, \code{data.frame}) with 20 rows and 6 columns.
+An object of class \code{tbl_df} (inherits from \code{tbl}, \code{data.frame}) with 50 rows and 5 columns.
 }
 \usage{
 quartile
@@ -13,7 +13,7 @@ quartile
 \description{
 A dataset containing NHSBSA employee hourly pay
 by reporting period from 2018 (eg. snapshot 31/03/2018 for 2018/19 report etc)
-split by gender, AFC band, directorate
+by quartiles
 }
 \details{
 \itemize{
@@ -22,13 +22,12 @@ split by gender, AFC band, directorate
   \item female. number of female employees in each quartile
   \item male. number of male employees in each quartile
   \item quartile. split hourly_rate by quartile by gender
-  \item female_percent. female employee % in quartile
-  \item male_percent. male employee % in quartile   
  }
+ 
  @docType data
  @keywords datasets
  @name quartile
  @usage quartile
- @format data frame with employee gender pay gap by quartiles
+ @format data frame with employee gender hourly pay by quartiles
 }
 \keyword{datasets}
diff --git a/tests/testthat/test-utils_charts.R b/tests/testthat/test-utils_charts.R
index f418d8f..b031074 100644
--- a/tests/testthat/test-utils_charts.R
+++ b/tests/testthat/test-utils_charts.R
@@ -1,40 +1,82 @@
-
 library(tidyr)
+library(dplyr)
 
 df <- gpg_data(afc_staff)
-x <- df$df_hdcnt_gender |> 
-  tidyr::pivot_wider(names_from = gender,
-                     values_from = headcount)
+x <- df$df_hdcnt_gender |>
+  tidyr::pivot_wider(
+    names_from = gender,
+    values_from = headcount
+  ) |>
+  dplyr::ungroup()
+
+y <- nhsbsaGPG::paygap
 
+z <- df$df_hdcnt_afc |>
+  filter(period == "2021/22") |>
+  mutate(headcount = headcount * ifelse(gender == "Male", 1, -1))
 
-testthat::test_that("gender_profile runs without errors", {
-  expect_silent(gender_profile(x,
+testthat::test_that("gpg_trend function runs without errors", {
+  expect_silent(gpg_trend(x,
     xvar = "period",
     yvars = c("Male", "Female"),
     series_names = c("Male", "Female"),
     yaxis_title = "Male and Female employee headcount",
-    yaxis_label = "number"
+    yaxis_label = "number",
+    colpalette = "gender"
   ))
 })
 
 
-testthat::test_that("gender_profile outputs a highchart, htmlwidget class", {
+testthat::test_that("gpg_trend outputs a highchart, htmlwidget class", {
   expect_equal(class(
-    gender_profile(x,
+    gpg_trend(x,
       xvar = "period",
       yvars = c("Male", "Female"),
       series_names = c("Male", "Female"),
       yaxis_title = "Male and Female employee headcount",
-      yaxis_label = "number"
-    )), c("highchart", "htmlwidget"))
+      yaxis_label = "number",
+      colpalette = c("DarkBlue", "Green")
+    )
+  ), c("highchart", "htmlwidget"))
 })
 
-testthat::test_that("gender_profile takes list as an input", {
+testthat::test_that("gpg_trend takes list as an input", {
   expect_equal(class(list(x)), "list")
 })
 
 
-testthat::test_that("gender_profile input data frame must contain Female,
+testthat::test_that("gpg_trend input data frame must contain Female,
                     Male column", {
-  expect_equal(length(grep("Female|Male", names(x))), 2)
+                      expect_equal(length(grep("Female|Male", names(x))), 2)
+                    })
+
+
+testthat::test_that("gpg_trend input data frame must contain period column", {
+  expect_equal(length(grep("period", names(x))), 1)
+})
+
+
+testthat::test_that("gpg_trend function runs with paygap dataframe", {
+  expect_silent(gpg_trend(y,
+    xvar = "period",
+    yvars = c("mean_hr_gpg", "median_hr_gpg"),
+    series_names = c("Mean gender pay gap", "Median gender pay gap"),
+    yaxis_title = "Gender pay gap in hourly pay",
+    yaxis_label = "percentage",
+    colpalette = c("Purple", "WarmYellow")
+  ))
+})
+
+
+testthat::test_that("gpg_pyramid function runs without error", {
+  expect_silent(gpg_pyramid(z, xvar = "afc_band", yvar = "headcount",
+    yaxis_title = "Headcount"
+  ))
+})
+
+testthat::test_that("gpg_stack function runs without error", {
+  expect_silent(gpg_stack(quartile |> filter(period == "2021/22"),
+    xvar = "quartile", yvar = "percent", groupvar = "ender",
+    yaxis_title = "Males and females in pay quartile"
+  ))
 })

From a385e3529f1393485486ee4844431f4c28997d2e Mon Sep 17 00:00:00 2001
From: kaygo <Kayoung.Goffe@nhsbsa.nhs.uk>
Date: Wed, 25 Oct 2023 15:56:29 +0100
Subject: [PATCH 09/19] R cmd check error fix

---
 R/gpg_data_class.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/gpg_data_class.R b/R/gpg_data_class.R
index 04df3a7..2149318 100644
--- a/R/gpg_data_class.R
+++ b/R/gpg_data_class.R
@@ -26,10 +26,11 @@
 #'
 #' @return If the class is not instantiated correctly, nothing is returned.
 #'
-#' @examples
+#' @examples \dontrun{
 #'
 #' library(nhsbsaGPG)
 #' df <- gpg_data(afc_staff)
+#' }
 #'
 #' @export
 

From 7ac33f9c953c606b0e4dbc34050f64c752e5c161 Mon Sep 17 00:00:00 2001
From: kaygo <Kayoung.Goffe@nhsbsa.nhs.uk>
Date: Wed, 25 Oct 2023 16:07:35 +0100
Subject: [PATCH 10/19] tryCatch spelling error

---
 R/utils_charts.R                   |  4 ++--
 man/gpg_data.Rd                    | 16 +++++++++-------
 man/paygap.Rd                      |  7 ++++---
 man/quartile.Rd                    |  2 +-
 tests/testthat/test-utils_charts.R | 28 ++++++++++++++++++----------
 5 files changed, 34 insertions(+), 23 deletions(-)

diff --git a/R/utils_charts.R b/R/utils_charts.R
index 6803b47..05aa976 100644
--- a/R/utils_charts.R
+++ b/R/utils_charts.R
@@ -115,7 +115,7 @@ gpg_trend <- function(x,
 
 gpg_pyramid <- function(x, xvar = "afc_band", yvar, yaxis_title) {
   out <- tryCatch(
-    exp = {
+    expr = {
       data <- x
       # Create chart object
       plt <- data |>
@@ -194,7 +194,7 @@ gpg_pyramid <- function(x, xvar = "afc_band", yvar, yaxis_title) {
 
 gpg_stack <- function(x, xvar, yvar, groupvar, yaxis_title) {
   out <- tryCatch(
-    exp = {
+    expr = {
       data <- x
       # Create chart object
       plt <- data |>
diff --git a/man/gpg_data.Rd b/man/gpg_data.Rd
index 3e79d37..4ad3d20 100644
--- a/man/gpg_data.Rd
+++ b/man/gpg_data.Rd
@@ -27,18 +27,20 @@ least seven columns: period, gender, hourly_rate, quartile, fte, afc_band,
 directorate.
 
 Once initiated, the class has six slots:
-\code{df}: raw data frame 
-\code{df_hdcnt}: data frame contains headcount by period 
-\code{df_hdcnt_gender}: data frame contains headcount by gender by period 
-\code{df_hdcnt_afc}: data frame contains headcount by afc band 
-\code{df_hdcnt_dir}: data frame contains headcount by directorate 
-\code{df_hrrate}: data frame contains hourly rate by gender for each grade 
+\code{df}: raw data frame
+\code{df_hdcnt}: data frame contains headcount by period
+\code{df_hdcnt_gender}: data frame contains headcount by gender by period
+\code{df_hdcnt_afc}: data frame contains headcount by afc band
+\code{df_hdcnt_dir}: data frame contains headcount by directorate
+\code{df_hrrate}: data frame contains hourly rate by gender for each grade
 \code{ending_fy}: a character vector containing ending reporting period
 (e.g. 31 March 2023). This uses for introduction paragraph
 }
 \examples{
- 
+\dontrun{
+
 library(nhsbsaGPG)
 df <- gpg_data(afc_staff)
+}
 
 }
diff --git a/man/paygap.Rd b/man/paygap.Rd
index 2e19a52..1ca4916 100644
--- a/man/paygap.Rd
+++ b/man/paygap.Rd
@@ -12,14 +12,15 @@ paygap
 }
 \description{
 A dataset containing NHSBSA employee paygap
-Directly pulled from ESR dashboard (NHS National Returns) 
+Directly pulled from ESR dashboard (NHS National Returns)
 gender, average hourly rate, median hourly rate and pay gap%
 }
 \details{
 \itemize{
    \item period. 2018/19, 2019/20 etc character
   \item avg_hr_gpg. Mean gender pay gap % based on male full-pay relevant employees
-  \item median_hr_gpg. Median gender pay gap % based on male full-pay relevant employees
-  }
+  \item median_hr_gpg. Median gender pay gap % based
+  on male full-pay relevant employees
+}
 }
 \keyword{datasets}
diff --git a/man/quartile.Rd b/man/quartile.Rd
index ab21d7b..41e328f 100644
--- a/man/quartile.Rd
+++ b/man/quartile.Rd
@@ -23,7 +23,7 @@ by quartiles
   \item male. number of male employees in each quartile
   \item quartile. split hourly_rate by quartile by gender
  }
- 
+
  @docType data
  @keywords datasets
  @name quartile
diff --git a/tests/testthat/test-utils_charts.R b/tests/testthat/test-utils_charts.R
index b031074..1651d4a 100644
--- a/tests/testthat/test-utils_charts.R
+++ b/tests/testthat/test-utils_charts.R
@@ -1,7 +1,7 @@
 library(tidyr)
 library(dplyr)
 
-df <- gpg_data(afc_staff)
+df <- gpg_data(nhsbsaGPG::afc_staff)
 x <- df$df_hdcnt_gender |>
   tidyr::pivot_wider(
     names_from = gender,
@@ -11,8 +11,8 @@ x <- df$df_hdcnt_gender |>
 
 y <- nhsbsaGPG::paygap
 
-z <- df$df_hdcnt_afc |>
-  filter(period == "2021/22") |>
+z <- df$df_hdcnt_afc |> 
+  filter(period == '2021/22') |> 
   mutate(headcount = headcount * ifelse(gender == "Male", 1, -1))
 
 testthat::test_that("gpg_trend function runs without errors", {
@@ -47,8 +47,8 @@ testthat::test_that("gpg_trend takes list as an input", {
 
 testthat::test_that("gpg_trend input data frame must contain Female,
                     Male column", {
-                      expect_equal(length(grep("Female|Male", names(x))), 2)
-                    })
+  expect_equal(length(grep("Female|Male", names(x))), 2)
+})
 
 
 testthat::test_that("gpg_trend input data frame must contain period column", {
@@ -69,14 +69,22 @@ testthat::test_that("gpg_trend function runs with paygap dataframe", {
 
 
 testthat::test_that("gpg_pyramid function runs without error", {
-  expect_silent(gpg_pyramid(z, xvar = "afc_band", yvar = "headcount",
-    yaxis_title = "Headcount"
+  expect_silent(gpg_pyramid(z ,
+                          xvar = "afc_band",
+                          yvar = "headcount",
+                          yaxis_title = "Headcount"
   ))
 })
 
 testthat::test_that("gpg_stack function runs without error", {
-  expect_silent(gpg_stack(quartile |> filter(period == "2021/22"),
-    xvar = "quartile", yvar = "percent", groupvar = "ender",
-    yaxis_title = "Males and females in pay quartile"
+  expect_silent(gpg_stack(quartile |> filter(period == "2021/22") ,
+                            xvar = "quartile",
+                            yvar = "percent",
+                            groupvar = "gender",
+                            yaxis_title = "Males and females in pay quartile"
   ))
 })
+
+
+
+

From 9fb8208c099c43fc1cdf437615feef8e78e93b65 Mon Sep 17 00:00:00 2001
From: kaygo <Kayoung.Goffe@nhsbsa.nhs.uk>
Date: Wed, 25 Oct 2023 18:33:51 +0100
Subject: [PATCH 11/19] dummy data as it failed RMD check

---
 tests/testthat/test-utils_charts.R | 54 +++++++++++++++---------------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/tests/testthat/test-utils_charts.R b/tests/testthat/test-utils_charts.R
index 1651d4a..c2cce80 100644
--- a/tests/testthat/test-utils_charts.R
+++ b/tests/testthat/test-utils_charts.R
@@ -1,19 +1,27 @@
 library(tidyr)
 library(dplyr)
 
-df <- gpg_data(nhsbsaGPG::afc_staff)
-x <- df$df_hdcnt_gender |>
-  tidyr::pivot_wider(
-    names_from = gender,
-    values_from = headcount
-  ) |>
-  dplyr::ungroup()
+x <- data.frame(
+  period = c("2018/19", "2019/20", "2020/21", "2021/22", "2022/23"),
+  Female = c(1700, 1800, 1900, 2000, 2300),
+  Male = c(1100, 1300, 1300, 1400, 1500)
+)
+
+y <- data.frame(
+  period = c("2018/19", "2019/20", "2020/21", "2021/22", "2022/23"),
+  mean_hr_gpg = c(11, 11, 12, 14, 12),
+  median_hr_gpg = c(0.80, 0.5, 2.3, 12.5, 8.88)
+)
+
+z <- data.frame(
+  period = rep("2021/22", 20),
+  gender = c(rep("Female", 10), rep("Male", 10)),
+  afc_band = c("Band 2", "Band 3", "Band 4", "Band 5", "Band 6",
+               "Band 2", "Band 3", "Band 4", "Band 5", "Band 6") ,
+  headcount = c(-460, -645, -280, -218, -118, 156, 80, 41, 13, 7),
+  perc = c(65.8, 66.5, 62.9, 57.8, 45.2, 47.0, 48.8, 48.2, 38.2, 31.8)
+)
 
-y <- nhsbsaGPG::paygap
-
-z <- df$df_hdcnt_afc |> 
-  filter(period == '2021/22') |> 
-  mutate(headcount = headcount * ifelse(gender == "Male", 1, -1))
 
 testthat::test_that("gpg_trend function runs without errors", {
   expect_silent(gpg_trend(x,
@@ -47,8 +55,8 @@ testthat::test_that("gpg_trend takes list as an input", {
 
 testthat::test_that("gpg_trend input data frame must contain Female,
                     Male column", {
-  expect_equal(length(grep("Female|Male", names(x))), 2)
-})
+                      expect_equal(length(grep("Female|Male", names(x))), 2)
+                    })
 
 
 testthat::test_that("gpg_trend input data frame must contain period column", {
@@ -69,22 +77,14 @@ testthat::test_that("gpg_trend function runs with paygap dataframe", {
 
 
 testthat::test_that("gpg_pyramid function runs without error", {
-  expect_silent(gpg_pyramid(z ,
-                          xvar = "afc_band",
-                          yvar = "headcount",
-                          yaxis_title = "Headcount"
+  expect_silent(gpg_pyramid(z, xvar = "afc_band", yvar = "headcount",
+    yaxis_title = "Headcount"
   ))
 })
 
 testthat::test_that("gpg_stack function runs without error", {
-  expect_silent(gpg_stack(quartile |> filter(period == "2021/22") ,
-                            xvar = "quartile",
-                            yvar = "percent",
-                            groupvar = "gender",
-                            yaxis_title = "Males and females in pay quartile"
+  expect_silent(gpg_stack(quartile |> filter(period == "2021/22"),
+    xvar = "quartile", yvar = "percent", groupvar = "gender",
+    yaxis_title = "Males and females in pay quartile"
   ))
 })
-
-
-
-

From 4641861a02b93f6fb65726bfe819b9f04cf4ec08 Mon Sep 17 00:00:00 2001
From: kaygo <Kayoung.Goffe@nhsbsa.nhs.uk>
Date: Wed, 25 Oct 2023 18:53:15 +0100
Subject: [PATCH 12/19] change to dummy data for test

---
 tests/testthat/test-utils_charts.R | 45 +++++++++++++++---------------
 1 file changed, 22 insertions(+), 23 deletions(-)

diff --git a/tests/testthat/test-utils_charts.R b/tests/testthat/test-utils_charts.R
index c2cce80..d44a953 100644
--- a/tests/testthat/test-utils_charts.R
+++ b/tests/testthat/test-utils_charts.R
@@ -1,28 +1,28 @@
 library(tidyr)
 library(dplyr)
 
-x <- data.frame(
-  period = c("2018/19", "2019/20", "2020/21", "2021/22", "2022/23"),
-  Female = c(1700, 1800, 1900, 2000, 2300),
-  Male = c(1100, 1300, 1300, 1400, 1500)
+df <- gpg_data(nhsbsaGPG::afc_staff)
+x <- df$df_hdcnt_gender |>
+  tidyr::pivot_wider(
+    names_from = gender,
+    values_from = headcount
+  ) |>
+  dplyr::ungroup()
+
+y <- nhsbsaGPG::paygap
+
+z <- df$df_hdcnt_afc |>
+  filter(period == "2021/22") |>
+  mutate(headcount = headcount * ifelse(gender == "Male", 1, -1))
+
+quartile <- data.frame(
+  period = c(rep("2018/19", 8)),
+  quartile = c(rep(1, 2), rep(2, 2), rep(3, 2), rep(4, 2)),
+  gender = c("female", "male", "female", "male", "female", "male", "female", "male"),
+  count = c(425, 282, 438, 261, 461, 269, 380, 344),
+  percent = c(60.1, 39.9, 62.7, 37.3, 63.2, 36.8, 52.5, 47.5)
 )
 
-y <- data.frame(
-  period = c("2018/19", "2019/20", "2020/21", "2021/22", "2022/23"),
-  mean_hr_gpg = c(11, 11, 12, 14, 12),
-  median_hr_gpg = c(0.80, 0.5, 2.3, 12.5, 8.88)
-)
-
-z <- data.frame(
-  period = rep("2021/22", 20),
-  gender = c(rep("Female", 10), rep("Male", 10)),
-  afc_band = c("Band 2", "Band 3", "Band 4", "Band 5", "Band 6",
-               "Band 2", "Band 3", "Band 4", "Band 5", "Band 6") ,
-  headcount = c(-460, -645, -280, -218, -118, 156, 80, 41, 13, 7),
-  perc = c(65.8, 66.5, 62.9, 57.8, 45.2, 47.0, 48.8, 48.2, 38.2, 31.8)
-)
-
-
 testthat::test_that("gpg_trend function runs without errors", {
   expect_silent(gpg_trend(x,
     xvar = "period",
@@ -83,8 +83,7 @@ testthat::test_that("gpg_pyramid function runs without error", {
 })
 
 testthat::test_that("gpg_stack function runs without error", {
-  expect_silent(gpg_stack(quartile |> filter(period == "2021/22"),
-    xvar = "quartile", yvar = "percent", groupvar = "gender",
-    yaxis_title = "Males and females in pay quartile"
+  expect_silent(gpg_stack(quartile, xvar = "quartile", yvar = "percent",
+    groupvar = "gender", yaxis_title = "Males and females in pay quartile"
   ))
 })

From e5ecbf39e3f2dbea04cad7754b0990941d955b64 Mon Sep 17 00:00:00 2001
From: kaygo <Kayoung.Goffe@nhsbsa.nhs.uk>
Date: Wed, 25 Oct 2023 19:00:15 +0100
Subject: [PATCH 13/19] dummy data

---
 tests/testthat/test-utils_charts.R | 33 ++++++++++++++++++------------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/tests/testthat/test-utils_charts.R b/tests/testthat/test-utils_charts.R
index d44a953..aad707c 100644
--- a/tests/testthat/test-utils_charts.R
+++ b/tests/testthat/test-utils_charts.R
@@ -1,19 +1,26 @@
 library(tidyr)
 library(dplyr)
 
-df <- gpg_data(nhsbsaGPG::afc_staff)
-x <- df$df_hdcnt_gender |>
-  tidyr::pivot_wider(
-    names_from = gender,
-    values_from = headcount
-  ) |>
-  dplyr::ungroup()
-
-y <- nhsbsaGPG::paygap
-
-z <- df$df_hdcnt_afc |>
-  filter(period == "2021/22") |>
-  mutate(headcount = headcount * ifelse(gender == "Male", 1, -1))
+x <- data.frame(
+  period = c("2018/19", "2019/20", "2020/21", "2021/22", "2022/23"),
+  Female = c(1700, 1800, 1900, 2000, 2300),
+  Male = c(1100, 1300, 1300, 1400, 1500)
+)
+
+y <- data.frame(
+  period = c("2018/19", "2019/20", "2020/21", "2021/22", "2022/23"),
+  mean_hr_gpg = c(11, 11, 12, 14, 12),
+  median_hr_gpg = c(0.80, 0.5, 2.3, 12.5, 8.88)
+)
+
+z <- data.frame(
+  period = rep("2021/22", 20),
+  gender = c(rep("Female", 10), rep("Male", 10)),
+  afc_band = c("Band 2", "Band 3", "Band 4", "Band 5", "Band 6",
+               "Band 2", "Band 3", "Band 4", "Band 5", "Band 6") ,
+  headcount = c(-460, -645, -280, -218, -118, 156, 80, 41, 13, 7),
+  perc = c(65.8, 66.5, 62.9, 57.8, 45.2, 47.0, 48.8, 48.2, 38.2, 31.8)
+)
 
 quartile <- data.frame(
   period = c(rep("2018/19", 8)),

From e1963a5338e63ca26d83912432d96a1387b4f214 Mon Sep 17 00:00:00 2001
From: kaygo <Kayoung.Goffe@nhsbsa.nhs.uk>
Date: Wed, 25 Oct 2023 19:18:38 +0100
Subject: [PATCH 14/19] ignore data.r

---
 .Rbuildignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.Rbuildignore b/.Rbuildignore
index f6a7b85..5692f1d 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -12,3 +12,4 @@
 ^\.github$
 ^\.lintr$
 ^data_temp$
+^R\data.R

From c92b670eaafe9919b6a1265ee95b239f4e489150 Mon Sep 17 00:00:00 2001
From: kaygo <Kayoung.Goffe@nhsbsa.nhs.uk>
Date: Wed, 25 Oct 2023 19:43:10 +0100
Subject: [PATCH 15/19] try to fix warning msg

---
 DESCRIPTION               | 4 +++-
 R/utils-globalVariables.R | 2 ++
 2 files changed, 5 insertions(+), 1 deletion(-)
 create mode 100644 R/utils-globalVariables.R

diff --git a/DESCRIPTION b/DESCRIPTION
index 29de8f7..cfc2210 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -22,7 +22,9 @@ Imports:
     dplyr (>= 1.1.3),
     futile.logger (>= 1.4.3),
     stringr,
-    htmlwidgets
+    htmlwidgets,
+    graphics,
+    stats
 Suggests:
     pkgload,
     testthat (>= 3.0.0),
diff --git a/R/utils-globalVariables.R b/R/utils-globalVariables.R
new file mode 100644
index 0000000..9d8628d
--- /dev/null
+++ b/R/utils-globalVariables.R
@@ -0,0 +1,2 @@
+utils::globalVariables(c("period", "headcount", "gender", "afc_band",
+                         "afc_band", "hourly_rate", "median", ".data", "afc_staff"))

From 5d4c64b7156de8fe5e583f276c637b78fae5ef2c Mon Sep 17 00:00:00 2001
From: kaygo <Kayoung.Goffe@nhsbsa.nhs.uk>
Date: Wed, 25 Oct 2023 20:22:16 +0100
Subject: [PATCH 16/19] try to fix warning note RMD check

---
 .Rbuildignore             | 2 +-
 DESCRIPTION               | 8 ++++----
 R/utils-globalVariables.R | 3 ++-
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/.Rbuildignore b/.Rbuildignore
index 5692f1d..f51e9aa 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -12,4 +12,4 @@
 ^\.github$
 ^\.lintr$
 ^data_temp$
-^R\data.R
+^\./R/data\.r$
diff --git a/DESCRIPTION b/DESCRIPTION
index cfc2210..3aa3e70 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -22,14 +22,14 @@ Imports:
     dplyr (>= 1.1.3),
     futile.logger (>= 1.4.3),
     stringr,
-    htmlwidgets,
-    graphics,
-    stats
+    htmlwidgets
 Suggests:
     pkgload,
     testthat (>= 3.0.0),
     usethis,
-    tidyr
+    tidyr,
+    graphics,
+    stats
 Remotes:
     nhsbsa-data-analytics/nhsbsaR
 Encoding: UTF-8
diff --git a/R/utils-globalVariables.R b/R/utils-globalVariables.R
index 9d8628d..f7f14d6 100644
--- a/R/utils-globalVariables.R
+++ b/R/utils-globalVariables.R
@@ -1,2 +1,3 @@
 utils::globalVariables(c("period", "headcount", "gender", "afc_band",
-                         "afc_band", "hourly_rate", "median", ".data", "afc_staff"))
+                         "afc_band", "hourly_rate", "median", ".data", 
+                         "afc_staff", "directorate", "aggregate", "barplot"))

From effbd5cdec1e5947e0d13d92531100981bf227c8 Mon Sep 17 00:00:00 2001
From: kaygo <Kayoung.Goffe@nhsbsa.nhs.uk>
Date: Wed, 25 Oct 2023 20:30:06 +0100
Subject: [PATCH 17/19] RMD final warning fix

---
 R/mod_introduction.R      | 7 +++++++
 R/utils-globalVariables.R | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/R/mod_introduction.R b/R/mod_introduction.R
index b1e5db1..2f2988d 100644
--- a/R/mod_introduction.R
+++ b/R/mod_introduction.R
@@ -67,6 +67,13 @@ mod_introduction_server <- function(id) {
       nhsbsaGPG::gpg_data(afc_staff)$ending_fy
     })
 
+    nhsbsaGPG::paygap
+    nhsbsaGPG::quartile
+
+
+
+
+
 
   })
 }
diff --git a/R/utils-globalVariables.R b/R/utils-globalVariables.R
index f7f14d6..e0dfff4 100644
--- a/R/utils-globalVariables.R
+++ b/R/utils-globalVariables.R
@@ -1,3 +1,3 @@
 utils::globalVariables(c("period", "headcount", "gender", "afc_band",
-                         "afc_band", "hourly_rate", "median", ".data", 
+                         "afc_band", "hourly_rate", "median", ".data",
                          "afc_staff", "directorate", "aggregate", "barplot"))

From cc3e7e2531dc8afbf2606fd2dd0698766c340b5e Mon Sep 17 00:00:00 2001
From: kaygo <Kayoung.Goffe@nhsbsa.nhs.uk>
Date: Wed, 25 Oct 2023 20:40:24 +0100
Subject: [PATCH 18/19] CMD warning message

---
 R/mod_introduction.R      | 8 --------
 R/utils-globalVariables.R | 3 ++-
 2 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/R/mod_introduction.R b/R/mod_introduction.R
index 2f2988d..e2f6e62 100644
--- a/R/mod_introduction.R
+++ b/R/mod_introduction.R
@@ -67,13 +67,5 @@ mod_introduction_server <- function(id) {
       nhsbsaGPG::gpg_data(afc_staff)$ending_fy
     })
 
-    nhsbsaGPG::paygap
-    nhsbsaGPG::quartile
-
-
-
-
-
-
   })
 }
diff --git a/R/utils-globalVariables.R b/R/utils-globalVariables.R
index e0dfff4..3b63edc 100644
--- a/R/utils-globalVariables.R
+++ b/R/utils-globalVariables.R
@@ -1,3 +1,4 @@
 utils::globalVariables(c("period", "headcount", "gender", "afc_band",
                          "afc_band", "hourly_rate", "median", ".data",
-                         "afc_staff", "directorate", "aggregate", "barplot"))
+                         "afc_staff", "directorate", "aggregate", "barplot",
+                         "quartile", "paygap"))

From 8700e8d54428c678a5eebf821dd436ba29d76bf5 Mon Sep 17 00:00:00 2001
From: kaygo <Kayoung.Goffe@nhsbsa.nhs.uk>
Date: Wed, 25 Oct 2023 20:44:05 +0100
Subject: [PATCH 19/19] delete rd as it cause warning

---
 man/afc_staff.Rd | 27 ---------------------------
 man/paygap.Rd    | 26 --------------------------
 man/quartile.Rd  | 33 ---------------------------------
 3 files changed, 86 deletions(-)
 delete mode 100644 man/afc_staff.Rd
 delete mode 100644 man/paygap.Rd
 delete mode 100644 man/quartile.Rd

diff --git a/man/afc_staff.Rd b/man/afc_staff.Rd
deleted file mode 100644
index a72f4c9..0000000
--- a/man/afc_staff.Rd
+++ /dev/null
@@ -1,27 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/data.R
-\docType{data}
-\name{afc_staff}
-\alias{afc_staff}
-\title{NHSBSA employee afc_staff}
-\format{
-A data frame with gender pay gap information
-}
-\usage{
-afc_staff
-}
-\description{
-A dataset containing NHSBSA employee hourly pay
-by reporting period split by gender, FTE, AFC band, directorate.
-\itemize{
-  \item period. 2018/19, 2019/20 etc character
-  \item gender. Male or Female, character
-  \item headcount. employee headcount used for aggregation
-  \item hourly_rate. hourly rate as shown pay slip
-  \item quartile. split hourly_rate by quartile by gender
-  \item fte. employee full time or part time information
-  \item afc_band. AFC band
-  \item directorate. NHSBSA directorate
-}
-}
-\keyword{datasets}
diff --git a/man/paygap.Rd b/man/paygap.Rd
deleted file mode 100644
index 1ca4916..0000000
--- a/man/paygap.Rd
+++ /dev/null
@@ -1,26 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/data.R
-\docType{data}
-\name{paygap}
-\alias{paygap}
-\title{NHSBSA employee paygap}
-\format{
-A data frame with paygap information
-}
-\usage{
-paygap
-}
-\description{
-A dataset containing NHSBSA employee paygap
-Directly pulled from ESR dashboard (NHS National Returns)
-gender, average hourly rate, median hourly rate and pay gap%
-}
-\details{
-\itemize{
-   \item period. 2018/19, 2019/20 etc character
-  \item avg_hr_gpg. Mean gender pay gap % based on male full-pay relevant employees
-  \item median_hr_gpg. Median gender pay gap % based
-  on male full-pay relevant employees
-}
-}
-\keyword{datasets}
diff --git a/man/quartile.Rd b/man/quartile.Rd
deleted file mode 100644
index 41e328f..0000000
--- a/man/quartile.Rd
+++ /dev/null
@@ -1,33 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/data.R
-\docType{data}
-\name{quartile}
-\alias{quartile}
-\title{NHSBSA employee quartile}
-\format{
-An object of class \code{tbl_df} (inherits from \code{tbl}, \code{data.frame}) with 50 rows and 5 columns.
-}
-\usage{
-quartile
-}
-\description{
-A dataset containing NHSBSA employee hourly pay
-by reporting period from 2018 (eg. snapshot 31/03/2018 for 2018/19 report etc)
-by quartiles
-}
-\details{
-\itemize{
-  \item period. 2018/19, 2019/20 etc character
-  \item quartile. split hourly pay into quartiles
-  \item female. number of female employees in each quartile
-  \item male. number of male employees in each quartile
-  \item quartile. split hourly_rate by quartile by gender
- }
-
- @docType data
- @keywords datasets
- @name quartile
- @usage quartile
- @format data frame with employee gender hourly pay by quartiles
-}
-\keyword{datasets}