From 9a4effa0517d07c919f63c5420f9d09d44a9d671 Mon Sep 17 00:00:00 2001 From: phoebeheywood <53634732+phoebeheywood@users.noreply.github.com> Date: Wed, 29 Sep 2021 09:38:59 +1000 Subject: [PATCH 01/11] replacing mentions of solar analytics --- shiny.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/shiny.R b/shiny.R index f8b3a6c..dba981b 100644 --- a/shiny.R +++ b/shiny.R @@ -1019,13 +1019,13 @@ server <- function(input,output,session){ disconnection_summary <- join_solar_analytics_and_cer_manufacturer_data(disconnection_summary, manufacturer_capacitys) manufacters_missing_from_cer <- get_manufactures_in_solar_analytics_but_not_cer(disconnection_summary) - manufacters_missing_from_solar_analytics <- get_manufactures_in_cer_but_not_solar_analytics(disconnection_summary) + manufacters_missing_from_input_db <- get_manufactures_in_cer_but_not_solar_analytics(disconnection_summary) disconnection_summary <- impose_sample_size_threshold(disconnection_summary, sample_threshold = 30) disconnection_summary <- calc_confidence_intervals_for_disconnections(disconnection_summary) v$disconnection_summary <- calc_upscale_kw_loss(disconnection_summary) v$upscaled_disconnections <- upscale_disconnections(v$disconnection_summary) write.csv(manufacters_missing_from_cer, "logging/manufacters_missing_from_cer.csv", row.names=FALSE) - write.csv(manufacters_missing_from_solar_analytics, "logging/manufacters_missing_from_solar_analytics.csv", row.names=FALSE) + write.csv(manufacters_missing_from_input_db, "logging/manufacters_missing_from_input_db.csv", row.names=FALSE) if(length(manufacters_missing_from_cer$manufacturer) > 0) { long_error_message <- c("Some manufacturers present in the solar analytics data could not be ", @@ -1036,10 +1036,10 @@ server <- function(input,output,session){ shinyalert("Manufacturers missing from CER data", long_error_message) } - if(length(manufacters_missing_from_solar_analytics$manufacturer) > 0) { + if(length(manufacters_missing_from_input_db$manufacturer) > 0) { long_error_message <- c("Some manufacturers present in the CER data could not be ", "matched to the solar analytics data set. A list of these has been saved in the ", - "file logging/manufacters_missing_from_solar_analytics.csv. You may wish to review the ", + "file logging/manufacters_missing_from_input_db.csv. You may wish to review the ", "file to check the number and names of missing manufacturers. ") long_error_message <- paste(long_error_message, collapse = '') shinyalert("Manufacturers missing from Solar Analytics data", long_error_message) From 8dc2885a8a67a1bfac553800c302de307ca0a6fc Mon Sep 17 00:00:00 2001 From: phoebeheywood <53634732+phoebeheywood@users.noreply.github.com> Date: Wed, 29 Sep 2021 11:09:16 +1000 Subject: [PATCH 02/11] Create main function for summarise_disconnections.R --- .../summarise_disconnections.R | 24 +++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/upscale_disconnections/summarise_disconnections.R b/upscale_disconnections/summarise_disconnections.R index ac8b0a2..ad8ae3b 100644 --- a/upscale_disconnections/summarise_disconnections.R +++ b/upscale_disconnections/summarise_disconnections.R @@ -1,6 +1,22 @@ +get_upscaling_results <- function(circuit_summary, manufacturer_install_data, event_date, region, sample_threshold){ + out <- list() + disconnection_summary <- group_disconnections_by_manufacturer(circuit_summary) + manufacturer_capacitys <- get_manufacturer_capacitys(manufacturer_install_data, event_date, region) + disconnection_summary <- join_solar_analytics_and_cer_manufacturer_data(disconnection_summary, manufacturer_capacitys) + out$manufacters_missing_from_cer <- get_manufactures_in_solar_analytics_but_not_cer(disconnection_summary) + out$manufacters_missing_from_input_db <- get_manufactures_in_cer_but_not_solar_analytics(disconnection_summary) + disconnection_summary <- impose_sample_size_threshold(disconnection_summary, sample_threshold) + disconnection_summary <- calc_confidence_intervals_for_disconnections(disconnection_summary) + out$disconnection_summary <- calc_upscale_kw_loss(disconnection_summary) + out$upscaled_disconnections <- upscale_disconnections(out$disconnection_summary) + return(out) +} + group_disconnections_by_manufacturer <- function(circuit_summary){ # Don't count circuits without a well defined response type. + # TODO: include UFLS in bad categories bad_categories <- c("6 Not enough data", "Undefined") + # TODO: | response_category == "NA" circuit_summary <- filter(circuit_summary, !(response_category %in% bad_categories | is.na(response_category))) # Get an initial summary of disconnection count and sample size by manufacturer. @@ -17,6 +33,8 @@ get_number_of_disconnections <- function(response_categories){ return(length(response_categories)) } +# TODO: get_number_of_ufls_disconnections + join_solar_analytics_and_cer_manufacturer_data <- function(circuit_summary, cer_manufacturer_data){ circuit_summary <- merge(circuit_summary, cer_manufacturer_data, by = c('Standard_Version', 'manufacturer'), all = TRUE) @@ -25,8 +43,10 @@ join_solar_analytics_and_cer_manufacturer_data <- function(circuit_summary, cer_ } impose_sample_size_threshold <- function(disconnection_summary, sample_threshold){ - circuit_summary <- mutate(disconnection_summary, sample_threshold, manufacturer = ifelse(is.na(cer_capacity), 'Other', manufacturer)) - circuit_summary <- mutate(disconnection_summary, sample_threshold, manufacturer = ifelse(is.na(disconnections), 'Other', manufacturer)) + circuit_summary <- mutate(disconnection_summary, sample_threshold, manufacturer = ifelse(is.na(cer_capacity), + 'Other', manufacturer)) + circuit_summary <- mutate(disconnection_summary, sample_threshold, manufacturer = ifelse(is.na(disconnections), + 'Other', manufacturer)) # Create an Other group for manufacturers with a small sample size. disconnection_summary <- mutate(disconnection_summary, sample_size = ifelse(is.na(sample_size), 0, sample_size)) From ebdac75ba22c6efc4d89fcfd7c72b04333ad08cc Mon Sep 17 00:00:00 2001 From: phoebeheywood <53634732+phoebeheywood@users.noreply.github.com> Date: Wed, 6 Oct 2021 17:27:06 +1100 Subject: [PATCH 03/11] Created functions for upscaling excluding UFLS circuits --- .../summarise_disconnections.R | 61 ++++++++++++++++--- 1 file changed, 52 insertions(+), 9 deletions(-) diff --git a/upscale_disconnections/summarise_disconnections.R b/upscale_disconnections/summarise_disconnections.R index ad8ae3b..c1ee5e3 100644 --- a/upscale_disconnections/summarise_disconnections.R +++ b/upscale_disconnections/summarise_disconnections.R @@ -1,10 +1,11 @@ get_upscaling_results <- function(circuit_summary, manufacturer_install_data, event_date, region, sample_threshold){ + # Upscale the proportion of disconnecting circuits by manufacturer to better represent the installed capacity. out <- list() disconnection_summary <- group_disconnections_by_manufacturer(circuit_summary) manufacturer_capacitys <- get_manufacturer_capacitys(manufacturer_install_data, event_date, region) - disconnection_summary <- join_solar_analytics_and_cer_manufacturer_data(disconnection_summary, manufacturer_capacitys) - out$manufacters_missing_from_cer <- get_manufactures_in_solar_analytics_but_not_cer(disconnection_summary) - out$manufacters_missing_from_input_db <- get_manufactures_in_cer_but_not_solar_analytics(disconnection_summary) + disconnection_summary <- join_circuit_summary_and_cer_manufacturer_data(disconnection_summary, manufacturer_capacitys) + out$manufacturers_missing_from_cer <- get_manufactures_in_solar_analytics_but_not_cer(disconnection_summary) + out$manufacturers_missing_from_input_db <- get_manufactures_in_cer_but_not_solar_analytics(disconnection_summary) disconnection_summary <- impose_sample_size_threshold(disconnection_summary, sample_threshold) disconnection_summary <- calc_confidence_intervals_for_disconnections(disconnection_summary) out$disconnection_summary <- calc_upscale_kw_loss(disconnection_summary) @@ -12,11 +13,31 @@ get_upscaling_results <- function(circuit_summary, manufacturer_install_data, ev return(out) } -group_disconnections_by_manufacturer <- function(circuit_summary){ +get_upscaling_results_excluding_ufls <- function(circuit_summary, manufacturer_install_data, event_date, region, + sample_threshold){ + # Upscale the proportion of disconnecting circuits based on sample sizes once UFLS circuits are removed. + out <- list() + disconnection_summary <- group_disconnections_by_manufacturer(circuit_summary, exclude_ufls_circuits = TRUE) + ufls_stats <- get_number_of_ufls_disconnections(circuit_summary$response_category) + manufacturer_capacitys <- get_manufacturer_capacitys(manufacturer_install_data, event_date, region) + disconnection_summary <- join_circuit_summary_and_cer_manufacturer_data(disconnection_summary, manufacturer_capacitys) + out$manufacturers_missing_from_cer <- get_manufactures_in_solar_analytics_but_not_cer(disconnection_summary) + out$manufacturers_missing_from_input_db <- get_manufactures_in_cer_but_not_solar_analytics(disconnection_summary) + disconnection_summary <- scale_manufacturer_capacities_by_ufls(disconnection_summary, ufls_stats) + disconnection_summary <- impose_sample_size_threshold(disconnection_summary, sample_threshold) + disconnection_summary <- calc_confidence_intervals_for_disconnections(disconnection_summary) + out$disconnection_summary <- calc_upscale_kw_loss(disconnection_summary) + out$upscaled_disconnections <- upscale_disconnections(out$disconnection_summary) + return(out) +} + +group_disconnections_by_manufacturer <- function(circuit_summary, exclude_ufls_circuits=FALSE){ # Don't count circuits without a well defined response type. - # TODO: include UFLS in bad categories - bad_categories <- c("6 Not enough data", "Undefined") - # TODO: | response_category == "NA" + if (exclude_ufls_circuits) { + bad_categories <- c("6 Not enough data", "Undefined", "UFLS Dropout") + } else { + bad_categories <- c("6 Not enough data", "Undefined") + } circuit_summary <- filter(circuit_summary, !(response_category %in% bad_categories | is.na(response_category))) # Get an initial summary of disconnection count and sample size by manufacturer. @@ -33,9 +54,31 @@ get_number_of_disconnections <- function(response_categories){ return(length(response_categories)) } -# TODO: get_number_of_ufls_disconnections +get_number_of_ufls_disconnections <- function(response_categories){ + # Find the number of circuits identified as UFLS Dropout and the sample size to use for the UFLS proportion + ufls_stats <- list() + bad_categories <- c("6 Not enough data", "Undefined") + response_categories <- response_categories[!(response_categories %in% bad_categories | is.na(response_categories))] + ufls_stats$sample_size <- length(response_categories) + disconnection_categories <- c("UFLS Dropout") + response_categories <- response_categories[response_categories %in% disconnection_categories] + ufls_stats$disconnections <- length(response_categories) + return(ufls_stats) +} + +scale_manufacturer_capacities_by_ufls <- function(disconnection_summary, ufls_stats){ + # Reduce the CER capacities by the UFLS proportion + ufls_proportion <- ufls_stats$disconnections / ufls_stats$sample_size + disconnection_summary <- mutate(disconnection_summary, cer_capacity=cer_capacity*(1-ufls_proportion)) + # Add a UFLS row to the disconnection summary + ufls_row <- data.frame(Standard_Version="UFLS", manufacturer="UFLS", disconnections=ufls_stats$disconnections, + sample_size=ufls_stats$sample_size, s_state="UFLS", + cer_capacity=sum(disconnection_summary$cer_capacity, na.rm = TRUE) / (1-ufls_proportion)) + disconnection_summary <- rbind(disconnection_summary, ufls_row) + return(disconnection_summary) +} -join_solar_analytics_and_cer_manufacturer_data <- function(circuit_summary, cer_manufacturer_data){ +join_circuit_summary_and_cer_manufacturer_data <- function(circuit_summary, cer_manufacturer_data){ circuit_summary <- merge(circuit_summary, cer_manufacturer_data, by = c('Standard_Version', 'manufacturer'), all = TRUE) circuit_summary <- rename(circuit_summary, cer_capacity = capacity) From 6e513d32b6efcb5096b516495f25e9e2b9aa166c Mon Sep 17 00:00:00 2001 From: phoebeheywood <53634732+phoebeheywood@users.noreply.github.com> Date: Wed, 6 Oct 2021 17:34:21 +1100 Subject: [PATCH 04/11] remove mentions of solar analytics --- shiny.R | 14 +++++++------- upscale_disconnections/post_processing_upscaling.R | 8 ++++---- upscale_disconnections/summarise_disconnections.R | 12 ++++++------ .../tests/test_summarise_disconnections.R | 4 ++-- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/shiny.R b/shiny.R index dba981b..24ac6b1 100644 --- a/shiny.R +++ b/shiny.R @@ -1016,10 +1016,10 @@ server <- function(input,output,session){ disconnection_summary <- group_disconnections_by_manufacturer(circuits_to_summarise) manufacturer_capacitys <- get_manufacturer_capacitys(manufacturer_install_data, load_date(), region_to_load()) - disconnection_summary <- join_solar_analytics_and_cer_manufacturer_data(disconnection_summary, + disconnection_summary <- join_circuit_summary_and_cer_manufacturer_data(disconnection_summary, manufacturer_capacitys) - manufacters_missing_from_cer <- get_manufactures_in_solar_analytics_but_not_cer(disconnection_summary) - manufacters_missing_from_input_db <- get_manufactures_in_cer_but_not_solar_analytics(disconnection_summary) + manufacters_missing_from_cer <- get_manufactures_in_input_db_but_not_cer(disconnection_summary) + manufacters_missing_from_input_db <- get_manufactures_in_cer_but_not_input_db(disconnection_summary) disconnection_summary <- impose_sample_size_threshold(disconnection_summary, sample_threshold = 30) disconnection_summary <- calc_confidence_intervals_for_disconnections(disconnection_summary) v$disconnection_summary <- calc_upscale_kw_loss(disconnection_summary) @@ -1028,21 +1028,21 @@ server <- function(input,output,session){ write.csv(manufacters_missing_from_input_db, "logging/manufacters_missing_from_input_db.csv", row.names=FALSE) if(length(manufacters_missing_from_cer$manufacturer) > 0) { - long_error_message <- c("Some manufacturers present in the solar analytics data could not be ", + long_error_message <- c("Some manufacturers present in the input data could not be ", "matched to the cer data set. A list of these has been saved in the ", "file logging/manufacters_missing_from_cer.csv. You may want to review the ", - "mapping used in processing the solar analytics data.") + "mapping used in processing the input data.") long_error_message <- paste(long_error_message, collapse = '') shinyalert("Manufacturers missing from CER data", long_error_message) } if(length(manufacters_missing_from_input_db$manufacturer) > 0) { long_error_message <- c("Some manufacturers present in the CER data could not be ", - "matched to the solar analytics data set. A list of these has been saved in the ", + "matched to the input data set. A list of these has been saved in the ", "file logging/manufacters_missing_from_input_db.csv. You may wish to review the ", "file to check the number and names of missing manufacturers. ") long_error_message <- paste(long_error_message, collapse = '') - shinyalert("Manufacturers missing from Solar Analytics data", long_error_message) + shinyalert("Manufacturers missing from input data", long_error_message) } diff --git a/upscale_disconnections/post_processing_upscaling.R b/upscale_disconnections/post_processing_upscaling.R index 10a4186..7194c37 100644 --- a/upscale_disconnections/post_processing_upscaling.R +++ b/upscale_disconnections/post_processing_upscaling.R @@ -25,10 +25,10 @@ if (exclude_solar_edge){ disconnection_summary <- group_disconnections_by_manufacturer(circuits_to_summarise) manufacturer_capacitys <- get_manufacturer_capacitys(manufacturer_install_data, load_start_time, region_to_load) -disconnection_summary <- join_solar_analytics_and_cer_manufacturer_data(disconnection_summary, +disconnection_summary <- join_circuit_summary_and_cer_manufacturer_data(disconnection_summary, manufacturer_capacitys) -manufacters_missing_from_cer <- get_manufactures_in_solar_analytics_but_not_cer(disconnection_summary) -manufacters_missing_from_solar_analytics <- get_manufactures_in_cer_but_not_solar_analytics(disconnection_summary) +manufacters_missing_from_cer <- get_manufactures_in_input_db_but_not_cer(disconnection_summary) +manufacters_missing_from_input_db <- get_manufactures_in_cer_but_not_input_db(disconnection_summary) disconnection_summary <- impose_sample_size_threshold(disconnection_summary, sample_threshold = 30) disconnection_summary <- calc_confidence_intervals_for_disconnections(disconnection_summary) disconnection_summary <- calc_upscale_kw_loss(disconnection_summary) @@ -37,4 +37,4 @@ upscaled_disconnections <- upscale_disconnections(disconnection_summary) write.csv(disconnection_summary, "test_disconnection_summary.csv", row.names = FALSE) write.csv(upscaled_disconnections, "test_upscaled_disconnections.csv", row.names = FALSE) write.csv(manufacters_missing_from_cer, "test_manufacters_missing_from_cer.csv", row.names=FALSE) -write.csv(manufacters_missing_from_solar_analytics, "test_manufacters_missing_from_solar_analytics.csv", row.names=FALSE) \ No newline at end of file +write.csv(manufacters_missing_from_input_db, "test_manufacters_missing_from_input_db.csv", row.names=FALSE) \ No newline at end of file diff --git a/upscale_disconnections/summarise_disconnections.R b/upscale_disconnections/summarise_disconnections.R index c1ee5e3..0103407 100644 --- a/upscale_disconnections/summarise_disconnections.R +++ b/upscale_disconnections/summarise_disconnections.R @@ -4,8 +4,8 @@ get_upscaling_results <- function(circuit_summary, manufacturer_install_data, ev disconnection_summary <- group_disconnections_by_manufacturer(circuit_summary) manufacturer_capacitys <- get_manufacturer_capacitys(manufacturer_install_data, event_date, region) disconnection_summary <- join_circuit_summary_and_cer_manufacturer_data(disconnection_summary, manufacturer_capacitys) - out$manufacturers_missing_from_cer <- get_manufactures_in_solar_analytics_but_not_cer(disconnection_summary) - out$manufacturers_missing_from_input_db <- get_manufactures_in_cer_but_not_solar_analytics(disconnection_summary) + out$manufacturers_missing_from_cer <- get_manufactures_in_input_db_but_not_cer(disconnection_summary) + out$manufacturers_missing_from_input_db <- get_manufactures_in_cer_but_not_input_db(disconnection_summary) disconnection_summary <- impose_sample_size_threshold(disconnection_summary, sample_threshold) disconnection_summary <- calc_confidence_intervals_for_disconnections(disconnection_summary) out$disconnection_summary <- calc_upscale_kw_loss(disconnection_summary) @@ -21,8 +21,8 @@ get_upscaling_results_excluding_ufls <- function(circuit_summary, manufacturer_i ufls_stats <- get_number_of_ufls_disconnections(circuit_summary$response_category) manufacturer_capacitys <- get_manufacturer_capacitys(manufacturer_install_data, event_date, region) disconnection_summary <- join_circuit_summary_and_cer_manufacturer_data(disconnection_summary, manufacturer_capacitys) - out$manufacturers_missing_from_cer <- get_manufactures_in_solar_analytics_but_not_cer(disconnection_summary) - out$manufacturers_missing_from_input_db <- get_manufactures_in_cer_but_not_solar_analytics(disconnection_summary) + out$manufacturers_missing_from_cer <- get_manufactures_in_input_db_but_not_cer(disconnection_summary) + out$manufacturers_missing_from_input_db <- get_manufactures_in_cer_but_not_input_db(disconnection_summary) disconnection_summary <- scale_manufacturer_capacities_by_ufls(disconnection_summary, ufls_stats) disconnection_summary <- impose_sample_size_threshold(disconnection_summary, sample_threshold) disconnection_summary <- calc_confidence_intervals_for_disconnections(disconnection_summary) @@ -139,12 +139,12 @@ calc_upscale_kw_loss <- function(disconnection_summary){ return(disconnection_summary) } -get_manufactures_in_solar_analytics_but_not_cer <- function(disconnection_summary){ +get_manufactures_in_input_db_but_not_cer <- function(disconnection_summary){ disconnection_summary <- filter(disconnection_summary, is.na(cer_capacity)) return(disconnection_summary) } -get_manufactures_in_cer_but_not_solar_analytics <- function(disconnection_summary){ +get_manufactures_in_cer_but_not_input_db <- function(disconnection_summary){ disconnection_summary <- filter(disconnection_summary, is.na(sample_size)) return(disconnection_summary) } diff --git a/upscale_disconnections/tests/test_summarise_disconnections.R b/upscale_disconnections/tests/test_summarise_disconnections.R index 31c0a3b..87a4f7d 100644 --- a/upscale_disconnections/tests/test_summarise_disconnections.R +++ b/upscale_disconnections/tests/test_summarise_disconnections.R @@ -34,7 +34,7 @@ testthat::test_that("group_disconnections_by_manufacturer",{ testthat::expect_equal(output, expected_output, tolerance = 1e-4) }) -testthat::test_that("join_solar_analytics_and_cer_manufacturer_data",{ +testthat::test_that("join_circuit_summary_and_cer_manufacturer_data",{ disconnection_summary <- "Standard_Version, manufacturer, disconnections, sample_size @@ -56,7 +56,7 @@ testthat::test_that("join_solar_analytics_and_cer_manufacturer_data",{ disconnection_summary <- load_test_file(disconnection_summary) cer_data <- load_test_file(cer_data) expected_output <- load_test_file(expected_output) - output <- join_solar_analytics_and_cer_manufacturer_data(disconnection_summary, cer_data) + output <- join_circuit_summary_and_cer_manufacturer_data(disconnection_summary, cer_data) testthat::expect_equal(output, expected_output, tolerance = 1e-4) }) From 3cab8601871531b20cb274e6873737f9f032262a Mon Sep 17 00:00:00 2001 From: phoebeheywood <53634732+phoebeheywood@users.noreply.github.com> Date: Wed, 6 Oct 2021 17:35:02 +1100 Subject: [PATCH 05/11] Create example_run_upscaling.R --- .../example_run_upscaling.R | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 upscale_disconnections/example_run_upscaling.R diff --git a/upscale_disconnections/example_run_upscaling.R b/upscale_disconnections/example_run_upscaling.R new file mode 100644 index 0000000..b980ea3 --- /dev/null +++ b/upscale_disconnections/example_run_upscaling.R @@ -0,0 +1,36 @@ +source("load_tool_environment.R") + +exclude_solar_edge <- FALSE +region_to_load <- 'QLD' +load_start_time <- '2021-05-25' +circuit_summary <- read.csv(file = "data/Luceo_circ_sum_active2309.csv", header = TRUE, stringsAsFactors = FALSE) +manufacturer_install_data <- read.csv(file = "inbuilt_data/cer_cumulative_capacity_and_number_by_manufacturer.csv", + header = TRUE, stringsAsFactors = FALSE) + +if (exclude_solar_edge){ + circuits_to_summarise <- filter(circuit_summary, manufacturer != "SolarEdge") + manufacturer_install_data <- filter(manufacturer_install_data, manufacturer != "SolarEdge") +} else { + circuits_to_summarise <- circuit_summary + manufacturer_install_data <- manufacturer_install_data +} + +# These 2 lines are different to what the shiny app does +circuits_to_summarise <- mutate(circuits_to_summarise, manufacturer=ifelse(is.na(manufacturer), 'Other', manufacturer)) +manufacturer_install_data <- mutate(manufacturer_install_data, + manufacturer=ifelse(is.na(manufacturer), 'Other', manufacturer)) + +manufacturer_install_data <- calc_installed_capacity_by_standard_and_manufacturer(manufacturer_install_data) + +upsc_results <- get_upscaling_results_excluding_ufls(circuits_to_summarise, manufacturer_install_data, load_start_time, region_to_load, + 30) + +disconnection_summary <- upsc_results$disconnection_summary +upscaled_disconnections <- upsc_results$upscaled_disconnections +manufacturers_missing_from_cer <- upsc_results$manufacturers_missing_from_cer +manufacturers_missing_from_input_db <- upsc_results$manufacturers_missing_from_input_db + +write.csv(disconnection_summary, "data/Luceo_active_disconnection_summary0610.csv", row.names = FALSE) +write.csv(upscaled_disconnections, "data/Luceo_active_upscaled_disconnections0610.csv", row.names = FALSE) +#write.csv(manufacturers_missing_from_cer, "test_manufacturers_missing_from_cer.csv", row.names=FALSE) +#write.csv(manufacturers_missing_from_input_db, "test_manufacturers_missing_from_input_db.csv", row.names=FALSE) \ No newline at end of file From 71cc53819338a94665bb2d495a4f383405e4daa8 Mon Sep 17 00:00:00 2001 From: phoebeheywood <53634732+phoebeheywood@users.noreply.github.com> Date: Thu, 7 Oct 2021 12:00:51 +1100 Subject: [PATCH 06/11] rename df columns to make outputs clearer --- .../summarise_disconnections.R | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/upscale_disconnections/summarise_disconnections.R b/upscale_disconnections/summarise_disconnections.R index 0103407..9808a8a 100644 --- a/upscale_disconnections/summarise_disconnections.R +++ b/upscale_disconnections/summarise_disconnections.R @@ -26,8 +26,20 @@ get_upscaling_results_excluding_ufls <- function(circuit_summary, manufacturer_i disconnection_summary <- scale_manufacturer_capacities_by_ufls(disconnection_summary, ufls_stats) disconnection_summary <- impose_sample_size_threshold(disconnection_summary, sample_threshold) disconnection_summary <- calc_confidence_intervals_for_disconnections(disconnection_summary) - out$disconnection_summary <- calc_upscale_kw_loss(disconnection_summary) - out$upscaled_disconnections <- upscale_disconnections(out$disconnection_summary) + disconnection_summary <- calc_upscale_kw_loss(disconnection_summary) + upscaled_disconnections <- upscale_disconnections(disconnection_summary) + disconnection_summary <- disconnection_summary %>% + rename( + sample_size_after_removing_UFLS_affected_circuits = sample_size, + cer_capacity_reduced_by_UFLS_proportion = cer_capacity + ) + out$disconnection_summary <- disconnection_summary + upscaled_disconnections <- upscaled_disconnections %>% + rename( + sample_size_after_removing_UFLS_affected_circuits = sample_size, + cer_capacity_reduced_by_UFLS_proportion = cer_capacity + ) + out$upscaled_disconnections <- upscaled_disconnections return(out) } @@ -71,7 +83,8 @@ scale_manufacturer_capacities_by_ufls <- function(disconnection_summary, ufls_st ufls_proportion <- ufls_stats$disconnections / ufls_stats$sample_size disconnection_summary <- mutate(disconnection_summary, cer_capacity=cer_capacity*(1-ufls_proportion)) # Add a UFLS row to the disconnection summary - ufls_row <- data.frame(Standard_Version="UFLS", manufacturer="UFLS", disconnections=ufls_stats$disconnections, + ufls_row <- data.frame(Standard_Version="UFLS_disconnections_and_totals_including_ULFS_affected_circuits", + manufacturer="UFLS", disconnections=ufls_stats$disconnections, sample_size=ufls_stats$sample_size, s_state="UFLS", cer_capacity=sum(disconnection_summary$cer_capacity, na.rm = TRUE) / (1-ufls_proportion)) disconnection_summary <- rbind(disconnection_summary, ufls_row) @@ -98,6 +111,7 @@ impose_sample_size_threshold <- function(disconnection_summary, sample_threshold manufacturer == "Unknown" | manufacturer == "Multiple" | manufacturer == "Mixed" | + manufacturer == "" | is.na(manufacturer), "Other", manufacturer) ) From ee5c4690b1a73d796b1c01972ce93fb60f221f2a Mon Sep 17 00:00:00 2001 From: phoebeheywood <53634732+phoebeheywood@users.noreply.github.com> Date: Thu, 7 Oct 2021 12:58:07 +1100 Subject: [PATCH 07/11] Create test_summarise_disconnections_excluding_ufls.R --- ..._summarise_disconnections_excluding_ufls.R | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 upscale_disconnections/tests/test_summarise_disconnections_excluding_ufls.R diff --git a/upscale_disconnections/tests/test_summarise_disconnections_excluding_ufls.R b/upscale_disconnections/tests/test_summarise_disconnections_excluding_ufls.R new file mode 100644 index 0000000..0df8654 --- /dev/null +++ b/upscale_disconnections/tests/test_summarise_disconnections_excluding_ufls.R @@ -0,0 +1,79 @@ + +testthat::context("Testing summarising disconnections when UFLS circuits are removed from sample sizes.") + + +load_test_file <- function(text){ + text <- gsub(" ", "", text) + text <- gsub("-", " ", text) + df <- read.table(text = text, sep = ",", header = TRUE, stringsAsFactors = FALSE) + return(df) +} + +testthat::test_that("group_disconnections_by_manufacturer",{ + + circuit_summary <- "c_id, Standard_Version, manufacturer, response_category + 1, y, SMA, 6-Not-enough-data + 2, y, SMA, Undefined + 3, y, SMA, UFLS-Dropout + 4, y, SMA, 3-Drop-to-Zero + 5, y, SMA, 4-Disconnect + 6, y, x, 4-Disconnect + 7, y, x, 6-Not-enough-data + 8, z, x, blah + 9, z, x, blah + 10, z, x, UFLS-Dropout" + + expected_output <- "Standard_Version, manufacturer, disconnections, sample_size + y, SMA, 2, 2 + y, x, 1, 1 + z, x, 0, 2" + + circuit_summary <- load_test_file(circuit_summary) + expected_output <- load_test_file(expected_output) + output <- group_disconnections_by_manufacturer(circuit_summary, exclude_ufls_circuits = TRUE) + testthat::expect_equal(output, expected_output, tolerance = 1e-4) +}) + +testthat::test_that("get_number_of_ufls_disconnections",{ + + circuit_summary <- "c_id, Standard_Version, manufacturer, response_category + 1, y, SMA, 6-Not-enough-data + 2, y, SMA, Undefined + 3, y, SMA, UFLS-Dropout + 4, y, SMA, 3-Drop-to-Zero + 5, y, SMA, 4-Disconnect + 6, y, x, 4-Disconnect + 7, y, x, 6-Not-enough-data + 8, z, x, blah + 9, z, x, blah + 10, z, x, UFLS-Dropout" + + expected_output <- list(sample_size=7, disconnections=2) + + circuit_summary <- load_test_file(circuit_summary) + output <- get_number_of_ufls_disconnections(circuit_summary$response_category) + testthat::expect_equal(output, expected_output, tolerance = 1e-4) +}) + +testthat::test_that("scale_manufacturer_capacities_by_ufls",{ + + circuit_summary <- "Standard_Version, manufacturer, disconnections, sample_size, s_state, cer_capacity + y, a, NA, NA, QLD, 13 + y, SMA, 2, 3, QLD, 10 + y, x, 1, 1, QLD, NA + z, x, 0, 3, QLD, 108" + + ufls_stats <- list(sample_size=6, disconnections=3) + + expected_output <- "Standard_Version, manufacturer, disconnections, sample_size, s_state, cer_capacity + y, a, NA, NA, QLD, 6.5 + y, SMA, 2, 3, QLD, 5 + y, x, 1, 1, QLD, NA + z, x, 0, 3, QLD, 54 + UFLS_disconnections_and_totals_including_ULFS_affected_circuits, UFLS, 3, 6, UFLS, 131" + + circuit_summary <- load_test_file(circuit_summary) + expected_output <- load_test_file(expected_output) + output <- scale_manufacturer_capacities_by_ufls(circuit_summary, ufls_stats) + testthat::expect_equal(output, expected_output, tolerance = 1e-4) +}) From af5e7622524c36c19798233694608355e8f9d4ca Mon Sep 17 00:00:00 2001 From: phoebeheywood <53634732+phoebeheywood@users.noreply.github.com> Date: Tue, 12 Oct 2021 09:23:34 +1100 Subject: [PATCH 08/11] integrate into shiny app --- shiny.R | 74 ++++++++++++++----- .../example_run_upscaling.R | 4 +- .../summarise_disconnections.R | 2 +- 3 files changed, 60 insertions(+), 20 deletions(-) diff --git a/shiny.R b/shiny.R index 24ac6b1..79d3b80 100644 --- a/shiny.R +++ b/shiny.R @@ -114,7 +114,11 @@ ui <- fluidPage( HTML("
"), uiOutput("save_manufacturer_disconnection_summary"), HTML("
"), + uiOutput("save_manufacturer_disconnection_summary_with_separate_ufls_counts"), + HTML("
"), uiOutput("save_upscaled_disconnection_summary"), + HTML("
"), + uiOutput("save_upscaled_disconnection_summary_with_separate_ufls_counts"), HTML("

"), plotlyOutput(outputId="NormPower"), plotlyOutput(outputId="Frequency"), @@ -1013,33 +1017,36 @@ server <- function(input,output,session){ circuits_to_summarise <- v$circuit_summary manufacturer_install_data <- v$manufacturer_install_data } - disconnection_summary <- group_disconnections_by_manufacturer(circuits_to_summarise) - manufacturer_capacitys <- get_manufacturer_capacitys(manufacturer_install_data, load_date(), - region_to_load()) - disconnection_summary <- join_circuit_summary_and_cer_manufacturer_data(disconnection_summary, - manufacturer_capacitys) - manufacters_missing_from_cer <- get_manufactures_in_input_db_but_not_cer(disconnection_summary) - manufacters_missing_from_input_db <- get_manufactures_in_cer_but_not_input_db(disconnection_summary) - disconnection_summary <- impose_sample_size_threshold(disconnection_summary, sample_threshold = 30) - disconnection_summary <- calc_confidence_intervals_for_disconnections(disconnection_summary) - v$disconnection_summary <- calc_upscale_kw_loss(disconnection_summary) - v$upscaled_disconnections <- upscale_disconnections(v$disconnection_summary) - write.csv(manufacters_missing_from_cer, "logging/manufacters_missing_from_cer.csv", row.names=FALSE) - write.csv(manufacters_missing_from_input_db, "logging/manufacters_missing_from_input_db.csv", row.names=FALSE) + upscaling_results <- get_upscaling_results(circuits_to_summarise, manufacturer_install_data, load_date(), + region_to_load(), sample_threshold = 30) + upscaling_results_with_separate_ufls_counts <- get_upscaling_results_excluding_ufls_affected_circuits( + circuits_to_summarise, manufacturer_install_data, load_date(), region_to_load(), sample_threshold = 30) + + v$disconnection_summary <- upscaling_results$disconnection_summary + v$upscaled_disconnections <- upscaling_results$upscaled_disconnections + v$disconnection_summary_with_separate_ufls_counts <- + upscaling_results_with_separate_ufls_counts$disconnection_summary + v$upscaled_disconnections_with_separate_ufls_counts <- + upscaling_results_with_separate_ufls_counts$upscaled_disconnections - if(length(manufacters_missing_from_cer$manufacturer) > 0) { + write.csv(upscaling_results$manufacturers_missing_from_cer, + "logging/manufacturers_missing_from_cer.csv", row.names=FALSE) + write.csv(upscaling_results$manufacturers_missing_from_input_db, + "logging/manufacturers_missing_from_input_db.csv", row.names=FALSE) + + if(length(upscaling_results$manufacturers_missing_from_cer$manufacturer) > 0) { long_error_message <- c("Some manufacturers present in the input data could not be ", "matched to the cer data set. A list of these has been saved in the ", - "file logging/manufacters_missing_from_cer.csv. You may want to review the ", + "file logging/manufacturers_missing_from_cer.csv. You may want to review the ", "mapping used in processing the input data.") long_error_message <- paste(long_error_message, collapse = '') shinyalert("Manufacturers missing from CER data", long_error_message) } - if(length(manufacters_missing_from_input_db$manufacturer) > 0) { + if(length(upscaling_results$manufacturers_missing_from_input_db$manufacturer) > 0) { long_error_message <- c("Some manufacturers present in the CER data could not be ", "matched to the input data set. A list of these has been saved in the ", - "file logging/manufacters_missing_from_input_db.csv. You may wish to review the ", + "file logging/manufacturers_missing_from_input_db.csv. You may wish to review the ", "file to check the number and names of missing manufacturers. ") long_error_message <- paste(long_error_message, collapse = '') shinyalert("Manufacturers missing from input data", long_error_message) @@ -1091,10 +1098,20 @@ server <- function(input,output,session){ shinySaveButton("save_manufacturer_disconnection_summary", "Save manufacturer disconnection summary", "Choose directory for report files ...", filetype=list(xlsx="csv")) }) + output$save_manufacturer_disconnection_summary_with_separate_ufls_counts <- renderUI({ + shinySaveButton("save_manufacturer_disconnection_summary_with_separate_ufls_counts", + "Save manufacturer disconnection summary with separate ufls counts", + "Choose directory for report files ...", filetype=list(xlsx="csv")) + }) output$save_upscaled_disconnection_summary <- renderUI({ shinySaveButton("save_upscaled_disconnection_summary", "Save upscaled disconnection summary", "Choose directory for report files ...", filetype=list(xlsx="csv")) }) + output$save_upscaled_disconnection_summary_with_separate_ufls_counts <- renderUI({ + shinySaveButton("save_upscaled_disconnection_summary_with_separate_ufls_counts", + "Save upscaled disconnection summary with separate ufls counts", + "Choose directory for report files ...", filetype=list(xlsx="csv")) + }) if ("width" %in% names(v$sample_count_table)) { @@ -1481,6 +1498,18 @@ server <- function(input,output,session){ } }) + + observeEvent(input$save_manufacturer_disconnection_summary_with_separate_ufls_counts,{ + volumes <- c(home=getwd()) + shinyFileSave(input, "save_manufacturer_disconnection_summary_with_separate_ufls_counts", roots=volumes, + session=session) + fileinfo <- parseSavePath(volumes, input$save_manufacturer_disconnection_summary_with_separate_ufls_counts) + if (nrow(fileinfo) > 0) { + write.csv(v$disconnection_summary_with_separate_ufls_counts, as.character(fileinfo$datapath), row.names=FALSE) + } + }) + + observeEvent(input$save_upscaled_disconnection_summary,{ volumes <- c(home=getwd()) shinyFileSave(input, "save_upscaled_disconnection_summary", roots=volumes, session=session) @@ -1491,6 +1520,17 @@ server <- function(input,output,session){ }) + observeEvent(input$save_upscaled_disconnection_summary_with_separate_ufls_counts,{ + volumes <- c(home=getwd()) + shinyFileSave(input, "save_upscaled_disconnection_summary_with_separate_ufls_counts", roots=volumes, + session=session) + fileinfo <- parseSavePath(volumes, input$save_upscaled_disconnection_summary_with_separate_ufls_counts) + if (nrow(fileinfo) > 0) { + write.csv(v$upscaled_disconnections_with_separate_ufls_counts, as.character(fileinfo$datapath), row.names=FALSE) + } + }) + + get_current_settings <- function(){ settings <- vector(mode='list') diff --git a/upscale_disconnections/example_run_upscaling.R b/upscale_disconnections/example_run_upscaling.R index b980ea3..77a4ece 100644 --- a/upscale_disconnections/example_run_upscaling.R +++ b/upscale_disconnections/example_run_upscaling.R @@ -22,8 +22,8 @@ manufacturer_install_data <- mutate(manufacturer_install_data, manufacturer_install_data <- calc_installed_capacity_by_standard_and_manufacturer(manufacturer_install_data) -upsc_results <- get_upscaling_results_excluding_ufls(circuits_to_summarise, manufacturer_install_data, load_start_time, region_to_load, - 30) +upsc_results <- get_upscaling_results_excluding_ufls_affected_circuits(circuits_to_summarise, manufacturer_install_data, + load_start_time, region_to_load, 30) disconnection_summary <- upsc_results$disconnection_summary upscaled_disconnections <- upsc_results$upscaled_disconnections diff --git a/upscale_disconnections/summarise_disconnections.R b/upscale_disconnections/summarise_disconnections.R index 9808a8a..6c83a3a 100644 --- a/upscale_disconnections/summarise_disconnections.R +++ b/upscale_disconnections/summarise_disconnections.R @@ -13,7 +13,7 @@ get_upscaling_results <- function(circuit_summary, manufacturer_install_data, ev return(out) } -get_upscaling_results_excluding_ufls <- function(circuit_summary, manufacturer_install_data, event_date, region, +get_upscaling_results_excluding_ufls_affected_circuits <- function(circuit_summary, manufacturer_install_data, event_date, region, sample_threshold){ # Upscale the proportion of disconnecting circuits based on sample sizes once UFLS circuits are removed. out <- list() From bdffec2388da9b1502dfe05c682968dbbc3a1ca7 Mon Sep 17 00:00:00 2001 From: phoebeheywood <53634732+phoebeheywood@users.noreply.github.com> Date: Thu, 21 Oct 2021 15:19:57 +1100 Subject: [PATCH 09/11] Update .gitignore --- .gitignore | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index ff6840e..c0f6e89 100644 --- a/.gitignore +++ b/.gitignore @@ -86,11 +86,11 @@ manufacters_missing_from_cer.csv manufacters_missing_from_solar_analytics.csv cer_cumulative_capacity_and_number_by_manufacturer_filter_off_grid_formatted.csv cer_cumulative_capacity_and_number_by_manufacturer_filter_off_grid.csv= -manufacters_missing_from_cer.csv -manufacters_missing_from_solar_analytics.csv +manufacturers_missing_from_cer.csv +manufacturers_missing_from_input_db.csv cer_cumulative_capacity_and_number_by_manufacturer.csv cer_cumulative_capacity_and_number_old.csv test_disconnection_summary.csv -test_manufacters_missing_from_cer.csv -test_manufacters_missing_from_solar_analytics.csv +test_manufacturers_missing_from_cer.csv +test_manufacturers_missing_from_solar_analytics.csv test_upscaled_disconnections.csv From a5b7fdaaf466b540ec23a031044e2f755b474f76 Mon Sep 17 00:00:00 2001 From: phoebeheywood <53634732+phoebeheywood@users.noreply.github.com> Date: Thu, 21 Oct 2021 16:00:18 +1100 Subject: [PATCH 10/11] Update example_run_upscaling.R --- upscale_disconnections/example_run_upscaling.R | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/upscale_disconnections/example_run_upscaling.R b/upscale_disconnections/example_run_upscaling.R index 77a4ece..e61a7d4 100644 --- a/upscale_disconnections/example_run_upscaling.R +++ b/upscale_disconnections/example_run_upscaling.R @@ -3,7 +3,7 @@ source("load_tool_environment.R") exclude_solar_edge <- FALSE region_to_load <- 'QLD' load_start_time <- '2021-05-25' -circuit_summary <- read.csv(file = "data/Luceo_circ_sum_active2309.csv", header = TRUE, stringsAsFactors = FALSE) +circuit_summary <- read.csv(file = "circuit_summary.csv", header = TRUE, stringsAsFactors = FALSE) manufacturer_install_data <- read.csv(file = "inbuilt_data/cer_cumulative_capacity_and_number_by_manufacturer.csv", header = TRUE, stringsAsFactors = FALSE) @@ -15,11 +15,6 @@ if (exclude_solar_edge){ manufacturer_install_data <- manufacturer_install_data } -# These 2 lines are different to what the shiny app does -circuits_to_summarise <- mutate(circuits_to_summarise, manufacturer=ifelse(is.na(manufacturer), 'Other', manufacturer)) -manufacturer_install_data <- mutate(manufacturer_install_data, - manufacturer=ifelse(is.na(manufacturer), 'Other', manufacturer)) - manufacturer_install_data <- calc_installed_capacity_by_standard_and_manufacturer(manufacturer_install_data) upsc_results <- get_upscaling_results_excluding_ufls_affected_circuits(circuits_to_summarise, manufacturer_install_data, @@ -30,7 +25,7 @@ upscaled_disconnections <- upsc_results$upscaled_disconnections manufacturers_missing_from_cer <- upsc_results$manufacturers_missing_from_cer manufacturers_missing_from_input_db <- upsc_results$manufacturers_missing_from_input_db -write.csv(disconnection_summary, "data/Luceo_active_disconnection_summary0610.csv", row.names = FALSE) -write.csv(upscaled_disconnections, "data/Luceo_active_upscaled_disconnections0610.csv", row.names = FALSE) -#write.csv(manufacturers_missing_from_cer, "test_manufacturers_missing_from_cer.csv", row.names=FALSE) -#write.csv(manufacturers_missing_from_input_db, "test_manufacturers_missing_from_input_db.csv", row.names=FALSE) \ No newline at end of file +write.csv(disconnection_summary, "test_disconnection_summary.csv", row.names = FALSE) +write.csv(upscaled_disconnections, "test_upscaled_disconnections.csv", row.names = FALSE) +write.csv(manufacturers_missing_from_cer, "test_manufacturers_missing_from_cer.csv", row.names=FALSE) +write.csv(manufacturers_missing_from_input_db, "test_manufacturers_missing_from_input_db.csv", row.names=FALSE) \ No newline at end of file From 59508ed302ec3bbc3a65f530301da38640156d9f Mon Sep 17 00:00:00 2001 From: phoebeheywood <53634732+phoebeheywood@users.noreply.github.com> Date: Thu, 28 Oct 2021 11:03:11 +1100 Subject: [PATCH 11/11] fix typos --- upscale_disconnections/post_processing_upscaling.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/upscale_disconnections/post_processing_upscaling.R b/upscale_disconnections/post_processing_upscaling.R index 7194c37..62a3c04 100644 --- a/upscale_disconnections/post_processing_upscaling.R +++ b/upscale_disconnections/post_processing_upscaling.R @@ -27,8 +27,8 @@ manufacturer_capacitys <- get_manufacturer_capacitys(manufacturer_install_data, region_to_load) disconnection_summary <- join_circuit_summary_and_cer_manufacturer_data(disconnection_summary, manufacturer_capacitys) -manufacters_missing_from_cer <- get_manufactures_in_input_db_but_not_cer(disconnection_summary) -manufacters_missing_from_input_db <- get_manufactures_in_cer_but_not_input_db(disconnection_summary) +manufacturers_missing_from_cer <- get_manufactures_in_input_db_but_not_cer(disconnection_summary) +manufacturers_missing_from_input_db <- get_manufactures_in_cer_but_not_input_db(disconnection_summary) disconnection_summary <- impose_sample_size_threshold(disconnection_summary, sample_threshold = 30) disconnection_summary <- calc_confidence_intervals_for_disconnections(disconnection_summary) disconnection_summary <- calc_upscale_kw_loss(disconnection_summary) @@ -36,5 +36,5 @@ upscaled_disconnections <- upscale_disconnections(disconnection_summary) write.csv(disconnection_summary, "test_disconnection_summary.csv", row.names = FALSE) write.csv(upscaled_disconnections, "test_upscaled_disconnections.csv", row.names = FALSE) -write.csv(manufacters_missing_from_cer, "test_manufacters_missing_from_cer.csv", row.names=FALSE) -write.csv(manufacters_missing_from_input_db, "test_manufacters_missing_from_input_db.csv", row.names=FALSE) \ No newline at end of file +write.csv(manufacturers_missing_from_cer, "test_manufacturers_missing_from_cer.csv", row.names=FALSE) +write.csv(manufacturers_missing_from_input_db, "test_manufacturers_missing_from_input_db.csv", row.names=FALSE) \ No newline at end of file