diff --git a/DESCRIPTION b/DESCRIPTION index 59fbf5f..ea66016 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -28,5 +28,7 @@ Suggests: jsonlite, mockery, withr, - httpcode + httpcode, + cli, + stringr Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index 2e813dd..a7b83d0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,5 +6,6 @@ export(get_api_date_suffix) export(get_polis_api_data) export(iterative_api_call) export(process_api_response) +export(save_polis_data) export(update_polis_api_data) export(write_log_file_api) diff --git a/R/get_api_date_suffix.R b/R/get_api_date_suffix.R index 7321c94..289fac5 100644 --- a/R/get_api_date_suffix.R +++ b/R/get_api_date_suffix.R @@ -34,6 +34,24 @@ get_api_date_suffix <- function(data_type) { sub_activ = "SubActivity", lqas = "Lqas" ) + # Define date fields for each data type + # for initial data + date_fields_initial <- c( + cases = "ParalysisOnsetDate", + virus = "VirusDate", + population = "CreatedDate", + env = "CollectionDate", + geo = "CreatedDate", + geo_synonym = "UpdatedDate", + im = "PublishDate", + activity = "ActivityDateFrom", + lab_specimen_virus = "PublishDate", + lab_specimen = "LastUpdateDate", + sub_activ = "DateFrom", lqas = "Start" + ) + + # Define date fields for each data type + # for updated data date_fields <- c( cases = "LastUpdateDate", virus = "UpdatedDate", population = "UpdatedDate", @@ -53,6 +71,7 @@ get_api_date_suffix <- function(data_type) { # Return endpoint suffix and date field list( endpoint_suffix = endpoint_suffixes[[data_type]], + date_fields_initial = date_fields_initial[[data_type]], date_field = date_fields[[data_type]] ) } diff --git a/R/get_polis_api_data.R b/R/get_polis_api_data.R index c43d0e5..ed89d7b 100644 --- a/R/get_polis_api_data.R +++ b/R/get_polis_api_data.R @@ -26,14 +26,15 @@ #' (Human Specimen Viruses). Default is 'cases'. #' @param region Region code for data filtering. #' Represents the WHO region from which to retrieve the data. -#' Default is 'AFRO' (African Region). +#' Possible values are AFRO; AMRO; EMRO; EURO; SEARO; WPRO Use +#' 'Global' to retrieve global data. Default is 'AFRO'. #' @param select_vars Vector of variables to select from the API response. #' If NULL (default), all variables are selected. #' @param polis_api_key API key for authentication. #' Default is retrieved from the environment variable #' 'POLIS_API_KEY'. An explicit API key can be provided #' if required. -#' +#' @param updated_dates Logical indicating whether to use the 'LastUpdateDate' #' @return A data frame containing the requested data aggregated from all pages #' of the API response. Each row represents a record, and columns #' correspond to the variables in the dataset. @@ -49,13 +50,24 @@ get_polis_api_data <- function(min_date, data_type = "cases", region = "AFRO", select_vars = NULL, + updated_dates = TRUE, polis_api_key) { + # API Endpoint and URL Construction api_endpoint <- "https://extranet.who.int/polis/api/v2/" endpoint_suffix <- get_api_date_suffix(data_type)$endpoint_suffix # set up the dates - date_field <- get_api_date_suffix(data_type)$date_field + if (updated_dates) { + date_field <- get_api_date_suffix(data_type)$date_field + } else { + date_field <- get_api_date_suffix(data_type)$date_fields_initial + } + + # set up region field + if (tolower(region) == "global") { + region_field <- NULL + } # set up region field name region_field <- if (data_type == "virus") "RegionName" else "WHORegion" diff --git a/R/save_polis_data.R b/R/save_polis_data.R new file mode 100644 index 0000000..0c1c4ae --- /dev/null +++ b/R/save_polis_data.R @@ -0,0 +1,56 @@ +#' Save Polis Data to compressed RDS File +#' +#' This function saves a given POLIS data object to an RDS file using a +#' specific naming convention based on the current date. It also manages the +#' retention of only the 5 most recent datasets in the specified directory, +#' removing older datasets if necessary. +#' +#' @param polis_data The POLIS data object to be saved. +#' @param polis_path The directory path where the RDS file will be saved. This +#' function will check this directory for existing datasets and will maintain +#' only the 5 most recent datasets, deleting older ones. +#' @param max_datasets The max number of datasets to retain in the directory. +#' +#' @return Invisible NULL. This function is used for its side effect of +#' saving a file and potentially deleting older files, rather than for +#' returning a value. +#' +#' @examples +#' # Assume `polis_data` is your dataset and `./polis_datasets` is your +#' # target directory +#' # save_polis_data(polis_data, "./polis_datasets") +#' +#' @export +save_polis_data <- function(polis_data, polis_path, + filname, max_datasets = 5) { + + cli::cli_process_start("Saving POLIS data into a compressed RDS file.") + + # generate the file name based on the current date + suffix_name <- sprintf("_%s.rds", format(Sys.Date(), "%Y_%V")) + full_path <- file.path(polis_path, paste0(filname, suffix_name)) + + # save polis list + saveRDS(polis_data, full_path, compress = "xz") + + cli::cli_process_done( ) + + # Check existing datasets and keep only the 5 most recent + existing_files <- list.files(polis_path, full.names = TRUE) + + if (length(existing_files) > 5) { + # Sort files by date, assuming the naming convention holds the date info + file_dates <- sapply(existing_files, function(x) { + as.Date(stringr::str_extract(x, "\\d{4}_\\d{2}"), "%Y_%V") + }) + + oldest_files <- existing_files[order( + file_dates)][1:(length(existing_files)-5)] + + cli::cli_alert_success( + "Removing {length(oldest_files)} old file(s) to keep top {max_datasets}.") + + suppressMessages(file.remove(oldest_files)) + } + cli::cli_process_done( ) +} diff --git a/R/update_polis_api_data.R b/R/update_polis_api_data.R index 03e242c..6ebeb75 100644 --- a/R/update_polis_api_data.R +++ b/R/update_polis_api_data.R @@ -55,6 +55,7 @@ update_polis_api_data <- function(min_date, save_directly = FALSE, log_results = FALSE, polis_api_key = NULL) { + # Construct file names for data and log data_file_name <- paste0(file_path, "/", data_type, "_polis_data.rds") log_file_name <- paste0(file_path, "/", "polis_data_update_log.xlsx") diff --git a/man/get_polis_api_data.Rd b/man/get_polis_api_data.Rd index 3e599d8..935ce69 100644 --- a/man/get_polis_api_data.Rd +++ b/man/get_polis_api_data.Rd @@ -10,6 +10,7 @@ get_polis_api_data( data_type = "cases", region = "AFRO", select_vars = NULL, + updated_dates = TRUE, polis_api_key ) } @@ -32,11 +33,14 @@ Sampling), lab_specimen (Human Specimen), lab_specimen_virus \item{region}{Region code for data filtering. Represents the WHO region from which to retrieve the data. -Default is 'AFRO' (African Region).} +Possible values are AFRO; AMRO; EMRO; EURO; SEARO; WPRO Use +'Global' to retrieve global data. Default is 'AFRO'.} \item{select_vars}{Vector of variables to select from the API response. If NULL (default), all variables are selected.} +\item{updated_dates}{Logical indicating whether to use the 'LastUpdateDate'} + \item{polis_api_key}{API key for authentication. Default is retrieved from the environment variable 'POLIS_API_KEY'. An explicit API key can be provided diff --git a/man/save_polis_data.Rd b/man/save_polis_data.Rd new file mode 100644 index 0000000..103a050 --- /dev/null +++ b/man/save_polis_data.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/save_polis_data.R +\name{save_polis_data} +\alias{save_polis_data} +\title{Save Polis Data to compressed RDS File} +\usage{ +save_polis_data(polis_data, polis_path, filname, max_datasets = 5) +} +\arguments{ +\item{polis_data}{The POLIS data object to be saved.} + +\item{polis_path}{The directory path where the RDS file will be saved. This +function will check this directory for existing datasets and will maintain +only the 5 most recent datasets, deleting older ones.} + +\item{max_datasets}{The max number of datasets to retain in the directory.} +} +\value{ +Invisible NULL. This function is used for its side effect of +saving a file and potentially deleting older files, rather than for +returning a value. +} +\description{ +This function saves a given POLIS data object to an RDS file using a +specific naming convention based on the current date. It also manages the +retention of only the 5 most recent datasets in the specified directory, +removing older datasets if necessary. +} +\examples{ +# Assume `polis_data` is your dataset and `./polis_datasets` is your +# target directory +# save_polis_data(polis_data, "./polis_datasets") + +} diff --git a/tests/testthat/test-get_api_date_suffix.R b/tests/testthat/test-get_api_date_suffix.R index 5058b02..41914df 100644 --- a/tests/testthat/test-get_api_date_suffix.R +++ b/tests/testthat/test-get_api_date_suffix.R @@ -1,9 +1,13 @@ testthat::test_that("Get API Date Suffix Functionality", { # Test for valid data types expect_equal(get_api_date_suffix("cases"), - list(endpoint_suffix = "Case", date_field = "LastUpdateDate")) + list(endpoint_suffix = "Case", + date_fields_initial = "ParalysisOnsetDate", + date_field = "LastUpdateDate")) expect_equal(get_api_date_suffix("virus"), - list(endpoint_suffix = "Virus", date_field = "UpdatedDate")) + list(endpoint_suffix = "Virus", + date_fields_initial = "VirusDate", + date_field = "UpdatedDate")) # Add more tests for other valid data types # Test for an invalid data type