From 02bdaaca35ff3163d346902d619a0e87c3bfd262 Mon Sep 17 00:00:00 2001 From: Koen Hufkens Date: Mon, 6 Nov 2023 12:50:43 +0100 Subject: [PATCH 01/16] faulty check --- R/pr_fit_comparison.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/pr_fit_comparison.R b/R/pr_fit_comparison.R index 990b856..b9ef01b 100644 --- a/R/pr_fit_comparison.R +++ b/R/pr_fit_comparison.R @@ -195,7 +195,7 @@ pr_fit_comparison <- function( control = control )) - if(inherits("try-error", par)) { + if(inherits(par, "try-error")) { return(NULL) } From c40dfa4e9cf2767ac4d35d92ff435571cb3ea27a Mon Sep 17 00:00:00 2001 From: Koen Hufkens Date: Mon, 6 Nov 2023 16:25:52 +0100 Subject: [PATCH 02/16] refactoring code --- DESCRIPTION | 1 - NEWS.md | 9 +++ R/pr_fit_parameters.R | 23 ------- R/pr_fm_be.R | 72 +++++++++++--------- R/pr_fm_eobs.R | 125 ----------------------------------- vignettes/BE_model_use.Rmd | 72 ++++++++++++++++++++ vignettes/ERA5_model_use.Rmd | 92 ++++++++++++++++++++++++++ 7 files changed, 213 insertions(+), 181 deletions(-) delete mode 100644 R/pr_fm_eobs.R create mode 100644 vignettes/BE_model_use.Rmd create mode 100644 vignettes/ERA5_model_use.Rmd diff --git a/DESCRIPTION b/DESCRIPTION index 0676143..bc944a0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,7 +22,6 @@ Imports: stats, GenSA, BayesianTools, - rgenoud, httr, rvest, jsonlite, diff --git a/NEWS.md b/NEWS.md index 6f30a24..a95658b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,12 @@ +# version 1.4 + +* CRAN release +* removed rgenoud support +* removed E-OBS support +* fix VP issue +* increase code coverage +* remaining {terra} and {sf} conversions + # version 1.3.2 * provisional CMIP6 support diff --git a/R/pr_fit_parameters.R b/R/pr_fit_parameters.R index 0fc91a3..abd554d 100644 --- a/R/pr_fit_parameters.R +++ b/R/pr_fit_parameters.R @@ -77,29 +77,6 @@ pr_fit_parameters <- function( ) } - if (tolower(method) == "genoud"){ - # optimize model parameters using the - # GENetic Optimization Using Derivatives - # needs more tweaking to work out of the box - # on most models - - # stop if no starting parameters are provided - if (is.null(par)){ - stop('The genoud algorithm needs defined strating parameters!') - } - optim_par <- rgenoud::genoud( - fn = cost, - nvars = length(par), - max.generations = control$maxit, - Domains = cbind(lower,upper), - boundary.enforcement = 2, - data.type.int = FALSE, - data = data, - model = model, - ... - ) - } - # BayesianTools if (tolower(method) == "bayesiantools"){ diff --git a/R/pr_fm_be.R b/R/pr_fm_be.R index c94ddec..4b1c008 100644 --- a/R/pr_fm_be.R +++ b/R/pr_fm_be.R @@ -16,7 +16,9 @@ #' # run with default settings #' # extracts values of the referenced publication #' # see github README -#' be_data <- pr_fm_be() +#' be_data <- pr_fm_be( +#' path = "/your/path", +#' year = 2010) #'} # create subset of layers to calculate phenology model output on @@ -28,6 +30,13 @@ pr_fm_be <- function( internal = TRUE ) { + # crop data if necessary, no checks yet + if (!is.null(extent)){ + if(length(extent) != 4){ + stop("not enough coordinate points to properly crop data!") + } + } + # download all data if it doesn't exist pr_dl_be(path = path, year = year) @@ -48,21 +57,28 @@ pr_fm_be <- function( # grab raster data from netcdf files, with the climatology # the long term mean and delta the differences - climatology <- raster::brick(sprintf("%s/%s",path,filenames[1]), - varname = "climatology") - delta <- do.call("stack", + be_raster <- terra::rast(file.path(path,filenames[1])) + be_raster <- terra::crop(be_raster, terra::ext(extent)) + + climatology <- terra::subset( + be_raster, grepl("climatology", names(be_raster))) + + delta <- do.call("c", lapply(filenames, FUN = function(x){ - raster::brick(sprintf("%s/%s",path,x), - varname = "temperature")})) + r <- terra::rast(file.path(path,x)) + r <- terra::crop(r, terra::ext(extent)) + terra::subset(r, grepl("temperature",names(r))) + }) + ) # get date elements from netcdf files years <- do.call("c",lapply(filenames,FUN = function(x){ - nc <- ncdf4::nc_open(sprintf("%s/%s",path,x)) + nc <- ncdf4::nc_open(file.path(path, x)) ncdf4::ncvar_get(nc,"year") })) yday <- do.call("c",lapply(filenames,FUN = function(x){ - nc <- ncdf4::nc_open(sprintf("%s/%s",path,x)) + nc <- ncdf4::nc_open(file.path(path, x)) ncdf4::ncvar_get(nc,"day_of_year") })) @@ -77,17 +93,7 @@ pr_fm_be <- function( } # subset raster data, do this before cropping to make things faster - delta <- raster::subset(delta, layers) - - # crop data if necessary, no checks yet - if (!is.null(extent)){ - if(length(extent)==4){ - climatology = raster::crop(climatology, raster::extent(extent)) - delta = raster::crop(delta, raster::extent(extent)) - } else { - stop("not enough coordinate points to properly crop data!") - } - } + delta <- terra::subset(delta, layers) # shift data when offset is < 365 if (offset < length(layers)){ @@ -98,27 +104,29 @@ pr_fm_be <- function( # calculate absolute temperatures not differences # with the mean - temperature <- raster::stackApply(raster::stack(delta, climatology), - indices = c(doy,1:length(layers)), - fun = sum ) + temperature <- terra::tapp( + c(delta, climatology), + index = c(doy,1:length(layers)), + fun = sum + ) + + # fill values temperature[temperature == 0] <- NA + # grab coordinates + cells <- terra::cells(be_raster$land_mask) + location <- t(terra::xyFromCell(be_raster$land_mask, cells)[,2:1]) + # convert temperature data to matrix - Ti <- t(raster::as.matrix(temperature)) + Ti <- t(terra::as.matrix(temperature)) # extract georeferencing info to be passed along - ext <- raster::extent(temperature) - proj <- raster::projection(temperature) + ext <- terra::ext(temperature) + proj <- terra::crs(temperature) size <- dim(temperature) cat("calculating daylength \n") - # grab coordinates - location <- sp::SpatialPoints(sp::coordinates(temperature), - proj4string = sp::CRS(proj)) - location <- t(sp::spTransform(location, - sp::CRS("+init=epsg:4326"))@coords[,2:1]) - # create doy vector if (offset < length(layers)){ doy <- c(offset:length(layers),1:(offset - 1)) @@ -134,7 +142,7 @@ pr_fm_be <- function( Li <- t(do.call("rbind",Li)) # recreate the validation data structure (new format) - # but with concatted data + # but with contacted data data <- list("site" = NULL, "location" = location, "doy" = doy, diff --git a/R/pr_fm_eobs.R b/R/pr_fm_eobs.R deleted file mode 100644 index 0422c27..0000000 --- a/R/pr_fm_eobs.R +++ /dev/null @@ -1,125 +0,0 @@ -#' Formatting E-OBS data -#' -#' Formats E-OBS data as described by: -#' Haylock, M.R., N. Hofstra, A.M.G. Klein Tank, E.J. Klok, P.D. -#' Jones, M. New. 2008: A European daily high-resolution gridded dataset -#' of surface temperature and precipitation. -#' J. Geophys. Res (Atmospheres), 113, D20119, into a 'phenor' compatible list. -#' -#' @param path a path of the gridded data netCDF files -#' @param year year to process (requires year - 1 to be present / downloaded) -#' @param offset offset of the time series in DOY (default = 264, sept 21) -#' @param resolution 0.25 or 0.50 degree data, only the normal rectangular data -#' will be processed not the polar variety. -#' calculation overhead -#' @param internal TRUE / FALSE, write data structure to file or not (as .rds) -#' @return Returns spatial model input data from long term E-OBS modelled data. -#' This data can be run by models specified in the phenor package. -#' @keywords phenology, model, preprocessing, climate data -#' @export -#' @examples -#' -#' \dontrun{ -#' # run with default settings netCDF E-OBS files will be -#' # looked for in your home directory -#' eobs_data <- pr_fm_eobs() -#'} - -pr_fm_eobs <- function( - path = tempdir(), - year = 2014, - offset = 264, - resolution = 0.25, - internal = TRUE - ) { - - # download or read data - eobs_data = lapply( c("tg","rr","elev"),function(x){ - - # filename - filename = sprintf("%s_ens_mean_%sdeg_reg[^/]*\\.nc", - x, - resolution) - - # if the file exist use the local file - if (file.exists(sprintf("%s/%s", path, filename))){ - r = raster::brick(sprintf("%s/%s", path, filename)) - return(r) - } else { - stop('No E-OBS files found in the referred path !') - } - }) - - # extract the yday and year strings and convert to numbers - eobs_date = as.Date(eobs_data[[1]]@z$Date) - yday = as.numeric(format(as.Date(eobs_data[[1]]@z$Date),"%j")) - years = as.numeric(format(as.Date(eobs_data[[1]]@z$Date),"%Y")) - - # select layers to subset using this year and yday data - layers = which((years == (year - 1) & yday >= offset) | - (years == year & yday < offset)) - - # check if all layers are available in the dataset - # needs a fix for crossing boundaries between decadal subsets !! - if (length(layers) < 365){ - stop("The selected dataset does not cover your data range!") - } - - # subset raster data - Ti = t(raster::as.matrix(raster::subset(eobs_data[[1]], layers))) - Pi = t(raster::as.matrix(raster::subset(eobs_data[[2]], layers))) - altitude = t(raster::as.matrix(eobs_data[[3]]$layer)) - - # extract georeferencing info to be passed along - ext = raster::extent(eobs_data[[1]]) - proj = raster::projection(eobs_data[[1]]) - size = dim(eobs_data[[1]]) - - # grab coordinates - location = sp::SpatialPoints(sp::coordinates(eobs_data[[1]]), - proj4string = sp::CRS(proj)) - location = t(sp::spTransform(location, - sp::CRS("+init=epsg:4326"))@coords[,2:1]) - - # create doy vector - if (offset < length(layers)){ - doy = c(offset:length(layers),1:(offset - 1)) - } else { - doy = 1:length(layers) - } - - # create daylength matrix - Li = lapply(location[1,], - FUN = function(x){ - daylength(doy = doy, latitude = x) - }) - Li = t(do.call("rbind",Li)) - - # recreate the validation data structure (new format) - # but with concatted data - data = list("site" = NULL, - "location" = location, - "doy" = doy, - "transition_dates" = NULL, - "Ti" = Ti, - "Tmini" = NULL, - "Tmaxi" = NULL, - "Li" = Li, - "Pi" = Pi, - "VPDi" = NULL, - "georeferencing" = list("extent" = ext, - "projection" = proj, - "size" = size) - ) - - # assign a class for post-processing - class(data) = "phenor_map_data" - - # return the formatted, faster data format - # either internally or saved as an rds (binary R data file) - if (internal){ - return(data) - } else { - saveRDS(data, file = sprintf("%s/phenor_eobs_data_%s.rds",path, year)) - } -} diff --git a/vignettes/BE_model_use.Rmd b/vignettes/BE_model_use.Rmd new file mode 100644 index 0000000..c2bcb2d --- /dev/null +++ b/vignettes/BE_model_use.Rmd @@ -0,0 +1,72 @@ +--- +title: "Berkeley Earth data use" +author: "Koen Hufkens" +date: "`r Sys.Date()`" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Berkeley Earth data use} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) + +set.seed(1234) +``` + + +Download the BE global daily gridded data from this page: + +https://berkeleyearth.org/data/ + +You will only need the TAVG product. Note that due to missing parameters such as precipitation some models might not be usable for upscaling using this dataset. Always consider the scope of your work within the context of the available training data and spatial scaling context. + +```{r} +# download the data +pr_dl_be( + path = tempdir(), + year = 2021 +) +``` + +```{r} +# format the data for a model run +source(here::here("R/pr_fm_be.R")) +be_data <- pr_fm_be( + path = "~/Downloads/", #tempdir(), + year = 2021 +) +``` + + +```{r eval = FALSE} +# load the included data using +data("phenocam_DB") + +# optimize model parameters +# using daymet data for training +optim.par <- pr_fit( + data = phenocam_DB, + cost = rmse, + model = "TT", + method = "GenSA" +) +``` + +```{r} +output <- pr_predict( + optim.par$par, + data = be_data, + model = "TT" + ) + +``` + +```{r} + + +``` diff --git a/vignettes/ERA5_model_use.Rmd b/vignettes/ERA5_model_use.Rmd new file mode 100644 index 0000000..e40e607 --- /dev/null +++ b/vignettes/ERA5_model_use.Rmd @@ -0,0 +1,92 @@ +--- +title: "ERA5 data use" +author: "Koen Hufkens" +date: "`r Sys.Date()`" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{ERA5 data use} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) + +library(phenor) +library(ecmwfr) +options(keyring_backend="file") + +# side load small ERA5 dataset (if feasible within CRAN footprint) + +``` + +ERA5 data can be used to train, evaluate and forecast phenology models (using CMIP routines). The data used in the download sections is ERA5 land which is re-analysis data at a 10km resolution. + +```{r} +# ERA5 example +pr_dl_era5( + path = "~/Desktop", + user = "2088", + product = "era5", + extent = c( + 50.73149477111302, + -7.08887567473501, + 40.365567456020266, + 12.748594284373073 + ) + ) + +# ERA5-land example +pr_dl_era5( + path = "~/Desktop", + user = "2088", + product = "land", + file = "era5-land.nc", + extent = c( + 48.345183009475015, + 4.782986565238425, + 45.64153500799514, + 11.44515031394129 + ) +) +``` + + +```{r eval = FALSE} +# format ERA5 data for upscaling +data <- pr_fm_era5( + path = "~/Desktop/", + file = "era5-land.nc", + year = 2019 +) +``` + +```{r} +# load the included data using +data("phenocam_DB") + +# optimize model parameters +# using daymet data for training +set.seed(1234) +optim.par <- pr_fit( + data = phenocam_DB, + cost = rmse, + model = "TT", + method = "GenSA" +) + +output <- pr_predict( + optim.par$par, + data = data, + model = "TT" + ) + +raster::plot(output) +maps::map("world", add = TRUE) +``` + + + From 32e5506e1acf162d1c74d6e2a5f994b0f3f2b143 Mon Sep 17 00:00:00 2001 From: Koen Hufkens Date: Mon, 6 Nov 2023 16:26:28 +0100 Subject: [PATCH 03/16] version bump --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index bc944a0..87dd150 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: phenor Title: An R phenology Modelling Framework -Version: 1.3.2 +Version: 1.4 Authors@R: c(person( family = "Hufkens", given = "Koen", From 07b1ae937e34fd016f5fe5af477b656683d41c88 Mon Sep 17 00:00:00 2001 From: Koen Hufkens Date: Mon, 6 Nov 2023 20:41:29 +0100 Subject: [PATCH 04/16] refactoring tests --- tests/testthat/test_formatting_functions.R | 10 +++--- tests/testthat/test_models.r | 42 ++++++++++++++-------- vignettes/BE_model_use.Rmd | 3 +- 3 files changed, 34 insertions(+), 21 deletions(-) diff --git a/tests/testthat/test_formatting_functions.R b/tests/testthat/test_formatting_functions.R index 567de20..29916bb 100644 --- a/tests/testthat/test_formatting_functions.R +++ b/tests/testthat/test_formatting_functions.R @@ -29,10 +29,12 @@ test_that("test downloading functions",{ test_that("test formatting functions",{ - df <- pr_dl_npn(species = 3, - internal = TRUE, - start = "2001-01-01", - end = "2001-12-31") + df <- pr_dl_npn( + species = 3, + internal = TRUE, + start = "2001-01-01", + end = "2001-12-31" + ) df <- pr_fm_npn(df) diff --git a/tests/testthat/test_models.r b/tests/testthat/test_models.r index 5012edd..64d8c5c 100644 --- a/tests/testthat/test_models.r +++ b/tests/testthat/test_models.r @@ -47,16 +47,24 @@ test_that("test models",{ # fit model fit <- pr_fit(control=list(max.call = 5)) - fit_cvmae <- pr_fit(control=list(max.call = 5), cost = cvmae) - expect_type(fit_cvmae, "list") - temp_sens <- pr_calc_temp_sens( - par = fit$par, - data = phenocam_DB, - model = "TT") + # fit model test + expect_type( + pr_fit( + control=list(max.call = 5), + cost = cvmae), + "list" + ) - expect_type(temp_sens, "list") + expect_type(pr_calc_temp_sens( + par = fit$par, + data = phenocam_DB, + model = "TT" + ), + "list" + ) + # missing model expect_error( pr_calc_temp_sens( par = fit$par, @@ -64,6 +72,7 @@ test_that("test models",{ ) ) + # missing input data expect_error( pr_calc_temp_sens( par = fit$par, @@ -71,10 +80,12 @@ test_that("test models",{ ) ) + # missing parameters expect_error( pr_calc_temp_sens( data = phenocam_DB, - model = "TT") + model = "TT" + ) ) # return summary stats @@ -82,11 +93,12 @@ test_that("test models",{ expect_type(plot(fit),"list") # test CV - l <- pr_cross_validate( - k = 2, - cv_seed = 123, - models = c("LIN"), - data = phenocam_DB[c(1:2)]) - - expect_type(l, "list") + expect_type( + pr_cross_validate( + k = 2, + cv_seed = 123, + models = c("LIN"), + data = phenocam_DB[c(1:2)]), + "list" + ) }) diff --git a/vignettes/BE_model_use.Rmd b/vignettes/BE_model_use.Rmd index c2bcb2d..70bac1c 100644 --- a/vignettes/BE_model_use.Rmd +++ b/vignettes/BE_model_use.Rmd @@ -67,6 +67,5 @@ output <- pr_predict( ``` ```{r} - - +plot(output) ``` From 90e76047fa889d184f66e7a5f6ba07ec60eb9e73 Mon Sep 17 00:00:00 2001 From: KH Date: Thu, 14 Mar 2024 15:17:50 +0100 Subject: [PATCH 05/16] update server --- R/pr_dl_be.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/pr_dl_be.R b/R/pr_dl_be.R index 8baaa9e..4f257df 100644 --- a/R/pr_dl_be.R +++ b/R/pr_dl_be.R @@ -22,10 +22,10 @@ pr_dl_be <- function( ){ # set server - server = "http://berkeleyearth.lbl.gov/auto/Global/Gridded" + server <- "https://berkeley-earth-temperature.s3.us-west-1.amazonaws.com/Global/Gridded" # set the decadal splits - split = seq(1880,as.numeric(format(Sys.Date(),"%Y")),10) + split <- seq(1880,as.numeric(format(Sys.Date(),"%Y")),10) # download 2 files if on split decade if (year %in% split){ From 7e07382e854d825fd05d6be8b1ea7622e40bfed8 Mon Sep 17 00:00:00 2001 From: KH Date: Fri, 15 Mar 2024 09:31:19 +0100 Subject: [PATCH 06/16] syntax --- R/pr_dl_npn.R | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/R/pr_dl_npn.R b/R/pr_dl_npn.R index 8a725fc..037195a 100644 --- a/R/pr_dl_npn.R +++ b/R/pr_dl_npn.R @@ -36,10 +36,10 @@ pr_dl_npn <- function( request = "phenor" # set url base - url = "http://www.usanpn.org/npn_portal/observations/getSummarizedData.json?" + url <- "http://www.usanpn.org/npn_portal/observations/getSummarizedData.json?" # formulate the query - query = list(species_id = species, + query <- list(species_id = species, phenophase_id = phenophase, start_date = start, end_date = end, @@ -50,12 +50,14 @@ pr_dl_npn <- function( request_src = request) # download data using httr - data = httr::GET(url, - query = query, - httr::progress()) + data <- httr::GET( + url, + query = query, + httr::progress() + ) # convert data to a clean data frame - data = as.data.frame(jsonlite::fromJSON(httr::content(data, as = "text"))) + data <- as.data.frame(jsonlite::fromJSON(httr::content(data, as = "text"))) # check if the data is valid (contains data) # if so return the data frame or write to disk for later processing @@ -67,7 +69,7 @@ pr_dl_npn <- function( # sprintf doesn't deal well with NULL if(is.null(phenophase)){ - phenophase = "NA" + phenophase <- "NA" } # write data to file as R data file From e610b1428e1ad07c3b2b355758794037f6782c40 Mon Sep 17 00:00:00 2001 From: KH Date: Fri, 15 Mar 2024 09:31:31 +0100 Subject: [PATCH 07/16] eobs test --- analysis/eobs_test_routine.R | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 analysis/eobs_test_routine.R diff --git a/analysis/eobs_test_routine.R b/analysis/eobs_test_routine.R new file mode 100644 index 0000000..2b51249 --- /dev/null +++ b/analysis/eobs_test_routine.R @@ -0,0 +1,30 @@ +library(phenor) + +source("R/pr_fm_eobs.R") + +data <- pr_fm_eobs( + path = "~/Desktop/", + file = "xxxx", + year = 2019 +) + +# load the included data using +data("phenocam_DB") + +# optimize model parameters +set.seed(1234) +optim.par <- pr_fit( + data = phenocam_DB, + cost = rmse, + model = "TT", + method = "GenSA" +) + +output <- pr_predict( + optim.par$par, + data = data, + model = "TT" +) + +raster::plot(output) +maps::map("world", add = TRUE) From 66a3619609a3cde9cea441b36e7c22673ece7138 Mon Sep 17 00:00:00 2001 From: KH Date: Fri, 15 Mar 2024 09:31:41 +0100 Subject: [PATCH 08/16] era5 land test --- analysis/era5_test_routine.R | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/analysis/era5_test_routine.R b/analysis/era5_test_routine.R index 28bbfc1..c5a5437 100644 --- a/analysis/era5_test_routine.R +++ b/analysis/era5_test_routine.R @@ -4,19 +4,6 @@ options(keyring_backend="file") source("R/pr_dl_era5.R") -# ERA5 example -pr_dl_era5( - path = "~/Desktop", - user = "2088", - product = "era5", - extent = c( - 50.73149477111302, - -7.08887567473501, - 40.365567456020266, - 12.748594284373073 - ) - ) - # ERA5-land example pr_dl_era5( path = "~/Desktop", From 7476c478995919ec4b19c8f776388b5a3406e00b Mon Sep 17 00:00:00 2001 From: KH Date: Fri, 15 Mar 2024 09:31:54 +0100 Subject: [PATCH 09/16] test optimization example --- analysis/test_optimization.r | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 analysis/test_optimization.r diff --git a/analysis/test_optimization.r b/analysis/test_optimization.r new file mode 100644 index 0000000..f1d7da0 --- /dev/null +++ b/analysis/test_optimization.r @@ -0,0 +1,17 @@ +try(detach("package:phenor", unload = TRUE)) +library(phenor) + +set.seed(0) + +par_bt = phenor::pr_fit( + data = phenocam_DB, + model = "TT", + method = "bayesiantools", + control = list( + sampler = "DEzs", + settings = list( + burnin = 10, + iterations = 1000 + ) + ) +) From e8a7346fa9fbf2e69bf8a62fc57769dd0eec4491 Mon Sep 17 00:00:00 2001 From: KH Date: Tue, 25 Jun 2024 08:35:11 +0200 Subject: [PATCH 10/16] vignettes --- vignettes/BE_model_use.Rmd | 16 ++++++++-------- vignettes/ERA5_model_use.Rmd | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/vignettes/BE_model_use.Rmd b/vignettes/BE_model_use.Rmd index 70bac1c..08e2d70 100644 --- a/vignettes/BE_model_use.Rmd +++ b/vignettes/BE_model_use.Rmd @@ -15,29 +15,30 @@ knitr::opts_chunk$set( comment = "#>" ) +library(phenor) set.seed(1234) ``` - Download the BE global daily gridded data from this page: https://berkeleyearth.org/data/ You will only need the TAVG product. Note that due to missing parameters such as precipitation some models might not be usable for upscaling using this dataset. Always consider the scope of your work within the context of the available training data and spatial scaling context. -```{r} +```{r eval = FALSE} # download the data +# need to call two years +# to bridge two years pr_dl_be( path = tempdir(), year = 2021 ) ``` -```{r} +```{r eval = FALSE} # format the data for a model run -source(here::here("R/pr_fm_be.R")) be_data <- pr_fm_be( - path = "~/Downloads/", #tempdir(), + path = tempdir(), year = 2021 ) ``` @@ -57,15 +58,14 @@ optim.par <- pr_fit( ) ``` -```{r} +```{r eval = FALSE} output <- pr_predict( optim.par$par, data = be_data, model = "TT" ) - ``` -```{r} +```{r eval = FALSE} plot(output) ``` diff --git a/vignettes/ERA5_model_use.Rmd b/vignettes/ERA5_model_use.Rmd index e40e607..9174f62 100644 --- a/vignettes/ERA5_model_use.Rmd +++ b/vignettes/ERA5_model_use.Rmd @@ -25,7 +25,7 @@ options(keyring_backend="file") ERA5 data can be used to train, evaluate and forecast phenology models (using CMIP routines). The data used in the download sections is ERA5 land which is re-analysis data at a 10km resolution. -```{r} +```{r eval = FALSE} # ERA5 example pr_dl_era5( path = "~/Desktop", From 0a004dce5fe10b9602c4d684ffe5e4fcdef11147 Mon Sep 17 00:00:00 2001 From: KH Date: Tue, 25 Jun 2024 08:42:49 +0200 Subject: [PATCH 11/16] updating namespace --- DESCRIPTION | 2 +- NAMESPACE | 1 - R/phenor.r | 7 ------- man/phenor.Rd | 9 --------- man/pr_fm_be.Rd | 4 +++- man/pr_fm_eobs.Rd | 51 ----------------------------------------------- 6 files changed, 4 insertions(+), 70 deletions(-) delete mode 100644 R/phenor.r delete mode 100644 man/phenor.Rd delete mode 100644 man/pr_fm_eobs.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 87dd150..3c10762 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -44,6 +44,6 @@ Remotes: License: AGPL-3 LazyData: true ByteCompile: true -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 URL: https://github.com/bluegreen-labs/phenor BugReports: https://github.com/bluegreen-labs/phenor/issues diff --git a/NAMESPACE b/NAMESPACE index f53b982..fce00da 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -52,7 +52,6 @@ export(pr_fm_be) export(pr_fm_cmip) export(pr_fm_csv) export(pr_fm_daymet_tiles) -export(pr_fm_eobs) export(pr_fm_era5) export(pr_fm_gee) export(pr_fm_modis) diff --git a/R/phenor.r b/R/phenor.r deleted file mode 100644 index adb8388..0000000 --- a/R/phenor.r +++ /dev/null @@ -1,7 +0,0 @@ -#' phenor: A package for rapid phenology model development. -#' -#' The package allows the assimilation of four important phenological records, either observational from the Pan European Phenology Project(PEP725), the USA National Phenology Network (USA-NPN), near-surface remote sensing based (PhenoCam) or remote sensing derived (MODIS MCD12Q2). These products provide coverage in Europe, the US and globally across a variety of ecosystem and plant functional types. A "model zoo" of common phenology models is provided, and will continue to expand. All phenology models are written in a readable format and are well commented. The package allows for rapid scaling in both hindcast and forecast mode at various spatial resolutions and at times providing global coverage using for example NASA Earth Exchange CMIP5 data. -#' -#' @docType package -#' @name phenor -NULL diff --git a/man/phenor.Rd b/man/phenor.Rd deleted file mode 100644 index 301244a..0000000 --- a/man/phenor.Rd +++ /dev/null @@ -1,9 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/phenor.r -\docType{package} -\name{phenor} -\alias{phenor} -\title{phenor: A package for rapid phenology model development.} -\description{ -The package allows the assimilation of four important phenological records, either observational from the Pan European Phenology Project(PEP725), the USA National Phenology Network (USA-NPN), near-surface remote sensing based (PhenoCam) or remote sensing derived (MODIS MCD12Q2). These products provide coverage in Europe, the US and globally across a variety of ecosystem and plant functional types. A "model zoo" of common phenology models is provided, and will continue to expand. All phenology models are written in a readable format and are well commented. The package allows for rapid scaling in both hindcast and forecast mode at various spatial resolutions and at times providing global coverage using for example NASA Earth Exchange CMIP5 data. -} diff --git a/man/pr_fm_be.Rd b/man/pr_fm_be.Rd index 18be324..3330227 100644 --- a/man/pr_fm_be.Rd +++ b/man/pr_fm_be.Rd @@ -35,7 +35,9 @@ Formatting of Berkeley Earth Gridded temperature data # run with default settings # extracts values of the referenced publication # see github README -be_data <- pr_fm_be() +be_data <- pr_fm_be( + path = "/your/path", + year = 2010) } } \keyword{model,} diff --git a/man/pr_fm_eobs.Rd b/man/pr_fm_eobs.Rd deleted file mode 100644 index 73acf5e..0000000 --- a/man/pr_fm_eobs.Rd +++ /dev/null @@ -1,51 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pr_fm_eobs.R -\name{pr_fm_eobs} -\alias{pr_fm_eobs} -\title{Formatting E-OBS data} -\usage{ -pr_fm_eobs( - path = tempdir(), - year = 2014, - offset = 264, - resolution = 0.25, - internal = TRUE -) -} -\arguments{ -\item{path}{a path of the gridded data netCDF files} - -\item{year}{year to process (requires year - 1 to be present / downloaded)} - -\item{offset}{offset of the time series in DOY (default = 264, sept 21)} - -\item{resolution}{0.25 or 0.50 degree data, only the normal rectangular data -will be processed not the polar variety. -calculation overhead} - -\item{internal}{TRUE / FALSE, write data structure to file or not (as .rds)} -} -\value{ -Returns spatial model input data from long term E-OBS modelled data. -This data can be run by models specified in the phenor package. -} -\description{ -Formats E-OBS data as described by: -Haylock, M.R., N. Hofstra, A.M.G. Klein Tank, E.J. Klok, P.D. -Jones, M. New. 2008: A European daily high-resolution gridded dataset -of surface temperature and precipitation. -J. Geophys. Res (Atmospheres), 113, D20119, into a 'phenor' compatible list. -} -\examples{ - -\dontrun{ -# run with default settings netCDF E-OBS files will be -# looked for in your home directory -eobs_data <- pr_fm_eobs() -} -} -\keyword{climate} -\keyword{data} -\keyword{model,} -\keyword{phenology,} -\keyword{preprocessing,} From 1b65e360c6790c7db522a67907947a5d87bde082 Mon Sep 17 00:00:00 2001 From: KH Date: Tue, 25 Jun 2024 08:56:33 +0200 Subject: [PATCH 12/16] vignette builder --- DESCRIPTION | 2 ++ 1 file changed, 2 insertions(+) diff --git a/DESCRIPTION b/DESCRIPTION index 3c10762..8bfa6d2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -38,6 +38,7 @@ Imports: Suggests: knitr, covr, + rmarkdown, testthat Remotes: github::florianhartig/BayesianTools/BayesianTools @@ -47,3 +48,4 @@ ByteCompile: true RoxygenNote: 7.3.1 URL: https://github.com/bluegreen-labs/phenor BugReports: https://github.com/bluegreen-labs/phenor/issues +VignetteBuilder: knitr From 39426ae86f7e25c2abe0723cf21ff84e66d99237 Mon Sep 17 00:00:00 2001 From: KH Date: Tue, 25 Jun 2024 08:56:52 +0200 Subject: [PATCH 13/16] documentation errors --- NAMESPACE | 2 +- R/cost_functions.r | 2 +- R/helper_functions.R | 6 +++--- R/pr_dl_cmip.R | 2 +- R/pr_dl_era5.R | 2 +- man/likelihood.Rd | 2 +- man/pr_dl_cmip.Rd | 2 +- man/pr_dl_era5.Rd | 2 +- 8 files changed, 10 insertions(+), 10 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index fce00da..dc515f3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -65,7 +65,7 @@ export(pr_plot_arrows) export(pr_plot_comparison) export(pr_predict) export(rmse) -export(rotate_cmip5) +export(rotate_cmip) export(shape_model_output) export(triangular_temperature_response) import(graphics) diff --git a/R/cost_functions.r b/R/cost_functions.r index 43b88c1..0ac9b2c 100644 --- a/R/cost_functions.r +++ b/R/cost_functions.r @@ -87,7 +87,7 @@ cvmae <- function( #' #' The function is aimed to be maximized, to use it with optimizers which #' minimize cost functions wrap the function as such: -#' `cost = function(...){abs(likelihood(...))}` +#' `cost = function(...)\{abs(likelihood(...))\}` #' #' @param par a vector of parameter values, this is functions specific #' @param data nested data structure with validation data as returned diff --git a/R/helper_functions.R b/R/helper_functions.R index bc551c8..203cf28 100644 --- a/R/helper_functions.R +++ b/R/helper_functions.R @@ -38,13 +38,13 @@ AICc <- function(measured, predicted, k){ "AICc" = AICc)) } -#' Rotate CMIP5 NetCDF data cubes +#' Rotate CMIP NetCDF data cubes #' #' Rotate NetCDF files faster than the default #' raster rotate() command, as data is cropped #' before any translations (reducing memory load) #' -#' @param r CMIP5 raster layer, stack or brick +#' @param r CMIP raster layer, stack or brick #' @param extent vector with coordinates defining the region of interest defined #' as xmin, xmax, ymin, ymax in lat/lon (default = c(-74,-65, 40, 48)) #' @return Returns raster data in the same format (layer, stack, brick) as @@ -52,7 +52,7 @@ AICc <- function(measured, predicted, k){ #' a normal longitude format (-180 to 180). #' @export -rotate_cmip5 <- function( +rotate_cmip <- function( r = NULL, extent = c(-74, -65, 40, 48) ){ diff --git a/R/pr_dl_cmip.R b/R/pr_dl_cmip.R index f719adc..1b4e18f 100644 --- a/R/pr_dl_cmip.R +++ b/R/pr_dl_cmip.R @@ -25,7 +25,7 @@ #' (default = c("daily_maximum_near_surface_air_temperature", #' "daily_minimum_near_surface_air_temperature", #' "precipitation")) -#' @param time_out time out in seconds before the {ecmwfr} download returns +#' @param time_out time out in seconds before the \{ecmwfr\} download returns #' to the prompt retaining the request running on the CDS server. #' Data can be retrieved later from the CDS webpage once finished. #' By default the time-out is set to an 3600 seconds (1h). It is advised to diff --git a/R/pr_dl_era5.R b/R/pr_dl_era5.R index ae9f0f3..8ff6f6b 100644 --- a/R/pr_dl_era5.R +++ b/R/pr_dl_era5.R @@ -16,7 +16,7 @@ #' data for the year of interest and the preceding year to make predictions. #' When specifying one year the preceding year will be appended, when #' specifying multiple years no checks are in place!! -#' @param time_out time out in seconds before the {ecmwfr} download returns +#' @param time_out time out in seconds before the \{ecmwfr\} download returns #' to the prompt retaining the request running on the CDS server. #' Data can be retrieved later from the CDS webpage once finished. #' By default the time-out is set to an 3600 seconds (1h). It is advised to diff --git a/man/likelihood.Rd b/man/likelihood.Rd index 67af5ac..12c1fce 100644 --- a/man/likelihood.Rd +++ b/man/likelihood.Rd @@ -23,7 +23,7 @@ the RMSE comparing observed and estimated values \description{ The function is aimed to be maximized, to use it with optimizers which minimize cost functions wrap the function as such: -`cost = function(...){abs(likelihood(...))}` +`cost = function(...)\{abs(likelihood(...))\}` } \examples{ diff --git a/man/pr_dl_cmip.Rd b/man/pr_dl_cmip.Rd index 3a9a5ea..e2ab757 100644 --- a/man/pr_dl_cmip.Rd +++ b/man/pr_dl_cmip.Rd @@ -38,7 +38,7 @@ variables are downloaded. \item{extent}{vector with coordinates defining the region of interest defined as ymax, xmin, ymin, xmax in lat/lon (default = c( 40, -80, 50, -70))} -\item{time_out}{time out in seconds before the {ecmwfr} download returns +\item{time_out}{time out in seconds before the \{ecmwfr\} download returns to the prompt retaining the request running on the CDS server. Data can be retrieved later from the CDS webpage once finished. By default the time-out is set to an 3600 seconds (1h). It is advised to diff --git a/man/pr_dl_era5.Rd b/man/pr_dl_era5.Rd index 49ac32d..b04fefb 100644 --- a/man/pr_dl_era5.Rd +++ b/man/pr_dl_era5.Rd @@ -30,7 +30,7 @@ specifying multiple years no checks are in place!!} \item{extent}{vector with coordinates defining the region of interest defined as ymax, xmin, ymin, xmax in lat/lon (default = c( 40, -80, 50, -70))} -\item{time_out}{time out in seconds before the {ecmwfr} download returns +\item{time_out}{time out in seconds before the \{ecmwfr\} download returns to the prompt retaining the request running on the CDS server. Data can be retrieved later from the CDS webpage once finished. By default the time-out is set to an 3600 seconds (1h). It is advised to From b51099b6466f7b0099b8c50e12988f4537eb9f67 Mon Sep 17 00:00:00 2001 From: KH Date: Tue, 25 Jun 2024 08:56:58 +0200 Subject: [PATCH 14/16] rename docs --- man/{rotate_cmip5.Rd => rotate_cmip.Rd} | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) rename man/{rotate_cmip5.Rd => rotate_cmip.Rd} (77%) diff --git a/man/rotate_cmip5.Rd b/man/rotate_cmip.Rd similarity index 77% rename from man/rotate_cmip5.Rd rename to man/rotate_cmip.Rd index 2d6d085..7ea636b 100644 --- a/man/rotate_cmip5.Rd +++ b/man/rotate_cmip.Rd @@ -1,13 +1,13 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/helper_functions.R -\name{rotate_cmip5} -\alias{rotate_cmip5} -\title{Rotate CMIP5 NetCDF data cubes} +\name{rotate_cmip} +\alias{rotate_cmip} +\title{Rotate CMIP NetCDF data cubes} \usage{ -rotate_cmip5(r = NULL, extent = c(-74, -65, 40, 48)) +rotate_cmip(r = NULL, extent = c(-74, -65, 40, 48)) } \arguments{ -\item{r}{CMIP5 raster layer, stack or brick} +\item{r}{CMIP raster layer, stack or brick} \item{extent}{vector with coordinates defining the region of interest defined as xmin, xmax, ymin, ymax in lat/lon (default = c(-74,-65, 40, 48))} From 0aecc26c7bd6e607ad9a43b83531480144ffd902 Mon Sep 17 00:00:00 2001 From: KH Date: Tue, 25 Jun 2024 08:57:15 +0200 Subject: [PATCH 15/16] limit testing for now --- tests/testthat/test_formatting_functions.R | 100 ++++++++++----------- 1 file changed, 50 insertions(+), 50 deletions(-) diff --git a/tests/testthat/test_formatting_functions.R b/tests/testthat/test_formatting_functions.R index 29916bb..0ef7740 100644 --- a/tests/testthat/test_formatting_functions.R +++ b/tests/testthat/test_formatting_functions.R @@ -1,54 +1,54 @@ # test formatting functions # test all data downloads -test_that("test downloading functions",{ - - # NPN data - expect_message( - pr_dl_npn(species = 3, - internal = FALSE, - start = "2001-01-01", - end = "2001-12-31") - ) - - expect_error( - pr_dl_npn(species = 3, - internal = FALSE, - start = "2000-01-01", - end = "2000-12-31") - ) - - df <- pr_dl_npn(species = 3, - internal = TRUE, - start = "2001-01-01", - end = "2001-12-31") - - expect_type(df,"list") - -}) +# test_that("test downloading functions",{ +# +# # NPN data +# expect_message( +# pr_dl_npn(species = 3, +# internal = FALSE, +# start = "2001-01-01", +# end = "2001-12-31") +# ) +# +# expect_error( +# pr_dl_npn(species = 3, +# internal = FALSE, +# start = "2000-01-01", +# end = "2000-12-31") +# ) +# +# df <- pr_dl_npn(species = 3, +# internal = TRUE, +# start = "2001-01-01", +# end = "2001-12-31") +# +# expect_type(df,"list") +# +# }) test_that("test formatting functions",{ - df <- pr_dl_npn( - species = 3, - internal = TRUE, - start = "2001-01-01", - end = "2001-12-31" - ) - - df <- pr_fm_npn(df) - - expect_type(df,"list") + # df <- pr_dl_npn( + # species = 3, + # internal = TRUE, + # start = "2001-01-01", + # end = "2001-12-31" + # ) + # + # df <- pr_fm_npn(df) + # + # expect_type(df,"list") # check npn meta-data function - expect_output(check_npn_phenophases(list = TRUE)) - l <- check_npn_phenophases(phenophase = 61, list = FALSE) - expect_type(l,"logical") - expect_output(check_npn_phenophases(phenophase = 61, list = TRUE)) - l <- check_npn_species(species = 3, list = TRUE) - expect_type(l,"list") - l <- check_npn_species(list = TRUE) - expect_type(l,"list") + # expect_output(check_npn_phenophases(list = TRUE)) + # l <- check_npn_phenophases(phenophase = 61, list = FALSE) + # expect_type(l,"logical") + # expect_output(check_npn_phenophases(phenophase = 61, list = TRUE)) + # l <- check_npn_species(species = 3, list = TRUE) + # expect_type(l,"list") + # l <- check_npn_species(list = TRUE) + # expect_type(l,"list") # csv data f <- data.frame( @@ -57,7 +57,8 @@ test_that("test formatting functions",{ lon = -110, phenophase = "spring", year = 2000, - doy = 120) + doy = 120 + ) t <- tempfile() write.table( @@ -66,11 +67,12 @@ test_that("test formatting functions",{ row.names = FALSE, col.names = TRUE, quote = FALSE, - sep = ",") + sep = "," + ) - # format data - npn_data <- pr_fm_csv(t, phenophase = "spring") - expect_type(npn_data, "list") + # format CSV data + csv_data <- pr_fm_csv(t, phenophase = "spring") + expect_type(csv_data, "list") # phenocam formatting phenocam_data <- pr_fm_phenocam(path = system.file( @@ -79,6 +81,4 @@ test_that("test formatting functions",{ mustWork = TRUE)) expect_type(phenocam_data, "list") - # MODISTools formatting? - }) From 52e18d6c1c257ad2d077b881557c574261f90599 Mon Sep 17 00:00:00 2001 From: KH Date: Tue, 25 Jun 2024 09:01:21 +0200 Subject: [PATCH 16/16] disable all code in vignettes for now --- vignettes/ERA5_model_use.Rmd | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vignettes/ERA5_model_use.Rmd b/vignettes/ERA5_model_use.Rmd index 9174f62..bb863d3 100644 --- a/vignettes/ERA5_model_use.Rmd +++ b/vignettes/ERA5_model_use.Rmd @@ -20,7 +20,6 @@ library(ecmwfr) options(keyring_backend="file") # side load small ERA5 dataset (if feasible within CRAN footprint) - ``` ERA5 data can be used to train, evaluate and forecast phenology models (using CMIP routines). The data used in the download sections is ERA5 land which is re-analysis data at a 10km resolution. @@ -64,7 +63,7 @@ data <- pr_fm_era5( ) ``` -```{r} +```{r eval = FALSE} # load the included data using data("phenocam_DB")