Skip to content

Commit

Permalink
Merge pull request #67 from geco-bern/fix-CRU
Browse files Browse the repository at this point in the history
Fix CRU
  • Loading branch information
fabern authored Dec 6, 2024
2 parents e2c5ac3 + eb3e99d commit 6f40e07
Show file tree
Hide file tree
Showing 7 changed files with 681 additions and 152 deletions.
4 changes: 3 additions & 1 deletion R/ingest_bysite.R
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,8 @@ ingest_bysite <- function(
mutate(
year_start = ifelse(year_start < year_start_wc, year_start, year_start_wc),
year_end = ifelse(year_end > year_end_wc, year_end, year_end_wc))
} else if (source == "ndep") {
# nothing done in this case
}
}
}
Expand Down Expand Up @@ -316,7 +318,7 @@ ingest_bysite <- function(
tidyr::pivot_longer(cols = starts_with("prec_"), names_to = "month", values_to = "prec", names_prefix = "prec_") %>%
mutate(month = as.integer(month)) %>%
rename(prec_fine = prec) %>%
mutate(prec_fine = prec_fine / days_in_month(month)) %>% # mm/month -> mm/d
mutate(prec_fine = prec_fine / lubridate::days_in_month(month)) %>% # mm/month -> mm/d
mutate(prec_fine = prec_fine / (60 * 60 * 24)) %>% # mm/d -> mm/sec
right_join(df_tmp %>%
dplyr::filter(lubridate::year(date) %in% year_start_wc:year_end_wc) %>%
Expand Down
256 changes: 143 additions & 113 deletions R/ingest_globalfields.R

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions data-raw/prepare_metainfo_fluxnet2015.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ The file "fluxnet_site_info_all.csv" was downloaded from downloaded from https:/

```{r}
## Get additional meta information for sites: Koeppen-Geiger Class
## The file "siteinfo_climate_koeppengeiger_flunxet2015.csv" was downloaded from downloaded from https://daac.ornl.gov/cgi-bin/dsviewer.pl?ds_id=1530 (placed in my ~/data/FLUXNET-2015_Tier1/meta/)
## The file "siteinfo_climate_koeppengeiger_flunxet2015.csv" was downloaded from downloaded from https://daac.ornl.gov/cgi-bin/dsviewer.pl?ds_id=1530 (placed in my /data/scratch/bstocker/data/FLUXNET-2015_Tier1/meta/)
siteinfo <- read_csv("../inst/extdata/fluxnet_site_info_all.csv") %>%
dplyr::select(-sitename) %>%
dplyr::rename( sitename = fluxnetid ) %>%
Expand Down Expand Up @@ -104,7 +104,7 @@ siteinfo %>%

The original FLUXNET 2015 meta info file name doesn't contain clean information on start and end years for which data is available. Complement this information using names of the FLUXNET 2015 data files. This is more reliable data than in the meta info file. To run this step, data needs to be downloaded. Here, I'm using the daily data files and specify the path where they are located.
```{r}
dir_DD_fluxnet2015 <- "~/data/FLUXNET-2015_Tier1/20160128/point-scale_none_1d/original/unpacked/"
dir_DD_fluxnet2015 <- "/data/scratch/bstocker/data/FLUXNET-2015_Tier1/20160128/point-scale_none_1d/original/unpacked/"
## "Manually" get year start and year end from file names
# moredata <- as.data.frame( read.table( paste0( settings_input$path_cx1data, "/FLUXNET-2015_Tier1/doc/filelist_DD.txt") ) )
Expand Down
205 changes: 205 additions & 0 deletions tests/testthat/test_CRU_WFDEI_NDEP.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
# test CRU, WFDEI, NDEP data

test_that("test CRU data (monthly and downscaled daily)", {
skip_on_cran()

## get monthly CRU data
mdf <- ingest_bysite(
sitename = "CH-Lae",
source = "cru",
getvars = c("tmax", "tmin", "prec", "vpd"),
# dir = "/data/archive/cru_NA_2021/data/",
dir = "/data/archive/cru_harris_2024/data/",
timescale = "m",
year_start = 1901,
year_end = 2018,
lon = 8.365,
lat = 47.4781,
verbose = FALSE
)

## get daily data (with temporal downscaling)
ddf <- ingest_bysite(
sitename = "CH-Lae",
source = "cru",
getvars = c("tmax", "tmin", "prec", "vpd"),
dir = "/data/archive/cru_harris_2024/data/",
timescale = "d",
year_start = 1901,
year_end = 2018,
lon = 8.365,
lat = 47.4781,
verbose = FALSE
)

## get yearly data (not supported)
# ydf <- ingest_bysite(
# sitename = "CH-Lae",
# source = "cru",
# getvars = c("tmax", "tmin", "prec", "vpd"),
# dir = "/data/archive/cru_harris_2024/data/",
# timescale = "y", requesting yearly cru data errors!
# year_start = 1901,
# year_end = 2018,
# lon = 8.365,
# lat = 47.4781,
# verbose = FALSE
# )


# library(ggplot2)
# ggplot(mdf, aes(x=date, y=prec)) + geom_line()
# ggplot(mdf, aes(x=moy, y=prec, group = year)) + geom_line()
# ggplot(mdf, aes(x=date, y=tmin)) + geom_line()
# ggplot(mdf, aes(x=moy, y=tmin, group = year)) + geom_line()

# library(ggplot2)
# ggplot(ddf, aes(x=date, y=prec)) + geom_line()
# ggplot(ddf, aes(x=lubridate::yday(date), y=prec, group = lubridate::year(date))) + geom_line()
# ggplot(ddf, aes(x=date, y=tmin)) + geom_line()
# ggplot(ddf, aes(x=lubridate::yday(date), y=tmin, group = lubridate::year(date))) + geom_line()
#
# pl1 <- ggplot(ddf, aes(x=lubridate::yday(date), y=tmin, group = lubridate::year(date))) +
# geom_line(aes(color = "daily\ninterpolated\nCRU\nobservations\n")) +
# geom_point(data = mdf, aes(color = "monthly\nCRU\nobservations")) +
# scale_color_manual(values = c('black', 'red'))
# pl1 + aes(x = date)

testthat::expect_equal(
mdf[c(1,100,1416),], # use dput() to derive below hardcoded reference
tidyr::tibble(sitename = c("CH-Lae", "CH-Lae", "CH-Lae"),
wetd = c(12.3695813093777, 15.8190953985062, 20.2960408158111),
prec = c(Jan = 1.5274476784002e-05, Apr = 2.91171954319271e-05, Dec = 4.01811756550539e-05),
tmax = c(-0.0131488023838055, 13.764172279623, 4.65204431463909),
tmin = c(-5.86579624579048, 3.4969638061452, 0.201791803788692),
vpd = c(97.352996115735, 423.985071897943, 108.397108058279),
moy = c(1, 4, 12),
vapr = c(415.709569987869, 756.009354235459, 634.833568499851),
month = c(1, 4, 12),
year = c(1901, 1909, 2018),
date = lubridate::ymd(c("1901-01-15","1909-04-15","2018-12-15")))
)

testthat::expect_equal(tolerance = 0.001, # we need a tolerance because of precip zeroes
ddf[c(1,100,1416, 43070),], # use dput() to derive below hardcoded reference
tidyr::tibble(date = lubridate::ymd(c("1901-01-01","1901-04-10","1904-11-17","2018-12-31")),
prec = c(0, 0, 0, 0),
tmax = c(1.36408937038989, 11.3535456681846, 4.26505071209226, 5.3088883537248),
tmin = c(-3.7938395642009, 2.84610676114661, -1.17652509826678, 0.529165441280156),
sitename = c("CH-Lae", "CH-Lae", "CH-Lae", "CH-Lae"),
vpd = c(75.4734411654391, 445.173057078465, 136.571370463, 136.784738582639),
vapr = c(523.440022615601, 601.876765774174, 558.154229833713, 626.47826413593)
)
)

# TODO: note that the output columns are different depending on timescale = "m" vs timescale = "d

# # TODO: note that low values around 0 are not always read out similarly, depending oth the request:
# ingest_bysite(
# sitename = "CH-Lae",
# source = "cru",
# getvars = c("tmin", "prec"),
# dir = "/data/archive/cru_harris_2024/data/",
# timescale = "d",
# year_start = 2018,
# year_end = 2018,
# lon = 8.365,
# lat = 47.4781,
# verbose = FALSE
# ) |> tail()
# ingest_bysite(
# sitename = "CH-Lae",
# source = "cru",
# getvars = c("tmax", "tmin", "prec", "vpd"),#c("prec"),
# dir = "/data/archive/cru_harris_2024/data/",
# timescale = "d",
# year_start = 2018,
# year_end = 2018,
# lon = 8.365,
# lat = 47.4781,
# verbose = FALSE
# ) |> tail()

})

test_that("test WATCH_WFDEI data (daily)", {
skip_on_cran()

# df_watch <- ingest_bysite(
# sitename = "FR-Pue",
# source = "watch_wfdei",
# getvars = c("temp"),
# dir = "/data/archive/wfdei_weedon_2014/data/",
# timescale = "d",
# year_start = 1976,
# year_end = 1982,
# lon = 3.5958,
# lat = 43.7414,
# verbose = TRUE
# #settings = list(correct_bias = "worldclim", dir_bias = "~/data/worldclim")
# )

# library(ggplot2)
# ggplot(df_watch, aes(x=lubridate::yday(date), y=temp, group = lubridate::year(date))) + geom_line()
# ggplot(df_watch, aes(x=date, y=temp)) + geom_line()

# WATCH_WFDEI-test
df_watch2 <- ingest_bysite(
sitename = "FR-Pue",
source = "watch_wfdei",
getvars = c("temp","prec","ppfd","wind","vpd"),
dir = "/data/archive/wfdei_weedon_2014/data/",
timescale = "d",
year_start = 1976,
year_end = 1982,
lon = 3.5958,
lat = 43.7414,
verbose = TRUE
#settings = list(correct_bias = "worldclim", dir_bias = "~/data/worldclim")
)

testthat::expect_equal(
df_watch2[c(1,100,1416, 2546),], # use dput() to derive below hardcoded reference
tidyr::tibble(sitename = c("FR-Pue", "FR-Pue", "FR-Pue", "FR-Pue"),
date = lubridate::ymd(c("1976-01-01","1976-04-10","1979-11-17","1982-12-22")),
ppfd = c(0.000121494193775998, 0.000463321711450072, 0.000158704938322483, 0.000136168702821456),
rain = c(1.04287650166711e-05, 1.50037226251495e-05, 3.42658596342262e-06, 0),
snow = c(1.16922735984436e-06, 0, 0, 0),
prec = c(1.15979923765154e-05, 1.50037226251495e-05, 3.42658596342262e-06, 0),
qair = c(0.00445236525960527, 0.00546576460676214, 0.00414670119491026, 0.00380845699253127),
temp = c(5.26007027738387, 10.1696075275532, 4.79854696611329, 3.2901508237403),
patm = c(98319.1269542875, 97914.4488131635, 97631.138619465, 97144.4980819063),
vapr = c(701.689376749628, 857.328665401018, 649.064121009744, 593.270511888794),
vpd = c(186.875760082974, 385.06692545368, 211.337540044264, 180.495864250968)
)
)

})

test_that("test CRU data (monthly and downscaled daily)", {
skip_on_cran()

df_ndep <- ingest(
ingestr::siteinfo_fluxnet2015 |>
dplyr::slice(1:3) |>
dplyr::select(sitename, lon, lat) |>
dplyr::mutate(year_start = 1990, year_end = 2009),
source = "ndep",
timescale = "y",
dir = "/data/scratch/bstocker/ndep_lamarque/",
verbose = FALSE
)
# TODO: note that the output is differently structured in source = "ndep" (timescale = "y")
# vs source = "cru", timescale = "m"
# see: mdf
# see: df_ndep |> tidyr::unnest(data)

testthat::expect_equal(
dplyr::ungroup(tidyr::unnest(df_ndep, data))[c(1,10,60),], # use dput() to derive below hardcoded reference
tidyr::tibble(sitename = c("AR-SLu", "AR-SLu", "AT-Neu"),
date = structure(c(7305, 10592, 14245), class = "Date"),
noy = c(0.114221848547459, 0.118227459490299, 0.685620393264294),
nhx = c(0.224458619952202, 0.187207788228989, 0.79076456451416))
)

})
23 changes: 14 additions & 9 deletions tests/testthat/test_flux_formatting.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ test_that("test HH data", {
skip_on_cran()

siteinfo <- ingestr::siteinfo_fluxnet2015 %>%
filter(sitename == "FR-Pue")
dplyr::filter(sitename == "FR-Pue")

settings_fluxnet <- list(
getswc = FALSE,
Expand All @@ -12,20 +12,22 @@ test_that("test HH data", {
remove_neg = FALSE,
dir_hh = paste0(path.package("ingestr"), "/extdata/")
)
df <- ingestr::ingest(

df <-testthat::expect_warning(ingestr::ingest(
siteinfo,
source = "fluxnet",
getvars = list(
gpp = "GPP_VUT_REF"
),
gpp = "GPP_NT_VUT_REF"
),
dir = paste0(path.package("ingestr"), "/extdata/"),
settings = settings_fluxnet,
timescale = "hh",
verbose = TRUE
)
))

expect_type(df, "list")
testthat::expect_equal(c("sitename","date","gpp"),
df |> tidyr::unnest(data) |> colnames())
})

test_that("test Daily data", {
Expand All @@ -42,7 +44,7 @@ test_that("test Daily data", {
dir_hh = paste0(path.package("ingestr"), "/extdata/")
)

df <- ingestr::ingest(
df <- testthat::expect_warning(ingestr::ingest(
siteinfo,
source = "fluxnet",
getvars = list(
Expand All @@ -53,7 +55,10 @@ test_that("test Daily data", {
settings = settings_fluxnet,
timescale = "d",
verbose = TRUE
)
))

expect_type(df, "list")
testthat::expect_equal(c("sitename","date","gpp", "gpp_unc"),
df |> tidyr::unnest(data) |> colnames())

})
12 changes: 6 additions & 6 deletions vignettes/example.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -744,7 +744,7 @@ df_worldclim <- ingest_bysite(
lon = 8.36439,
lat = 47.47833,
settings = settings_worldclim,
dir = "~/data/worldclim"
dir = "/data/archive/worldclim_fick_2017/data"
)
```

Expand Down Expand Up @@ -797,9 +797,9 @@ ddf_fluxnet <- ingest(
netrad = "NETRAD",
patm = "PA_F"
),
dir = "~/data/FLUXNET-2015_Tier1/20191024/DD/", # adjust this with your local path
dir = "/data/scratch/bstocker/FLUXNET-2015_Tier1/20191024/DD/", # adjust this with your local path
settings = list(
dir_hh = "~/data/FLUXNET-2015_Tier1/20191024/HH/", # adjust this with your local path
dir_hh = "/data/scratch/bstocker/FLUXNET-2015_Tier1/20191024/HH/", # adjust this with your local path
getswc = FALSE),
timescale = "d",
verbose = TRUE
Expand All @@ -813,9 +813,9 @@ ddf_tmin <- ingest(
siteinfo = siteinfo %>% slice(1:3),
source = "fluxnet",
getvars = list(tmin = "TMIN_F"),
dir = "~/data/FLUXNET-2015_Tier1/20191024/DD/", # adjust this with your local path
dir = "/data/scratch/bstocker/FLUXNET-2015_Tier1/20191024/DD/", # adjust this with your local path
settings = list(
dir_hh = "~/data/FLUXNET-2015_Tier1/20191024/HH/", # adjust this with your local path
dir_hh = "/data/scratch/bstocker/FLUXNET-2015_Tier1/20191024/HH/", # adjust this with your local path
getswc = FALSE),
timescale = "d",
verbose = TRUE
Expand All @@ -839,7 +839,7 @@ ddf_fluxnet_gpp <- ingest(
source = "fluxnet",
getvars = list(gpp = "GPP_NT_VUT_REF",
pp_unc = "GPP_NT_VUT_SE"),
dir = "~/data/FLUXNET-2015_Tier1/20191024/DD/", # adjust this with your local path
dir = "/data/scratch/bstocker/FLUXNET-2015_Tier1/20191024/DD/", # adjust this with your local path
settings = settings_fluxnet,
timescale= "d"
)
Expand Down
Loading

0 comments on commit 6f40e07

Please sign in to comment.