-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrscript_get_data_glass.R
60 lines (54 loc) · 1.99 KB
/
rscript_get_data_glass.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/usr/bin/env Rscript
args = commandArgs(trailingOnly=TRUE)
library(dplyr)
library(tidyr)
library(purrr)
library(lubridate)
library(magrittr)
library(tidync)
library(rbeni)
##------------------------------------------------------------------------
## Extract point data and construct separate nested time series data frame
## for each longitde slice
##------------------------------------------------------------------------
dir <- "~/data/glass/data_netcdf/"
fileprefix <- "GLASS07B01.V41."
nclist <- paste0(dir, list.files(dir, pattern = paste0(fileprefix, ".*.nc"), recursive = TRUE))
outdir <- "~/data/glass/data_tidy/"
varnam <- "NR"
lonnam <- "lon"
latnam <- "lat"
timenam <- "time"
timedimnam <- "time"
## not necessary anymore after files have been combined into annual files
# fgetdate_glass <- function(filnam){
# filnam <- basename(filnam)
# year <- stringr::str_sub(filnam, 17, 20)
# doy <- stringr::str_sub(filnam, 21, 23)
# date <- lubridate::ymd(paste0(year, "-01-01")) + lubridate::days(as.numeric(doy)) - lubridate::days(1)
# return(date)
# }
##------------------------------------------------------------------------
## split it up into chunks (total number of chunks provided by argument 2)
##------------------------------------------------------------------------
nchunk <- as.integer(args[2]) # 1000 # make sure this is consistent with the number of parallel jobs (job array!) in the submission script
nlon <- 7200
nrows_chunk <- ceiling(nlon/nchunk)
ilat <- seq(1:nlon)
irow_chunk <- split(ilat, ceiling(seq_along(ilat)/nrows_chunk))
print("getting data for longitude indices:")
print(irow_chunk[[as.integer(args[1])]])
## create files for each longitude slice, containing full time series wrapped for each gridcell (latitude)
nclist_to_df(
nclist,
outdir = outdir,
fileprefix = fileprefix,
varnam = varnam,
ilon = irow_chunk[[as.integer(args[1])]],
lonnam = lonnam,
latnam = latnam,
timenam = timenam,
timedimnam = timedimnam,
ncores = "all",
single_basedate = FALSE
)