-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathglmloop.r
120 lines (98 loc) · 3.89 KB
/
glmloop.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# This script runs a Generalized Linear Model (GLM) for different sets of input variables,
# calculates autoregressive lag corrections for RGR (Relative Growth Rate) based on Holocene periods,
# and generates probability outputs for various variable combinations. The script produces plots comparing RGR
# and the difference between boom and bust probabilities. Model results and coefficients are saved in MATLAB files.
# SPDX-FileCopyrightText: 2023-2024 Helmholtz-Zentrum hereon GmbH
# SPDX-FileContributor: Kai W. Wirtz <[email protected]>
# SPDX-License-Identifier: GPL-3.0-or-later
# Input files:
# - 'target_ts_0.mat' (contains time series data and input variables)
# Output files:
# - GLM results: 'out/glmres_<variable_combination>.mat'
# - Plot comparing RGR with probability: 'out/plots/comp_GLM_3V.png'
# Variables used:
# - scdir: directory for output files
# - tmax: maximum time for analysis
# - irgr: index for RGR (Relative Growth Rate) in the input data
# - vari: indices for the input variables (e.g., TSI, climate stability)
# - nvar: number of input variables
# - rgr0: original RGR time series
# - lag: time lag for autoregressive adjustments
# - time: time vector for analysis
# - var: input variable matrix after squeezing time domain
# - stdc: standard deviation correction for RGR
# - varname, varshort, shortname: arrays storing full, short, and variable names
# - nc: number of variable combinations
# - m: matrix defining combinations of input variables
# - prob: probability matrix
# - cores: model coefficients
# - tprob: difference between boom and bust probabilities
library('R.matlab')
scdir <- 'out/'
tmax <- 8.8 # maximum time
# indices of variables in input matrix
irgr <- 2 # 4:area weighted +smoothed +detrended
# index to input variables
vari <- c(6, 9, irgr) # 6:- TSI 9: climate stability
nvar <- length(vari)
bobu <- c('booms', 'busts', 'bobu', 'single')
corn <- c('hit.bin', 'Pearson', 'sum', 'single')
# load data
fname <- paste0(scdir, 'target_ts_0', '.mat')
print(paste('reading...', fname))
d <- readMat(fname)
time <- d$dat[,1]
dt <- time[2] - time[1]
rgr0 <- d$dat[, irgr] # RGR
# autoregressive part by lagged RGR
# lag for early Holocene
lag <- floor(0.36 / dt) # 360a for t>7kaBP
ii <- which(time >= 7 & time < max(time) - 0.35)
d$dat[ii, irgr] <- rgr0[lag + ii]
# lag for early Holocene
ii <- which(time < 7)
lag <- floor(0.21 / dt) # 210a for t>7kaBP
d$dat[ii, irgr] <- rgr0[lag + ii]
d$dat[is.na(d$dat[, irgr]), irgr] <- 0
# squeeze on time domain of interest
ii <- which(time >= 3 & time <= tmax)
time <- time[ii]
var <- d$dat[ii, vari]
rgr0 <- rgr0[ii]
stdc <- 0.5 * sd(rgr0)
var[is.nan(var)] <- 0
var[is.na(var)] <- 0
datnames <- paste(unlist(d$legdat))
gsub("[\r\n]", "", datnames) # clear names from special chars
# create array of variable names in different versions
varname <- c()
varshort <- c()
shortname <- c()
for (cti in 1:nvar) {
fullname <- datnames[vari[cti]]
varname <- c(varname, fullname)
fullname <- gsub(" ", "", fullname)
str <- substring(fullname, 1, 4)
varshort <- c(varshort, str)
shortname <- c(shortname, paste0('V', as.character(cti)))
print(varname[cti])
}
# create all possible combinations of input variables
nc <- 3 # number of input combinations
m <- matrix(c(c(0, 0, 1), c(1, 1, 0), c(1, 1, 1)), ncol = nc, nrow = 3)
# loop over variable combinations
for (loi in 1:nc) {
vi <- which(m[, loi] == 1)
source('do_lgm.r')
print(cores)
tprob <- prob[1, ] - prob[2, ]
# save model coefficients and results to matlab file
fname <- paste0(scdir, 'glmres_', paste0(varshort[vi], collapse = '_'), '.mat')
writeMat(fname, timres = time, prob = prob, tprob = tprob, statcoeff = coresb, vind = vi)
}
# plot comparison RGR with probability
file <- paste0(scdir, 'plots/comp_GLM_3V', '.png')
png(file)
plot(time, rgr0 / sd(rgr0), 'l', col = "blue", main = paste('RGR vs. prob(boom)-prob(bust)'))
lines(time, tprob / sd(tprob), 'l', col = "red")
dev.off()