Skip to content

Commit

Permalink
add NearestCorrelationMatrix.jl
Browse files Browse the repository at this point in the history
  • Loading branch information
dmetivie committed Sep 26, 2024
1 parent 4dfc6f2 commit 13d9cce
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 10 deletions.
3 changes: 2 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,15 @@ Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
Geodesy = "0ef565a4-170c-5f04-8de2-149903a85f3d"
Impute = "f7bf1975-0170-51b9-8c5f-a992d46b9575"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
NearestCorrelationMatrix = "59ddf330-608c-4938-8bc9-a4ee97bbbea6"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
RollingFunctions = "b0e4dd01-7b14-53d8-9b45-175a3e362653"
SmoothPeriodicStatsModels = "3d8e1505-165e-443f-a184-83a230b5a9e5"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"

[compat]
julia = "1"
SmoothPeriodicStatsModels = "1"
julia = "1"

[extras]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Expand Down
3 changes: 2 additions & 1 deletion examples/tuto_paper.jl
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,8 @@ We fit a Gaussian copula to each pair of stations for joint rainy days only.
"""
#!nb # !!! warning
#!nb # For some hidden states corresponding to dry weather, it might happen that for some pair of stations, there are not enough simultaneous rain occurrences in a rain category $Z = k$ to estimate a correlation coefficient.
#!nb # In that case a `missing` coefficient is returned by `cov_RR`.
#!nb # In that case a `missing` coefficient is returned by `cov_RR` or it returns the value `impute_missing` if specified (to avoid missing).
#!nb # The option `force_PosDef` (enabled by default) ensures having positive definite matrix. This is necessary to use gaussian copula.

Σ²RR = cov_RR(data_stations_z, K)

Expand Down
3 changes: 2 additions & 1 deletion src/StochasticWeatherGenerators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ using DataFrames, DataFramesMeta, Dates
using CSV, Printf # File Read/Load
using LinearAlgebra: tril, I
using Copulas # for correlated generation
using NearestCorrelationMatrix: nearest_cor!
using RollingFunctions # rollmean for climate indices
using SmoothPeriodicStatsModels: αₜ, σₜ, μₜ, ρₜ
using SmoothPeriodicStatsModels: AR1, model_for_loglikelihood_AR1, initialvalue_optimize!
Expand Down Expand Up @@ -40,7 +41,7 @@ include("temperature/correlations.jl")
include("climate_indices.jl")

# ## Rain
export rand_RR, fit_mle_RR, cov_RR, fit_mle_stations
export rand_RR, fit_mle_RR, cov_RR, cor_RR, fit_mle_stations
export joint_rain, Σ_Spearman2Pearson, Σ_Kendall2Pearson, corTail

# ## AR1
Expand Down
14 changes: 7 additions & 7 deletions src/rain/correlations.jl
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@ function corTail(x, q=0.95)
return (c + c') / 2
end

#TODO: a version without K
#TODO: is NearestCorrelationMatrix.jl the best way to enforce positivity? Uncertainty in estimation could be interesting (coefficients we trust or not).
"""
cor_RR(dfs::AbstractArray{<:DataFrame}, K; cor_method=Σ_Spearman2Pearson, force_PosDef = true)
cor_RR(dfs::AbstractArray{<:DataFrame}[, K]; cor_method=Σ_Spearman2Pearson, force_PosDef = true)
Compute the (strictly positive) rain pair correlations `cor(Rs₁ > 0, Rs₂ > 0)` between each pair of stations `s₁, s₂` for each hidden state `Z = k`.
Input: a array `dfs` of `df::DataFrame` of length `S` (number of station) where each `df` have :DATE, :RR, :z (same :z for each df).
Expand All @@ -63,7 +63,7 @@ Output: `K` correlation matrix of size `S×S`
Options:
- `force_PosDef` will enforce Positive Definite matrix with `sqrt.(ΣS_k.^2)`.
- `force_PosDef` will enforce Positive Definite matrix with [NearestCorrelationMatrix.jl](https://github.com/adknudson/NearestCorrelationMatrix.jl).
- `cor_method`: typically `Σ_Spearman2Pearson` or `Σ_Kendall2Pearson`
- `impute_missing`: if `nothing`, `missing` will be outputted when two stations do not have at least two rain days in common. Otherwise the value `impute_missing` will be set.
Expand All @@ -86,7 +86,7 @@ function cor_RR(dfs::AbstractArray{<:DataFrame}, K; cor_method=Σ_Spearman2Pears
if all([all((!ismissing).(S)) for S in ΣS_k]) # are they no missing?
ΣS_k = convert.(Matrix{Float64}, ΣS_k)
if force_PosDef
ΣS_k = sqrt.(ΣS_k .^ 2)
ΣS_k = nearest_cor!.(ΣS_k)
end
else
for k in 1:K
Expand All @@ -100,7 +100,7 @@ function cor_RR(dfs::AbstractArray{<:DataFrame}, K; cor_method=Σ_Spearman2Pears
end

"""
cov_RR(dfs::AbstractArray{<:DataFrame}, K; cor_method=Σ_Spearman2Pearson, force_PosDef = true)
cov_RR(dfs::AbstractArray{<:DataFrame}[, K]; cor_method=Σ_Spearman2Pearson, force_PosDef = true)
Compute the (strictly positive) rain pair covariance `cov(Rs₁ > 0, Rs₂ > 0)` between each pair of stations `s₁, s₂` for each hidden state `Z = k`.
Input: a array `dfs` of `df::DataFrame` of length `S` (number of station) where each `df` have :DATE, :RR, :z (same :z for each df).
Expand All @@ -109,7 +109,7 @@ Output: `K` covariance matrix of size `S×S`
Options:
- `force_PosDef` will enforce Positive Definite matrix with `sqrt.(ΣS_k.^2)`.
- `force_PosDef` will enforce Positive Definite matrix with [NearestCorrelationMatrix.jl](https://github.com/adknudson/NearestCorrelationMatrix.jl).
- `cor_method`: typically `Σ_Spearman2Pearson` or `Σ_Kendall2Pearson`
- `impute_missing`: if `nothing`, `missing` will be outputted when two stations do not have at least two rain days in common. Otherwise the value `impute_missing` will be set.
Expand All @@ -135,7 +135,7 @@ function cor_RR(dfs::AbstractArray{<:DataFrame}; cor_method=Σ_Spearman2Pearson,
if all((!ismissing).(ΣS)) # are they no missing?
ΣS = convert(Matrix{Float64}, ΣS)
if force_PosDef
ΣS = sqrt(ΣS^2)
nearest_cor!(ΣS)
end
else
aremissing = findall(ismissing, ΣS)
Expand Down

0 comments on commit 13d9cce

Please sign in to comment.