From b234aed54cbc681060f6f4a4d3dc79a6cad9362e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Czy=C5=BC?= Date: Sat, 28 Oct 2023 21:59:49 +0200 Subject: [PATCH] Add splines and demo --- .gitignore | 3 ++ docs/api.md | 3 ++ environment.yml | 25 ++++++++++ examples/splines_demonstration.py | 77 +++++++++++++++++++++++++++++++ src/covvfit/__init__.py | 5 ++ src/covvfit/_splines.py | 44 ++++++++++++++++++ 6 files changed, 157 insertions(+) create mode 100644 docs/api.md create mode 100644 environment.yml create mode 100644 examples/splines_demonstration.py create mode 100644 src/covvfit/_splines.py diff --git a/.gitignore b/.gitignore index fa12c16..42e818f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# Private files +private/ + # Notebooks by default should not be uploaded (use Quarto Markdown as an alternative) *.nb *.ipynb diff --git a/docs/api.md b/docs/api.md new file mode 100644 index 0000000..17b603f --- /dev/null +++ b/docs/api.md @@ -0,0 +1,3 @@ +# API Reference + +::: covvfit.create_spline_matrix diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..db766d5 --- /dev/null +++ b/environment.yml @@ -0,0 +1,25 @@ +name: covvfit +channels: +- bioconda +- conda-forge +dependencies: +- arviz=0.16.1 +- matplotlib-base=3.8.0 +- numpy=1.25.2 +- pandas=2.1.2 +- pip=23.3.1 +- pymc=5.9.1 +- pymc-base=5.9.1 +- pytensor=2.17.3 +- pytensor-base=2.17.3 +- pytest=7.4.3 +- python=3.10.13 +- pyyaml=6.0.1 +- scipy=1.11.3 +- setuptools=68.2.2 +- snakemake=7.32.4 +- snakemake-minimal=7.32.4 +- xarray=2023.10.1 +- xarray-einstats=0.6.0 +- yaml=0.2.5 + diff --git a/examples/splines_demonstration.py b/examples/splines_demonstration.py new file mode 100644 index 0000000..57b1b5b --- /dev/null +++ b/examples/splines_demonstration.py @@ -0,0 +1,77 @@ +"""Bayesian regression using B-splines.""" +import matplotlib.pyplot as plt +import numpy as np +import pymc as pm + +import covvfit as cv + + +def create_model( + xs: np.ndarray, + ys: np.ndarray, + n_coefs: int = 5, + degree: int = 3, +) -> pm.Model: + # Create the B-spline basis functions + pts = cv.create_spline_matrix(xs, n_coefs=n_coefs) + + with pm.Model() as model: + # Weights are the coefficients of the B-spline basis functions + weights = pm.Normal("weights", 0, 10, size=(n_coefs, 1)) + + # The function is a linear combination of the basis functions + func = pm.Deterministic("func", (pts @ weights).ravel()) + + # We add normal noise with unknown standard deviation + sigma = pm.HalfCauchy("sigma", 1) + pm.Normal("observed", mu=func, sigma=sigma, observed=ys) + + return model + + +def main() -> None: + rng = np.random.default_rng(42) + + xs = np.linspace(0, 1, 151) + ys_perfect = 2 * xs + 3 * np.sin(5 * xs) + ys_obs = ys_perfect + 2 * rng.normal(size=xs.shape) + + model = create_model(xs=xs, ys=ys_obs, n_coefs=5) + + n_samples_per_chain = 500 + n_chains = 4 + thinning = 10 + with model: + idata = pm.sample(tune=500, draws=n_samples_per_chain, chains=n_chains) + thinned_idata = idata.sel(draw=slice(None, None, thinning)) + idata.extend(pm.sample_posterior_predictive(thinned_idata)) + + fig, ax = plt.subplots() + + # Plot individual posterior predictive samples + post_pred = idata.posterior_predictive["observed"] # pyright: ignore + + for chain in range(n_chains): + for sample in range(n_samples_per_chain // thinning): + ys = post_pred[chain, sample] + ax.plot(xs, ys, alpha=0.02, color="navy") + + # Plot mean of posterior predictive + mean_predictive = post_pred.mean(axis=(0, 1)) + ax.plot(xs, mean_predictive, color="navy", label="Posterior predictive mean") + + # Plot "perfect data" + ax.plot(xs, ys_perfect, color="maroon", label="Noiseless data") + # Plot noisy data + ax.scatter(xs, ys_obs, c="k", s=3, label="Observed data") + + ax.set_title("Bayesian regression using B-splines") + ax.set_xlabel("$x$") + ax.set_ylabel("$y$") + + ax.legend(frameon=False) + fig.savefig("splines_demonstration.pdf") + + +if __name__ == "__main__": + main() diff --git a/src/covvfit/__init__.py b/src/covvfit/__init__.py index 1cf6267..1a1a6c1 100644 --- a/src/covvfit/__init__.py +++ b/src/covvfit/__init__.py @@ -1 +1,6 @@ +from covvfit._splines import create_spline_matrix + VERSION = "0.1.0" + + +__all__ = ["create_spline_matrix", "VERSION"] diff --git a/src/covvfit/_splines.py b/src/covvfit/_splines.py new file mode 100644 index 0000000..412567f --- /dev/null +++ b/src/covvfit/_splines.py @@ -0,0 +1,44 @@ +"""Spline functions utilities.""" +from typing import Optional + +import numpy as np +from scipy.interpolate import BSpline + + +def create_spline_matrix( + xs: np.ndarray, + n_coefs: int = 5, + degree: int = 3, + knots: Optional[np.ndarray] = None, +) -> np.ndarray: + """Creates basis spline functions evaluated at points provided. + + Args: + xs: points to evaluate the spline functions on, shape (N,) + n_coefs: number of coefficients + degree: B-spline degree + knots: knot positions, should be of shape `n_coefs + degree + 1` + + Returns: + B-splines evaluated at the points provided. Shape (N, n_coefs) + + Note: + The number of knots is given by `n_coefs + degree + 1` + """ + n_knots = n_coefs + degree + 1 + + if knots is None: + knots = np.linspace(np.min(xs), np.max(xs), n_knots) + else: + knots = np.asarray(knots) + + assert isinstance(knots, np.ndarray) + assert knots.shape[0] == n_knots + + def coeff(i: int) -> np.ndarray: + """One-hot vector with 1 at position `i`""" + return np.eye(n_coefs)[i] + + return np.vstack( + [BSpline(t=knots, c=coeff(i), k=degree)(xs) for i in range(n_coefs)] + ).T