From 04675bc5815e940e8844535383161c4136d9ad39 Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Thu, 12 Nov 2020 15:16:03 -0500 Subject: [PATCH 01/14] re-starting latent model impl --- python/celerite2/__init__.py | 2 +- python/celerite2/celerite2.py | 585 ------------------ python/celerite2/core.py | 309 +++++++-- python/celerite2/jax/__init__.py | 2 +- .../celerite2/jax/{celerite2.py => core.py} | 41 +- python/celerite2/latent.py | 105 ++++ python/celerite2/numpy.py | 120 ---- python/celerite2/terms.py | 185 ++++-- python/celerite2/theano/__init__.py | 2 +- .../theano/{celerite2.py => core.py} | 21 + .../jax/{test_celerite2.py => test_core.py} | 0 .../test/{test_celerite2.py => test_core.py} | 0 .../{test_celerite2.py => test_core.py} | 0 13 files changed, 534 insertions(+), 838 deletions(-) delete mode 100644 python/celerite2/celerite2.py rename python/celerite2/jax/{celerite2.py => core.py} (69%) create mode 100644 python/celerite2/latent.py delete mode 100644 python/celerite2/numpy.py rename python/celerite2/theano/{celerite2.py => core.py} (90%) rename python/test/jax/{test_celerite2.py => test_core.py} (100%) rename python/test/{test_celerite2.py => test_core.py} (100%) rename python/test/theano/{test_celerite2.py => test_core.py} (100%) diff --git a/python/celerite2/__init__.py b/python/celerite2/__init__.py index f172089..983cdf1 100644 --- a/python/celerite2/__init__.py +++ b/python/celerite2/__init__.py @@ -3,7 +3,7 @@ __all__ = ["__version__", "terms", "GaussianProcess"] from . import terms from .celerite2_version import __version__ -from .numpy import GaussianProcess +from .core import GaussianProcess __uri__ = "https://celerite2.readthedocs.io" __author__ = "Daniel Foreman-Mackey" diff --git a/python/celerite2/celerite2.py b/python/celerite2/celerite2.py deleted file mode 100644 index c85c078..0000000 --- a/python/celerite2/celerite2.py +++ /dev/null @@ -1,585 +0,0 @@ -# -*- coding: utf-8 -*- - -__all__ = ["GaussianProcess"] - -import warnings -from functools import wraps - -import numpy as np - -from . import driver -from .driver import LinAlgError - - -class ConstantMean: - def __init__(self, value=0.0): - self.value = value - - def __call__(self, x): - return self.value - - -def _handle_vector(func): - @wraps(func) - def wrapped(self, y, *args, **kwargs): - y = np.atleast_1d(y) - is_vector = False - if y.ndim == 1: - is_vector = True - y = y[:, None] - result = func(self, y, *args, **kwargs) - if is_vector: - return result[:, 0] - return result - - return wrapped - - -class GaussianProcess: - """The main interface to the celerite2 Gaussian Process (GP) solver - - Args: - kernel: An instance of a subclass of :class:`terms.Term`. - mean (optional): The mean function of the process. This can either - be a callable (it will be evaluated with a single argument, a - vector of ``x`` values) or a scalar. (default: ``0.0``) - **kwargs: Other arguments will be passed directly to - :func:`GaussianProcess.compute` if the argument ``t`` is specified. - """ - - def __init__(self, kernel, t=None, *, mean=0.0, **kwargs): - self.kernel = kernel - self.mean = mean - - # Placeholders for storing data - self._t = None - self._mean_value = None - self._diag = None - self._size = None - self._log_det = -np.inf - self._norm = np.inf - - # Placeholders to celerite matrices - self._c = np.empty(0, dtype=np.float64) - self._U = np.empty((0, 0), dtype=np.float64) - self._V = np.empty((0, 0), dtype=np.float64) - self._d = np.empty(0, dtype=np.float64) - - if t is not None: - self.compute(t, **kwargs) - - @property - def mean(self): - return self._mean - - @mean.setter - def mean(self, mean): - if callable(mean): - self._mean = mean - else: - self._mean = ConstantMean(mean) - - @property - def mean_value(self): - if self._mean_value is None: - raise RuntimeError( - "'compute' must be executed before accessing mean_value" - ) - return self._mean_value - - def compute( - self, t, *, yerr=None, diag=None, check_sorted=True, quiet=False - ): - """Compute the Cholesky factorization of the GP covariance matrix - - Args: - t (shape[N]): The independent coordinates of the observations. - This must be sorted in increasing order. - yerr (shape[N], optional): If provided, the diagonal standard - deviation of the observation model. - diag (shape[N], optional): If provided, the diagonal variance of - the observation model. - check_sorted (bool, optional): If ``True``, a check is performed to - make sure that ``t`` is correctly sorted. A ``ValueError`` will - be thrown when this check fails. - quiet (bool, optional): If ``True``, when the matrix cannot be - factorized (because of numerics or otherwise) the solver's - ``LinAlgError`` will be silenced and the determiniant will be - set to zero. Otherwise, the exception will be propagated. - - Raises: - ValueError: When the inputs are not valid (shape, number, etc.). - LinAlgError: When the matrix is not numerically positive definite. - """ - # Check the input coordinates - t = np.ascontiguousarray(t, dtype=np.float64) - if check_sorted and np.any(np.diff(t) < 0.0): - raise ValueError("The input coordinates must be sorted") - if t.ndim != 1: - raise ValueError("The input coordinates must be one dimensional") - - # Save the diagonal - self._t = t - self._size = self._t.shape[0] - self._mean_value = self._mean(self._t) - self._diag = np.empty_like(self._t) - if yerr is None and diag is None: - self._diag[:] = 0.0 - - elif yerr is not None: - if diag is not None: - raise ValueError( - "only one of 'diag' and 'yerr' can be provided" - ) - self._diag[:] = np.atleast_1d(yerr) ** 2 - - else: - self._diag[:] = np.atleast_1d(diag) - - # Fill the celerite matrices - ( - self._c, - self._d, - self._U, - self._V, - ) = self.kernel.get_celerite_matrices( - self._t, self._diag, c=self._c, a=self._d, U=self._U, V=self._V - ) - - # Compute the Cholesky factorization - try: - self._d, self._W = driver.factor( - self._t, - self._c, - self._d, - self._U, - self._V, - self._d, - np.copy(self._V), - ) - except LinAlgError: - if not quiet: - raise - self._log_det = -np.inf - self._norm = np.inf - else: - self._log_det = np.sum(np.log(self._d)) - self._norm = -0.5 * ( - self._log_det + self._size * np.log(2 * np.pi) - ) - - def recompute(self, *, quiet=False): - """Re-compute the factorization given a previous call to compute - - Args: - quiet (bool, optional): If ``True``, when the matrix cannot be - factorized (because of numerics or otherwise) the solver's - ``LinAlgError`` will be silenced and the determiniant will be - set to zero. Otherwise, the exception will be propagated. - - Raises: - RuntimeError: If :func:`GaussianProcess.compute` is not called - first. - ValueError: When the inputs are not valid (shape, number, etc.). - """ - if self._t is None: - raise RuntimeError( - "you must call 'compute' directly at least once" - ) - return self.compute( - self._t, diag=self._diag, check_sorted=False, quiet=quiet - ) - - def _process_input(self, y, *, inplace=False, require_vector=False): - y = np.atleast_1d(y) - if self._t is None: - raise RuntimeError("you must call 'compute' first") - if self._t.shape[0] != y.shape[0]: - raise ValueError("dimension mismatch") - if require_vector and self._t.shape != y.shape: - raise ValueError("'y' must be one dimensional") - if inplace: - if ( - y.dtype != "float64" - or not y.flags.c_contiguous - or not y.flags.writeable - ): - warnings.warn( - "Inplace operations can only be made on C-contiguous, " - "writable, float64 arrays; a copy will be made" - ) - y = np.ascontiguousarray(y, dtype=np.float64) - else: - y = np.array(y, dtype=np.float64, copy=True, order="C") - return y - - @_handle_vector - def _apply_inverse(self, y): - z = driver.solve_lower(self._t, self._c, self._U, self._W, y, y) - z /= self._d[:, None] - z = driver.solve_upper(self._t, self._c, self._U, self._W, z, z) - return z - - def apply_inverse(self, y, *, inplace=False): - """Apply the inverse of the covariance matrix to a vector or matrix - - Solve ``K.x = y`` for ``x`` where ``K`` is the covariance matrix of - the GP. - - .. note:: The mean function is not applied in this method. - - Args: - y (shape[N] or shape[N, M]): The vector or matrix ``y`` described - above. - inplace (bool, optional): If ``True``, ``y`` will be overwritten - with the result ``x``. - - Raises: - RuntimeError: If :func:`GaussianProcess.compute` is not called - first. - ValueError: When the inputs are not valid (shape, number, etc.). - """ - y = self._process_input(y, inplace=inplace) - return self._apply_inverse(y) - - @_handle_vector - def _dot_tril(self, y): - z = y * np.sqrt(self._d)[:, None] - return driver.matmul_lower(self._t, self._c, self._U, self._W, z, z) - - def dot_tril(self, y, *, inplace=False): - """Dot the Cholesky factor of the GP system into a vector or matrix - - Compute ``x = L.y`` where ``K = L.L^T`` and ``K`` is the covariance - matrix of the GP. - - .. note:: The mean function is not applied in this method. - - Args: - y (shape[N] or shape[N, M]): The vector or matrix ``y`` described - above. - inplace (bool, optional): If ``True``, ``y`` will be overwritten - with the result ``x``. - - Raises: - RuntimeError: If :func:`GaussianProcess.compute` is not called - first. - ValueError: When the inputs are not valid (shape, number, etc.). - """ - y = self._process_input(y, inplace=inplace) - return self._dot_tril(y) - - def log_likelihood(self, y, *, inplace=False): - """Compute the marginalized likelihood of the GP model - - The factorized matrix from the previous call to - :func:`GaussianProcess.compute` is used so that method must be called - first. - - Args: - y (shape[N]): The observations at coordinates ``t`` as defined by - :func:`GaussianProcess.compute`. - inplace (bool, optional): If ``True``, ``y`` will be overwritten - in the process of the calculation. This will reduce the memory - footprint, but should be used with care since this will - overwrite the data. - - Raises: - RuntimeError: If :func:`GaussianProcess.compute` is not called - first. - ValueError: When the inputs are not valid (shape, number, etc.). - """ - y = self._process_input(y, inplace=inplace, require_vector=True) - if not np.isfinite(self._log_det): - return -np.inf - alpha = (y - self._mean_value)[:, None] - alpha = driver.solve_lower( - self._t, self._c, self._U, self._W, alpha, alpha - )[:, 0] - loglike = self._norm - 0.5 * np.sum(alpha ** 2 / self._d) - if not np.isfinite(loglike): - return -np.inf - return loglike - - def predict( - self, - y, - t=None, - *, - return_cov=False, - return_var=False, - include_mean=True, - kernel=None, - ): - """Compute the conditional distribution - - The factorized matrix from the previous call to - :func:`GaussianProcess.compute` is used so that method must be called - first. - - Args: - y (shape[N]): The observations at coordinates ``t`` as defined by - :func:`GaussianProcess.compute`. - t (shape[M], optional): The independent coordinates where the - prediction should be evaluated. If not provided, this will be - evaluated at the observations ``t`` from - :func:`GaussianProcess.compute`. - return_var (bool, optional): Return the variance of the conditional - distribution. - return_cov (bool, optional): Return the full covariance matrix of - the conditional distribution. - include_mean (bool, optional): Include the mean function in the - prediction. - kernel (optional): If provided, compute the conditional - distribution using a different kernel. This is generally used - to separate the contributions from different model components. - Note that the computational cost and scaling will be worse - when using this parameter. - - Raises: - RuntimeError: If :func:`GaussianProcess.compute` is not called - first. - ValueError: When the inputs are not valid (shape, number, etc.). - """ - y = self._process_input(y, inplace=True, require_vector=True) - return ConditionalDistribution( - self, y, t=t, include_mean=include_mean, kernel=kernel - ) - - def sample(self, *, size=None, include_mean=True): - """Generate random samples from the prior implied by the GP system - - The factorized matrix from the previous call to - :func:`GaussianProcess.compute` is used so that method must be called - first. - - Args: - size (int, optional): The number of samples to generate. If not - provided, only one sample will be produced. - include_mean (bool, optional): Include the mean function in the - prediction. - - Raises: - RuntimeError: If :func:`GaussianProcess.compute` is not called - first. - ValueError: When the inputs are not valid (shape, number, etc.). - """ - - if self._t is None: - raise RuntimeError("you must call 'compute' first") - if size is None: - n = np.random.randn(self._size) - else: - n = np.random.randn(self._size, size) - result = self.dot_tril(n, inplace=True).T - if include_mean: - result += self._mean_value - return result - - def sample_conditional( - self, - y, - t=None, - *, - size=None, - regularize=None, - include_mean=True, - kernel=None, - ): - """Sample from the conditional (predictive) distribution - - .. note:: this method scales as ``O(M^3)`` for large ``M``, where - ``M == len(t)``. - - Args: - y (shape[N]): The observations at coordinates ``x`` from - :func:`GausianProcess.compute`. - t (shape[M], optional): The independent coordinates where the - prediction should be made. If this is omitted the coordinates - will be assumed to be ``x`` from - :func:`GaussianProcess.compute` and an efficient method will - be used to compute the mean prediction. - size (int, optional): The number of samples to generate. If not - provided, only one sample will be produced. - regularize (float, optional): For poorly conditioned systems, you - can provide a small number here to regularize the predictive - covariance. This number will be added to the diagonal. - include_mean (bool, optional): Include the mean function in the - prediction. - kernel (optional): If provided, compute the conditional - distribution using a different kernel. This is generally used - to separate the contributions from different model components. - Note that the computational cost and scaling will be worse - when using this parameter. - """ - mu, cov = self.predict( - y, t, return_cov=True, include_mean=include_mean, kernel=kernel - ) - if regularize is not None: - cov[np.diag_indices_from(cov)] += regularize - return np.random.multivariate_normal(mu, cov, size=size) - - -class ConditionalDistribution: - def __init__( - self, - gp, - y, - t=None, - *, - include_mean=True, - kernel=None, - ): - self.gp = gp - self.y = y - self.t = t - self.include_mean = include_mean - self.kernel = kernel - - self._c2 = None - self._U1 = None - self._V1 = None - self._U2 = None - self._V2 = None - - self._KxsT = None - self._Kinv_KxsT = None - - if self.t is None: - self._xs = self.gp._t - else: - self._xs = np.ascontiguousarray(self.t, dtype=np.float64) - if self._xs.ndim != 1: - raise ValueError("'t' must be one-dimensional") - - @property - def KxsT(self): - if self._KxsT is None: - tau = self.gp._t[:, None] - self._xs[None, :] - if self.kernel is None: - self._KxsT = self.gp.kernel.get_value(tau) - else: - self._KxsT = self.kernel.get_value(tau) - return self._KxsT - - @property - def Kinv_KxsT(self): - if self._Kinv_KxsT is None: - self._Kinv_KxsT = self.gp.apply_inverse(self.KxsT, inplace=False) - return self._Kinv_KxsT - - def _do_dot(self, inp, target): - if self.kernel is None: - kernel = self.gp.kernel - U1 = self.gp._U - V1 = self.gp._V - else: - kernel = self.kernel - if self._U1 is None or self._V1 is None: - _, _, self._U1, self._V1 = kernel.get_celerite_matrices( - self.gp._t, - np.zeros_like(self.gp._t), - U=self._U1, - V=self._V1, - ) - U1 = self._U1 - V1 = self._V1 - - if self._c2 is None or self._U2 is None or self._V2 is None: - self._c2, _, self._U2, self._V2 = kernel.get_celerite_matrices( - self._xs, - np.zeros_like(self._xs), - c=self._c2, - U=self._U2, - V=self._V2, - ) - c = self._c2 - U2 = self._U2 - V2 = self._V2 - - is_vector = False - if inp.ndim == 1: - is_vector = True - inp = inp[:, None] - target = target[:, None] - - target = driver.general_matmul_lower( - self._xs, self.gp._t, c, U2, V1, inp, target - ) - target = driver.general_matmul_upper( - self._xs, self.gp._t, c, V2, U1, inp, target - ) - - if is_vector: - return target[:, 0] - return target - - @property - def mean(self): - alpha = self.gp._apply_inverse(self.y - self.gp._mean_value) - - if self.t is None and self.kernel is None: - mu = self.y - self.gp._diag * alpha - if not self.include_mean: - mu -= self.gp._mean_value - return mu - - mu = np.zeros_like(self._xs) - mu = self._do_dot(alpha, mu) - - if self.include_mean: - mu += self.gp._mean(self._xs) - return mu - - @property - def variance(self): - if self.kernel is None: - kernel = self.gp.kernel - else: - kernel = self.kernel - return kernel.get_value(0.0) - np.einsum( - "ij,ij->j", self.KxsT, self.Kinv_KxsT - ) - - @property - def covariance(self): - if self.kernel is None: - kernel = self.gp.kernel - else: - kernel = self.kernel - neg_cov = -kernel.get_value(self._xs[:, None] - self._xs[None, :]) - neg_cov = self._do_dot(self.Kinv_KxsT, neg_cov) - return -neg_cov - - def sample(self, *, size=None, regularize=None): - """Sample from the conditional (predictive) distribution - - .. note:: this method scales as ``O(M^3)`` for large ``M``, where - ``M == len(t)``. - - Args: - y (shape[N]): The observations at coordinates ``x`` from - :func:`GausianProcess.compute`. - t (shape[M], optional): The independent coordinates where the - prediction should be made. If this is omitted the coordinates - will be assumed to be ``x`` from - :func:`GaussianProcess.compute` and an efficient method will - be used to compute the mean prediction. - size (int, optional): The number of samples to generate. If not - provided, only one sample will be produced. - regularize (float, optional): For poorly conditioned systems, you - can provide a small number here to regularize the predictive - covariance. This number will be added to the diagonal. - include_mean (bool, optional): Include the mean function in the - prediction. - kernel (optional): If provided, compute the conditional - distribution using a different kernel. This is generally used - to separate the contributions from different model components. - Note that the computational cost and scaling will be worse - when using this parameter. - """ - mu = self.mean - cov = self.covariance - if regularize is not None: - cov[np.diag_indices_from(cov)] += regularize - return np.random.multivariate_normal(mu, cov, size=size) diff --git a/python/celerite2/core.py b/python/celerite2/core.py index f55f2e9..6dd88e8 100644 --- a/python/celerite2/core.py +++ b/python/celerite2/core.py @@ -4,9 +4,16 @@ "ConstantMean", "BaseConditionalDistribution", "BaseGaussianProcess", + "ConditionalDistribution", + "GaussianProcess", ] +import warnings + import numpy as np +from . import driver +from .driver import LinAlgError + class ConstantMean: def __init__(self, value=0.0): @@ -23,6 +30,7 @@ def __init__( y, t=None, *, + X=None, include_mean=True, kernel=None, ): @@ -30,32 +38,65 @@ def __init__( self.y = y self.t = t self.include_mean = include_mean - self.kernel = kernel - - self._c2 = None - self._U1 = None - self._V1 = None - self._U2 = None - self._V2 = None - - self._KxsT = None - self._Kinv_KxsT = None if self.t is None: + if X is not None: + raise ValueError("'X' can only be defined when 't' is") self._xs = self.gp._t + self._X = self.gp._X else: self._xs = self.gp._as_tensor(self.t) if self._xs.ndim != 1: raise ValueError("'t' must be one-dimensional") + if X is None: + if self.gp._X is not None: + raise TypeError( + "'X' is required if it is defined for the process" + ) + self._X = None + else: + self._X = self.gp._as_tensor(X, dtype=None) + if self._X.shape[0] != self._xs.shape[0]: + raise ValueError("Dimension mismatch") + + if kernel is None: + self.kernel = self.gp.kernel + self._U1 = self.gp._U + self._V1 = self.gp._V + else: + self.kernel = kernel + _, _, self._U1, self._V1 = self.kernel.get_celerite_matrices( + self.gp._t, self.gp._zeros_like(self.gp._t), X=self.gp._X + ) + + ( + self._c2, + self._a2, + self._U2, + self._V2, + ) = self.kernel.get_celerite_matrices( + self._xs, self.gp._zeros_like(self._xs), X=self._X + ) + + self._KxsT = None + self._Kinv_KxsT = None + @property def KxsT(self): if self._KxsT is None: - tau = self.gp._t[:, None] - self._xs[None, :] - if self.kernel is None: - self._KxsT = self.gp.kernel.get_value(tau) - else: - self._KxsT = self.kernel.get_value(tau) + K = self.gp._zeros((self.gp._t.shape[0], self._xs.shape[0])) + self._KxsT = self.gp._do_general_matmul( + self.gp._t, + self._xs, + self._c2, + self._U2, + self._V2, + self._U1, + self._V1, + self.gp._eye(self._xs.shape[0]), + K, + ) return self._KxsT @property @@ -64,48 +105,24 @@ def Kinv_KxsT(self): self._Kinv_KxsT = self.gp.apply_inverse(self.KxsT, inplace=False) return self._Kinv_KxsT - def _do_general_matmul(self, c, U1, V1, U2, V2, inp, target): - raise NotImplementedError("must be implemented by subclasses") - - def _diagdot(self, a, b): - raise NotImplementedError("must be implemented by subclasses") - def _do_dot(self, inp, target): - if self.kernel is None: - kernel = self.gp.kernel - U1 = self.gp._U - V1 = self.gp._V - else: - kernel = self.kernel - if self._U1 is None or self._V1 is None: - _, _, self._U1, self._V1 = kernel.get_celerite_matrices( - self.gp._t, - self.gp._zeros_like(self.gp._t), - U=self._U1, - V=self._V1, - ) - U1 = self._U1 - V1 = self._V1 - - if self._c2 is None or self._U2 is None or self._V2 is None: - self._c2, _, self._U2, self._V2 = kernel.get_celerite_matrices( - self._xs, - self.gp._zeros_like(self._xs), - c=self._c2, - U=self._U2, - V=self._V2, - ) - c = self._c2 - U2 = self._U2 - V2 = self._V2 - is_vector = False if inp.ndim == 1: is_vector = True inp = inp[:, None] target = target[:, None] - target = self._do_general_matmul(c, U1, V1, U2, V2, inp, target) + target = self.gp._do_general_matmul( + self._xs, + self.gp._t, + self._c2, + self._U1, + self._V1, + self._U2, + self._V2, + inp, + target, + ) if is_vector: return target[:, 0] @@ -132,19 +149,17 @@ def mean(self): @property def variance(self): - if self.kernel is None: - kernel = self.gp.kernel - else: - kernel = self.kernel - return kernel.get_value(0.0) - self._diagdot(self.KxsT, self.Kinv_KxsT) + return self._a2 - self.gp._diagdot(self.KxsT, self.Kinv_KxsT) @property def covariance(self): - if self.kernel is None: - kernel = self.gp.kernel - else: - kernel = self.kernel - neg_cov = -kernel.get_value(self._xs[:, None] - self._xs[None, :]) + neg_cov = -self.gp._get_dense_matrix( + self._xs, + self._c2, + self._a2, + self._U2, + self._V2, + ) neg_cov = self._do_dot(self.Kinv_KxsT, neg_cov) return -neg_cov @@ -198,6 +213,7 @@ def __init__(self, kernel, t=None, *, mean=0.0, **kwargs): # Placeholders for storing data self._t = None + self._X = None self._mean_value = None self._diag = None self._size = None @@ -237,12 +253,15 @@ def _setup(self): def _copy_or_check(self, tensor, *, inplace=False): return tensor - def _as_tensor(self, tensor): + def _as_tensor(self, tensor, dtype=np.float64): raise NotImplementedError("must be implemented by subclasses") def _zeros_like(self, tensor): raise NotImplementedError("must be implemented by subclasses") + def _zeros(self, shape): + raise NotImplementedError("must be implemented by subclasses") + def _do_compute(self, quiet): raise NotImplementedError("must be implemented by subclasses") @@ -258,8 +277,27 @@ def _do_dot_tril(self, y): def _do_norm(self, y): raise NotImplementedError("must be implemented by subclasses") + def _get_dense_matrix(self, t, c, a, U, V): + raise NotImplementedError("must be implemented by subclasses") + + def _do_general_matmul(self, t1, t2, c, U1, V1, U2, V2, inp, target): + raise NotImplementedError("must be implemented by subclasses") + + def _diagdot(self, a, b): + raise NotImplementedError("must be implemented by subclasses") + + def _eye(self, n): + raise NotImplementedError("must be implemented by subclasses") + def compute( - self, t, *, yerr=None, diag=None, check_sorted=True, quiet=False + self, + t, + *, + yerr=None, + diag=None, + X=None, + check_sorted=True, + quiet=False, ): """Compute the Cholesky factorization of the GP covariance matrix @@ -288,8 +326,14 @@ def compute( if check_sorted: t = self._check_sorted(t) + if X is not None: + X = self._as_tensor(X, dtype=None) + if t.shape[0] != X.shape[0]: + raise ValueError("Dimension mismatch") + # Save the diagonal self._t = t + self._X = X self._size = self._t.shape[0] self._mean_value = self._mean(self._t) self._diag = self._zeros_like(self._t) @@ -310,7 +354,13 @@ def compute( self._U, self._V, ) = self.kernel.get_celerite_matrices( - self._t, self._diag, c=self._c, a=self._a, U=self._U, V=self._V + self._t, + self._diag, + c=self._c, + a=self._a, + U=self._U, + V=self._V, + X=self._X, ) self._do_compute(quiet) @@ -334,7 +384,11 @@ def recompute(self, *, quiet=False): "you must call 'compute' directly at least once" ) return self.compute( - self._t, diag=self._diag, check_sorted=False, quiet=quiet + self._t, + diag=self._diag, + X=self._X, + check_sorted=False, + quiet=quiet, ) def _process_input(self, y, *, require_vector=False, inplace=False): @@ -431,6 +485,7 @@ def predict( y, t=None, *, + X=None, return_cov=False, return_var=False, include_mean=True, @@ -466,17 +521,19 @@ def predict( first. ValueError: When the inputs are not valid (shape, number, etc.). """ - cond = self.condition(y, t=t, include_mean=include_mean, kernel=kernel) + cond = self.condition( + y, t=t, X=X, include_mean=include_mean, kernel=kernel + ) if return_var: return cond.mean, cond.variance if return_cov: return cond.mean, cond.covariance return cond.mean - def condition(self, y, t=None, *, include_mean=True, kernel=None): + def condition(self, y, t=None, *, X=None, include_mean=True, kernel=None): y = self._process_input(y, require_vector=True, inplace=True) return self.conditional_distribution( - self, y, t=t, include_mean=include_mean, kernel=kernel + self, y, t=t, X=X, include_mean=include_mean, kernel=kernel ) def sample(self, *, size=None, include_mean=True): @@ -498,3 +555,121 @@ def sample(self, *, size=None, include_mean=True): ValueError: When the inputs are not valid (shape, number, etc.). """ raise NotImplementedError("'sample' is not implemented by extensions") + + +class ConditionalDistribution(BaseConditionalDistribution): + def sample(self, *, size=None, regularize=None): + mu = self.mean + cov = self.covariance + if regularize is not None: + cov[np.diag_indices_from(cov)] += regularize + return np.random.multivariate_normal(mu, cov, size=size) + + +class GaussianProcess(BaseGaussianProcess): + conditional_distribution = ConditionalDistribution + + def _setup(self): + self._c = np.empty(0, dtype=np.float64) + self._a = np.empty(0, dtype=np.float64) + self._U = np.empty((0, 0), dtype=np.float64) + self._V = np.empty((0, 0), dtype=np.float64) + + def _copy_or_check(self, y, *, inplace=False): + if inplace: + if ( + y.dtype != "float64" + or not y.flags.c_contiguous + or not y.flags.writeable + ): + warnings.warn( + "Inplace operations can only be made on C-contiguous, " + "writable, float64 arrays; a copy will be made" + ) + y = np.ascontiguousarray(y, dtype=np.float64) + else: + y = np.array(y, dtype=np.float64, copy=True, order="C") + return y + + def _as_tensor(self, y, dtype=np.float64): + return np.ascontiguousarray(y, dtype=dtype) + + def _zeros_like(self, y): + return np.zeros_like(y) + + def _zeros(self, shape): + return np.zeros(shape) + + def _eye(self, n): + return np.eye(n) + + def _get_dense_matrix(self, t, c, a, U, V): + Y = np.eye(len(t)) + Z = np.diag(a) + Z = driver.matmul_lower(t, c, U, V, Y, Z) + return driver.matmul_upper(t, c, U, V, Y, Z) + + def _do_general_matmul(self, t1, t2, c, U1, V1, U2, V2, inp, target): + target = driver.general_matmul_lower(t1, t2, c, U2, V1, inp, target) + target = driver.general_matmul_upper(t1, t2, c, V2, U1, inp, target) + return target + + def _diagdot(self, a, b): + return np.einsum("ij,ij->j", a, b) + + def _do_compute(self, quiet): + # Compute the Cholesky factorization + try: + self._d, self._W = driver.factor( + self._t, + self._c, + self._a, + self._U, + self._V, + self._a, + np.copy(self._V), + ) + except LinAlgError: + if not quiet: + raise + self._log_det = -np.inf + self._norm = np.inf + else: + self._log_det = np.sum(np.log(self._d)) + self._norm = -0.5 * ( + self._log_det + self._size * np.log(2 * np.pi) + ) + + def _check_sorted(self, t): + if np.any(np.diff(t) < 0.0): + raise ValueError("The input coordinates must be sorted") + return t + + def _do_solve(self, y): + z = driver.solve_lower(self._t, self._c, self._U, self._W, y, y) + z /= self._d[:, None] + z = driver.solve_upper(self._t, self._c, self._U, self._W, z, z) + return z + + def _do_dot_tril(self, y): + z = y * np.sqrt(self._d)[:, None] + return driver.matmul_lower(self._t, self._c, self._U, self._W, z, z) + + def _do_norm(self, y): + alpha = y[:, None] + alpha = driver.solve_lower( + self._t, self._c, self._U, self._W, alpha, alpha + )[:, 0] + return np.sum(alpha ** 2 / self._d) + + def sample(self, *, size=None, include_mean=True): + if self._t is None: + raise RuntimeError("you must call 'compute' first") + if size is None: + n = np.random.randn(self._size) + else: + n = np.random.randn(self._size, size) + result = self.dot_tril(n, inplace=True).T + if include_mean: + result += self._mean_value + return result diff --git a/python/celerite2/jax/__init__.py b/python/celerite2/jax/__init__.py index 74818ef..2a34679 100644 --- a/python/celerite2/jax/__init__.py +++ b/python/celerite2/jax/__init__.py @@ -22,5 +22,5 @@ __all__ = ["terms", "GaussianProcess", "CeleriteNormal"] from . import terms # noqa isort:skip -from .celerite2 import GaussianProcess # noqa isort:skip +from .core import GaussianProcess # noqa isort:skip from .distribution import CeleriteNormal # noqa isort:skip diff --git a/python/celerite2/jax/celerite2.py b/python/celerite2/jax/core.py similarity index 69% rename from python/celerite2/jax/celerite2.py rename to python/celerite2/jax/core.py index 9ff304f..ac6e4a5 100644 --- a/python/celerite2/jax/celerite2.py +++ b/python/celerite2/jax/core.py @@ -1,35 +1,32 @@ # -*- coding: utf-8 -*- -__all__ = ["GaussianProcess", "ConditionalDistribution"] +__all__ = ["GaussianProcess"] from jax import numpy as np -from ..core import BaseConditionalDistribution, BaseGaussianProcess +from ..core import BaseGaussianProcess from . import distribution, ops -class ConditionalDistribution(BaseConditionalDistribution): - def _do_general_matmul(self, c, U1, V1, U2, V2, inp, target): - target += ops.general_matmul_lower( - self._xs, self.gp._t, c, U2, V1, inp - ) - target += ops.general_matmul_upper( - self._xs, self.gp._t, c, V2, U1, inp - ) - return target - - def _diagdot(self, a, b): - return np.einsum("ij,ij->j", a, b) - - class GaussianProcess(BaseGaussianProcess): - conditional_distribution = ConditionalDistribution - def _as_tensor(self, tensor): return np.asarray(tensor, dtype=np.float64) def _zeros_like(self, tensor): return np.zeros_like(tensor) + def _zeros(self, shape): + return np.zeros(shape) + + def _eye(self, n): + return np.eye(n) + + def _get_dense_matrix(self, t, c, a, U, V): + Y = np.eye(len(t)) + Z = np.diag(a) + Z += ops.matmul_lower(t, c, U, V, Y) + Z += ops.matmul_upper(t, c, U, V, Y) + return Z + def _do_compute(self, quiet): self._d, self._W = ops.factor( self._t, self._c, self._a, self._U, self._V @@ -57,5 +54,13 @@ def _do_norm(self, y): )[:, 0] return np.sum(alpha ** 2 / self._d) + def _do_general_matmul(self, t1, t2, c, U1, V1, U2, V2, inp, target): + target += ops.general_matmul_lower(t1, t2, c, U2, V1, inp) + target += ops.general_matmul_upper(t1, t2, c, V2, U1, inp) + return target + + def _diagdot(self, a, b): + return np.einsum("ij,ij->j", a, b) + def numpyro_dist(self): return distribution.CeleriteNormal(self) diff --git a/python/celerite2/latent.py b/python/celerite2/latent.py new file mode 100644 index 0000000..6603c3d --- /dev/null +++ b/python/celerite2/latent.py @@ -0,0 +1,105 @@ +# -*- coding: utf-8 -*- + +__all__ = ["LatentTerm", "prepare_rectangular_data", "KroneckerLatent"] +import numpy as np + +from .terms import Term + + +class LatentTerm(Term): + __requires_general_addition__ = True + + def __init__(self, term, left=None, right=None): + self.term = term + self.left = left + self.right = right + + def _get_latent(self, left_or_right, t, X): + if left_or_right is None: + return np.ones((1, 1, 1)) + if callable(left_or_right): + left_or_right = left_or_right(t, X) + else: + left_or_right = np.atleast_1d(left_or_right) + if left_or_right.ndim == 1: + return left_or_right[None, None, :] + if left_or_right.ndim == 2: + return left_or_right[None, :, :] + if left_or_right.ndim != 3: + raise ValueError("Invalid shape for latent") + return left_or_right + + def get_left_latent(self, t, X): + return self._get_latent(self.left, t, X) + + def get_right_latent(self, t, X): + if self.right is None and self.left is not None: + return self._get_latent(self.left, t, X) + return self._get_latent(self.right, t, X) + + def get_celerite_matrices( + self, + t, + diag, + *, + c=None, + a=None, + U=None, + V=None, + X=None, + ): + t = np.atleast_1d(t) + diag = np.atleast_1d(diag) + c0, a0, U0, V0 = self.term.get_celerite_matrices(t, diag, X=X) + + left = self.get_left_latent(t, X) + right = self.get_right_latent(t, X) + + N = len(t) + K = left.shape[2] + J = c0.shape[0] * K + c, a, U, V = self._resize_matrices(N, J, c, a, U, V) + + c[:] = np.tile(c0, K) + U[:] = np.ascontiguousarray(U0[:, :, None] * left).reshape((N, -1)) + V[:] = np.ascontiguousarray(V0[:, :, None] * right).reshape((N, -1)) + a[:] = diag + np.sum(U * V, axis=-1) + + return c, a, U, V + + +def prepare_rectangular_data(N, M, t, **kwargs): + results = dict( + t=np.tile(np.asarray(t)[:, None], (1, M)).flatten(), + X=np.tile(np.arange(M), N), + ) + + for k, v in kwargs.items(): + results[k] = np.ascontiguousarray( + np.broadcast_to(v, (N, M)), dtype=np.float64 + ).flatten() + + return results + + +class KroneckerLatent: + def __init__(self, *, R=None, L=None): + if R is not None: + if L is not None: + raise ValueError("Only one of 'R' and 'L' can be defined") + R = np.ascontiguousarray(np.atleast_2d(R), dtype=np.float64) + try: + self.L = np.linalg.cholesky(R) + except np.linalg.LinAlgError: + M = np.copy(R) + M[np.diag_indices_from(M)] += 1e-10 + self.L = np.linalg.cholesky(M) + elif L is not None: + self.L = np.ascontiguousarray(L, dtype=np.float64) + if self.L.ndim == 1: + self.L = np.reshape(self.L, (-1, 1)) + else: + raise ValueError("One of 'R' and 'L' must be defined") + + def __call__(self, t, inds): + return self.L[inds][:, None, :] diff --git a/python/celerite2/numpy.py b/python/celerite2/numpy.py deleted file mode 100644 index 99d6e05..0000000 --- a/python/celerite2/numpy.py +++ /dev/null @@ -1,120 +0,0 @@ -# -*- coding: utf-8 -*- - -__all__ = ["ConditionalDistribution", "GaussianProcess"] -import warnings - -import numpy as np - -from . import driver -from .core import BaseConditionalDistribution, BaseGaussianProcess -from .driver import LinAlgError - - -class ConditionalDistribution(BaseConditionalDistribution): - def _do_general_matmul(self, c, U1, V1, U2, V2, inp, target): - target = driver.general_matmul_lower( - self._xs, self.gp._t, c, U2, V1, inp, target - ) - target = driver.general_matmul_upper( - self._xs, self.gp._t, c, V2, U1, inp, target - ) - return target - - def _diagdot(self, a, b): - return np.einsum("ij,ij->j", a, b) - - def sample(self, *, size=None, regularize=None): - mu = self.mean - cov = self.covariance - if regularize is not None: - cov[np.diag_indices_from(cov)] += regularize - return np.random.multivariate_normal(mu, cov, size=size) - - -class GaussianProcess(BaseGaussianProcess): - conditional_distribution = ConditionalDistribution - - def _setup(self): - self._c = np.empty(0, dtype=np.float64) - self._a = np.empty(0, dtype=np.float64) - self._U = np.empty((0, 0), dtype=np.float64) - self._V = np.empty((0, 0), dtype=np.float64) - - def _copy_or_check(self, y, *, inplace=False): - if inplace: - if ( - y.dtype != "float64" - or not y.flags.c_contiguous - or not y.flags.writeable - ): - warnings.warn( - "Inplace operations can only be made on C-contiguous, " - "writable, float64 arrays; a copy will be made" - ) - y = np.ascontiguousarray(y, dtype=np.float64) - else: - y = np.array(y, dtype=np.float64, copy=True, order="C") - return y - - def _as_tensor(self, y): - return np.ascontiguousarray(y, dtype=np.float64) - - def _zeros_like(self, y): - return np.zeros_like(y) - - def _do_compute(self, quiet): - # Compute the Cholesky factorization - try: - self._d, self._W = driver.factor( - self._t, - self._c, - self._a, - self._U, - self._V, - self._a, - np.copy(self._V), - ) - except LinAlgError: - if not quiet: - raise - self._log_det = -np.inf - self._norm = np.inf - else: - self._log_det = np.sum(np.log(self._d)) - self._norm = -0.5 * ( - self._log_det + self._size * np.log(2 * np.pi) - ) - - def _check_sorted(self, t): - if np.any(np.diff(t) < 0.0): - raise ValueError("The input coordinates must be sorted") - return t - - def _do_solve(self, y): - z = driver.solve_lower(self._t, self._c, self._U, self._W, y, y) - z /= self._d[:, None] - z = driver.solve_upper(self._t, self._c, self._U, self._W, z, z) - return z - - def _do_dot_tril(self, y): - z = y * np.sqrt(self._d)[:, None] - return driver.matmul_lower(self._t, self._c, self._U, self._W, z, z) - - def _do_norm(self, y): - alpha = y[:, None] - alpha = driver.solve_lower( - self._t, self._c, self._U, self._W, alpha, alpha - )[:, 0] - return np.sum(alpha ** 2 / self._d) - - def sample(self, *, size=None, include_mean=True): - if self._t is None: - raise RuntimeError("you must call 'compute' first") - if size is None: - n = np.random.randn(self._size) - else: - n = np.random.randn(self._size, size) - result = self.dot_tril(n, inplace=True).T - if include_mean: - result += self._mean_value - return result diff --git a/python/celerite2/terms.py b/python/celerite2/terms.py index 365e349..af88844 100644 --- a/python/celerite2/terms.py +++ b/python/celerite2/terms.py @@ -3,6 +3,7 @@ __all__ = [ "Term", "TermSum", + "TermSumGeneral", "TermProduct", "TermDiff", "TermConvolution", @@ -30,8 +31,16 @@ class Term: """ + __requires_general_addition__ = False + def __add__(self, b): - return TermSum(self, b) + terms = tuple(self.terms) + (b,) + if ( + self.__requires_general_addition__ + or b.__requires_general_addition__ + ): + return TermSumGeneral(*terms) + return TermSum(*terms) def __mul__(self, b): return TermProduct(self, b) @@ -114,8 +123,27 @@ def to_dense(self, x, diag): K[np.diag_indices_from(K)] += diag return K + def _resize_matrices(self, N, J, c, a, U, V): + if c is None: + c = np.empty(J) + else: + c.resize(J, refcheck=False) + if a is None: + a = np.empty(N) + else: + a.resize(N, refcheck=False) + if U is None: + U = np.empty((N, J)) + else: + U.resize((N, J), refcheck=False) + if V is None: + V = np.empty((N, J)) + else: + V.resize((N, J), refcheck=False) + return c, a, U, V + def get_celerite_matrices( - self, x, diag, *, c=None, a=None, U=None, V=None + self, t, diag, *, c=None, a=None, U=None, V=None, X=None ): """Get the matrices needed to solve the celerite system @@ -126,7 +154,7 @@ def get_celerite_matrices( extensions. Args: - x (shape[N]): The independent coordinates of the data. + t (shape[N]): The independent coordinates of the data. diag (shape[N]): The diagonal variance of the system. a (shape[N], optional): The diagonal of the A matrix. U (shape[N, J], optional): The first low-rank matrix. @@ -137,57 +165,41 @@ def get_celerite_matrices( Raises: ValueError: When the inputs are not valid. """ - x = np.atleast_1d(x) + t = np.atleast_1d(t) diag = np.atleast_1d(diag) - if len(x.shape) != 1: + if len(t.shape) != 1: raise ValueError("'x' must be one-dimensional") - if x.shape != diag.shape: + if t.shape != diag.shape: raise ValueError("dimension mismatch") ar, cr, ac, bc, cc, dc = self.get_coefficients() - N = len(x) + N = len(t) Jr = len(ar) Jc = len(ac) J = Jr + 2 * Jc - - if c is None: - c = np.empty(J) - else: - c.resize(J, refcheck=False) - if a is None: - a = np.empty(N) - else: - a.resize(N, refcheck=False) - if U is None: - U = np.empty((N, J)) - else: - U.resize((N, J), refcheck=False) - if V is None: - V = np.empty((N, J)) - else: - V.resize((N, J), refcheck=False) + c, a, U, V = self._resize_matrices(N, J, c, a, U, V) c[:Jr] = cr c[Jr::2] = cc c[Jr + 1 :: 2] = cc a, U, V = driver.get_celerite_matrices( - ar, ac, bc, dc, x, diag, a, U, V + ar, ac, bc, dc, t, diag, a, U, V ) return c, a, U, V - def dot(self, x, diag, y): + def dot(self, t, diag, y, *, X=None): """Apply a matrix-vector or matrix-matrix product Args: - x (shape[N]): The independent coordinates of the data. + t (shape[N]): The independent coordinates of the data. diag (shape[N]): The diagonal variance of the system. y (shape[N] or shape[N, K]): The target of vector or matrix for this operation. """ - x = np.atleast_1d(x) + t = np.atleast_1d(t) y = np.atleast_1d(y) - if y.shape[0] != x.shape[0]: + if y.shape[0] != t.shape[0]: raise ValueError("Dimension mismatch") is_vector = False @@ -197,10 +209,10 @@ def dot(self, x, diag, y): if y.ndim != 2: raise ValueError("'y' can only be a vector or matrix") - c, a, U, V = self.get_celerite_matrices(x, diag) + c, a, U, V = self.get_celerite_matrices(t, diag, X=X) z = y * a[:, None] - z = driver.matmul_lower(x, c, U, V, y, z) - z = driver.matmul_upper(x, c, U, V, y, z) + z = driver.matmul_lower(t, c, U, V, y, z) + z = driver.matmul_upper(t, c, U, V, y, z) if is_vector: return z[:, 0] @@ -219,10 +231,10 @@ class TermSum(Term): """ def __init__(self, *terms): - if any(isinstance(term, TermConvolution) for term in terms): + if any(term.__requires_general_addition__ for term in terms): raise TypeError( - "You cannot perform operations on an TermConvolution, it must " - "be the outer term in the kernel" + "You cannot perform operations on a term that requires general" + " addition, it must be the outer term in the kernel" ) self._terms = terms @@ -235,6 +247,71 @@ def get_coefficients(self): return tuple(np.concatenate(c) for c in zip(*coeffs)) +class TermSumGeneral(Term): + """A general sum of multiple :class:`Term` objects + + This will be used if any of the terms require "general addition", + or if they don't provide a ``get_coefficients`` method. + + Args: + *terms: Any number of :class:`Term` subclasses to add together. + """ + + __requires_general_addition__ = True + + def __init__(self, *terms): + basic = [ + term for term in terms if not term.__requires_general_addition__ + ] + self._terms = [ + term for term in terms if term.__requires_general_addition__ + ] + if len(basic): + self._terms.insert(0, TermSum(*basic)) + if not len(self._terms): + raise ValueError( + "A general term sum cannot be instantiated without any terms" + ) + + @property + def terms(self): + return self._terms + + def get_value(self, tau): + K = self.terms[0].get_value(tau) + for term in self.terms[1:]: + K += term.get_value(tau) + return K + + def get_psd(self, omega): + p = self.terms[0].get_psd(omega) + for term in self.terms[1:]: + p += term.get_psd(omega) + return p + + def get_celerite_matrices( + self, t, diag, *, c=None, a=None, U=None, V=None, X=None + ): + diag = np.atleast_1d(diag) + matrices = [ + term.get_celerite_matrices(t, np.zeros_like(diag), X=X) + for term in self.terms + ] + N = matrices[0][1].shape[0] + J = sum(m[0].shape[0] for m in matrices) + c, a, U, V = self._resize_matrices(N, J, c, a, U, V) + a[:] = diag + j = 0 + for c0, a0, U0, V0 in matrices: + dj = len(c0) + c[j : j + dj] = c0 + a[:] += a0 + U[:, j : j + dj] = U0 + V[:, j : j + dj] = V0 + j += dj + return c, a, U, V + + class TermProduct(Term): """A product of two :class:`Term` objects @@ -248,12 +325,13 @@ class TermProduct(Term): """ def __init__(self, term1, term2): - int1 = isinstance(term1, TermConvolution) - int2 = isinstance(term2, TermConvolution) - if int1 or int2: + if ( + term1.__requires_general_addition__ + or term2.__requires_general_addition__ + ): raise TypeError( - "You cannot perform operations on an TermConvolution, it must " - "be the outer term in the kernel" + "You cannot perform operations on a term that requires general" + " addition, it must be the outer term in the kernel" ) self.term1 = term1 self.term2 = term2 @@ -309,10 +387,10 @@ class TermDiff(Term): """ def __init__(self, term): - if isinstance(term, TermConvolution): + if term.__requires_general_addition__: raise TypeError( - "You cannot perform operations on an TermConvolution, it must " - "be the outer term in the kernel" + "You cannot perform operations on a term that requires general" + " addition, it must be the outer term in the kernel" ) self.term = term @@ -343,12 +421,27 @@ class TermConvolution(Term): exposure time). """ + __requires_general_addition__ = True + def __init__(self, term, delta): + if term.__requires_general_addition__: + raise TypeError( + "You cannot perform operations on a term that requires general" + " addition, it must be the outer term in the kernel" + ) self.term = term self.delta = float(delta) def get_celerite_matrices( - self, x, diag, *, c=None, a=None, U=None, V=None + self, + t, + diag, + *, + c=None, + a=None, + U=None, + V=None, + X=None, ): dt = self.delta ar, cr, a, b, cc, d = self.term.get_coefficients() @@ -379,7 +472,9 @@ def get_celerite_matrices( new_diag = diag + delta_diag - return super().get_celerite_matrices(x, new_diag, c=c, a=a, U=U, V=V) + return super().get_celerite_matrices( + t, new_diag, c=c, a=a, U=U, V=V, X=X + ) def get_coefficients(self): ar, cr, a, b, c, d = self.term.get_coefficients() diff --git a/python/celerite2/theano/__init__.py b/python/celerite2/theano/__init__.py index 19b104a..47c6ad1 100644 --- a/python/celerite2/theano/__init__.py +++ b/python/celerite2/theano/__init__.py @@ -3,5 +3,5 @@ __all__ = ["terms", "GaussianProcess", "CeleriteNormal"] from . import terms -from .celerite2 import GaussianProcess +from .core import GaussianProcess from .distribution import CeleriteNormal diff --git a/python/celerite2/theano/celerite2.py b/python/celerite2/theano/core.py similarity index 90% rename from python/celerite2/theano/celerite2.py rename to python/celerite2/theano/core.py index f909680..1bbed40 100644 --- a/python/celerite2/theano/celerite2.py +++ b/python/celerite2/theano/core.py @@ -70,6 +70,27 @@ def _as_tensor(self, tensor): def _zeros_like(self, tensor): return tt.zeros_like(tensor) + def _zeros(self, shape): + return tt.zeros(shape) + + def _eye(self, n): + return tt.eye(n) + + def _get_dense_matrix(self, t, c, a, U, V): + Y = tt.eye(t.shape[0]) + Z = tt.diag(a) + Z += ops.matmul_lower(t, c, U, V, Y)[0] + Z += ops.matmul_upper(t, c, U, V, Y)[0] + return Z + + def _do_general_matmul(self, t1, t2, c, U1, V1, U2, V2, inp, target): + target += ops.general_matmul_lower(t1, t2, c, U2, V1, inp)[0] + target += ops.general_matmul_upper(t1, t2, c, V2, U1, inp)[0] + return target + + def _diagdot(self, a, b): + return tt.batched_dot(a.T, b.T) + def _do_compute(self, quiet): if quiet: self._d, self._W, _ = ops.factor_quiet( diff --git a/python/test/jax/test_celerite2.py b/python/test/jax/test_core.py similarity index 100% rename from python/test/jax/test_celerite2.py rename to python/test/jax/test_core.py diff --git a/python/test/test_celerite2.py b/python/test/test_core.py similarity index 100% rename from python/test/test_celerite2.py rename to python/test/test_core.py diff --git a/python/test/theano/test_celerite2.py b/python/test/theano/test_core.py similarity index 100% rename from python/test/theano/test_celerite2.py rename to python/test/theano/test_core.py From 7c00e0227f351af631d2ba97d7ffda72e4d27121 Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Thu, 12 Nov 2020 20:25:33 -0500 Subject: [PATCH 02/14] getting theano and jax working --- python/celerite2/__init__.py | 19 ++- python/celerite2/core.py | 2 + python/celerite2/jax/terms.py | 235 +++++++++++++++++-------------- python/celerite2/latent.py | 5 + python/celerite2/terms.py | 134 +++++------------- python/celerite2/testing.py | 15 +- python/celerite2/theano/core.py | 25 +++- python/celerite2/theano/terms.py | 181 ++++++++++++------------ python/test/test_terms.py | 3 +- python/test/theano/test_core.py | 2 +- 10 files changed, 312 insertions(+), 309 deletions(-) diff --git a/python/celerite2/__init__.py b/python/celerite2/__init__.py index 983cdf1..3751528 100644 --- a/python/celerite2/__init__.py +++ b/python/celerite2/__init__.py @@ -11,7 +11,7 @@ __license__ = "MIT" __description__ = "Fast and scalable Gaussian Processes in 1D" __bibtex__ = __citation__ = r""" -@article{celerite1, +@article{celerite2:foremanmackey17, author = {{Foreman-Mackey}, D. and {Agol}, E. and {Ambikasaran}, S. and {Angus}, R.}, title = "{Fast and Scalable Gaussian Process Modeling with Applications to @@ -25,7 +25,7 @@ adsurl = {http://adsabs.harvard.edu/abs/2017AJ....154..220F}, adsnote = {Provided by the SAO/NASA Astrophysics Data System} } -@article{celerite2, +@article{celerite2:foremanmackey18, author = {{Foreman-Mackey}, D.}, title = "{Scalable Backpropagation for Gaussian Processes using Celerite}", journal = {Research Notes of the American Astronomical Society}, @@ -38,4 +38,19 @@ adsurl = {http://adsabs.harvard.edu/abs/2018RNAAS...2a..31F}, adsnote = {Provided by the SAO/NASA Astrophysics Data System} } +@article{celerite2:gordon20, + author = {{Gordon}, Tyler A. and {Agol}, Eric and {Foreman-Mackey}, Daniel}, + title = "{A Fast, Two-dimensional Gaussian Process Method Based on + Celerite: Applications to Transiting Exoplanet Discovery and + Characterization}", + journal = {\aj}, + year = 2020, + month = nov, + volume = 160, + number = 5, + pages = {240}, + doi = {10.3847/1538-3881/abbc16}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2020AJ....160..240G}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} """ diff --git a/python/celerite2/core.py b/python/celerite2/core.py index 6dd88e8..93fdb1f 100644 --- a/python/celerite2/core.py +++ b/python/celerite2/core.py @@ -308,6 +308,8 @@ def compute( deviation of the observation model. diag (shape[N], optional): If provided, the diagonal variance of the observation model. + X (shape[N], optional): If provided, coordinates of the data points + in the latent space. check_sorted (bool, optional): If ``True``, a check is performed to make sure that ``t`` is correctly sorted. A ``ValueError`` will be thrown when this check fails. diff --git a/python/celerite2/jax/terms.py b/python/celerite2/jax/terms.py index 25132b6..5279c19 100644 --- a/python/celerite2/jax/terms.py +++ b/python/celerite2/jax/terms.py @@ -3,6 +3,7 @@ __all__ = [ "Term", "TermSum", + "TermSumGeneral", "TermProduct", "TermDiff", "TermConvolution", @@ -23,8 +24,17 @@ class Term: + __doc__ = base_terms.Term.__doc__ + __requires_general_addition__ = False + def __add__(self, b): - return TermSum(self, b) + terms = tuple(self.terms) + (b,) + if ( + self.__requires_general_addition__ + or b.__requires_general_addition__ + ): + return TermSumGeneral(*terms) + return TermSum(*terms) def __mul__(self, b): return TermProduct(self, b) @@ -36,23 +46,27 @@ def terms(self): def get_coefficients(self): raise NotImplementedError("subclasses must implement this method") - def get_value(self, tau): - tau = np.abs(np.atleast_1d(tau)) - ar, cr, ac, bc, cc, dc = self.get_coefficients() - k = np.zeros_like(tau) - tau = tau[..., None] - - if len(ar): - k += np.sum(ar * np.exp(-cr * tau), axis=-1) - - if len(ac): - arg = dc * tau - k += np.sum( - np.exp(-cc * tau) * (ac * np.cos(arg) + bc * np.sin(arg)), - axis=-1, - ) - - return k + def get_value(self, t, t2=None, *, diag=None, X=None, X2=None): + t = np.atleast_1d(t) + if diag is None: + diag = np.zeros_like(t) + c1, a1, U1, V1 = self.get_celerite_matrices(t, diag, X=X) + if t2 is None: + Y = np.eye(t.shape[0]) + K = np.diag(a1) + K += ops.matmul_lower(t, c1, U1, V1, Y) + K += ops.matmul_upper(t, c1, U1, V1, Y) + return K + + t2 = np.atleast_1d(t2) + c2, a2, U2, V2 = self.get_celerite_matrices( + t2, np.zeros_like(t2), X=X2 + ) + Y = np.eye(t2.shape[0]) + K = np.zeros((t.shape[0], t2.shape[0])) + K += ops.general_matmul_lower(t, t2, c1, U1, V2, Y) + K += ops.general_matmul_upper(t, t2, c1, V1, U2, Y) + return K def get_psd(self, omega): w2 = np.atleast_1d(omega) ** 2 @@ -73,26 +87,22 @@ def get_psd(self, omega): return np.sqrt(2 / np.pi) * psd - def to_dense(self, x, diag): - K = self.get_value(x[:, None] - x[None, :]) - return K + np.diag(diag) - - def get_celerite_matrices(self, x, diag, **kwargs): - x = np.atleast_1d(x) + def get_celerite_matrices(self, t, diag, **kwargs): + t = np.atleast_1d(t) diag = np.atleast_1d(diag) - if len(x.shape) != 1: - raise ValueError("'x' must be one-dimensional") - if x.shape != diag.shape: + if t.ndim != 1: + raise ValueError("'t' must be one-dimensional") + if t.shape != diag.shape: raise ValueError("dimension mismatch") ar, cr, ac, bc, cc, dc = self.get_coefficients() a = diag + np.sum(ar) + np.sum(ac) - arg = dc[None, :] * x[:, None] + arg = dc[None, :] * t[:, None] cos = np.cos(arg) sin = np.sin(arg) - z = np.zeros_like(x) + z = np.zeros_like(t) U = np.concatenate( ( @@ -112,7 +122,7 @@ def get_celerite_matrices(self, x, diag, **kwargs): return c, a, U, V - def dot(self, x, diag, y): + def dot(self, t, diag, y, *, X=None): y = np.atleast_1d(y) is_vector = False @@ -122,10 +132,10 @@ def dot(self, x, diag, y): if y.ndim != 2: raise ValueError("'y' can only be a vector or matrix") - c, a, U, V = self.get_celerite_matrices(x, diag) + c, a, U, V = self.get_celerite_matrices(t, diag, X=X) z = y * a[:, None] - z += ops.matmul_lower(x, c, U, V, y) - z += ops.matmul_upper(x, c, U, V, y) + z += ops.matmul_lower(t, c, U, V, y) + z += ops.matmul_upper(t, c, U, V, y) if is_vector: return z[:, 0] @@ -133,11 +143,13 @@ def dot(self, x, diag, y): class TermSum(Term): + __doc__ = base_terms.TermSum.__doc__ + def __init__(self, *terms): - if any(isinstance(term, TermConvolution) for term in terms): + if any(term.__requires_general_addition__ for term in terms): raise TypeError( - "You cannot perform operations on an TermConvolution, it must " - "be the outer term in the kernel" + "You cannot perform operations on a term that requires general" + " addition, it must be the outer term in the kernel" ) self._terms = terms @@ -150,14 +162,66 @@ def get_coefficients(self): return tuple(np.concatenate(c) for c in zip(*coeffs)) +class TermSumGeneral(Term): + __doc__ = base_terms.TermSumGeneral.__doc__ + __requires_general_addition__ = True + + def __init__(self, *terms): + basic = [ + term for term in terms if not term.__requires_general_addition__ + ] + self._terms = [ + term for term in terms if term.__requires_general_addition__ + ] + if len(basic): + self._terms.insert(0, TermSum(*basic)) + if not len(self._terms): + raise ValueError( + "A general term sum cannot be instantiated without any terms" + ) + + @property + def terms(self): + return self._terms + + def get_value(self, *args, **kwargs): + K = self.terms[0].get_value(*args, **kwargs) + for term in self.terms[1:]: + K += term.get_value(*args, **kwargs) + return K + + def get_psd(self, omega): + p = self.terms[0].get_psd(omega) + for term in self.terms[1:]: + p += term.get_psd(omega) + return p + + def get_celerite_matrices( + self, t, diag, *, c=None, a=None, U=None, V=None, X=None + ): + diag = np.atleast_1d(diag) + matrices = [ + term.get_celerite_matrices(t, np.zeros_like(diag), X=X) + for term in self.terms + ] + c = np.concatenate([m[0] for m in matrices]) + a = diag + np.sum([m[1] for m in matrices], axis=0) + U = np.concatenate([m[2] for m in matrices], axis=1) + V = np.concatenate([m[3] for m in matrices], axis=1) + return c, a, U, V + + class TermProduct(Term): + __doc__ = base_terms.TermProduct.__doc__ + def __init__(self, term1, term2): - int1 = isinstance(term1, TermConvolution) - int2 = isinstance(term2, TermConvolution) - if int1 or int2: + if ( + term1.__requires_general_addition__ + or term2.__requires_general_addition__ + ): raise TypeError( - "You cannot perform operations on an TermConvolution, it must " - "be the outer term in the kernel" + "You cannot perform operations on a term that requires general" + " addition, it must be the outer term in the kernel" ) self.term1 = term1 self.term2 = term2 @@ -206,11 +270,13 @@ def get_coefficients(self): class TermDiff(Term): + __doc__ = base_terms.TermDiff.__doc__ + def __init__(self, term): - if isinstance(term, TermConvolution): + if term.__requires_general_addition__: raise TypeError( - "You cannot perform operations on an TermConvolution, it must " - "be the outer term in the kernel" + "You cannot perform operations on a term that requires general" + " addition, it must be the outer term in the kernel" ) self.term = term @@ -229,7 +295,15 @@ def get_coefficients(self): class TermConvolution(Term): + __doc__ = base_terms.TermConvolution.__doc__ + __requires_general_addition__ = True + def __init__(self, term, delta): + if term.__requires_general_addition__: + raise TypeError( + "You cannot perform operations on a term that requires general" + " addition, it must be the outer term in the kernel" + ) self.term = term self.delta = np.float64(delta) @@ -301,70 +375,10 @@ def get_psd(self, omega): sinc = np.sin(arg) / arg return psd0 * sinc ** 2 - def get_value(self, tau0): - dt = self.delta - ar, cr, a, b, c, d = self.term.get_coefficients() - - # Format the lags correctly - tau0 = np.abs(np.atleast_1d(tau0)) - tau = tau0[..., None] - - # Precompute some factors - dpt = dt + tau - dmt = dt - tau - - # Real parts: - # tau > Delta - crd = cr * dt - cosh = np.cosh(crd) - norm = 2 * ar / crd ** 2 - K_large = np.sum(norm * (cosh - 1) * np.exp(-cr * tau), axis=-1) - - # tau < Delta - crdmt = cr * dmt - K_small = K_large + np.sum(norm * (crdmt - np.sinh(crdmt)), axis=-1) - - # Complex part - cd = c * dt - dd = d * dt - c2 = c ** 2 - d2 = d ** 2 - c2pd2 = c2 + d2 - C1 = a * (c2 - d2) + 2 * b * c * d - C2 = b * (c2 - d2) - 2 * a * c * d - norm = 1.0 / (dt * c2pd2) ** 2 - k0 = np.exp(-c * tau) - cdt = np.cos(d * tau) - sdt = np.sin(d * tau) - - # For tau > Delta - cos_term = 2 * (np.cosh(cd) * np.cos(dd) - 1) - sin_term = 2 * (np.sinh(cd) * np.sin(dd)) - factor = k0 * norm - K_large += np.sum( - (C1 * cos_term - C2 * sin_term) * factor * cdt, axis=-1 - ) - K_large += np.sum( - (C2 * cos_term + C1 * sin_term) * factor * sdt, axis=-1 - ) - - # tau < Delta - edmt = np.exp(-c * dmt) - edpt = np.exp(-c * dpt) - cos_term = ( - edmt * np.cos(d * dmt) + edpt * np.cos(d * dpt) - 2 * k0 * cdt - ) - sin_term = ( - edmt * np.sin(d * dmt) + edpt * np.sin(d * dpt) - 2 * k0 * sdt - ) - K_small += np.sum(2 * (a * c + b * d) * c2pd2 * dmt * norm, axis=-1) - K_small += np.sum((C1 * cos_term + C2 * sin_term) * norm, axis=-1) - - mask = tau0 >= dt - return K_large * mask + K_small * (~mask) - class RealTerm(Term): + __doc__ = base_terms.RealTerm.__doc__ + def __init__(self, *, a, c): self.a = np.float64(a) self.c = np.float64(c) @@ -375,6 +389,8 @@ def get_coefficients(self): class ComplexTerm(Term): + __doc__ = base_terms.ComplexTerm.__doc__ + def __init__(self, *, a, b, c, d): self.a = np.float64(a) self.b = np.float64(b) @@ -401,6 +417,9 @@ def SHOTerm(*args, **kwargs): return under +SHOTerm.__doc__ = base_terms.SHOTerm.__doc__ + + class OverdampedSHOTerm(Term): __parameter_spec__ = base_terms.SHOTerm.__parameter_spec__ @@ -450,6 +469,8 @@ def get_coefficients(self): class Matern32Term(Term): + __doc__ = base_terms.Matern32Term.__doc__ + def __init__(self, *, sigma, rho, eps=0.01): self.sigma = np.float64(sigma) self.rho = np.float64(rho) @@ -470,6 +491,8 @@ def get_coefficients(self): class RotationTerm(TermSum): + __doc__ = base_terms.RotationTerm.__doc__ + def __init__(self, *, sigma, period, Q0, dQ, f): self.sigma = np.float64(sigma) self.period = np.float64(period) diff --git a/python/celerite2/latent.py b/python/celerite2/latent.py index 6603c3d..b2632c0 100644 --- a/python/celerite2/latent.py +++ b/python/celerite2/latent.py @@ -14,6 +14,11 @@ def __init__(self, term, left=None, right=None): self.left = left self.right = right + def get_psd(self, omega): + raise NotImplementedError( + "The PSD is not implemented for latent models" + ) + def _get_latent(self, left_or_right, t, X): if left_or_right is None: return np.ones((1, 1, 1)) diff --git a/python/celerite2/terms.py b/python/celerite2/terms.py index af88844..fbd1ba7 100644 --- a/python/celerite2/terms.py +++ b/python/celerite2/terms.py @@ -64,28 +64,41 @@ def get_coefficients(self): """ raise NotImplementedError("subclasses must implement this method") - def get_value(self, tau): + def get_value(self, t, t2=None, *, diag=None, X=None, X2=None): """Compute the value of the kernel as a function of lag + If ``t2`` is provided, the result will be rectangular. + Args: - tau (shape[...]): The lags where the kernel should be evaluated. + t (shape[N]): The independent coordinates of the lefthand data. + t2 (shape[M], optional): The independent coordinates righthand + data. + diag (shape[N], optional): The diagonal variance of the system. + X (shape[N], optional): The latent coordinates of the lefthand + data. + X2 (shape[N], optional): The latent coordinates of the righthand + data. """ - tau = np.abs(np.atleast_1d(tau)) - ar, cr, ac, bc, cc, dc = self.get_coefficients() - k = np.zeros_like(tau) - tau = tau[..., None] - - if len(ar): - k += np.sum(ar * np.exp(-cr * tau), axis=-1) - - if len(ac): - arg = dc * tau - k += np.sum( - np.exp(-cc * tau) * (ac * np.cos(arg) + bc * np.sin(arg)), - axis=-1, - ) - - return k + t = np.atleast_1d(t) + if diag is None: + diag = np.zeros_like(t) + c1, a1, U1, V1 = self.get_celerite_matrices(t, diag, X=X) + if t2 is None: + Y = np.eye(t.shape[0]) + K = driver.matmul_lower(t, c1, U1, V1, Y, np.diag(a1)) + K = driver.matmul_upper(t, c1, U1, V1, Y, K) + return K + + t2 = np.atleast_1d(t2) + c2, a2, U2, V2 = self.get_celerite_matrices( + t2, np.zeros_like(t2), X=X2 + ) + Y = np.eye(t2.shape[0]) + K = driver.general_matmul_lower( + t, t2, c1, U1, V2, Y, np.zeros((t.shape[0], t2.shape[0])) + ) + K = driver.general_matmul_upper(t, t2, c1, V1, U2, Y, K) + return K def get_psd(self, omega): """Compute the value of the power spectral density for this process @@ -112,17 +125,6 @@ def get_psd(self, omega): return np.sqrt(2 / np.pi) * psd - def to_dense(self, x, diag): - """Evaluate the dense covariance matrix for this term - - Args: - x (shape[N]): The independent coordinates of the data. - diag (shape[N]): The diagonal variance of the system. - """ - K = self.get_value(x[:, None] - x[None, :]) - K[np.diag_indices_from(K)] += diag - return K - def _resize_matrices(self, N, J, c, a, U, V): if c is None: c = np.empty(J) @@ -156,11 +158,11 @@ def get_celerite_matrices( Args: t (shape[N]): The independent coordinates of the data. diag (shape[N]): The diagonal variance of the system. + c (shape[J], optional): The transport coefficients. a (shape[N], optional): The diagonal of the A matrix. U (shape[N, J], optional): The first low-rank matrix. V (shape[N, J], optional): The second low-rank matrix. - P (shape[N-1, J], optional): The regularization matrix used for - numerical stability. + X (shape[N], optional): The latent coordinates of the data. Raises: ValueError: When the inputs are not valid. @@ -196,6 +198,7 @@ def dot(self, t, diag, y, *, X=None): diag (shape[N]): The diagonal variance of the system. y (shape[N] or shape[N, K]): The target of vector or matrix for this operation. + X (shape[N], optional): The latent coordinates of the data. """ t = np.atleast_1d(t) y = np.atleast_1d(y) @@ -277,10 +280,10 @@ def __init__(self, *terms): def terms(self): return self._terms - def get_value(self, tau): - K = self.terms[0].get_value(tau) + def get_value(self, *args, **kwargs): + K = self.terms[0].get_value(*args, **kwargs) for term in self.terms[1:]: - K += term.get_value(tau) + K += term.get_value(*args, **kwargs) return K def get_psd(self, omega): @@ -513,69 +516,6 @@ def get_psd(self, omega): sinc[m] = np.sin(arg[m]) / arg[m] return psd0 * sinc ** 2 - def get_value(self, tau0): - dt = self.delta - ar, cr, a, b, c, d = self.term.get_coefficients() - - # Format the lags correctly - tau0 = np.abs(np.atleast_1d(tau0)) - tau = tau0[..., None] - - # Precompute some factors - dpt = dt + tau - dmt = dt - tau - - # Real parts: - # tau > Delta - crd = cr * dt - cosh = np.cosh(crd) - norm = 2 * ar / crd ** 2 - K_large = np.sum(norm * (cosh - 1) * np.exp(-cr * tau), axis=-1) - - # tau < Delta - crdmt = cr * dmt - K_small = K_large + np.sum(norm * (crdmt - np.sinh(crdmt)), axis=-1) - - # Complex part - cd = c * dt - dd = d * dt - c2 = c ** 2 - d2 = d ** 2 - c2pd2 = c2 + d2 - C1 = a * (c2 - d2) + 2 * b * c * d - C2 = b * (c2 - d2) - 2 * a * c * d - norm = 1.0 / (dt * c2pd2) ** 2 - k0 = np.exp(-c * tau) - cdt = np.cos(d * tau) - sdt = np.sin(d * tau) - - # For tau > Delta - cos_term = 2 * (np.cosh(cd) * np.cos(dd) - 1) - sin_term = 2 * (np.sinh(cd) * np.sin(dd)) - factor = k0 * norm - K_large += np.sum( - (C1 * cos_term - C2 * sin_term) * factor * cdt, axis=-1 - ) - K_large += np.sum( - (C2 * cos_term + C1 * sin_term) * factor * sdt, axis=-1 - ) - - # tau < Delta - edmt = np.exp(-c * dmt) - edpt = np.exp(-c * dpt) - cos_term = ( - edmt * np.cos(d * dmt) + edpt * np.cos(d * dpt) - 2 * k0 * cdt - ) - sin_term = ( - edmt * np.sin(d * dmt) + edpt * np.sin(d * dpt) - 2 * k0 * sdt - ) - K_small += np.sum(2 * (a * c + b * d) * c2pd2 * dmt * norm, axis=-1) - K_small += np.sum((C1 * cos_term + C2 * sin_term) * norm, axis=-1) - - mask = tau0 >= dt - K_small[mask] = K_large[mask] - return K_small - class RealTerm(Term): r"""The simplest celerite term diff --git a/python/celerite2/testing.py b/python/celerite2/testing.py index 2c5afc1..17a9b65 100644 --- a/python/celerite2/testing.py +++ b/python/celerite2/testing.py @@ -30,7 +30,7 @@ def get_matrices( c, a, U, V = kernel.get_celerite_matrices(x, diag) if include_dense: - K = kernel.get_value(x[:, None] - x[None, :]) + K = kernel.get_value(x, x) K[np.diag_indices_from(K)] += diag if not conditional: @@ -42,7 +42,7 @@ def get_matrices( _, _, U2, V2 = kernel.get_celerite_matrices(t, np.zeros_like(t)) if include_dense: - K_star = kernel.get_value(t[:, None] - x[None, :]) + K_star = kernel.get_value(t, x) return x, c, a, U, V, K, Y, t, U2, V2, K_star return x, c, a, U, V, Y, t, U2, V2 @@ -128,17 +128,16 @@ def check_tensor_term(eval_func, term, pyterm, atol=1e-8): _compare_tensor( eval_func, - term.to_dense(x, diag), - pyterm.to_dense(x, diag), - "to_dense", + term.get_value(x), + pyterm.get_value(x), + "get_value", atol=atol, ) - tau = x[:, None] - x[None, :] _compare_tensor( eval_func, - term.get_value(tau), - pyterm.get_value(tau), + term.get_value(x, t), + pyterm.get_value(x, t), "get_value", atol=atol, ) diff --git a/python/celerite2/theano/core.py b/python/celerite2/theano/core.py index 1bbed40..2fc87fa 100644 --- a/python/celerite2/theano/core.py +++ b/python/celerite2/theano/core.py @@ -14,9 +14,13 @@ pm = None CITATIONS = ( - ("celerite2:foremanmackey17", "celerite2:foremanmackey18"), + ( + "celerite2:foremanmackey17", + "celerite2:foremanmackey18", + "celerite2:gordon20", + ), r""" -@article{exoplanet:foremanmackey17, +@article{celerite2:foremanmackey17, author = {{Foreman-Mackey}, D. and {Agol}, E. and {Ambikasaran}, S. and {Angus}, R.}, title = "{Fast and Scalable Gaussian Process Modeling with Applications to @@ -30,7 +34,7 @@ adsurl = {http://adsabs.harvard.edu/abs/2017AJ....154..220F}, adsnote = {Provided by the SAO/NASA Astrophysics Data System} } -@article{exoplanet:foremanmackey18, +@article{celerite2:foremanmackey18, author = {{Foreman-Mackey}, D.}, title = "{Scalable Backpropagation for Gaussian Processes using Celerite}", journal = {Research Notes of the American Astronomical Society}, @@ -43,6 +47,21 @@ adsurl = {http://adsabs.harvard.edu/abs/2018RNAAS...2a..31F}, adsnote = {Provided by the SAO/NASA Astrophysics Data System} } +@article{celerite2:gordon20, + author = {{Gordon}, Tyler A. and {Agol}, Eric and {Foreman-Mackey}, Daniel}, + title = "{A Fast, Two-dimensional Gaussian Process Method Based on + Celerite: Applications to Transiting Exoplanet Discovery and + Characterization}", + journal = {\aj}, + year = 2020, + month = nov, + volume = 160, + number = 5, + pages = {240}, + doi = {10.3847/1538-3881/abbc16}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2020AJ....160..240G}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} """, ) diff --git a/python/celerite2/theano/terms.py b/python/celerite2/theano/terms.py index eb3a6fa..126a7ae 100644 --- a/python/celerite2/theano/terms.py +++ b/python/celerite2/theano/terms.py @@ -3,6 +3,7 @@ __all__ = [ "Term", "TermSum", + "TermSumGeneral", "TermProduct", "TermDiff", "TermConvolution", @@ -23,6 +24,7 @@ class Term(base_terms.Term): __doc__ = base_terms.Term.__doc__ + __requires_general_addition__ = False def __init__(self, *, dtype="float64"): self.dtype = dtype @@ -43,14 +45,26 @@ def terms(self): def get_coefficients(self): raise NotImplementedError("subclasses must implement this method") - def get_value(self, tau): - ar, cr, ac, bc, cc, dc = self.coefficients - tau = tt.abs_(tau) - tau = tt.shape_padright(tau) - K = tt.sum(ar * tt.exp(-cr * tau), axis=-1) - factor = tt.exp(-cc * tau) - K += tt.sum(ac * factor * tt.cos(dc * tau), axis=-1) - K += tt.sum(bc * factor * tt.sin(dc * tau), axis=-1) + def get_value(self, t, t2=None, *, diag=None, X=None, X2=None): + t = tt.as_tensor_variable(t) + if diag is None: + diag = tt.zeros_like(t) + c1, a1, U1, V1 = self.get_celerite_matrices(t, diag, X=X) + if t2 is None: + Y = tt.eye(t.shape[0]) + K = tt.diag(a1) + K += ops.matmul_lower(t, c1, U1, V1, Y)[0] + K += ops.matmul_upper(t, c1, U1, V1, Y)[0] + return K + + t2 = tt.as_tensor_variable(t2) + c2, a2, U2, V2 = self.get_celerite_matrices( + t2, tt.zeros_like(t2), X=X2 + ) + Y = tt.eye(t2.shape[0]) + K = tt.zeros((t.shape[0], t2.shape[0])) + K += ops.general_matmul_lower(t, t2, c1, U1, V2, Y)[0] + K += ops.general_matmul_upper(t, t2, c1, V1, U2, Y)[0] return K def get_psd(self, omega): @@ -66,11 +80,6 @@ def get_psd(self, omega): ) return np.sqrt(2.0 / np.pi) * power - def to_dense(self, x, diag): - K = self.get_value(x[:, None] - x[None, :]) - K += tt.diag(diag) - return K - def get_celerite_matrices(self, x, diag, **kwargs): x = tt.as_tensor_variable(x) diag = tt.as_tensor_variable(diag) @@ -100,7 +109,7 @@ def get_celerite_matrices(self, x, diag, **kwargs): return c, a, U, V - def dot(self, x, diag, y): + def dot(self, x, diag, y, *, X=None): x = tt.as_tensor_variable(x) y = tt.as_tensor_variable(y) @@ -111,7 +120,7 @@ def dot(self, x, diag, y): if y.ndim != 2: raise ValueError("'y' can only be a vector or matrix") - c, a, U, V = self.get_celerite_matrices(x, diag) + c, a, U, V = self.get_celerite_matrices(x, diag, X=X) z = y * a[:, None] z += ops.matmul_lower(x, c, U, V, y)[0] z += ops.matmul_upper(x, c, U, V, y)[0] @@ -125,11 +134,10 @@ class TermSum(Term): __doc__ = base_terms.TermSum.__doc__ def __init__(self, *terms, **kwargs): - if any(isinstance(term, TermConvolution) for term in terms): + if any(term.__requires_general_addition__ for term in terms): raise TypeError( - "You cannot perform operations on an " - "TermConvolution, it must be the outer term in " - "the kernel" + "You cannot perform operations on a term that requires general" + " addition, it must be the outer term in the kernel" ) self._terms = terms super().__init__(**kwargs) @@ -143,17 +151,66 @@ def get_coefficients(self): return tuple(tt.concatenate(a, axis=0) for a in zip(*coeffs)) +class TermSumGeneral(Term): + __doc__ = base_terms.TermSumGeneral.__doc__ + __requires_general_addition__ = True + + def __init__(self, *terms): + basic = [ + term for term in terms if not term.__requires_general_addition__ + ] + self._terms = [ + term for term in terms if term.__requires_general_addition__ + ] + if len(basic): + self._terms.insert(0, TermSum(*basic)) + if not len(self._terms): + raise ValueError( + "A general term sum cannot be instantiated without any terms" + ) + + @property + def terms(self): + return self._terms + + def get_value(self, *args, **kwargs): + K = self.terms[0].get_value(*args, **kwargs) + for term in self.terms[1:]: + K += term.get_value(*args, **kwargs) + return K + + def get_psd(self, omega): + p = self.terms[0].get_psd(omega) + for term in self.terms[1:]: + p += term.get_psd(omega) + return p + + def get_celerite_matrices( + self, t, diag, *, c=None, a=None, U=None, V=None, X=None + ): + diag = tt.as_tensor_variable(diag) + matrices = [ + term.get_celerite_matrices(t, tt.zeros_like(diag), X=X) + for term in self.terms + ] + c = tt.concatenate([m[0] for m in matrices]) + a = diag + tt.sum([m[1] for m in matrices], axis=0) + U = tt.concatenate([m[2] for m in matrices], axis=1) + V = tt.concatenate([m[3] for m in matrices], axis=1) + return c, a, U, V + + class TermProduct(Term): __doc__ = base_terms.TermProduct.__doc__ def __init__(self, term1, term2, **kwargs): - int1 = isinstance(term1, TermConvolution) - int2 = isinstance(term2, TermConvolution) - if int1 or int2: + if ( + term1.__requires_general_addition__ + or term2.__requires_general_addition__ + ): raise TypeError( - "You cannot perform operations on an " - "TermConvolution, it must be the outer term in " - "the kernel" + "You cannot perform operations on a term that requires general" + " addition, it must be the outer term in the kernel" ) self.term1 = term1 self.term2 = term2 @@ -228,11 +285,10 @@ class TermDiff(Term): __doc__ = base_terms.TermDiff.__doc__ def __init__(self, term, **kwargs): - if isinstance(term, TermConvolution): + if term.__requires_general_addition__: raise TypeError( - "You cannot perform operations on an " - "TermConvolution, it must be the outer term in " - "the kernel" + "You cannot perform operations on a term that requires general" + " addition, it must be the outer term in the kernel" ) self.term = term super().__init__(**kwargs) @@ -253,8 +309,14 @@ def get_coefficients(self): class TermConvolution(Term): __doc__ = base_terms.TermConvolution.__doc__ + __requires_general_addition__ = True def __init__(self, term, delta, **kwargs): + if term.__requires_general_addition__: + raise TypeError( + "You cannot perform operations on a term that requires general" + " addition, it must be the outer term in the kernel" + ) self.term = term self.delta = tt.as_tensor_variable(delta).astype("float64") super().__init__(**kwargs) @@ -324,67 +386,6 @@ def get_psd(self, omega): sinc = tt.switch(tt.neq(arg, 0), tt.sin(arg) / arg, tt.ones_like(arg)) return psd0 * sinc ** 2 - def get_value(self, tau0): - dt = self.delta - ar, cr, a, b, c, d = self.term.coefficients - - # Format the lags correctly - tau0 = tt.abs_(tau0) - tau = tau0[..., None] - - # Precompute some factors - dpt = dt + tau - dmt = dt - tau - - # Real parts: - # tau > Delta - crd = cr * dt - cosh = tt.cosh(crd) - norm = 2 * ar / crd ** 2 - K_large = tt.sum(norm * (cosh - 1) * tt.exp(-cr * tau), axis=-1) - - # tau < Delta - crdmt = cr * dmt - K_small = K_large + tt.sum(norm * (crdmt - tt.sinh(crdmt)), axis=-1) - - # Complex part - cd = c * dt - dd = d * dt - c2 = c ** 2 - d2 = d ** 2 - c2pd2 = c2 + d2 - C1 = a * (c2 - d2) + 2 * b * c * d - C2 = b * (c2 - d2) - 2 * a * c * d - norm = 1.0 / (dt * c2pd2) ** 2 - k0 = tt.exp(-c * tau) - cdt = tt.cos(d * tau) - sdt = tt.sin(d * tau) - - # For tau > Delta - cos_term = 2 * (tt.cosh(cd) * tt.cos(dd) - 1) - sin_term = 2 * (tt.sinh(cd) * tt.sin(dd)) - factor = k0 * norm - K_large += tt.sum( - (C1 * cos_term - C2 * sin_term) * factor * cdt, axis=-1 - ) - K_large += tt.sum( - (C2 * cos_term + C1 * sin_term) * factor * sdt, axis=-1 - ) - - # tau < Delta - edmt = tt.exp(-c * dmt) - edpt = tt.exp(-c * dpt) - cos_term = ( - edmt * tt.cos(d * dmt) + edpt * tt.cos(d * dpt) - 2 * k0 * cdt - ) - sin_term = ( - edmt * tt.sin(d * dmt) + edpt * tt.sin(d * dpt) - 2 * k0 * sdt - ) - K_small += tt.sum(2 * (a * c + b * d) * c2pd2 * dmt * norm, axis=-1) - K_small += tt.sum((C1 * cos_term + C2 * sin_term) * norm, axis=-1) - - return tt.switch(tt.le(tau0, dt), K_small, K_large) - class RealTerm(Term): __doc__ = base_terms.RealTerm.__doc__ diff --git a/python/test/test_terms.py b/python/test/test_terms.py index 4ec2772..f1084ca 100644 --- a/python/test/test_terms.py +++ b/python/test/test_terms.py @@ -94,10 +94,9 @@ def test_consistency(oterm): np.random.seed(40582) x = np.sort(np.random.uniform(0, 10, 50)) diag = np.random.uniform(0.1, 0.3, len(x)) - assert np.allclose(oterm.get_value(x), term.get_value(x)) tau = x[:, None] - x[None, :] - K = term.get_value(tau) + K = term.get_value(x, x) assert np.allclose(oterm.get_value(tau), K) # And the power spectrum diff --git a/python/test/theano/test_core.py b/python/test/theano/test_core.py index c7082f8..6bbbacc 100644 --- a/python/test/theano/test_core.py +++ b/python/test/theano/test_core.py @@ -6,7 +6,7 @@ from celerite2 import terms as pyterms from celerite2.testing import check_gp_models from celerite2.theano import GaussianProcess, terms -from celerite2.theano.celerite2 import CITATIONS +from celerite2.theano.core import CITATIONS term_mark = pytest.mark.parametrize( "name,args", From 824e4b4302557271546f72ff027f75d1a51bc615 Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Tue, 17 Nov 2020 21:38:27 -0500 Subject: [PATCH 03/14] adding initial test for latent term and derivative latent --- python/celerite2/__init__.py | 4 +- python/celerite2/latent.py | 148 ++++++++++++++++++++++++++--------- python/celerite2/terms.py | 22 ++++-- python/test/test_latent.py | 111 ++++++++++++++++++++++++++ 4 files changed, 243 insertions(+), 42 deletions(-) create mode 100644 python/test/test_latent.py diff --git a/python/celerite2/__init__.py b/python/celerite2/__init__.py index 3751528..5670f70 100644 --- a/python/celerite2/__init__.py +++ b/python/celerite2/__init__.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -__all__ = ["__version__", "terms", "GaussianProcess"] -from . import terms +__all__ = ["__version__", "terms", "latent", "GaussianProcess"] +from . import latent, terms from .celerite2_version import __version__ from .core import GaussianProcess diff --git a/python/celerite2/latent.py b/python/celerite2/latent.py index b2632c0..6de2f53 100644 --- a/python/celerite2/latent.py +++ b/python/celerite2/latent.py @@ -1,18 +1,36 @@ # -*- coding: utf-8 -*- -__all__ = ["LatentTerm", "prepare_rectangular_data", "KroneckerLatent"] +__all__ = [ + "prepare_rectangular_data", + "LatentTerm", + "KroneckerLatentTerm", +] import numpy as np -from .terms import Term +from . import terms -class LatentTerm(Term): +def prepare_rectangular_data(N, M, t, **kwargs): + results = dict( + t=np.tile(np.asarray(t)[:, None], (1, M)).flatten(), + X=np.tile(np.arange(M), N), + ) + + for k, v in kwargs.items(): + results[k] = np.ascontiguousarray( + np.broadcast_to(v, (N, M)), dtype=np.float64 + ).flatten() + + return results + + +class LatentTerm(terms.Term): __requires_general_addition__ = True - def __init__(self, term, left=None, right=None): + def __init__(self, term, left_latent=None, right_latent=None): self.term = term - self.left = left - self.right = right + self.left_latent = left_latent + self.right_latent = right_latent def get_psd(self, omega): raise NotImplementedError( @@ -35,12 +53,12 @@ def _get_latent(self, left_or_right, t, X): return left_or_right def get_left_latent(self, t, X): - return self._get_latent(self.left, t, X) + return self._get_latent(self.left_latent, t, X) def get_right_latent(self, t, X): - if self.right is None and self.left is not None: - return self._get_latent(self.left, t, X) - return self._get_latent(self.right, t, X) + if self.right_latent is None and self.left_latent is not None: + return self._get_latent(self.left_latent, t, X) + return self._get_latent(self.right_latent, t, X) def get_celerite_matrices( self, @@ -65,7 +83,7 @@ def get_celerite_matrices( J = c0.shape[0] * K c, a, U, V = self._resize_matrices(N, J, c, a, U, V) - c[:] = np.tile(c0, K) + c[:] = np.repeat(c0, K) U[:] = np.ascontiguousarray(U0[:, :, None] * left).reshape((N, -1)) V[:] = np.ascontiguousarray(V0[:, :, None] * right).reshape((N, -1)) a[:] = diag + np.sum(U * V, axis=-1) @@ -73,38 +91,98 @@ def get_celerite_matrices( return c, a, U, V -def prepare_rectangular_data(N, M, t, **kwargs): - results = dict( - t=np.tile(np.asarray(t)[:, None], (1, M)).flatten(), - X=np.tile(np.arange(M), N), - ) - - for k, v in kwargs.items(): - results[k] = np.ascontiguousarray( - np.broadcast_to(v, (N, M)), dtype=np.float64 - ).flatten() - - return results - - -class KroneckerLatent: - def __init__(self, *, R=None, L=None): +class KroneckerLatentTerm(LatentTerm): + def __init__(self, term, *, R=None, L=None): + self.R = None + self.L = None if R is not None: if L is not None: raise ValueError("Only one of 'R' and 'L' can be defined") - R = np.ascontiguousarray(np.atleast_2d(R), dtype=np.float64) - try: - self.L = np.linalg.cholesky(R) - except np.linalg.LinAlgError: - M = np.copy(R) - M[np.diag_indices_from(M)] += 1e-10 - self.L = np.linalg.cholesky(M) + self.R = np.ascontiguousarray(np.atleast_2d(R), dtype=np.float64) + super().__init__( + term, + left_latent=self._left_latent, + right_latent=self._right_latent, + ) + elif L is not None: self.L = np.ascontiguousarray(L, dtype=np.float64) if self.L.ndim == 1: self.L = np.reshape(self.L, (-1, 1)) + super().__init__( + term, + left_latent=self._lowrank_latent, + ) + else: raise ValueError("One of 'R' and 'L' must be defined") - def __call__(self, t, inds): + def _left_latent(self, t, inds): + return self.R[inds][:, None, :] + + def _right_latent(self, t, inds): + N = len(t) + latent = np.zeros((N, 1, self.R.shape[0])) + latent[(np.arange(N), 0, inds)] = 1.0 + return latent + + def _lowrank_latent(self, t, inds): return self.L[inds][:, None, :] + + +class _DerivativeHelperTerm(terms.Term): + def __init__(self, term): + self.term = term + + def get_coefficients(self): + coeffs = self.term.get_coefficients() + a, b, c, d = coeffs[2:] + final_coeffs = [ + -coeffs[0] * coeffs[1], + coeffs[1], + b * d - c * a, + -(a * d + b * c), + c, + d, + ] + return final_coeffs + + +class DerivativeLatentTerm(LatentTerm): + def __init__(self, term): + if term.__requires_general_addition__: + raise TypeError( + "You cannot perform operations on a term that requires general" + " addition, it must be the outer term in the kernel" + ) + + ar, cr, ac, bc, cc, dc = term.get_coefficients() + + Jr = len(cr) + Jc = 2 * len(cc) + J = Jr + Jc + + self.left = np.zeros((2, 4 * J, 1)) + self.right = np.zeros((2, 4 * J, 1)) + + self.left[0, :Jr] = 1.0 + self.left[0, 2 * Jr : 3 * Jr] = -1.0 + self.left[1, Jr : 2 * Jr] = 1.0 + self.left[1, 3 * Jr : 4 * Jr] = 1.0 + + self.right[0, : 2 * Jr] = 1.0 + self.right[1, 2 * Jr : 4 * Jr] = 1.0 + + dterm = _DerivativeHelperTerm(term) + d2term = terms.TermDiff(term) + super().__init__( + terms.TermSum(term, dterm, dterm, d2term), + left_latent=self._left_latent, + right_latent=self._right_latent, + ) + + def _left_latent(self, t, inds): + return self.left[inds] + + def _right_latent(self, t, inds): + return self.right[inds] diff --git a/python/celerite2/terms.py b/python/celerite2/terms.py index fbd1ba7..c3e14e3 100644 --- a/python/celerite2/terms.py +++ b/python/celerite2/terms.py @@ -182,12 +182,24 @@ def get_celerite_matrices( J = Jr + 2 * Jc c, a, U, V = self._resize_matrices(N, J, c, a, U, V) + arg = dc[None, :] * t[:, None] + cos = np.cos(arg) + sin = np.sin(arg) + c[:Jr] = cr - c[Jr::2] = cc - c[Jr + 1 :: 2] = cc - a, U, V = driver.get_celerite_matrices( - ar, ac, bc, dc, t, diag, a, U, V - ) + c[Jr : Jr + Jc] = cc + c[Jr + Jc :] = cc + + a[:] = diag + np.sum(ar) + np.sum(ac) + + U[:, :Jr] = ar[None, :] + U[:, Jr : Jr + Jc] = ac[None, :] * cos + bc[None, :] * sin + U[:, Jr + Jc :] = ac[None, :] * sin - bc[None, :] * cos + + V[:, :Jr] = 1.0 + V[:, Jr : Jr + Jc] = cos + V[:, Jr + Jc :] = sin + return c, a, U, V def dot(self, t, diag, y, *, X=None): diff --git a/python/test/test_latent.py b/python/test/test_latent.py new file mode 100644 index 0000000..3eb9b1c --- /dev/null +++ b/python/test/test_latent.py @@ -0,0 +1,111 @@ +# -*- coding: utf-8 -*- +import numpy as np +import pytest + +from celerite2 import latent, terms + + +@pytest.fixture +def data(): + N = 10 + M = 3 + np.random.seed(105) + x = np.sort(np.random.uniform(0, 10, N)) + y = np.random.randn(N * M, 3) + data = latent.prepare_rectangular_data(N, M, t=x) + + return N, M, x, data["t"], data["X"], y + + +def test_value(data): + N, M, t0, t, X, y = data + + R = np.random.randn(M, M) + R[np.diag_indices_from(R)] = np.exp(R[np.diag_indices_from(R)]) + 1e-5 + R[np.triu_indices_from(R, 1)] = 0.0 + R = np.dot(R, R.T) + + term = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3) + term += terms.SHOTerm(sigma=0.23456, rho=3.4, Q=2.3) + kernel = latent.KroneckerLatentTerm(term, R=R) + + ar, cr, ac, bc, cc, dc = term.get_coefficients() + c0, a0, U0, V0 = term.get_celerite_matrices(t0, np.zeros_like(t0)) + c, a, U, V = kernel.get_celerite_matrices(t, np.zeros_like(t), X=X) + + assert c.shape == (len(c0) * M,) + assert a.shape == (N * M,) + assert U.shape == (N * M, len(c0) * M) + assert V.shape == (N * M, len(c0) * M) + + assert np.allclose(c, np.repeat(c0, M)) + assert np.allclose(a, (a0[:, None] * np.diag(R)[None, :]).flatten()) + assert np.allclose(U, np.kron(U0, R)) + assert np.allclose(V, np.kron(V0, np.eye(M))) + + K = kernel.get_value(t, X=X) + K0 = term.get_value(t0) + + assert np.allclose(K, np.kron(K0, R)) + + +# def test_low_rank_value(data): +# N, M, x, diag, y, t = data + +# alpha = np.random.randn(M) +# term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3) +# term = kron.KronTerm(term0, L=alpha) + +# a, U, V, P = term.get_celerite_matrices(x, diag) +# assert a.shape == (N * M,) +# assert U.shape == (N * M, 2) +# assert V.shape == (N * M, 2) +# assert P.shape == (N * M - 1, 2) + +# check_value(term, x, diag, y, t) + +# full_term = kron.KronTerm(term0, R=np.outer(alpha, alpha)) +# assert np.allclose(full_term.dot(x, diag, y), term.dot(x, diag, y)) + + +# def test_sum_value(data): +# N, M, x, diag, y, t = data + +# alpha = np.random.randn(M) +# R = np.random.randn(M, M) +# R[np.diag_indices_from(R)] = np.exp(R[np.diag_indices_from(R)]) +# R[np.triu_indices_from(R, 1)] = 0.0 +# R = np.dot(R, R.T) + +# term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3) +# term = kron.KronTerm(term0, R=R) + kron.KronTerm(term0, L=alpha) + +# a, U, V, P = term.get_celerite_matrices(x, diag) +# assert a.shape == (N * M,) +# assert U.shape == (N * M, 2 * M + 2) +# assert V.shape == (N * M, 2 * M + 2) +# assert P.shape == (N * M - 1, 2 * M + 2) + +# check_value(term, x, diag, y, t) + + +# def test_missing_values(data): +# N, M, x, diag, y, t = data +# mask = np.random.rand(N, M) > 0.1 +# assert np.all(mask.sum(axis=1) > 0) + +# R = np.random.randn(M, M) +# R[np.diag_indices_from(R)] = np.exp(R[np.diag_indices_from(R)]) +# R[np.triu_indices_from(R, 1)] = 0.0 +# R = np.dot(R, R.T) + +# term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3) +# term = kron.KronTerm(term0, R=R) + +# a, U, V, P = term.get_celerite_matrices(x, diag, mask=mask) +# assert a.shape == (mask.sum(),) +# assert U.shape == (mask.sum(), 2 * M) +# assert V.shape == (mask.sum(), 2 * M) +# assert P.shape == (mask.sum() - 1, 2 * M) + +# check_value(term, x, diag, y, t, mask=mask) From 54ee88cf790d25f8fa0faffb6e5e5821dc62a4fc Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Tue, 17 Nov 2020 21:45:47 -0500 Subject: [PATCH 04/14] derivative latent for complex terms --- python/celerite2/latent.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/python/celerite2/latent.py b/python/celerite2/latent.py index 6de2f53..7b85a8d 100644 --- a/python/celerite2/latent.py +++ b/python/celerite2/latent.py @@ -140,7 +140,7 @@ def get_coefficients(self): final_coeffs = [ -coeffs[0] * coeffs[1], coeffs[1], - b * d - c * a, + b * d - a * c, -(a * d + b * c), c, d, @@ -159,19 +159,29 @@ def __init__(self, term): ar, cr, ac, bc, cc, dc = term.get_coefficients() Jr = len(cr) - Jc = 2 * len(cc) - J = Jr + Jc + Jc = len(cc) + J = Jr + 2 * Jc self.left = np.zeros((2, 4 * J, 1)) self.right = np.zeros((2, 4 * J, 1)) - self.left[0, :Jr] = 1.0 - self.left[0, 2 * Jr : 3 * Jr] = -1.0 - self.left[1, Jr : 2 * Jr] = 1.0 - self.left[1, 3 * Jr : 4 * Jr] = 1.0 - - self.right[0, : 2 * Jr] = 1.0 - self.right[1, 2 * Jr : 4 * Jr] = 1.0 + if Jr: + self.left[0, :Jr] = 1.0 + self.left[0, 2 * Jr : 3 * Jr] = -1.0 + self.left[1, Jr : 2 * Jr] = 1.0 + self.left[1, 3 * Jr : 4 * Jr] = 1.0 + self.right[0, : 2 * Jr] = 1.0 + self.right[1, 2 * Jr : 4 * Jr] = 1.0 + + if Jc: + J0 = 4 * Jr + for J0 in [4 * Jr, 4 * Jr + 4 * Jc]: + self.left[0, J0 : J0 + Jc] = 1.0 + self.left[0, J0 + 2 * Jc : J0 + 3 * Jc] = -1.0 + self.left[1, J0 + Jc : J0 + 2 * Jc] = 1.0 + self.left[1, J0 + 3 * Jc : J0 + 4 * Jc] = 1.0 + self.right[0, J0 : J0 + 2 * Jc] = 1.0 + self.right[1, J0 + 2 * Jc : J0 + 4 * Jc] = 1.0 dterm = _DerivativeHelperTerm(term) d2term = terms.TermDiff(term) From ad65d6a1fe615292a80122eadab1b26bee4e4fa0 Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Thu, 3 Dec 2020 10:56:30 -0500 Subject: [PATCH 05/14] remove derivative term --- python/celerite2/latent.py | 95 +++++++++----------------------------- python/celerite2/terms.py | 4 ++ 2 files changed, 26 insertions(+), 73 deletions(-) diff --git a/python/celerite2/latent.py b/python/celerite2/latent.py index 7b85a8d..7993f31 100644 --- a/python/celerite2/latent.py +++ b/python/celerite2/latent.py @@ -27,11 +27,18 @@ def prepare_rectangular_data(N, M, t, **kwargs): class LatentTerm(terms.Term): __requires_general_addition__ = True - def __init__(self, term, left_latent=None, right_latent=None): + def __init__( + self, term, *, dimension, left_latent=None, right_latent=None + ): + self._dimension = int(dimension) self.term = term self.left_latent = left_latent self.right_latent = right_latent + @property + def dimension(self): + return self._dimension + def get_psd(self, omega): raise NotImplementedError( "The PSD is not implemented for latent models" @@ -73,19 +80,27 @@ def get_celerite_matrices( ): t = np.atleast_1d(t) diag = np.atleast_1d(diag) - c0, a0, U0, V0 = self.term.get_celerite_matrices(t, diag, X=X) left = self.get_left_latent(t, X) right = self.get_right_latent(t, X) + if left.shape != right.shape: + raise ValueError( + "The dimensions of the left and right latent models are " + "incompatible" + ) - N = len(t) + c0, a0, U0, V0 = self.term.get_celerite_matrices(t, diag, X=X) + U0 = U0[:, :, None] + V0 = V0[:, :, None] + + N = t.shape[0] K = left.shape[2] J = c0.shape[0] * K c, a, U, V = self._resize_matrices(N, J, c, a, U, V) c[:] = np.repeat(c0, K) - U[:] = np.ascontiguousarray(U0[:, :, None] * left).reshape((N, -1)) - V[:] = np.ascontiguousarray(V0[:, :, None] * right).reshape((N, -1)) + U[:] = np.ascontiguousarray(U0 * left).reshape((N, -1)) + V[:] = np.ascontiguousarray(V0 * right).reshape((N, -1)) a[:] = diag + np.sum(U * V, axis=-1) return c, a, U, V @@ -101,6 +116,7 @@ def __init__(self, term, *, R=None, L=None): self.R = np.ascontiguousarray(np.atleast_2d(R), dtype=np.float64) super().__init__( term, + dimension=self.R.shape[0], left_latent=self._left_latent, right_latent=self._right_latent, ) @@ -111,6 +127,7 @@ def __init__(self, term, *, R=None, L=None): self.L = np.reshape(self.L, (-1, 1)) super().__init__( term, + dimension=self.L.shape[0], left_latent=self._lowrank_latent, ) @@ -128,71 +145,3 @@ def _right_latent(self, t, inds): def _lowrank_latent(self, t, inds): return self.L[inds][:, None, :] - - -class _DerivativeHelperTerm(terms.Term): - def __init__(self, term): - self.term = term - - def get_coefficients(self): - coeffs = self.term.get_coefficients() - a, b, c, d = coeffs[2:] - final_coeffs = [ - -coeffs[0] * coeffs[1], - coeffs[1], - b * d - a * c, - -(a * d + b * c), - c, - d, - ] - return final_coeffs - - -class DerivativeLatentTerm(LatentTerm): - def __init__(self, term): - if term.__requires_general_addition__: - raise TypeError( - "You cannot perform operations on a term that requires general" - " addition, it must be the outer term in the kernel" - ) - - ar, cr, ac, bc, cc, dc = term.get_coefficients() - - Jr = len(cr) - Jc = len(cc) - J = Jr + 2 * Jc - - self.left = np.zeros((2, 4 * J, 1)) - self.right = np.zeros((2, 4 * J, 1)) - - if Jr: - self.left[0, :Jr] = 1.0 - self.left[0, 2 * Jr : 3 * Jr] = -1.0 - self.left[1, Jr : 2 * Jr] = 1.0 - self.left[1, 3 * Jr : 4 * Jr] = 1.0 - self.right[0, : 2 * Jr] = 1.0 - self.right[1, 2 * Jr : 4 * Jr] = 1.0 - - if Jc: - J0 = 4 * Jr - for J0 in [4 * Jr, 4 * Jr + 4 * Jc]: - self.left[0, J0 : J0 + Jc] = 1.0 - self.left[0, J0 + 2 * Jc : J0 + 3 * Jc] = -1.0 - self.left[1, J0 + Jc : J0 + 2 * Jc] = 1.0 - self.left[1, J0 + 3 * Jc : J0 + 4 * Jc] = 1.0 - self.right[0, J0 : J0 + 2 * Jc] = 1.0 - self.right[1, J0 + 2 * Jc : J0 + 4 * Jc] = 1.0 - - dterm = _DerivativeHelperTerm(term) - d2term = terms.TermDiff(term) - super().__init__( - terms.TermSum(term, dterm, dterm, d2term), - left_latent=self._left_latent, - right_latent=self._right_latent, - ) - - def _left_latent(self, t, inds): - return self.left[inds] - - def _right_latent(self, t, inds): - return self.right[inds] diff --git a/python/celerite2/terms.py b/python/celerite2/terms.py index c3e14e3..64e7c0b 100644 --- a/python/celerite2/terms.py +++ b/python/celerite2/terms.py @@ -45,6 +45,10 @@ def __add__(self, b): def __mul__(self, b): return TermProduct(self, b) + @property + def dimension(self): + return 0 + @property def terms(self): return [self] From 60df5ad4dfa90c70385f98d898c43570302c95e8 Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Tue, 8 Dec 2020 11:14:07 -0500 Subject: [PATCH 06/14] install ipykernel for tutorials --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index d876eb0..c8b30de 100644 --- a/setup.py +++ b/setup.py @@ -62,6 +62,7 @@ "pymc3", "tqdm", "numpyro", + "ipykernel", ], } EXTRA_REQUIRE["dev"] = ( From 7b2ee4af64d34bf31960f62d50dce59e1058a300 Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Tue, 8 Dec 2020 12:44:23 -0500 Subject: [PATCH 07/14] tutorial installation --- .github/workflows/tutorials.yml | 4 +- python/celerite2/latent.py | 18 ++++---- python/test/test_latent.py | 75 +++++++++------------------------ 3 files changed, 32 insertions(+), 65 deletions(-) diff --git a/.github/workflows/tutorials.yml b/.github/workflows/tutorials.yml index 2f16822..37f4ff7 100644 --- a/.github/workflows/tutorials.yml +++ b/.github/workflows/tutorials.yml @@ -27,7 +27,9 @@ jobs: - name: Install dependencies run: | python -m pip install -U pip - python -m pip install --use-feature=2020-resolver ".[tutorials]" + python -m pip install --use-feature=2020-resolver jupytext ipykernel theano jupyter + python -m ipykernel install --name python + # python -m pip install --use-feature=2020-resolver ".[tutorials]" - name: Get theano compiledir id: compiledir diff --git a/python/celerite2/latent.py b/python/celerite2/latent.py index 7993f31..f75fe3e 100644 --- a/python/celerite2/latent.py +++ b/python/celerite2/latent.py @@ -107,16 +107,16 @@ def get_celerite_matrices( class KroneckerLatentTerm(LatentTerm): - def __init__(self, term, *, R=None, L=None): - self.R = None + def __init__(self, term, *, K=None, L=None): + self.K = None self.L = None - if R is not None: + if K is not None: if L is not None: - raise ValueError("Only one of 'R' and 'L' can be defined") - self.R = np.ascontiguousarray(np.atleast_2d(R), dtype=np.float64) + raise ValueError("Only one of 'K' and 'L' can be defined") + self.K = np.ascontiguousarray(np.atleast_2d(K), dtype=np.float64) super().__init__( term, - dimension=self.R.shape[0], + dimension=self.K.shape[0], left_latent=self._left_latent, right_latent=self._right_latent, ) @@ -132,14 +132,14 @@ def __init__(self, term, *, R=None, L=None): ) else: - raise ValueError("One of 'R' and 'L' must be defined") + raise ValueError("One of 'K' and 'L' must be defined") def _left_latent(self, t, inds): - return self.R[inds][:, None, :] + return self.K[inds][:, None, :] def _right_latent(self, t, inds): N = len(t) - latent = np.zeros((N, 1, self.R.shape[0])) + latent = np.zeros((N, 1, self.K.shape[0])) latent[(np.arange(N), 0, inds)] = 1.0 return latent diff --git a/python/test/test_latent.py b/python/test/test_latent.py index 3eb9b1c..7433740 100644 --- a/python/test/test_latent.py +++ b/python/test/test_latent.py @@ -27,7 +27,7 @@ def test_value(data): term = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3) term += terms.SHOTerm(sigma=0.23456, rho=3.4, Q=2.3) - kernel = latent.KroneckerLatentTerm(term, R=R) + kernel = latent.KroneckerLatentTerm(term, K=R) ar, cr, ac, bc, cc, dc = term.get_coefficients() c0, a0, U0, V0 = term.get_celerite_matrices(t0, np.zeros_like(t0)) @@ -49,63 +49,28 @@ def test_value(data): assert np.allclose(K, np.kron(K0, R)) -# def test_low_rank_value(data): -# N, M, x, diag, y, t = data - -# alpha = np.random.randn(M) -# term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3) -# term = kron.KronTerm(term0, L=alpha) - -# a, U, V, P = term.get_celerite_matrices(x, diag) -# assert a.shape == (N * M,) -# assert U.shape == (N * M, 2) -# assert V.shape == (N * M, 2) -# assert P.shape == (N * M - 1, 2) - -# check_value(term, x, diag, y, t) - -# full_term = kron.KronTerm(term0, R=np.outer(alpha, alpha)) -# assert np.allclose(full_term.dot(x, diag, y), term.dot(x, diag, y)) - - -# def test_sum_value(data): -# N, M, x, diag, y, t = data - -# alpha = np.random.randn(M) -# R = np.random.randn(M, M) -# R[np.diag_indices_from(R)] = np.exp(R[np.diag_indices_from(R)]) -# R[np.triu_indices_from(R, 1)] = 0.0 -# R = np.dot(R, R.T) - -# term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3) -# term = kron.KronTerm(term0, R=R) + kron.KronTerm(term0, L=alpha) - -# a, U, V, P = term.get_celerite_matrices(x, diag) -# assert a.shape == (N * M,) -# assert U.shape == (N * M, 2 * M + 2) -# assert V.shape == (N * M, 2 * M + 2) -# assert P.shape == (N * M - 1, 2 * M + 2) - -# check_value(term, x, diag, y, t) +def test_low_rank_value(data): + N, M, t0, t, X, y = data + alpha = np.random.randn(M) + R = np.outer(alpha, alpha) + term = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3) + term += terms.SHOTerm(sigma=0.23456, rho=3.4, Q=2.3) + kernel = latent.KroneckerLatentTerm(term, L=alpha) -# def test_missing_values(data): -# N, M, x, diag, y, t = data -# mask = np.random.rand(N, M) > 0.1 -# assert np.all(mask.sum(axis=1) > 0) + ar, cr, ac, bc, cc, dc = term.get_coefficients() + c0, a0, U0, V0 = term.get_celerite_matrices(t0, np.zeros_like(t0)) + c, a, U, V = kernel.get_celerite_matrices(t, np.zeros_like(t), X=X) -# R = np.random.randn(M, M) -# R[np.diag_indices_from(R)] = np.exp(R[np.diag_indices_from(R)]) -# R[np.triu_indices_from(R, 1)] = 0.0 -# R = np.dot(R, R.T) + assert c.shape == (len(c0),) + assert a.shape == (N * M,) + assert U.shape == (N * M, len(c0)) + assert V.shape == (N * M, len(c0)) -# term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3) -# term = kron.KronTerm(term0, R=R) + assert np.allclose(c, c0) + assert np.allclose(a, (a0[:, None] * np.diag(R)[None, :]).flatten()) -# a, U, V, P = term.get_celerite_matrices(x, diag, mask=mask) -# assert a.shape == (mask.sum(),) -# assert U.shape == (mask.sum(), 2 * M) -# assert V.shape == (mask.sum(), 2 * M) -# assert P.shape == (mask.sum() - 1, 2 * M) + K = kernel.get_value(t, X=X) + K0 = term.get_value(t0) -# check_value(term, x, diag, y, t, mask=mask) + assert np.allclose(K, np.kron(K0, R)) From 8c59418c44a5061dd17364d84a9cdd7db39fddea Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Tue, 8 Dec 2020 14:25:45 -0500 Subject: [PATCH 08/14] using conda to install python for tutorials --- .github/workflows/tutorials.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/tutorials.yml b/.github/workflows/tutorials.yml index 37f4ff7..20f0ef3 100644 --- a/.github/workflows/tutorials.yml +++ b/.github/workflows/tutorials.yml @@ -20,11 +20,13 @@ jobs: fetch-depth: 0 - name: Set up Python - uses: actions/setup-python@v2 + uses: conda-incubator/setup-miniconda@v2 with: python-version: 3.8 + auto-update-conda: true - name: Install dependencies + shell: bash -l {0} run: | python -m pip install -U pip python -m pip install --use-feature=2020-resolver jupytext ipykernel theano jupyter @@ -32,6 +34,7 @@ jobs: # python -m pip install --use-feature=2020-resolver ".[tutorials]" - name: Get theano compiledir + shell: bash -l {0} id: compiledir run: | python -c "import theano; print('::set-output name=compiledir::' + theano.config.compiledir.split('/')[-1])" @@ -46,6 +49,7 @@ jobs: tutorials- - name: Execute the notebooks + shell: bash -l {0} run: | jupytext --to ipynb --execute docs/tutorials/*.py From 8f8cb9022441da4cc643604f5a4ddc064b3c8075 Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Tue, 8 Dec 2020 14:37:09 -0500 Subject: [PATCH 09/14] install wontcha --- .github/workflows/tutorials.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/tutorials.yml b/.github/workflows/tutorials.yml index 20f0ef3..b83f86e 100644 --- a/.github/workflows/tutorials.yml +++ b/.github/workflows/tutorials.yml @@ -29,8 +29,7 @@ jobs: shell: bash -l {0} run: | python -m pip install -U pip - python -m pip install --use-feature=2020-resolver jupytext ipykernel theano jupyter - python -m ipykernel install --name python + python -m pip install jupytext ipykernel theano jupyter # python -m pip install --use-feature=2020-resolver ".[tutorials]" - name: Get theano compiledir From e06cb95b63a673c9fee08fefa66fec4e536f6ea9 Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Tue, 8 Dec 2020 14:40:14 -0500 Subject: [PATCH 10/14] properly install --- .github/workflows/tutorials.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/tutorials.yml b/.github/workflows/tutorials.yml index b83f86e..258ad09 100644 --- a/.github/workflows/tutorials.yml +++ b/.github/workflows/tutorials.yml @@ -29,8 +29,7 @@ jobs: shell: bash -l {0} run: | python -m pip install -U pip - python -m pip install jupytext ipykernel theano jupyter - # python -m pip install --use-feature=2020-resolver ".[tutorials]" + python -m pip install ".[tutorials]" - name: Get theano compiledir shell: bash -l {0} From 9ea409b843eefe5e30792c316df3280fb3863fc6 Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Tue, 8 Dec 2020 16:01:34 -0500 Subject: [PATCH 11/14] installation --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c8b30de..ff7671c 100644 --- a/setup.py +++ b/setup.py @@ -55,7 +55,7 @@ "tutorials": [ "jupytext", "jupyter", - "nbconvert", + # "nbconvert", "matplotlib", "scipy", "emcee", From 0459533d4623b1acb430fd9d2b0059c346877615 Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Tue, 8 Dec 2020 16:33:46 -0500 Subject: [PATCH 12/14] wtf --- .github/workflows/tutorials.yml | 1 + setup.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/tutorials.yml b/.github/workflows/tutorials.yml index 258ad09..d25277d 100644 --- a/.github/workflows/tutorials.yml +++ b/.github/workflows/tutorials.yml @@ -29,6 +29,7 @@ jobs: shell: bash -l {0} run: | python -m pip install -U pip + python -m pip install jupytext ipykernel theano jupyter python -m pip install ".[tutorials]" - name: Get theano compiledir diff --git a/setup.py b/setup.py index ff7671c..c8b30de 100644 --- a/setup.py +++ b/setup.py @@ -55,7 +55,7 @@ "tutorials": [ "jupytext", "jupyter", - # "nbconvert", + "nbconvert", "matplotlib", "scipy", "emcee", From 020a2a9d4eda76c0c88bbb95e3b7a331169735f0 Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Tue, 8 Dec 2020 17:17:11 -0500 Subject: [PATCH 13/14] jupytext version --- .github/workflows/tutorials.yml | 1 - setup.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/tutorials.yml b/.github/workflows/tutorials.yml index d25277d..258ad09 100644 --- a/.github/workflows/tutorials.yml +++ b/.github/workflows/tutorials.yml @@ -29,7 +29,6 @@ jobs: shell: bash -l {0} run: | python -m pip install -U pip - python -m pip install jupytext ipykernel theano jupyter python -m pip install ".[tutorials]" - name: Get theano compiledir diff --git a/setup.py b/setup.py index c8b30de..4d94f56 100644 --- a/setup.py +++ b/setup.py @@ -53,7 +53,7 @@ "ipython", ], "tutorials": [ - "jupytext", + "jupytext<1.7.0", "jupyter", "nbconvert", "matplotlib", From 4d1e05c32c544c271268d9e6576b6fdfd2b6e1aa Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Wed, 9 Dec 2020 07:33:59 -0500 Subject: [PATCH 14/14] adding convolution term value back in --- python/celerite2/terms.py | 72 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/python/celerite2/terms.py b/python/celerite2/terms.py index 64e7c0b..530a6e7 100644 --- a/python/celerite2/terms.py +++ b/python/celerite2/terms.py @@ -495,6 +495,78 @@ def get_celerite_matrices( t, new_diag, c=c, a=a, U=U, V=V, X=X ) + def get_value(self, t, t2=None, *, diag=None, X=None, X2=None): + t = np.atleast_1d(t) + if t2 is None: + tau0 = t[:, None] - t[None, :] + else: + tau0 = t[:, None] - np.atleast_1d(t2)[None, :] + + dt = self.delta + ar, cr, a, b, c, d = self.term.get_coefficients() + + # Format the lags correctly + tau0 = np.abs(np.atleast_1d(tau0)) + tau = tau0[..., None] + + # Precompute some factors + dpt = dt + tau + dmt = dt - tau + + # Real parts: + # tau > Delta + crd = cr * dt + cosh = np.cosh(crd) + norm = 2 * ar / crd ** 2 + K_large = np.sum(norm * (cosh - 1) * np.exp(-cr * tau), axis=-1) + + # tau < Delta + crdmt = cr * dmt + K_small = K_large + np.sum(norm * (crdmt - np.sinh(crdmt)), axis=-1) + + # Complex part + cd = c * dt + dd = d * dt + c2 = c ** 2 + d2 = d ** 2 + c2pd2 = c2 + d2 + C1 = a * (c2 - d2) + 2 * b * c * d + C2 = b * (c2 - d2) - 2 * a * c * d + norm = 1.0 / (dt * c2pd2) ** 2 + k0 = np.exp(-c * tau) + cdt = np.cos(d * tau) + sdt = np.sin(d * tau) + + # For tau > Delta + cos_term = 2 * (np.cosh(cd) * np.cos(dd) - 1) + sin_term = 2 * (np.sinh(cd) * np.sin(dd)) + factor = k0 * norm + K_large += np.sum( + (C1 * cos_term - C2 * sin_term) * factor * cdt, axis=-1 + ) + K_large += np.sum( + (C2 * cos_term + C1 * sin_term) * factor * sdt, axis=-1 + ) + + # tau < Delta + edmt = np.exp(-c * dmt) + edpt = np.exp(-c * dpt) + cos_term = ( + edmt * np.cos(d * dmt) + edpt * np.cos(d * dpt) - 2 * k0 * cdt + ) + sin_term = ( + edmt * np.sin(d * dmt) + edpt * np.sin(d * dpt) - 2 * k0 * sdt + ) + K_small += np.sum(2 * (a * c + b * d) * c2pd2 * dmt * norm, axis=-1) + K_small += np.sum((C1 * cos_term + C2 * sin_term) * norm, axis=-1) + + mask = tau0 >= dt + K = K_large * mask + K_small * (~mask) + + if diag is not None: + K[np.diag_indices_from(K)] += diag + return K + def get_coefficients(self): ar, cr, a, b, c, d = self.term.get_coefficients()