From 04675bc5815e940e8844535383161c4136d9ad39 Mon Sep 17 00:00:00 2001
From: Dan F-M <foreman.mackey@gmail.com>
Date: Thu, 12 Nov 2020 15:16:03 -0500
Subject: [PATCH 01/14] re-starting latent model impl

---
 python/celerite2/__init__.py                  |   2 +-
 python/celerite2/celerite2.py                 | 585 ------------------
 python/celerite2/core.py                      | 309 +++++++--
 python/celerite2/jax/__init__.py              |   2 +-
 .../celerite2/jax/{celerite2.py => core.py}   |  41 +-
 python/celerite2/latent.py                    | 105 ++++
 python/celerite2/numpy.py                     | 120 ----
 python/celerite2/terms.py                     | 185 ++++--
 python/celerite2/theano/__init__.py           |   2 +-
 .../theano/{celerite2.py => core.py}          |  21 +
 .../jax/{test_celerite2.py => test_core.py}   |   0
 .../test/{test_celerite2.py => test_core.py}  |   0
 .../{test_celerite2.py => test_core.py}       |   0
 13 files changed, 534 insertions(+), 838 deletions(-)
 delete mode 100644 python/celerite2/celerite2.py
 rename python/celerite2/jax/{celerite2.py => core.py} (69%)
 create mode 100644 python/celerite2/latent.py
 delete mode 100644 python/celerite2/numpy.py
 rename python/celerite2/theano/{celerite2.py => core.py} (90%)
 rename python/test/jax/{test_celerite2.py => test_core.py} (100%)
 rename python/test/{test_celerite2.py => test_core.py} (100%)
 rename python/test/theano/{test_celerite2.py => test_core.py} (100%)

diff --git a/python/celerite2/__init__.py b/python/celerite2/__init__.py
index f172089..983cdf1 100644
--- a/python/celerite2/__init__.py
+++ b/python/celerite2/__init__.py
@@ -3,7 +3,7 @@
 __all__ = ["__version__", "terms", "GaussianProcess"]
 from . import terms
 from .celerite2_version import __version__
-from .numpy import GaussianProcess
+from .core import GaussianProcess
 
 __uri__ = "https://celerite2.readthedocs.io"
 __author__ = "Daniel Foreman-Mackey"
diff --git a/python/celerite2/celerite2.py b/python/celerite2/celerite2.py
deleted file mode 100644
index c85c078..0000000
--- a/python/celerite2/celerite2.py
+++ /dev/null
@@ -1,585 +0,0 @@
-# -*- coding: utf-8 -*-
-
-__all__ = ["GaussianProcess"]
-
-import warnings
-from functools import wraps
-
-import numpy as np
-
-from . import driver
-from .driver import LinAlgError
-
-
-class ConstantMean:
-    def __init__(self, value=0.0):
-        self.value = value
-
-    def __call__(self, x):
-        return self.value
-
-
-def _handle_vector(func):
-    @wraps(func)
-    def wrapped(self, y, *args, **kwargs):
-        y = np.atleast_1d(y)
-        is_vector = False
-        if y.ndim == 1:
-            is_vector = True
-            y = y[:, None]
-        result = func(self, y, *args, **kwargs)
-        if is_vector:
-            return result[:, 0]
-        return result
-
-    return wrapped
-
-
-class GaussianProcess:
-    """The main interface to the celerite2 Gaussian Process (GP) solver
-
-    Args:
-        kernel: An instance of a subclass of :class:`terms.Term`.
-        mean (optional): The mean function of the process. This can either
-            be a callable (it will be evaluated with a single argument, a
-            vector of ``x`` values) or a scalar. (default: ``0.0``)
-        **kwargs: Other arguments will be passed directly to
-            :func:`GaussianProcess.compute` if the argument ``t`` is specified.
-    """
-
-    def __init__(self, kernel, t=None, *, mean=0.0, **kwargs):
-        self.kernel = kernel
-        self.mean = mean
-
-        # Placeholders for storing data
-        self._t = None
-        self._mean_value = None
-        self._diag = None
-        self._size = None
-        self._log_det = -np.inf
-        self._norm = np.inf
-
-        # Placeholders to celerite matrices
-        self._c = np.empty(0, dtype=np.float64)
-        self._U = np.empty((0, 0), dtype=np.float64)
-        self._V = np.empty((0, 0), dtype=np.float64)
-        self._d = np.empty(0, dtype=np.float64)
-
-        if t is not None:
-            self.compute(t, **kwargs)
-
-    @property
-    def mean(self):
-        return self._mean
-
-    @mean.setter
-    def mean(self, mean):
-        if callable(mean):
-            self._mean = mean
-        else:
-            self._mean = ConstantMean(mean)
-
-    @property
-    def mean_value(self):
-        if self._mean_value is None:
-            raise RuntimeError(
-                "'compute' must be executed before accessing mean_value"
-            )
-        return self._mean_value
-
-    def compute(
-        self, t, *, yerr=None, diag=None, check_sorted=True, quiet=False
-    ):
-        """Compute the Cholesky factorization of the GP covariance matrix
-
-        Args:
-            t (shape[N]): The independent coordinates of the observations.
-                This must be sorted in increasing order.
-            yerr (shape[N], optional): If provided, the diagonal standard
-                deviation of the observation model.
-            diag (shape[N], optional): If provided, the diagonal variance of
-                the observation model.
-            check_sorted (bool, optional): If ``True``, a check is performed to
-                make sure that ``t`` is correctly sorted. A ``ValueError`` will
-                be thrown when this check fails.
-            quiet (bool, optional): If ``True``, when the matrix cannot be
-                factorized (because of numerics or otherwise) the solver's
-                ``LinAlgError`` will be silenced and the determiniant will be
-                set to zero. Otherwise, the exception will be propagated.
-
-        Raises:
-            ValueError: When the inputs are not valid (shape, number, etc.).
-            LinAlgError: When the matrix is not numerically positive definite.
-        """
-        # Check the input coordinates
-        t = np.ascontiguousarray(t, dtype=np.float64)
-        if check_sorted and np.any(np.diff(t) < 0.0):
-            raise ValueError("The input coordinates must be sorted")
-        if t.ndim != 1:
-            raise ValueError("The input coordinates must be one dimensional")
-
-        # Save the diagonal
-        self._t = t
-        self._size = self._t.shape[0]
-        self._mean_value = self._mean(self._t)
-        self._diag = np.empty_like(self._t)
-        if yerr is None and diag is None:
-            self._diag[:] = 0.0
-
-        elif yerr is not None:
-            if diag is not None:
-                raise ValueError(
-                    "only one of 'diag' and 'yerr' can be provided"
-                )
-            self._diag[:] = np.atleast_1d(yerr) ** 2
-
-        else:
-            self._diag[:] = np.atleast_1d(diag)
-
-        # Fill the celerite matrices
-        (
-            self._c,
-            self._d,
-            self._U,
-            self._V,
-        ) = self.kernel.get_celerite_matrices(
-            self._t, self._diag, c=self._c, a=self._d, U=self._U, V=self._V
-        )
-
-        # Compute the Cholesky factorization
-        try:
-            self._d, self._W = driver.factor(
-                self._t,
-                self._c,
-                self._d,
-                self._U,
-                self._V,
-                self._d,
-                np.copy(self._V),
-            )
-        except LinAlgError:
-            if not quiet:
-                raise
-            self._log_det = -np.inf
-            self._norm = np.inf
-        else:
-            self._log_det = np.sum(np.log(self._d))
-            self._norm = -0.5 * (
-                self._log_det + self._size * np.log(2 * np.pi)
-            )
-
-    def recompute(self, *, quiet=False):
-        """Re-compute the factorization given a previous call to compute
-
-        Args:
-            quiet (bool, optional): If ``True``, when the matrix cannot be
-                factorized (because of numerics or otherwise) the solver's
-                ``LinAlgError`` will be silenced and the determiniant will be
-                set to zero. Otherwise, the exception will be propagated.
-
-        Raises:
-            RuntimeError: If :func:`GaussianProcess.compute` is not called
-                first.
-            ValueError: When the inputs are not valid (shape, number, etc.).
-        """
-        if self._t is None:
-            raise RuntimeError(
-                "you must call 'compute' directly  at least once"
-            )
-        return self.compute(
-            self._t, diag=self._diag, check_sorted=False, quiet=quiet
-        )
-
-    def _process_input(self, y, *, inplace=False, require_vector=False):
-        y = np.atleast_1d(y)
-        if self._t is None:
-            raise RuntimeError("you must call 'compute' first")
-        if self._t.shape[0] != y.shape[0]:
-            raise ValueError("dimension mismatch")
-        if require_vector and self._t.shape != y.shape:
-            raise ValueError("'y' must be one dimensional")
-        if inplace:
-            if (
-                y.dtype != "float64"
-                or not y.flags.c_contiguous
-                or not y.flags.writeable
-            ):
-                warnings.warn(
-                    "Inplace operations can only be made on C-contiguous, "
-                    "writable, float64 arrays; a copy will be made"
-                )
-            y = np.ascontiguousarray(y, dtype=np.float64)
-        else:
-            y = np.array(y, dtype=np.float64, copy=True, order="C")
-        return y
-
-    @_handle_vector
-    def _apply_inverse(self, y):
-        z = driver.solve_lower(self._t, self._c, self._U, self._W, y, y)
-        z /= self._d[:, None]
-        z = driver.solve_upper(self._t, self._c, self._U, self._W, z, z)
-        return z
-
-    def apply_inverse(self, y, *, inplace=False):
-        """Apply the inverse of the covariance matrix to a vector or matrix
-
-        Solve ``K.x = y`` for ``x`` where ``K`` is the covariance matrix of
-        the GP.
-
-        .. note:: The mean function is not applied in this method.
-
-        Args:
-            y (shape[N] or shape[N, M]): The vector or matrix ``y`` described
-                above.
-            inplace (bool, optional): If ``True``, ``y`` will be overwritten
-                with the result ``x``.
-
-        Raises:
-            RuntimeError: If :func:`GaussianProcess.compute` is not called
-                first.
-            ValueError: When the inputs are not valid (shape, number, etc.).
-        """
-        y = self._process_input(y, inplace=inplace)
-        return self._apply_inverse(y)
-
-    @_handle_vector
-    def _dot_tril(self, y):
-        z = y * np.sqrt(self._d)[:, None]
-        return driver.matmul_lower(self._t, self._c, self._U, self._W, z, z)
-
-    def dot_tril(self, y, *, inplace=False):
-        """Dot the Cholesky factor of the GP system into a vector or matrix
-
-        Compute ``x = L.y`` where ``K = L.L^T`` and ``K`` is the covariance
-        matrix of the GP.
-
-        .. note:: The mean function is not applied in this method.
-
-        Args:
-            y (shape[N] or shape[N, M]): The vector or matrix ``y`` described
-                above.
-            inplace (bool, optional): If ``True``, ``y`` will be overwritten
-                with the result ``x``.
-
-        Raises:
-            RuntimeError: If :func:`GaussianProcess.compute` is not called
-                first.
-            ValueError: When the inputs are not valid (shape, number, etc.).
-        """
-        y = self._process_input(y, inplace=inplace)
-        return self._dot_tril(y)
-
-    def log_likelihood(self, y, *, inplace=False):
-        """Compute the marginalized likelihood of the GP model
-
-        The factorized matrix from the previous call to
-        :func:`GaussianProcess.compute` is used so that method must be called
-        first.
-
-        Args:
-            y (shape[N]): The observations at coordinates ``t`` as defined by
-                :func:`GaussianProcess.compute`.
-            inplace (bool, optional): If ``True``, ``y`` will be overwritten
-                in the process of the calculation. This will reduce the memory
-                footprint, but should be used with care since this will
-                overwrite the data.
-
-        Raises:
-            RuntimeError: If :func:`GaussianProcess.compute` is not called
-                first.
-            ValueError: When the inputs are not valid (shape, number, etc.).
-        """
-        y = self._process_input(y, inplace=inplace, require_vector=True)
-        if not np.isfinite(self._log_det):
-            return -np.inf
-        alpha = (y - self._mean_value)[:, None]
-        alpha = driver.solve_lower(
-            self._t, self._c, self._U, self._W, alpha, alpha
-        )[:, 0]
-        loglike = self._norm - 0.5 * np.sum(alpha ** 2 / self._d)
-        if not np.isfinite(loglike):
-            return -np.inf
-        return loglike
-
-    def predict(
-        self,
-        y,
-        t=None,
-        *,
-        return_cov=False,
-        return_var=False,
-        include_mean=True,
-        kernel=None,
-    ):
-        """Compute the conditional distribution
-
-        The factorized matrix from the previous call to
-        :func:`GaussianProcess.compute` is used so that method must be called
-        first.
-
-        Args:
-            y (shape[N]): The observations at coordinates ``t`` as defined by
-                :func:`GaussianProcess.compute`.
-            t (shape[M], optional): The independent coordinates where the
-                prediction should be evaluated. If not provided, this will be
-                evaluated at the observations ``t`` from
-                :func:`GaussianProcess.compute`.
-            return_var (bool, optional): Return the variance of the conditional
-                distribution.
-            return_cov (bool, optional): Return the full covariance matrix of
-                the conditional distribution.
-            include_mean (bool, optional): Include the mean function in the
-                prediction.
-            kernel (optional): If provided, compute the conditional
-                distribution using a different kernel. This is generally used
-                to separate the contributions from different model components.
-                Note that the computational cost and scaling will be worse
-                when using this parameter.
-
-        Raises:
-            RuntimeError: If :func:`GaussianProcess.compute` is not called
-                first.
-            ValueError: When the inputs are not valid (shape, number, etc.).
-        """
-        y = self._process_input(y, inplace=True, require_vector=True)
-        return ConditionalDistribution(
-            self, y, t=t, include_mean=include_mean, kernel=kernel
-        )
-
-    def sample(self, *, size=None, include_mean=True):
-        """Generate random samples from the prior implied by the GP system
-
-        The factorized matrix from the previous call to
-        :func:`GaussianProcess.compute` is used so that method must be called
-        first.
-
-        Args:
-            size (int, optional): The number of samples to generate. If not
-                provided, only one sample will be produced.
-            include_mean (bool, optional): Include the mean function in the
-                prediction.
-
-        Raises:
-            RuntimeError: If :func:`GaussianProcess.compute` is not called
-                first.
-            ValueError: When the inputs are not valid (shape, number, etc.).
-        """
-
-        if self._t is None:
-            raise RuntimeError("you must call 'compute' first")
-        if size is None:
-            n = np.random.randn(self._size)
-        else:
-            n = np.random.randn(self._size, size)
-        result = self.dot_tril(n, inplace=True).T
-        if include_mean:
-            result += self._mean_value
-        return result
-
-    def sample_conditional(
-        self,
-        y,
-        t=None,
-        *,
-        size=None,
-        regularize=None,
-        include_mean=True,
-        kernel=None,
-    ):
-        """Sample from the conditional (predictive) distribution
-
-        .. note:: this method scales as ``O(M^3)`` for large ``M``, where
-            ``M == len(t)``.
-
-        Args:
-            y (shape[N]): The observations at coordinates ``x`` from
-                :func:`GausianProcess.compute`.
-            t (shape[M], optional): The independent coordinates where the
-                prediction should be made. If this is omitted the coordinates
-                will be assumed to be ``x`` from
-                :func:`GaussianProcess.compute` and an efficient method will
-                be used to compute the mean prediction.
-            size (int, optional): The number of samples to generate. If not
-                provided, only one sample will be produced.
-            regularize (float, optional): For poorly conditioned systems, you
-                can provide a small number here to regularize the predictive
-                covariance. This number will be added to the diagonal.
-            include_mean (bool, optional): Include the mean function in the
-                prediction.
-            kernel (optional): If provided, compute the conditional
-                distribution using a different kernel. This is generally used
-                to separate the contributions from different model components.
-                Note that the computational cost and scaling will be worse
-                when using this parameter.
-        """
-        mu, cov = self.predict(
-            y, t, return_cov=True, include_mean=include_mean, kernel=kernel
-        )
-        if regularize is not None:
-            cov[np.diag_indices_from(cov)] += regularize
-        return np.random.multivariate_normal(mu, cov, size=size)
-
-
-class ConditionalDistribution:
-    def __init__(
-        self,
-        gp,
-        y,
-        t=None,
-        *,
-        include_mean=True,
-        kernel=None,
-    ):
-        self.gp = gp
-        self.y = y
-        self.t = t
-        self.include_mean = include_mean
-        self.kernel = kernel
-
-        self._c2 = None
-        self._U1 = None
-        self._V1 = None
-        self._U2 = None
-        self._V2 = None
-
-        self._KxsT = None
-        self._Kinv_KxsT = None
-
-        if self.t is None:
-            self._xs = self.gp._t
-        else:
-            self._xs = np.ascontiguousarray(self.t, dtype=np.float64)
-            if self._xs.ndim != 1:
-                raise ValueError("'t' must be one-dimensional")
-
-    @property
-    def KxsT(self):
-        if self._KxsT is None:
-            tau = self.gp._t[:, None] - self._xs[None, :]
-            if self.kernel is None:
-                self._KxsT = self.gp.kernel.get_value(tau)
-            else:
-                self._KxsT = self.kernel.get_value(tau)
-        return self._KxsT
-
-    @property
-    def Kinv_KxsT(self):
-        if self._Kinv_KxsT is None:
-            self._Kinv_KxsT = self.gp.apply_inverse(self.KxsT, inplace=False)
-        return self._Kinv_KxsT
-
-    def _do_dot(self, inp, target):
-        if self.kernel is None:
-            kernel = self.gp.kernel
-            U1 = self.gp._U
-            V1 = self.gp._V
-        else:
-            kernel = self.kernel
-            if self._U1 is None or self._V1 is None:
-                _, _, self._U1, self._V1 = kernel.get_celerite_matrices(
-                    self.gp._t,
-                    np.zeros_like(self.gp._t),
-                    U=self._U1,
-                    V=self._V1,
-                )
-            U1 = self._U1
-            V1 = self._V1
-
-        if self._c2 is None or self._U2 is None or self._V2 is None:
-            self._c2, _, self._U2, self._V2 = kernel.get_celerite_matrices(
-                self._xs,
-                np.zeros_like(self._xs),
-                c=self._c2,
-                U=self._U2,
-                V=self._V2,
-            )
-        c = self._c2
-        U2 = self._U2
-        V2 = self._V2
-
-        is_vector = False
-        if inp.ndim == 1:
-            is_vector = True
-            inp = inp[:, None]
-            target = target[:, None]
-
-        target = driver.general_matmul_lower(
-            self._xs, self.gp._t, c, U2, V1, inp, target
-        )
-        target = driver.general_matmul_upper(
-            self._xs, self.gp._t, c, V2, U1, inp, target
-        )
-
-        if is_vector:
-            return target[:, 0]
-        return target
-
-    @property
-    def mean(self):
-        alpha = self.gp._apply_inverse(self.y - self.gp._mean_value)
-
-        if self.t is None and self.kernel is None:
-            mu = self.y - self.gp._diag * alpha
-            if not self.include_mean:
-                mu -= self.gp._mean_value
-            return mu
-
-        mu = np.zeros_like(self._xs)
-        mu = self._do_dot(alpha, mu)
-
-        if self.include_mean:
-            mu += self.gp._mean(self._xs)
-        return mu
-
-    @property
-    def variance(self):
-        if self.kernel is None:
-            kernel = self.gp.kernel
-        else:
-            kernel = self.kernel
-        return kernel.get_value(0.0) - np.einsum(
-            "ij,ij->j", self.KxsT, self.Kinv_KxsT
-        )
-
-    @property
-    def covariance(self):
-        if self.kernel is None:
-            kernel = self.gp.kernel
-        else:
-            kernel = self.kernel
-        neg_cov = -kernel.get_value(self._xs[:, None] - self._xs[None, :])
-        neg_cov = self._do_dot(self.Kinv_KxsT, neg_cov)
-        return -neg_cov
-
-    def sample(self, *, size=None, regularize=None):
-        """Sample from the conditional (predictive) distribution
-
-        .. note:: this method scales as ``O(M^3)`` for large ``M``, where
-            ``M == len(t)``.
-
-        Args:
-            y (shape[N]): The observations at coordinates ``x`` from
-                :func:`GausianProcess.compute`.
-            t (shape[M], optional): The independent coordinates where the
-                prediction should be made. If this is omitted the coordinates
-                will be assumed to be ``x`` from
-                :func:`GaussianProcess.compute` and an efficient method will
-                be used to compute the mean prediction.
-            size (int, optional): The number of samples to generate. If not
-                provided, only one sample will be produced.
-            regularize (float, optional): For poorly conditioned systems, you
-                can provide a small number here to regularize the predictive
-                covariance. This number will be added to the diagonal.
-            include_mean (bool, optional): Include the mean function in the
-                prediction.
-            kernel (optional): If provided, compute the conditional
-                distribution using a different kernel. This is generally used
-                to separate the contributions from different model components.
-                Note that the computational cost and scaling will be worse
-                when using this parameter.
-        """
-        mu = self.mean
-        cov = self.covariance
-        if regularize is not None:
-            cov[np.diag_indices_from(cov)] += regularize
-        return np.random.multivariate_normal(mu, cov, size=size)
diff --git a/python/celerite2/core.py b/python/celerite2/core.py
index f55f2e9..6dd88e8 100644
--- a/python/celerite2/core.py
+++ b/python/celerite2/core.py
@@ -4,9 +4,16 @@
     "ConstantMean",
     "BaseConditionalDistribution",
     "BaseGaussianProcess",
+    "ConditionalDistribution",
+    "GaussianProcess",
 ]
+import warnings
+
 import numpy as np
 
+from . import driver
+from .driver import LinAlgError
+
 
 class ConstantMean:
     def __init__(self, value=0.0):
@@ -23,6 +30,7 @@ def __init__(
         y,
         t=None,
         *,
+        X=None,
         include_mean=True,
         kernel=None,
     ):
@@ -30,32 +38,65 @@ def __init__(
         self.y = y
         self.t = t
         self.include_mean = include_mean
-        self.kernel = kernel
-
-        self._c2 = None
-        self._U1 = None
-        self._V1 = None
-        self._U2 = None
-        self._V2 = None
-
-        self._KxsT = None
-        self._Kinv_KxsT = None
 
         if self.t is None:
+            if X is not None:
+                raise ValueError("'X' can only be defined when 't' is")
             self._xs = self.gp._t
+            self._X = self.gp._X
         else:
             self._xs = self.gp._as_tensor(self.t)
             if self._xs.ndim != 1:
                 raise ValueError("'t' must be one-dimensional")
 
+            if X is None:
+                if self.gp._X is not None:
+                    raise TypeError(
+                        "'X' is required if it is defined for the process"
+                    )
+                self._X = None
+            else:
+                self._X = self.gp._as_tensor(X, dtype=None)
+                if self._X.shape[0] != self._xs.shape[0]:
+                    raise ValueError("Dimension mismatch")
+
+        if kernel is None:
+            self.kernel = self.gp.kernel
+            self._U1 = self.gp._U
+            self._V1 = self.gp._V
+        else:
+            self.kernel = kernel
+            _, _, self._U1, self._V1 = self.kernel.get_celerite_matrices(
+                self.gp._t, self.gp._zeros_like(self.gp._t), X=self.gp._X
+            )
+
+        (
+            self._c2,
+            self._a2,
+            self._U2,
+            self._V2,
+        ) = self.kernel.get_celerite_matrices(
+            self._xs, self.gp._zeros_like(self._xs), X=self._X
+        )
+
+        self._KxsT = None
+        self._Kinv_KxsT = None
+
     @property
     def KxsT(self):
         if self._KxsT is None:
-            tau = self.gp._t[:, None] - self._xs[None, :]
-            if self.kernel is None:
-                self._KxsT = self.gp.kernel.get_value(tau)
-            else:
-                self._KxsT = self.kernel.get_value(tau)
+            K = self.gp._zeros((self.gp._t.shape[0], self._xs.shape[0]))
+            self._KxsT = self.gp._do_general_matmul(
+                self.gp._t,
+                self._xs,
+                self._c2,
+                self._U2,
+                self._V2,
+                self._U1,
+                self._V1,
+                self.gp._eye(self._xs.shape[0]),
+                K,
+            )
         return self._KxsT
 
     @property
@@ -64,48 +105,24 @@ def Kinv_KxsT(self):
             self._Kinv_KxsT = self.gp.apply_inverse(self.KxsT, inplace=False)
         return self._Kinv_KxsT
 
-    def _do_general_matmul(self, c, U1, V1, U2, V2, inp, target):
-        raise NotImplementedError("must be implemented by subclasses")
-
-    def _diagdot(self, a, b):
-        raise NotImplementedError("must be implemented by subclasses")
-
     def _do_dot(self, inp, target):
-        if self.kernel is None:
-            kernel = self.gp.kernel
-            U1 = self.gp._U
-            V1 = self.gp._V
-        else:
-            kernel = self.kernel
-            if self._U1 is None or self._V1 is None:
-                _, _, self._U1, self._V1 = kernel.get_celerite_matrices(
-                    self.gp._t,
-                    self.gp._zeros_like(self.gp._t),
-                    U=self._U1,
-                    V=self._V1,
-                )
-            U1 = self._U1
-            V1 = self._V1
-
-        if self._c2 is None or self._U2 is None or self._V2 is None:
-            self._c2, _, self._U2, self._V2 = kernel.get_celerite_matrices(
-                self._xs,
-                self.gp._zeros_like(self._xs),
-                c=self._c2,
-                U=self._U2,
-                V=self._V2,
-            )
-        c = self._c2
-        U2 = self._U2
-        V2 = self._V2
-
         is_vector = False
         if inp.ndim == 1:
             is_vector = True
             inp = inp[:, None]
             target = target[:, None]
 
-        target = self._do_general_matmul(c, U1, V1, U2, V2, inp, target)
+        target = self.gp._do_general_matmul(
+            self._xs,
+            self.gp._t,
+            self._c2,
+            self._U1,
+            self._V1,
+            self._U2,
+            self._V2,
+            inp,
+            target,
+        )
 
         if is_vector:
             return target[:, 0]
@@ -132,19 +149,17 @@ def mean(self):
 
     @property
     def variance(self):
-        if self.kernel is None:
-            kernel = self.gp.kernel
-        else:
-            kernel = self.kernel
-        return kernel.get_value(0.0) - self._diagdot(self.KxsT, self.Kinv_KxsT)
+        return self._a2 - self.gp._diagdot(self.KxsT, self.Kinv_KxsT)
 
     @property
     def covariance(self):
-        if self.kernel is None:
-            kernel = self.gp.kernel
-        else:
-            kernel = self.kernel
-        neg_cov = -kernel.get_value(self._xs[:, None] - self._xs[None, :])
+        neg_cov = -self.gp._get_dense_matrix(
+            self._xs,
+            self._c2,
+            self._a2,
+            self._U2,
+            self._V2,
+        )
         neg_cov = self._do_dot(self.Kinv_KxsT, neg_cov)
         return -neg_cov
 
@@ -198,6 +213,7 @@ def __init__(self, kernel, t=None, *, mean=0.0, **kwargs):
 
         # Placeholders for storing data
         self._t = None
+        self._X = None
         self._mean_value = None
         self._diag = None
         self._size = None
@@ -237,12 +253,15 @@ def _setup(self):
     def _copy_or_check(self, tensor, *, inplace=False):
         return tensor
 
-    def _as_tensor(self, tensor):
+    def _as_tensor(self, tensor, dtype=np.float64):
         raise NotImplementedError("must be implemented by subclasses")
 
     def _zeros_like(self, tensor):
         raise NotImplementedError("must be implemented by subclasses")
 
+    def _zeros(self, shape):
+        raise NotImplementedError("must be implemented by subclasses")
+
     def _do_compute(self, quiet):
         raise NotImplementedError("must be implemented by subclasses")
 
@@ -258,8 +277,27 @@ def _do_dot_tril(self, y):
     def _do_norm(self, y):
         raise NotImplementedError("must be implemented by subclasses")
 
+    def _get_dense_matrix(self, t, c, a, U, V):
+        raise NotImplementedError("must be implemented by subclasses")
+
+    def _do_general_matmul(self, t1, t2, c, U1, V1, U2, V2, inp, target):
+        raise NotImplementedError("must be implemented by subclasses")
+
+    def _diagdot(self, a, b):
+        raise NotImplementedError("must be implemented by subclasses")
+
+    def _eye(self, n):
+        raise NotImplementedError("must be implemented by subclasses")
+
     def compute(
-        self, t, *, yerr=None, diag=None, check_sorted=True, quiet=False
+        self,
+        t,
+        *,
+        yerr=None,
+        diag=None,
+        X=None,
+        check_sorted=True,
+        quiet=False,
     ):
         """Compute the Cholesky factorization of the GP covariance matrix
 
@@ -288,8 +326,14 @@ def compute(
         if check_sorted:
             t = self._check_sorted(t)
 
+        if X is not None:
+            X = self._as_tensor(X, dtype=None)
+            if t.shape[0] != X.shape[0]:
+                raise ValueError("Dimension mismatch")
+
         # Save the diagonal
         self._t = t
+        self._X = X
         self._size = self._t.shape[0]
         self._mean_value = self._mean(self._t)
         self._diag = self._zeros_like(self._t)
@@ -310,7 +354,13 @@ def compute(
             self._U,
             self._V,
         ) = self.kernel.get_celerite_matrices(
-            self._t, self._diag, c=self._c, a=self._a, U=self._U, V=self._V
+            self._t,
+            self._diag,
+            c=self._c,
+            a=self._a,
+            U=self._U,
+            V=self._V,
+            X=self._X,
         )
 
         self._do_compute(quiet)
@@ -334,7 +384,11 @@ def recompute(self, *, quiet=False):
                 "you must call 'compute' directly  at least once"
             )
         return self.compute(
-            self._t, diag=self._diag, check_sorted=False, quiet=quiet
+            self._t,
+            diag=self._diag,
+            X=self._X,
+            check_sorted=False,
+            quiet=quiet,
         )
 
     def _process_input(self, y, *, require_vector=False, inplace=False):
@@ -431,6 +485,7 @@ def predict(
         y,
         t=None,
         *,
+        X=None,
         return_cov=False,
         return_var=False,
         include_mean=True,
@@ -466,17 +521,19 @@ def predict(
                 first.
             ValueError: When the inputs are not valid (shape, number, etc.).
         """
-        cond = self.condition(y, t=t, include_mean=include_mean, kernel=kernel)
+        cond = self.condition(
+            y, t=t, X=X, include_mean=include_mean, kernel=kernel
+        )
         if return_var:
             return cond.mean, cond.variance
         if return_cov:
             return cond.mean, cond.covariance
         return cond.mean
 
-    def condition(self, y, t=None, *, include_mean=True, kernel=None):
+    def condition(self, y, t=None, *, X=None, include_mean=True, kernel=None):
         y = self._process_input(y, require_vector=True, inplace=True)
         return self.conditional_distribution(
-            self, y, t=t, include_mean=include_mean, kernel=kernel
+            self, y, t=t, X=X, include_mean=include_mean, kernel=kernel
         )
 
     def sample(self, *, size=None, include_mean=True):
@@ -498,3 +555,121 @@ def sample(self, *, size=None, include_mean=True):
             ValueError: When the inputs are not valid (shape, number, etc.).
         """
         raise NotImplementedError("'sample' is not implemented by extensions")
+
+
+class ConditionalDistribution(BaseConditionalDistribution):
+    def sample(self, *, size=None, regularize=None):
+        mu = self.mean
+        cov = self.covariance
+        if regularize is not None:
+            cov[np.diag_indices_from(cov)] += regularize
+        return np.random.multivariate_normal(mu, cov, size=size)
+
+
+class GaussianProcess(BaseGaussianProcess):
+    conditional_distribution = ConditionalDistribution
+
+    def _setup(self):
+        self._c = np.empty(0, dtype=np.float64)
+        self._a = np.empty(0, dtype=np.float64)
+        self._U = np.empty((0, 0), dtype=np.float64)
+        self._V = np.empty((0, 0), dtype=np.float64)
+
+    def _copy_or_check(self, y, *, inplace=False):
+        if inplace:
+            if (
+                y.dtype != "float64"
+                or not y.flags.c_contiguous
+                or not y.flags.writeable
+            ):
+                warnings.warn(
+                    "Inplace operations can only be made on C-contiguous, "
+                    "writable, float64 arrays; a copy will be made"
+                )
+            y = np.ascontiguousarray(y, dtype=np.float64)
+        else:
+            y = np.array(y, dtype=np.float64, copy=True, order="C")
+        return y
+
+    def _as_tensor(self, y, dtype=np.float64):
+        return np.ascontiguousarray(y, dtype=dtype)
+
+    def _zeros_like(self, y):
+        return np.zeros_like(y)
+
+    def _zeros(self, shape):
+        return np.zeros(shape)
+
+    def _eye(self, n):
+        return np.eye(n)
+
+    def _get_dense_matrix(self, t, c, a, U, V):
+        Y = np.eye(len(t))
+        Z = np.diag(a)
+        Z = driver.matmul_lower(t, c, U, V, Y, Z)
+        return driver.matmul_upper(t, c, U, V, Y, Z)
+
+    def _do_general_matmul(self, t1, t2, c, U1, V1, U2, V2, inp, target):
+        target = driver.general_matmul_lower(t1, t2, c, U2, V1, inp, target)
+        target = driver.general_matmul_upper(t1, t2, c, V2, U1, inp, target)
+        return target
+
+    def _diagdot(self, a, b):
+        return np.einsum("ij,ij->j", a, b)
+
+    def _do_compute(self, quiet):
+        # Compute the Cholesky factorization
+        try:
+            self._d, self._W = driver.factor(
+                self._t,
+                self._c,
+                self._a,
+                self._U,
+                self._V,
+                self._a,
+                np.copy(self._V),
+            )
+        except LinAlgError:
+            if not quiet:
+                raise
+            self._log_det = -np.inf
+            self._norm = np.inf
+        else:
+            self._log_det = np.sum(np.log(self._d))
+            self._norm = -0.5 * (
+                self._log_det + self._size * np.log(2 * np.pi)
+            )
+
+    def _check_sorted(self, t):
+        if np.any(np.diff(t) < 0.0):
+            raise ValueError("The input coordinates must be sorted")
+        return t
+
+    def _do_solve(self, y):
+        z = driver.solve_lower(self._t, self._c, self._U, self._W, y, y)
+        z /= self._d[:, None]
+        z = driver.solve_upper(self._t, self._c, self._U, self._W, z, z)
+        return z
+
+    def _do_dot_tril(self, y):
+        z = y * np.sqrt(self._d)[:, None]
+        return driver.matmul_lower(self._t, self._c, self._U, self._W, z, z)
+
+    def _do_norm(self, y):
+        alpha = y[:, None]
+        alpha = driver.solve_lower(
+            self._t, self._c, self._U, self._W, alpha, alpha
+        )[:, 0]
+        return np.sum(alpha ** 2 / self._d)
+
+    def sample(self, *, size=None, include_mean=True):
+        if self._t is None:
+            raise RuntimeError("you must call 'compute' first")
+        if size is None:
+            n = np.random.randn(self._size)
+        else:
+            n = np.random.randn(self._size, size)
+        result = self.dot_tril(n, inplace=True).T
+        if include_mean:
+            result += self._mean_value
+        return result
diff --git a/python/celerite2/jax/__init__.py b/python/celerite2/jax/__init__.py
index 74818ef..2a34679 100644
--- a/python/celerite2/jax/__init__.py
+++ b/python/celerite2/jax/__init__.py
@@ -22,5 +22,5 @@
 __all__ = ["terms", "GaussianProcess", "CeleriteNormal"]
 
 from . import terms  # noqa isort:skip
-from .celerite2 import GaussianProcess  # noqa isort:skip
+from .core import GaussianProcess  # noqa isort:skip
 from .distribution import CeleriteNormal  # noqa isort:skip
diff --git a/python/celerite2/jax/celerite2.py b/python/celerite2/jax/core.py
similarity index 69%
rename from python/celerite2/jax/celerite2.py
rename to python/celerite2/jax/core.py
index 9ff304f..ac6e4a5 100644
--- a/python/celerite2/jax/celerite2.py
+++ b/python/celerite2/jax/core.py
@@ -1,35 +1,32 @@
 # -*- coding: utf-8 -*-
 
-__all__ = ["GaussianProcess", "ConditionalDistribution"]
+__all__ = ["GaussianProcess"]
 from jax import numpy as np
 
-from ..core import BaseConditionalDistribution, BaseGaussianProcess
+from ..core import BaseGaussianProcess
 from . import distribution, ops
 
 
-class ConditionalDistribution(BaseConditionalDistribution):
-    def _do_general_matmul(self, c, U1, V1, U2, V2, inp, target):
-        target += ops.general_matmul_lower(
-            self._xs, self.gp._t, c, U2, V1, inp
-        )
-        target += ops.general_matmul_upper(
-            self._xs, self.gp._t, c, V2, U1, inp
-        )
-        return target
-
-    def _diagdot(self, a, b):
-        return np.einsum("ij,ij->j", a, b)
-
-
 class GaussianProcess(BaseGaussianProcess):
-    conditional_distribution = ConditionalDistribution
-
     def _as_tensor(self, tensor):
         return np.asarray(tensor, dtype=np.float64)
 
     def _zeros_like(self, tensor):
         return np.zeros_like(tensor)
 
+    def _zeros(self, shape):
+        return np.zeros(shape)
+
+    def _eye(self, n):
+        return np.eye(n)
+
+    def _get_dense_matrix(self, t, c, a, U, V):
+        Y = np.eye(len(t))
+        Z = np.diag(a)
+        Z += ops.matmul_lower(t, c, U, V, Y)
+        Z += ops.matmul_upper(t, c, U, V, Y)
+        return Z
+
     def _do_compute(self, quiet):
         self._d, self._W = ops.factor(
             self._t, self._c, self._a, self._U, self._V
@@ -57,5 +54,13 @@ def _do_norm(self, y):
         )[:, 0]
         return np.sum(alpha ** 2 / self._d)
 
+    def _do_general_matmul(self, t1, t2, c, U1, V1, U2, V2, inp, target):
+        target += ops.general_matmul_lower(t1, t2, c, U2, V1, inp)
+        target += ops.general_matmul_upper(t1, t2, c, V2, U1, inp)
+        return target
+
+    def _diagdot(self, a, b):
+        return np.einsum("ij,ij->j", a, b)
+
     def numpyro_dist(self):
         return distribution.CeleriteNormal(self)
diff --git a/python/celerite2/latent.py b/python/celerite2/latent.py
new file mode 100644
index 0000000..6603c3d
--- /dev/null
+++ b/python/celerite2/latent.py
@@ -0,0 +1,105 @@
+# -*- coding: utf-8 -*-
+
+__all__ = ["LatentTerm", "prepare_rectangular_data", "KroneckerLatent"]
+import numpy as np
+
+from .terms import Term
+
+
+class LatentTerm(Term):
+    __requires_general_addition__ = True
+
+    def __init__(self, term, left=None, right=None):
+        self.term = term
+        self.left = left
+        self.right = right
+
+    def _get_latent(self, left_or_right, t, X):
+        if left_or_right is None:
+            return np.ones((1, 1, 1))
+        if callable(left_or_right):
+            left_or_right = left_or_right(t, X)
+        else:
+            left_or_right = np.atleast_1d(left_or_right)
+        if left_or_right.ndim == 1:
+            return left_or_right[None, None, :]
+        if left_or_right.ndim == 2:
+            return left_or_right[None, :, :]
+        if left_or_right.ndim != 3:
+            raise ValueError("Invalid shape for latent")
+        return left_or_right
+
+    def get_left_latent(self, t, X):
+        return self._get_latent(self.left, t, X)
+
+    def get_right_latent(self, t, X):
+        if self.right is None and self.left is not None:
+            return self._get_latent(self.left, t, X)
+        return self._get_latent(self.right, t, X)
+
+    def get_celerite_matrices(
+        self,
+        t,
+        diag,
+        *,
+        c=None,
+        a=None,
+        U=None,
+        V=None,
+        X=None,
+    ):
+        t = np.atleast_1d(t)
+        diag = np.atleast_1d(diag)
+        c0, a0, U0, V0 = self.term.get_celerite_matrices(t, diag, X=X)
+
+        left = self.get_left_latent(t, X)
+        right = self.get_right_latent(t, X)
+
+        N = len(t)
+        K = left.shape[2]
+        J = c0.shape[0] * K
+        c, a, U, V = self._resize_matrices(N, J, c, a, U, V)
+
+        c[:] = np.tile(c0, K)
+        U[:] = np.ascontiguousarray(U0[:, :, None] * left).reshape((N, -1))
+        V[:] = np.ascontiguousarray(V0[:, :, None] * right).reshape((N, -1))
+        a[:] = diag + np.sum(U * V, axis=-1)
+
+        return c, a, U, V
+
+
+def prepare_rectangular_data(N, M, t, **kwargs):
+    results = dict(
+        t=np.tile(np.asarray(t)[:, None], (1, M)).flatten(),
+        X=np.tile(np.arange(M), N),
+    )
+
+    for k, v in kwargs.items():
+        results[k] = np.ascontiguousarray(
+            np.broadcast_to(v, (N, M)), dtype=np.float64
+        ).flatten()
+
+    return results
+
+
+class KroneckerLatent:
+    def __init__(self, *, R=None, L=None):
+        if R is not None:
+            if L is not None:
+                raise ValueError("Only one of 'R' and 'L' can be defined")
+            R = np.ascontiguousarray(np.atleast_2d(R), dtype=np.float64)
+            try:
+                self.L = np.linalg.cholesky(R)
+            except np.linalg.LinAlgError:
+                M = np.copy(R)
+                M[np.diag_indices_from(M)] += 1e-10
+                self.L = np.linalg.cholesky(M)
+        elif L is not None:
+            self.L = np.ascontiguousarray(L, dtype=np.float64)
+            if self.L.ndim == 1:
+                self.L = np.reshape(self.L, (-1, 1))
+        else:
+            raise ValueError("One of 'R' and 'L' must be defined")
+
+    def __call__(self, t, inds):
+        return self.L[inds][:, None, :]
diff --git a/python/celerite2/numpy.py b/python/celerite2/numpy.py
deleted file mode 100644
index 99d6e05..0000000
--- a/python/celerite2/numpy.py
+++ /dev/null
@@ -1,120 +0,0 @@
-# -*- coding: utf-8 -*-
-
-__all__ = ["ConditionalDistribution", "GaussianProcess"]
-import warnings
-
-import numpy as np
-
-from . import driver
-from .core import BaseConditionalDistribution, BaseGaussianProcess
-from .driver import LinAlgError
-
-
-class ConditionalDistribution(BaseConditionalDistribution):
-    def _do_general_matmul(self, c, U1, V1, U2, V2, inp, target):
-        target = driver.general_matmul_lower(
-            self._xs, self.gp._t, c, U2, V1, inp, target
-        )
-        target = driver.general_matmul_upper(
-            self._xs, self.gp._t, c, V2, U1, inp, target
-        )
-        return target
-
-    def _diagdot(self, a, b):
-        return np.einsum("ij,ij->j", a, b)
-
-    def sample(self, *, size=None, regularize=None):
-        mu = self.mean
-        cov = self.covariance
-        if regularize is not None:
-            cov[np.diag_indices_from(cov)] += regularize
-        return np.random.multivariate_normal(mu, cov, size=size)
-
-
-class GaussianProcess(BaseGaussianProcess):
-    conditional_distribution = ConditionalDistribution
-
-    def _setup(self):
-        self._c = np.empty(0, dtype=np.float64)
-        self._a = np.empty(0, dtype=np.float64)
-        self._U = np.empty((0, 0), dtype=np.float64)
-        self._V = np.empty((0, 0), dtype=np.float64)
-
-    def _copy_or_check(self, y, *, inplace=False):
-        if inplace:
-            if (
-                y.dtype != "float64"
-                or not y.flags.c_contiguous
-                or not y.flags.writeable
-            ):
-                warnings.warn(
-                    "Inplace operations can only be made on C-contiguous, "
-                    "writable, float64 arrays; a copy will be made"
-                )
-            y = np.ascontiguousarray(y, dtype=np.float64)
-        else:
-            y = np.array(y, dtype=np.float64, copy=True, order="C")
-        return y
-
-    def _as_tensor(self, y):
-        return np.ascontiguousarray(y, dtype=np.float64)
-
-    def _zeros_like(self, y):
-        return np.zeros_like(y)
-
-    def _do_compute(self, quiet):
-        # Compute the Cholesky factorization
-        try:
-            self._d, self._W = driver.factor(
-                self._t,
-                self._c,
-                self._a,
-                self._U,
-                self._V,
-                self._a,
-                np.copy(self._V),
-            )
-        except LinAlgError:
-            if not quiet:
-                raise
-            self._log_det = -np.inf
-            self._norm = np.inf
-        else:
-            self._log_det = np.sum(np.log(self._d))
-            self._norm = -0.5 * (
-                self._log_det + self._size * np.log(2 * np.pi)
-            )
-
-    def _check_sorted(self, t):
-        if np.any(np.diff(t) < 0.0):
-            raise ValueError("The input coordinates must be sorted")
-        return t
-
-    def _do_solve(self, y):
-        z = driver.solve_lower(self._t, self._c, self._U, self._W, y, y)
-        z /= self._d[:, None]
-        z = driver.solve_upper(self._t, self._c, self._U, self._W, z, z)
-        return z
-
-    def _do_dot_tril(self, y):
-        z = y * np.sqrt(self._d)[:, None]
-        return driver.matmul_lower(self._t, self._c, self._U, self._W, z, z)
-
-    def _do_norm(self, y):
-        alpha = y[:, None]
-        alpha = driver.solve_lower(
-            self._t, self._c, self._U, self._W, alpha, alpha
-        )[:, 0]
-        return np.sum(alpha ** 2 / self._d)
-
-    def sample(self, *, size=None, include_mean=True):
-        if self._t is None:
-            raise RuntimeError("you must call 'compute' first")
-        if size is None:
-            n = np.random.randn(self._size)
-        else:
-            n = np.random.randn(self._size, size)
-        result = self.dot_tril(n, inplace=True).T
-        if include_mean:
-            result += self._mean_value
-        return result
diff --git a/python/celerite2/terms.py b/python/celerite2/terms.py
index 365e349..af88844 100644
--- a/python/celerite2/terms.py
+++ b/python/celerite2/terms.py
@@ -3,6 +3,7 @@
 __all__ = [
     "Term",
     "TermSum",
+    "TermSumGeneral",
     "TermProduct",
     "TermDiff",
     "TermConvolution",
@@ -30,8 +31,16 @@ class Term:
 
     """
 
+    __requires_general_addition__ = False
+
     def __add__(self, b):
-        return TermSum(self, b)
+        terms = tuple(self.terms) + (b,)
+        if (
+            self.__requires_general_addition__
+            or b.__requires_general_addition__
+        ):
+            return TermSumGeneral(*terms)
+        return TermSum(*terms)
 
     def __mul__(self, b):
         return TermProduct(self, b)
@@ -114,8 +123,27 @@ def to_dense(self, x, diag):
         K[np.diag_indices_from(K)] += diag
         return K
 
+    def _resize_matrices(self, N, J, c, a, U, V):
+        if c is None:
+            c = np.empty(J)
+        else:
+            c.resize(J, refcheck=False)
+        if a is None:
+            a = np.empty(N)
+        else:
+            a.resize(N, refcheck=False)
+        if U is None:
+            U = np.empty((N, J))
+        else:
+            U.resize((N, J), refcheck=False)
+        if V is None:
+            V = np.empty((N, J))
+        else:
+            V.resize((N, J), refcheck=False)
+        return c, a, U, V
+
     def get_celerite_matrices(
-        self, x, diag, *, c=None, a=None, U=None, V=None
+        self, t, diag, *, c=None, a=None, U=None, V=None, X=None
     ):
         """Get the matrices needed to solve the celerite system
 
@@ -126,7 +154,7 @@ def get_celerite_matrices(
             extensions.
 
         Args:
-            x (shape[N]): The independent coordinates of the data.
+            t (shape[N]): The independent coordinates of the data.
             diag (shape[N]): The diagonal variance of the system.
             a (shape[N], optional): The diagonal of the A matrix.
             U (shape[N, J], optional): The first low-rank matrix.
@@ -137,57 +165,41 @@ def get_celerite_matrices(
         Raises:
             ValueError: When the inputs are not valid.
         """
-        x = np.atleast_1d(x)
+        t = np.atleast_1d(t)
         diag = np.atleast_1d(diag)
-        if len(x.shape) != 1:
+        if len(t.shape) != 1:
             raise ValueError("'x' must be one-dimensional")
-        if x.shape != diag.shape:
+        if t.shape != diag.shape:
             raise ValueError("dimension mismatch")
 
         ar, cr, ac, bc, cc, dc = self.get_coefficients()
 
-        N = len(x)
+        N = len(t)
         Jr = len(ar)
         Jc = len(ac)
         J = Jr + 2 * Jc
-
-        if c is None:
-            c = np.empty(J)
-        else:
-            c.resize(J, refcheck=False)
-        if a is None:
-            a = np.empty(N)
-        else:
-            a.resize(N, refcheck=False)
-        if U is None:
-            U = np.empty((N, J))
-        else:
-            U.resize((N, J), refcheck=False)
-        if V is None:
-            V = np.empty((N, J))
-        else:
-            V.resize((N, J), refcheck=False)
+        c, a, U, V = self._resize_matrices(N, J, c, a, U, V)
 
         c[:Jr] = cr
         c[Jr::2] = cc
         c[Jr + 1 :: 2] = cc
         a, U, V = driver.get_celerite_matrices(
-            ar, ac, bc, dc, x, diag, a, U, V
+            ar, ac, bc, dc, t, diag, a, U, V
         )
         return c, a, U, V
 
-    def dot(self, x, diag, y):
+    def dot(self, t, diag, y, *, X=None):
         """Apply a matrix-vector or matrix-matrix product
 
         Args:
-            x (shape[N]): The independent coordinates of the data.
+            t (shape[N]): The independent coordinates of the data.
             diag (shape[N]): The diagonal variance of the system.
             y (shape[N] or shape[N, K]): The target of vector or matrix for
                 this operation.
         """
-        x = np.atleast_1d(x)
+        t = np.atleast_1d(t)
         y = np.atleast_1d(y)
-        if y.shape[0] != x.shape[0]:
+        if y.shape[0] != t.shape[0]:
             raise ValueError("Dimension mismatch")
 
         is_vector = False
@@ -197,10 +209,10 @@ def dot(self, x, diag, y):
         if y.ndim != 2:
             raise ValueError("'y' can only be a vector or matrix")
 
-        c, a, U, V = self.get_celerite_matrices(x, diag)
+        c, a, U, V = self.get_celerite_matrices(t, diag, X=X)
         z = y * a[:, None]
-        z = driver.matmul_lower(x, c, U, V, y, z)
-        z = driver.matmul_upper(x, c, U, V, y, z)
+        z = driver.matmul_lower(t, c, U, V, y, z)
+        z = driver.matmul_upper(t, c, U, V, y, z)
 
         if is_vector:
             return z[:, 0]
@@ -219,10 +231,10 @@ class TermSum(Term):
     """
 
     def __init__(self, *terms):
-        if any(isinstance(term, TermConvolution) for term in terms):
+        if any(term.__requires_general_addition__ for term in terms):
             raise TypeError(
-                "You cannot perform operations on an TermConvolution, it must "
-                "be the outer term in the kernel"
+                "You cannot perform operations on a term that requires general"
+                " addition, it must be the outer term in the kernel"
             )
         self._terms = terms
 
@@ -235,6 +247,71 @@ def get_coefficients(self):
         return tuple(np.concatenate(c) for c in zip(*coeffs))
 
 
+class TermSumGeneral(Term):
+    """A general sum of multiple :class:`Term` objects
+
+    This will be used if any of the terms require "general addition",
+    or if they don't provide a ``get_coefficients`` method.
+
+    Args:
+        *terms: Any number of :class:`Term` subclasses to add together.
+    """
+
+    __requires_general_addition__ = True
+
+    def __init__(self, *terms):
+        basic = [
+            term for term in terms if not term.__requires_general_addition__
+        ]
+        self._terms = [
+            term for term in terms if term.__requires_general_addition__
+        ]
+        if len(basic):
+            self._terms.insert(0, TermSum(*basic))
+        if not len(self._terms):
+            raise ValueError(
+                "A general term sum cannot be instantiated without any terms"
+            )
+
+    @property
+    def terms(self):
+        return self._terms
+
+    def get_value(self, tau):
+        K = self.terms[0].get_value(tau)
+        for term in self.terms[1:]:
+            K += term.get_value(tau)
+        return K
+
+    def get_psd(self, omega):
+        p = self.terms[0].get_psd(omega)
+        for term in self.terms[1:]:
+            p += term.get_psd(omega)
+        return p
+
+    def get_celerite_matrices(
+        self, t, diag, *, c=None, a=None, U=None, V=None, X=None
+    ):
+        diag = np.atleast_1d(diag)
+        matrices = [
+            term.get_celerite_matrices(t, np.zeros_like(diag), X=X)
+            for term in self.terms
+        ]
+        N = matrices[0][1].shape[0]
+        J = sum(m[0].shape[0] for m in matrices)
+        c, a, U, V = self._resize_matrices(N, J, c, a, U, V)
+        a[:] = diag
+        j = 0
+        for c0, a0, U0, V0 in matrices:
+            dj = len(c0)
+            c[j : j + dj] = c0
+            a[:] += a0
+            U[:, j : j + dj] = U0
+            V[:, j : j + dj] = V0
+            j += dj
+        return c, a, U, V
+
+
 class TermProduct(Term):
     """A product of two :class:`Term` objects
 
@@ -248,12 +325,13 @@ class TermProduct(Term):
     """
 
     def __init__(self, term1, term2):
-        int1 = isinstance(term1, TermConvolution)
-        int2 = isinstance(term2, TermConvolution)
-        if int1 or int2:
+        if (
+            term1.__requires_general_addition__
+            or term2.__requires_general_addition__
+        ):
             raise TypeError(
-                "You cannot perform operations on an TermConvolution, it must "
-                "be the outer term in the kernel"
+                "You cannot perform operations on a term that requires general"
+                " addition, it must be the outer term in the kernel"
             )
         self.term1 = term1
         self.term2 = term2
@@ -309,10 +387,10 @@ class TermDiff(Term):
     """
 
     def __init__(self, term):
-        if isinstance(term, TermConvolution):
+        if term.__requires_general_addition__:
             raise TypeError(
-                "You cannot perform operations on an TermConvolution, it must "
-                "be the outer term in the kernel"
+                "You cannot perform operations on a term that requires general"
+                " addition, it must be the outer term in the kernel"
             )
         self.term = term
 
@@ -343,12 +421,27 @@ class TermConvolution(Term):
             exposure time).
     """
 
+    __requires_general_addition__ = True
+
     def __init__(self, term, delta):
+        if term.__requires_general_addition__:
+            raise TypeError(
+                "You cannot perform operations on a term that requires general"
+                " addition, it must be the outer term in the kernel"
+            )
         self.term = term
         self.delta = float(delta)
 
     def get_celerite_matrices(
-        self, x, diag, *, c=None, a=None, U=None, V=None
+        self,
+        t,
+        diag,
+        *,
+        c=None,
+        a=None,
+        U=None,
+        V=None,
+        X=None,
     ):
         dt = self.delta
         ar, cr, a, b, cc, d = self.term.get_coefficients()
@@ -379,7 +472,9 @@ def get_celerite_matrices(
 
         new_diag = diag + delta_diag
 
-        return super().get_celerite_matrices(x, new_diag, c=c, a=a, U=U, V=V)
+        return super().get_celerite_matrices(
+            t, new_diag, c=c, a=a, U=U, V=V, X=X
+        )
 
     def get_coefficients(self):
         ar, cr, a, b, c, d = self.term.get_coefficients()
diff --git a/python/celerite2/theano/__init__.py b/python/celerite2/theano/__init__.py
index 19b104a..47c6ad1 100644
--- a/python/celerite2/theano/__init__.py
+++ b/python/celerite2/theano/__init__.py
@@ -3,5 +3,5 @@
 __all__ = ["terms", "GaussianProcess", "CeleriteNormal"]
 
 from . import terms
-from .celerite2 import GaussianProcess
+from .core import GaussianProcess
 from .distribution import CeleriteNormal
diff --git a/python/celerite2/theano/celerite2.py b/python/celerite2/theano/core.py
similarity index 90%
rename from python/celerite2/theano/celerite2.py
rename to python/celerite2/theano/core.py
index f909680..1bbed40 100644
--- a/python/celerite2/theano/celerite2.py
+++ b/python/celerite2/theano/core.py
@@ -70,6 +70,27 @@ def _as_tensor(self, tensor):
     def _zeros_like(self, tensor):
         return tt.zeros_like(tensor)
 
+    def _zeros(self, shape):
+        return tt.zeros(shape)
+
+    def _eye(self, n):
+        return tt.eye(n)
+
+    def _get_dense_matrix(self, t, c, a, U, V):
+        Y = tt.eye(t.shape[0])
+        Z = tt.diag(a)
+        Z += ops.matmul_lower(t, c, U, V, Y)[0]
+        Z += ops.matmul_upper(t, c, U, V, Y)[0]
+        return Z
+
+    def _do_general_matmul(self, t1, t2, c, U1, V1, U2, V2, inp, target):
+        target += ops.general_matmul_lower(t1, t2, c, U2, V1, inp)[0]
+        target += ops.general_matmul_upper(t1, t2, c, V2, U1, inp)[0]
+        return target
+
+    def _diagdot(self, a, b):
+        return tt.batched_dot(a.T, b.T)
+
     def _do_compute(self, quiet):
         if quiet:
             self._d, self._W, _ = ops.factor_quiet(
diff --git a/python/test/jax/test_celerite2.py b/python/test/jax/test_core.py
similarity index 100%
rename from python/test/jax/test_celerite2.py
rename to python/test/jax/test_core.py
diff --git a/python/test/test_celerite2.py b/python/test/test_core.py
similarity index 100%
rename from python/test/test_celerite2.py
rename to python/test/test_core.py
diff --git a/python/test/theano/test_celerite2.py b/python/test/theano/test_core.py
similarity index 100%
rename from python/test/theano/test_celerite2.py
rename to python/test/theano/test_core.py

From 7c00e0227f351af631d2ba97d7ffda72e4d27121 Mon Sep 17 00:00:00 2001
From: Dan F-M <foreman.mackey@gmail.com>
Date: Thu, 12 Nov 2020 20:25:33 -0500
Subject: [PATCH 02/14] getting theano and jax working

---
 python/celerite2/__init__.py     |  19 ++-
 python/celerite2/core.py         |   2 +
 python/celerite2/jax/terms.py    | 235 +++++++++++++++++--------------
 python/celerite2/latent.py       |   5 +
 python/celerite2/terms.py        | 134 +++++-------------
 python/celerite2/testing.py      |  15 +-
 python/celerite2/theano/core.py  |  25 +++-
 python/celerite2/theano/terms.py | 181 ++++++++++++------------
 python/test/test_terms.py        |   3 +-
 python/test/theano/test_core.py  |   2 +-
 10 files changed, 312 insertions(+), 309 deletions(-)

diff --git a/python/celerite2/__init__.py b/python/celerite2/__init__.py
index 983cdf1..3751528 100644
--- a/python/celerite2/__init__.py
+++ b/python/celerite2/__init__.py
@@ -11,7 +11,7 @@
 __license__ = "MIT"
 __description__ = "Fast and scalable Gaussian Processes in 1D"
 __bibtex__ = __citation__ = r"""
-@article{celerite1,
+@article{celerite2:foremanmackey17,
    author = {{Foreman-Mackey}, D. and {Agol}, E. and {Ambikasaran}, S. and
              {Angus}, R.},
     title = "{Fast and Scalable Gaussian Process Modeling with Applications to
@@ -25,7 +25,7 @@
    adsurl = {http://adsabs.harvard.edu/abs/2017AJ....154..220F},
   adsnote = {Provided by the SAO/NASA Astrophysics Data System}
 }
-@article{celerite2,
+@article{celerite2:foremanmackey18,
    author = {{Foreman-Mackey}, D.},
     title = "{Scalable Backpropagation for Gaussian Processes using Celerite}",
   journal = {Research Notes of the American Astronomical Society},
@@ -38,4 +38,19 @@
    adsurl = {http://adsabs.harvard.edu/abs/2018RNAAS...2a..31F},
   adsnote = {Provided by the SAO/NASA Astrophysics Data System}
 }
+@article{celerite2:gordon20,
+   author = {{Gordon}, Tyler A. and {Agol}, Eric and {Foreman-Mackey}, Daniel},
+    title = "{A Fast, Two-dimensional Gaussian Process Method Based on
+              Celerite: Applications to Transiting Exoplanet Discovery and
+              Characterization}",
+  journal = {\aj},
+     year = 2020,
+    month = nov,
+   volume = 160,
+   number = 5,
+    pages = {240},
+      doi = {10.3847/1538-3881/abbc16},
+   adsurl = {https://ui.adsabs.harvard.edu/abs/2020AJ....160..240G},
+  adsnote = {Provided by the SAO/NASA Astrophysics Data System}
+}
 """
diff --git a/python/celerite2/core.py b/python/celerite2/core.py
index 6dd88e8..93fdb1f 100644
--- a/python/celerite2/core.py
+++ b/python/celerite2/core.py
@@ -308,6 +308,8 @@ def compute(
                 deviation of the observation model.
             diag (shape[N], optional): If provided, the diagonal variance of
                 the observation model.
+            X (shape[N], optional): If provided, coordinates of the data points
+                in the latent space.
             check_sorted (bool, optional): If ``True``, a check is performed to
                 make sure that ``t`` is correctly sorted. A ``ValueError`` will
                 be thrown when this check fails.
diff --git a/python/celerite2/jax/terms.py b/python/celerite2/jax/terms.py
index 25132b6..5279c19 100644
--- a/python/celerite2/jax/terms.py
+++ b/python/celerite2/jax/terms.py
@@ -3,6 +3,7 @@
 __all__ = [
     "Term",
     "TermSum",
+    "TermSumGeneral",
     "TermProduct",
     "TermDiff",
     "TermConvolution",
@@ -23,8 +24,17 @@
 
 
 class Term:
+    __doc__ = base_terms.Term.__doc__
+    __requires_general_addition__ = False
+
     def __add__(self, b):
-        return TermSum(self, b)
+        terms = tuple(self.terms) + (b,)
+        if (
+            self.__requires_general_addition__
+            or b.__requires_general_addition__
+        ):
+            return TermSumGeneral(*terms)
+        return TermSum(*terms)
 
     def __mul__(self, b):
         return TermProduct(self, b)
@@ -36,23 +46,27 @@ def terms(self):
     def get_coefficients(self):
         raise NotImplementedError("subclasses must implement this method")
 
-    def get_value(self, tau):
-        tau = np.abs(np.atleast_1d(tau))
-        ar, cr, ac, bc, cc, dc = self.get_coefficients()
-        k = np.zeros_like(tau)
-        tau = tau[..., None]
-
-        if len(ar):
-            k += np.sum(ar * np.exp(-cr * tau), axis=-1)
-
-        if len(ac):
-            arg = dc * tau
-            k += np.sum(
-                np.exp(-cc * tau) * (ac * np.cos(arg) + bc * np.sin(arg)),
-                axis=-1,
-            )
-
-        return k
+    def get_value(self, t, t2=None, *, diag=None, X=None, X2=None):
+        t = np.atleast_1d(t)
+        if diag is None:
+            diag = np.zeros_like(t)
+        c1, a1, U1, V1 = self.get_celerite_matrices(t, diag, X=X)
+        if t2 is None:
+            Y = np.eye(t.shape[0])
+            K = np.diag(a1)
+            K += ops.matmul_lower(t, c1, U1, V1, Y)
+            K += ops.matmul_upper(t, c1, U1, V1, Y)
+            return K
+
+        t2 = np.atleast_1d(t2)
+        c2, a2, U2, V2 = self.get_celerite_matrices(
+            t2, np.zeros_like(t2), X=X2
+        )
+        Y = np.eye(t2.shape[0])
+        K = np.zeros((t.shape[0], t2.shape[0]))
+        K += ops.general_matmul_lower(t, t2, c1, U1, V2, Y)
+        K += ops.general_matmul_upper(t, t2, c1, V1, U2, Y)
+        return K
 
     def get_psd(self, omega):
         w2 = np.atleast_1d(omega) ** 2
@@ -73,26 +87,22 @@ def get_psd(self, omega):
 
         return np.sqrt(2 / np.pi) * psd
 
-    def to_dense(self, x, diag):
-        K = self.get_value(x[:, None] - x[None, :])
-        return K + np.diag(diag)
-
-    def get_celerite_matrices(self, x, diag, **kwargs):
-        x = np.atleast_1d(x)
+    def get_celerite_matrices(self, t, diag, **kwargs):
+        t = np.atleast_1d(t)
         diag = np.atleast_1d(diag)
-        if len(x.shape) != 1:
-            raise ValueError("'x' must be one-dimensional")
-        if x.shape != diag.shape:
+        if t.ndim != 1:
+            raise ValueError("'t' must be one-dimensional")
+        if t.shape != diag.shape:
             raise ValueError("dimension mismatch")
 
         ar, cr, ac, bc, cc, dc = self.get_coefficients()
 
         a = diag + np.sum(ar) + np.sum(ac)
 
-        arg = dc[None, :] * x[:, None]
+        arg = dc[None, :] * t[:, None]
         cos = np.cos(arg)
         sin = np.sin(arg)
-        z = np.zeros_like(x)
+        z = np.zeros_like(t)
 
         U = np.concatenate(
             (
@@ -112,7 +122,7 @@ def get_celerite_matrices(self, x, diag, **kwargs):
 
         return c, a, U, V
 
-    def dot(self, x, diag, y):
+    def dot(self, t, diag, y, *, X=None):
         y = np.atleast_1d(y)
 
         is_vector = False
@@ -122,10 +132,10 @@ def dot(self, x, diag, y):
         if y.ndim != 2:
             raise ValueError("'y' can only be a vector or matrix")
 
-        c, a, U, V = self.get_celerite_matrices(x, diag)
+        c, a, U, V = self.get_celerite_matrices(t, diag, X=X)
         z = y * a[:, None]
-        z += ops.matmul_lower(x, c, U, V, y)
-        z += ops.matmul_upper(x, c, U, V, y)
+        z += ops.matmul_lower(t, c, U, V, y)
+        z += ops.matmul_upper(t, c, U, V, y)
 
         if is_vector:
             return z[:, 0]
@@ -133,11 +143,13 @@ def dot(self, x, diag, y):
 
 
 class TermSum(Term):
+    __doc__ = base_terms.TermSum.__doc__
+
     def __init__(self, *terms):
-        if any(isinstance(term, TermConvolution) for term in terms):
+        if any(term.__requires_general_addition__ for term in terms):
             raise TypeError(
-                "You cannot perform operations on an TermConvolution, it must "
-                "be the outer term in the kernel"
+                "You cannot perform operations on a term that requires general"
+                " addition, it must be the outer term in the kernel"
             )
         self._terms = terms
 
@@ -150,14 +162,66 @@ def get_coefficients(self):
         return tuple(np.concatenate(c) for c in zip(*coeffs))
 
 
+class TermSumGeneral(Term):
+    __doc__ = base_terms.TermSumGeneral.__doc__
+    __requires_general_addition__ = True
+
+    def __init__(self, *terms):
+        basic = [
+            term for term in terms if not term.__requires_general_addition__
+        ]
+        self._terms = [
+            term for term in terms if term.__requires_general_addition__
+        ]
+        if len(basic):
+            self._terms.insert(0, TermSum(*basic))
+        if not len(self._terms):
+            raise ValueError(
+                "A general term sum cannot be instantiated without any terms"
+            )
+
+    @property
+    def terms(self):
+        return self._terms
+
+    def get_value(self, *args, **kwargs):
+        K = self.terms[0].get_value(*args, **kwargs)
+        for term in self.terms[1:]:
+            K += term.get_value(*args, **kwargs)
+        return K
+
+    def get_psd(self, omega):
+        p = self.terms[0].get_psd(omega)
+        for term in self.terms[1:]:
+            p += term.get_psd(omega)
+        return p
+
+    def get_celerite_matrices(
+        self, t, diag, *, c=None, a=None, U=None, V=None, X=None
+    ):
+        diag = np.atleast_1d(diag)
+        matrices = [
+            term.get_celerite_matrices(t, np.zeros_like(diag), X=X)
+            for term in self.terms
+        ]
+        c = np.concatenate([m[0] for m in matrices])
+        a = diag + np.sum([m[1] for m in matrices], axis=0)
+        U = np.concatenate([m[2] for m in matrices], axis=1)
+        V = np.concatenate([m[3] for m in matrices], axis=1)
+        return c, a, U, V
+
+
 class TermProduct(Term):
+    __doc__ = base_terms.TermProduct.__doc__
+
     def __init__(self, term1, term2):
-        int1 = isinstance(term1, TermConvolution)
-        int2 = isinstance(term2, TermConvolution)
-        if int1 or int2:
+        if (
+            term1.__requires_general_addition__
+            or term2.__requires_general_addition__
+        ):
             raise TypeError(
-                "You cannot perform operations on an TermConvolution, it must "
-                "be the outer term in the kernel"
+                "You cannot perform operations on a term that requires general"
+                " addition, it must be the outer term in the kernel"
             )
         self.term1 = term1
         self.term2 = term2
@@ -206,11 +270,13 @@ def get_coefficients(self):
 
 
 class TermDiff(Term):
+    __doc__ = base_terms.TermDiff.__doc__
+
     def __init__(self, term):
-        if isinstance(term, TermConvolution):
+        if term.__requires_general_addition__:
             raise TypeError(
-                "You cannot perform operations on an TermConvolution, it must "
-                "be the outer term in the kernel"
+                "You cannot perform operations on a term that requires general"
+                " addition, it must be the outer term in the kernel"
             )
         self.term = term
 
@@ -229,7 +295,15 @@ def get_coefficients(self):
 
 
 class TermConvolution(Term):
+    __doc__ = base_terms.TermConvolution.__doc__
+    __requires_general_addition__ = True
+
     def __init__(self, term, delta):
+        if term.__requires_general_addition__:
+            raise TypeError(
+                "You cannot perform operations on a term that requires general"
+                " addition, it must be the outer term in the kernel"
+            )
         self.term = term
         self.delta = np.float64(delta)
 
@@ -301,70 +375,10 @@ def get_psd(self, omega):
         sinc = np.sin(arg) / arg
         return psd0 * sinc ** 2
 
-    def get_value(self, tau0):
-        dt = self.delta
-        ar, cr, a, b, c, d = self.term.get_coefficients()
-
-        # Format the lags correctly
-        tau0 = np.abs(np.atleast_1d(tau0))
-        tau = tau0[..., None]
-
-        # Precompute some factors
-        dpt = dt + tau
-        dmt = dt - tau
-
-        # Real parts:
-        # tau > Delta
-        crd = cr * dt
-        cosh = np.cosh(crd)
-        norm = 2 * ar / crd ** 2
-        K_large = np.sum(norm * (cosh - 1) * np.exp(-cr * tau), axis=-1)
-
-        # tau < Delta
-        crdmt = cr * dmt
-        K_small = K_large + np.sum(norm * (crdmt - np.sinh(crdmt)), axis=-1)
-
-        # Complex part
-        cd = c * dt
-        dd = d * dt
-        c2 = c ** 2
-        d2 = d ** 2
-        c2pd2 = c2 + d2
-        C1 = a * (c2 - d2) + 2 * b * c * d
-        C2 = b * (c2 - d2) - 2 * a * c * d
-        norm = 1.0 / (dt * c2pd2) ** 2
-        k0 = np.exp(-c * tau)
-        cdt = np.cos(d * tau)
-        sdt = np.sin(d * tau)
-
-        # For tau > Delta
-        cos_term = 2 * (np.cosh(cd) * np.cos(dd) - 1)
-        sin_term = 2 * (np.sinh(cd) * np.sin(dd))
-        factor = k0 * norm
-        K_large += np.sum(
-            (C1 * cos_term - C2 * sin_term) * factor * cdt, axis=-1
-        )
-        K_large += np.sum(
-            (C2 * cos_term + C1 * sin_term) * factor * sdt, axis=-1
-        )
-
-        # tau < Delta
-        edmt = np.exp(-c * dmt)
-        edpt = np.exp(-c * dpt)
-        cos_term = (
-            edmt * np.cos(d * dmt) + edpt * np.cos(d * dpt) - 2 * k0 * cdt
-        )
-        sin_term = (
-            edmt * np.sin(d * dmt) + edpt * np.sin(d * dpt) - 2 * k0 * sdt
-        )
-        K_small += np.sum(2 * (a * c + b * d) * c2pd2 * dmt * norm, axis=-1)
-        K_small += np.sum((C1 * cos_term + C2 * sin_term) * norm, axis=-1)
-
-        mask = tau0 >= dt
-        return K_large * mask + K_small * (~mask)
-
 
 class RealTerm(Term):
+    __doc__ = base_terms.RealTerm.__doc__
+
     def __init__(self, *, a, c):
         self.a = np.float64(a)
         self.c = np.float64(c)
@@ -375,6 +389,8 @@ def get_coefficients(self):
 
 
 class ComplexTerm(Term):
+    __doc__ = base_terms.ComplexTerm.__doc__
+
     def __init__(self, *, a, b, c, d):
         self.a = np.float64(a)
         self.b = np.float64(b)
@@ -401,6 +417,9 @@ def SHOTerm(*args, **kwargs):
     return under
 
 
+SHOTerm.__doc__ = base_terms.SHOTerm.__doc__
+
+
 class OverdampedSHOTerm(Term):
     __parameter_spec__ = base_terms.SHOTerm.__parameter_spec__
 
@@ -450,6 +469,8 @@ def get_coefficients(self):
 
 
 class Matern32Term(Term):
+    __doc__ = base_terms.Matern32Term.__doc__
+
     def __init__(self, *, sigma, rho, eps=0.01):
         self.sigma = np.float64(sigma)
         self.rho = np.float64(rho)
@@ -470,6 +491,8 @@ def get_coefficients(self):
 
 
 class RotationTerm(TermSum):
+    __doc__ = base_terms.RotationTerm.__doc__
+
     def __init__(self, *, sigma, period, Q0, dQ, f):
         self.sigma = np.float64(sigma)
         self.period = np.float64(period)
diff --git a/python/celerite2/latent.py b/python/celerite2/latent.py
index 6603c3d..b2632c0 100644
--- a/python/celerite2/latent.py
+++ b/python/celerite2/latent.py
@@ -14,6 +14,11 @@ def __init__(self, term, left=None, right=None):
         self.left = left
         self.right = right
 
+    def get_psd(self, omega):
+        raise NotImplementedError(
+            "The PSD is not implemented for latent models"
+        )
+
     def _get_latent(self, left_or_right, t, X):
         if left_or_right is None:
             return np.ones((1, 1, 1))
diff --git a/python/celerite2/terms.py b/python/celerite2/terms.py
index af88844..fbd1ba7 100644
--- a/python/celerite2/terms.py
+++ b/python/celerite2/terms.py
@@ -64,28 +64,41 @@ def get_coefficients(self):
         """
         raise NotImplementedError("subclasses must implement this method")
 
-    def get_value(self, tau):
+    def get_value(self, t, t2=None, *, diag=None, X=None, X2=None):
         """Compute the value of the kernel as a function of lag
 
+        If ``t2`` is provided, the result will be rectangular.
+
         Args:
-            tau (shape[...]): The lags where the kernel should be evaluated.
+            t (shape[N]): The independent coordinates of the lefthand data.
+            t2 (shape[M], optional): The independent coordinates righthand
+                data.
+            diag (shape[N], optional): The diagonal variance of the system.
+            X (shape[N], optional): The latent coordinates of the lefthand
+                data.
+            X2 (shape[N], optional): The latent coordinates of the righthand
+                data.
         """
-        tau = np.abs(np.atleast_1d(tau))
-        ar, cr, ac, bc, cc, dc = self.get_coefficients()
-        k = np.zeros_like(tau)
-        tau = tau[..., None]
-
-        if len(ar):
-            k += np.sum(ar * np.exp(-cr * tau), axis=-1)
-
-        if len(ac):
-            arg = dc * tau
-            k += np.sum(
-                np.exp(-cc * tau) * (ac * np.cos(arg) + bc * np.sin(arg)),
-                axis=-1,
-            )
-
-        return k
+        t = np.atleast_1d(t)
+        if diag is None:
+            diag = np.zeros_like(t)
+        c1, a1, U1, V1 = self.get_celerite_matrices(t, diag, X=X)
+        if t2 is None:
+            Y = np.eye(t.shape[0])
+            K = driver.matmul_lower(t, c1, U1, V1, Y, np.diag(a1))
+            K = driver.matmul_upper(t, c1, U1, V1, Y, K)
+            return K
+
+        t2 = np.atleast_1d(t2)
+        c2, a2, U2, V2 = self.get_celerite_matrices(
+            t2, np.zeros_like(t2), X=X2
+        )
+        Y = np.eye(t2.shape[0])
+        K = driver.general_matmul_lower(
+            t, t2, c1, U1, V2, Y, np.zeros((t.shape[0], t2.shape[0]))
+        )
+        K = driver.general_matmul_upper(t, t2, c1, V1, U2, Y, K)
+        return K
 
     def get_psd(self, omega):
         """Compute the value of the power spectral density for this process
@@ -112,17 +125,6 @@ def get_psd(self, omega):
 
         return np.sqrt(2 / np.pi) * psd
 
-    def to_dense(self, x, diag):
-        """Evaluate the dense covariance matrix for this term
-
-        Args:
-            x (shape[N]): The independent coordinates of the data.
-            diag (shape[N]): The diagonal variance of the system.
-        """
-        K = self.get_value(x[:, None] - x[None, :])
-        K[np.diag_indices_from(K)] += diag
-        return K
-
     def _resize_matrices(self, N, J, c, a, U, V):
         if c is None:
             c = np.empty(J)
@@ -156,11 +158,11 @@ def get_celerite_matrices(
         Args:
             t (shape[N]): The independent coordinates of the data.
             diag (shape[N]): The diagonal variance of the system.
+            c (shape[J], optional): The transport coefficients.
             a (shape[N], optional): The diagonal of the A matrix.
             U (shape[N, J], optional): The first low-rank matrix.
             V (shape[N, J], optional): The second low-rank matrix.
-            P (shape[N-1, J], optional): The regularization matrix used for
-                numerical stability.
+            X (shape[N], optional): The latent coordinates of the data.
 
         Raises:
             ValueError: When the inputs are not valid.
@@ -196,6 +198,7 @@ def dot(self, t, diag, y, *, X=None):
             diag (shape[N]): The diagonal variance of the system.
             y (shape[N] or shape[N, K]): The target of vector or matrix for
                 this operation.
+            X (shape[N], optional): The latent coordinates of the data.
         """
         t = np.atleast_1d(t)
         y = np.atleast_1d(y)
@@ -277,10 +280,10 @@ def __init__(self, *terms):
     def terms(self):
         return self._terms
 
-    def get_value(self, tau):
-        K = self.terms[0].get_value(tau)
+    def get_value(self, *args, **kwargs):
+        K = self.terms[0].get_value(*args, **kwargs)
         for term in self.terms[1:]:
-            K += term.get_value(tau)
+            K += term.get_value(*args, **kwargs)
         return K
 
     def get_psd(self, omega):
@@ -513,69 +516,6 @@ def get_psd(self, omega):
         sinc[m] = np.sin(arg[m]) / arg[m]
         return psd0 * sinc ** 2
 
-    def get_value(self, tau0):
-        dt = self.delta
-        ar, cr, a, b, c, d = self.term.get_coefficients()
-
-        # Format the lags correctly
-        tau0 = np.abs(np.atleast_1d(tau0))
-        tau = tau0[..., None]
-
-        # Precompute some factors
-        dpt = dt + tau
-        dmt = dt - tau
-
-        # Real parts:
-        # tau > Delta
-        crd = cr * dt
-        cosh = np.cosh(crd)
-        norm = 2 * ar / crd ** 2
-        K_large = np.sum(norm * (cosh - 1) * np.exp(-cr * tau), axis=-1)
-
-        # tau < Delta
-        crdmt = cr * dmt
-        K_small = K_large + np.sum(norm * (crdmt - np.sinh(crdmt)), axis=-1)
-
-        # Complex part
-        cd = c * dt
-        dd = d * dt
-        c2 = c ** 2
-        d2 = d ** 2
-        c2pd2 = c2 + d2
-        C1 = a * (c2 - d2) + 2 * b * c * d
-        C2 = b * (c2 - d2) - 2 * a * c * d
-        norm = 1.0 / (dt * c2pd2) ** 2
-        k0 = np.exp(-c * tau)
-        cdt = np.cos(d * tau)
-        sdt = np.sin(d * tau)
-
-        # For tau > Delta
-        cos_term = 2 * (np.cosh(cd) * np.cos(dd) - 1)
-        sin_term = 2 * (np.sinh(cd) * np.sin(dd))
-        factor = k0 * norm
-        K_large += np.sum(
-            (C1 * cos_term - C2 * sin_term) * factor * cdt, axis=-1
-        )
-        K_large += np.sum(
-            (C2 * cos_term + C1 * sin_term) * factor * sdt, axis=-1
-        )
-
-        # tau < Delta
-        edmt = np.exp(-c * dmt)
-        edpt = np.exp(-c * dpt)
-        cos_term = (
-            edmt * np.cos(d * dmt) + edpt * np.cos(d * dpt) - 2 * k0 * cdt
-        )
-        sin_term = (
-            edmt * np.sin(d * dmt) + edpt * np.sin(d * dpt) - 2 * k0 * sdt
-        )
-        K_small += np.sum(2 * (a * c + b * d) * c2pd2 * dmt * norm, axis=-1)
-        K_small += np.sum((C1 * cos_term + C2 * sin_term) * norm, axis=-1)
-
-        mask = tau0 >= dt
-        K_small[mask] = K_large[mask]
-        return K_small
-
 
 class RealTerm(Term):
     r"""The simplest celerite term
diff --git a/python/celerite2/testing.py b/python/celerite2/testing.py
index 2c5afc1..17a9b65 100644
--- a/python/celerite2/testing.py
+++ b/python/celerite2/testing.py
@@ -30,7 +30,7 @@ def get_matrices(
     c, a, U, V = kernel.get_celerite_matrices(x, diag)
 
     if include_dense:
-        K = kernel.get_value(x[:, None] - x[None, :])
+        K = kernel.get_value(x, x)
         K[np.diag_indices_from(K)] += diag
 
     if not conditional:
@@ -42,7 +42,7 @@ def get_matrices(
     _, _, U2, V2 = kernel.get_celerite_matrices(t, np.zeros_like(t))
 
     if include_dense:
-        K_star = kernel.get_value(t[:, None] - x[None, :])
+        K_star = kernel.get_value(t, x)
         return x, c, a, U, V, K, Y, t, U2, V2, K_star
 
     return x, c, a, U, V, Y, t, U2, V2
@@ -128,17 +128,16 @@ def check_tensor_term(eval_func, term, pyterm, atol=1e-8):
 
     _compare_tensor(
         eval_func,
-        term.to_dense(x, diag),
-        pyterm.to_dense(x, diag),
-        "to_dense",
+        term.get_value(x),
+        pyterm.get_value(x),
+        "get_value",
         atol=atol,
     )
 
-    tau = x[:, None] - x[None, :]
     _compare_tensor(
         eval_func,
-        term.get_value(tau),
-        pyterm.get_value(tau),
+        term.get_value(x, t),
+        pyterm.get_value(x, t),
         "get_value",
         atol=atol,
     )
diff --git a/python/celerite2/theano/core.py b/python/celerite2/theano/core.py
index 1bbed40..2fc87fa 100644
--- a/python/celerite2/theano/core.py
+++ b/python/celerite2/theano/core.py
@@ -14,9 +14,13 @@
     pm = None
 
 CITATIONS = (
-    ("celerite2:foremanmackey17", "celerite2:foremanmackey18"),
+    (
+        "celerite2:foremanmackey17",
+        "celerite2:foremanmackey18",
+        "celerite2:gordon20",
+    ),
     r"""
-@article{exoplanet:foremanmackey17,
+@article{celerite2:foremanmackey17,
    author = {{Foreman-Mackey}, D. and {Agol}, E. and {Ambikasaran}, S. and
              {Angus}, R.},
     title = "{Fast and Scalable Gaussian Process Modeling with Applications to
@@ -30,7 +34,7 @@
    adsurl = {http://adsabs.harvard.edu/abs/2017AJ....154..220F},
   adsnote = {Provided by the SAO/NASA Astrophysics Data System}
 }
-@article{exoplanet:foremanmackey18,
+@article{celerite2:foremanmackey18,
    author = {{Foreman-Mackey}, D.},
     title = "{Scalable Backpropagation for Gaussian Processes using Celerite}",
   journal = {Research Notes of the American Astronomical Society},
@@ -43,6 +47,21 @@
    adsurl = {http://adsabs.harvard.edu/abs/2018RNAAS...2a..31F},
   adsnote = {Provided by the SAO/NASA Astrophysics Data System}
 }
+@article{celerite2:gordon20,
+   author = {{Gordon}, Tyler A. and {Agol}, Eric and {Foreman-Mackey}, Daniel},
+    title = "{A Fast, Two-dimensional Gaussian Process Method Based on
+              Celerite: Applications to Transiting Exoplanet Discovery and
+              Characterization}",
+  journal = {\aj},
+     year = 2020,
+    month = nov,
+   volume = 160,
+   number = 5,
+    pages = {240},
+      doi = {10.3847/1538-3881/abbc16},
+   adsurl = {https://ui.adsabs.harvard.edu/abs/2020AJ....160..240G},
+  adsnote = {Provided by the SAO/NASA Astrophysics Data System}
+}
 """,
 )
 
diff --git a/python/celerite2/theano/terms.py b/python/celerite2/theano/terms.py
index eb3a6fa..126a7ae 100644
--- a/python/celerite2/theano/terms.py
+++ b/python/celerite2/theano/terms.py
@@ -3,6 +3,7 @@
 __all__ = [
     "Term",
     "TermSum",
+    "TermSumGeneral",
     "TermProduct",
     "TermDiff",
     "TermConvolution",
@@ -23,6 +24,7 @@
 
 class Term(base_terms.Term):
     __doc__ = base_terms.Term.__doc__
+    __requires_general_addition__ = False
 
     def __init__(self, *, dtype="float64"):
         self.dtype = dtype
@@ -43,14 +45,26 @@ def terms(self):
     def get_coefficients(self):
         raise NotImplementedError("subclasses must implement this method")
 
-    def get_value(self, tau):
-        ar, cr, ac, bc, cc, dc = self.coefficients
-        tau = tt.abs_(tau)
-        tau = tt.shape_padright(tau)
-        K = tt.sum(ar * tt.exp(-cr * tau), axis=-1)
-        factor = tt.exp(-cc * tau)
-        K += tt.sum(ac * factor * tt.cos(dc * tau), axis=-1)
-        K += tt.sum(bc * factor * tt.sin(dc * tau), axis=-1)
+    def get_value(self, t, t2=None, *, diag=None, X=None, X2=None):
+        t = tt.as_tensor_variable(t)
+        if diag is None:
+            diag = tt.zeros_like(t)
+        c1, a1, U1, V1 = self.get_celerite_matrices(t, diag, X=X)
+        if t2 is None:
+            Y = tt.eye(t.shape[0])
+            K = tt.diag(a1)
+            K += ops.matmul_lower(t, c1, U1, V1, Y)[0]
+            K += ops.matmul_upper(t, c1, U1, V1, Y)[0]
+            return K
+
+        t2 = tt.as_tensor_variable(t2)
+        c2, a2, U2, V2 = self.get_celerite_matrices(
+            t2, tt.zeros_like(t2), X=X2
+        )
+        Y = tt.eye(t2.shape[0])
+        K = tt.zeros((t.shape[0], t2.shape[0]))
+        K += ops.general_matmul_lower(t, t2, c1, U1, V2, Y)[0]
+        K += ops.general_matmul_upper(t, t2, c1, V1, U2, Y)[0]
         return K
 
     def get_psd(self, omega):
@@ -66,11 +80,6 @@ def get_psd(self, omega):
         )
         return np.sqrt(2.0 / np.pi) * power
 
-    def to_dense(self, x, diag):
-        K = self.get_value(x[:, None] - x[None, :])
-        K += tt.diag(diag)
-        return K
-
     def get_celerite_matrices(self, x, diag, **kwargs):
         x = tt.as_tensor_variable(x)
         diag = tt.as_tensor_variable(diag)
@@ -100,7 +109,7 @@ def get_celerite_matrices(self, x, diag, **kwargs):
 
         return c, a, U, V
 
-    def dot(self, x, diag, y):
+    def dot(self, x, diag, y, *, X=None):
         x = tt.as_tensor_variable(x)
         y = tt.as_tensor_variable(y)
 
@@ -111,7 +120,7 @@ def dot(self, x, diag, y):
         if y.ndim != 2:
             raise ValueError("'y' can only be a vector or matrix")
 
-        c, a, U, V = self.get_celerite_matrices(x, diag)
+        c, a, U, V = self.get_celerite_matrices(x, diag, X=X)
         z = y * a[:, None]
         z += ops.matmul_lower(x, c, U, V, y)[0]
         z += ops.matmul_upper(x, c, U, V, y)[0]
@@ -125,11 +134,10 @@ class TermSum(Term):
     __doc__ = base_terms.TermSum.__doc__
 
     def __init__(self, *terms, **kwargs):
-        if any(isinstance(term, TermConvolution) for term in terms):
+        if any(term.__requires_general_addition__ for term in terms):
             raise TypeError(
-                "You cannot perform operations on an "
-                "TermConvolution, it must be the outer term in "
-                "the kernel"
+                "You cannot perform operations on a term that requires general"
+                " addition, it must be the outer term in the kernel"
             )
         self._terms = terms
         super().__init__(**kwargs)
@@ -143,17 +151,66 @@ def get_coefficients(self):
         return tuple(tt.concatenate(a, axis=0) for a in zip(*coeffs))
 
 
+class TermSumGeneral(Term):
+    __doc__ = base_terms.TermSumGeneral.__doc__
+    __requires_general_addition__ = True
+
+    def __init__(self, *terms):
+        basic = [
+            term for term in terms if not term.__requires_general_addition__
+        ]
+        self._terms = [
+            term for term in terms if term.__requires_general_addition__
+        ]
+        if len(basic):
+            self._terms.insert(0, TermSum(*basic))
+        if not len(self._terms):
+            raise ValueError(
+                "A general term sum cannot be instantiated without any terms"
+            )
+
+    @property
+    def terms(self):
+        return self._terms
+
+    def get_value(self, *args, **kwargs):
+        K = self.terms[0].get_value(*args, **kwargs)
+        for term in self.terms[1:]:
+            K += term.get_value(*args, **kwargs)
+        return K
+
+    def get_psd(self, omega):
+        p = self.terms[0].get_psd(omega)
+        for term in self.terms[1:]:
+            p += term.get_psd(omega)
+        return p
+
+    def get_celerite_matrices(
+        self, t, diag, *, c=None, a=None, U=None, V=None, X=None
+    ):
+        diag = tt.as_tensor_variable(diag)
+        matrices = [
+            term.get_celerite_matrices(t, tt.zeros_like(diag), X=X)
+            for term in self.terms
+        ]
+        c = tt.concatenate([m[0] for m in matrices])
+        a = diag + tt.sum([m[1] for m in matrices], axis=0)
+        U = tt.concatenate([m[2] for m in matrices], axis=1)
+        V = tt.concatenate([m[3] for m in matrices], axis=1)
+        return c, a, U, V
+
+
 class TermProduct(Term):
     __doc__ = base_terms.TermProduct.__doc__
 
     def __init__(self, term1, term2, **kwargs):
-        int1 = isinstance(term1, TermConvolution)
-        int2 = isinstance(term2, TermConvolution)
-        if int1 or int2:
+        if (
+            term1.__requires_general_addition__
+            or term2.__requires_general_addition__
+        ):
             raise TypeError(
-                "You cannot perform operations on an "
-                "TermConvolution, it must be the outer term in "
-                "the kernel"
+                "You cannot perform operations on a term that requires general"
+                " addition, it must be the outer term in the kernel"
             )
         self.term1 = term1
         self.term2 = term2
@@ -228,11 +285,10 @@ class TermDiff(Term):
     __doc__ = base_terms.TermDiff.__doc__
 
     def __init__(self, term, **kwargs):
-        if isinstance(term, TermConvolution):
+        if term.__requires_general_addition__:
             raise TypeError(
-                "You cannot perform operations on an "
-                "TermConvolution, it must be the outer term in "
-                "the kernel"
+                "You cannot perform operations on a term that requires general"
+                " addition, it must be the outer term in the kernel"
             )
         self.term = term
         super().__init__(**kwargs)
@@ -253,8 +309,14 @@ def get_coefficients(self):
 
 class TermConvolution(Term):
     __doc__ = base_terms.TermConvolution.__doc__
+    __requires_general_addition__ = True
 
     def __init__(self, term, delta, **kwargs):
+        if term.__requires_general_addition__:
+            raise TypeError(
+                "You cannot perform operations on a term that requires general"
+                " addition, it must be the outer term in the kernel"
+            )
         self.term = term
         self.delta = tt.as_tensor_variable(delta).astype("float64")
         super().__init__(**kwargs)
@@ -324,67 +386,6 @@ def get_psd(self, omega):
         sinc = tt.switch(tt.neq(arg, 0), tt.sin(arg) / arg, tt.ones_like(arg))
         return psd0 * sinc ** 2
 
-    def get_value(self, tau0):
-        dt = self.delta
-        ar, cr, a, b, c, d = self.term.coefficients
-
-        # Format the lags correctly
-        tau0 = tt.abs_(tau0)
-        tau = tau0[..., None]
-
-        # Precompute some factors
-        dpt = dt + tau
-        dmt = dt - tau
-
-        # Real parts:
-        # tau > Delta
-        crd = cr * dt
-        cosh = tt.cosh(crd)
-        norm = 2 * ar / crd ** 2
-        K_large = tt.sum(norm * (cosh - 1) * tt.exp(-cr * tau), axis=-1)
-
-        # tau < Delta
-        crdmt = cr * dmt
-        K_small = K_large + tt.sum(norm * (crdmt - tt.sinh(crdmt)), axis=-1)
-
-        # Complex part
-        cd = c * dt
-        dd = d * dt
-        c2 = c ** 2
-        d2 = d ** 2
-        c2pd2 = c2 + d2
-        C1 = a * (c2 - d2) + 2 * b * c * d
-        C2 = b * (c2 - d2) - 2 * a * c * d
-        norm = 1.0 / (dt * c2pd2) ** 2
-        k0 = tt.exp(-c * tau)
-        cdt = tt.cos(d * tau)
-        sdt = tt.sin(d * tau)
-
-        # For tau > Delta
-        cos_term = 2 * (tt.cosh(cd) * tt.cos(dd) - 1)
-        sin_term = 2 * (tt.sinh(cd) * tt.sin(dd))
-        factor = k0 * norm
-        K_large += tt.sum(
-            (C1 * cos_term - C2 * sin_term) * factor * cdt, axis=-1
-        )
-        K_large += tt.sum(
-            (C2 * cos_term + C1 * sin_term) * factor * sdt, axis=-1
-        )
-
-        # tau < Delta
-        edmt = tt.exp(-c * dmt)
-        edpt = tt.exp(-c * dpt)
-        cos_term = (
-            edmt * tt.cos(d * dmt) + edpt * tt.cos(d * dpt) - 2 * k0 * cdt
-        )
-        sin_term = (
-            edmt * tt.sin(d * dmt) + edpt * tt.sin(d * dpt) - 2 * k0 * sdt
-        )
-        K_small += tt.sum(2 * (a * c + b * d) * c2pd2 * dmt * norm, axis=-1)
-        K_small += tt.sum((C1 * cos_term + C2 * sin_term) * norm, axis=-1)
-
-        return tt.switch(tt.le(tau0, dt), K_small, K_large)
-
 
 class RealTerm(Term):
     __doc__ = base_terms.RealTerm.__doc__
diff --git a/python/test/test_terms.py b/python/test/test_terms.py
index 4ec2772..f1084ca 100644
--- a/python/test/test_terms.py
+++ b/python/test/test_terms.py
@@ -94,10 +94,9 @@ def test_consistency(oterm):
     np.random.seed(40582)
     x = np.sort(np.random.uniform(0, 10, 50))
     diag = np.random.uniform(0.1, 0.3, len(x))
-    assert np.allclose(oterm.get_value(x), term.get_value(x))
 
     tau = x[:, None] - x[None, :]
-    K = term.get_value(tau)
+    K = term.get_value(x, x)
     assert np.allclose(oterm.get_value(tau), K)
 
     # And the power spectrum
diff --git a/python/test/theano/test_core.py b/python/test/theano/test_core.py
index c7082f8..6bbbacc 100644
--- a/python/test/theano/test_core.py
+++ b/python/test/theano/test_core.py
@@ -6,7 +6,7 @@
 from celerite2 import terms as pyterms
 from celerite2.testing import check_gp_models
 from celerite2.theano import GaussianProcess, terms
-from celerite2.theano.celerite2 import CITATIONS
+from celerite2.theano.core import CITATIONS
 
 term_mark = pytest.mark.parametrize(
     "name,args",

From 824e4b4302557271546f72ff027f75d1a51bc615 Mon Sep 17 00:00:00 2001
From: Dan F-M <foreman.mackey@gmail.com>
Date: Tue, 17 Nov 2020 21:38:27 -0500
Subject: [PATCH 03/14] adding initial test for latent term and derivative
 latent

---
 python/celerite2/__init__.py |   4 +-
 python/celerite2/latent.py   | 148 ++++++++++++++++++++++++++---------
 python/celerite2/terms.py    |  22 ++++--
 python/test/test_latent.py   | 111 ++++++++++++++++++++++++++
 4 files changed, 243 insertions(+), 42 deletions(-)
 create mode 100644 python/test/test_latent.py

diff --git a/python/celerite2/__init__.py b/python/celerite2/__init__.py
index 3751528..5670f70 100644
--- a/python/celerite2/__init__.py
+++ b/python/celerite2/__init__.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 
-__all__ = ["__version__", "terms", "GaussianProcess"]
-from . import terms
+__all__ = ["__version__", "terms", "latent", "GaussianProcess"]
+from . import latent, terms
 from .celerite2_version import __version__
 from .core import GaussianProcess
 
diff --git a/python/celerite2/latent.py b/python/celerite2/latent.py
index b2632c0..6de2f53 100644
--- a/python/celerite2/latent.py
+++ b/python/celerite2/latent.py
@@ -1,18 +1,36 @@
 # -*- coding: utf-8 -*-
 
-__all__ = ["LatentTerm", "prepare_rectangular_data", "KroneckerLatent"]
+__all__ = [
+    "prepare_rectangular_data",
+    "LatentTerm",
+    "KroneckerLatentTerm",
+]
 import numpy as np
 
-from .terms import Term
+from . import terms
 
 
-class LatentTerm(Term):
+def prepare_rectangular_data(N, M, t, **kwargs):
+    results = dict(
+        t=np.tile(np.asarray(t)[:, None], (1, M)).flatten(),
+        X=np.tile(np.arange(M), N),
+    )
+
+    for k, v in kwargs.items():
+        results[k] = np.ascontiguousarray(
+            np.broadcast_to(v, (N, M)), dtype=np.float64
+        ).flatten()
+
+    return results
+
+
+class LatentTerm(terms.Term):
     __requires_general_addition__ = True
 
-    def __init__(self, term, left=None, right=None):
+    def __init__(self, term, left_latent=None, right_latent=None):
         self.term = term
-        self.left = left
-        self.right = right
+        self.left_latent = left_latent
+        self.right_latent = right_latent
 
     def get_psd(self, omega):
         raise NotImplementedError(
@@ -35,12 +53,12 @@ def _get_latent(self, left_or_right, t, X):
         return left_or_right
 
     def get_left_latent(self, t, X):
-        return self._get_latent(self.left, t, X)
+        return self._get_latent(self.left_latent, t, X)
 
     def get_right_latent(self, t, X):
-        if self.right is None and self.left is not None:
-            return self._get_latent(self.left, t, X)
-        return self._get_latent(self.right, t, X)
+        if self.right_latent is None and self.left_latent is not None:
+            return self._get_latent(self.left_latent, t, X)
+        return self._get_latent(self.right_latent, t, X)
 
     def get_celerite_matrices(
         self,
@@ -65,7 +83,7 @@ def get_celerite_matrices(
         J = c0.shape[0] * K
         c, a, U, V = self._resize_matrices(N, J, c, a, U, V)
 
-        c[:] = np.tile(c0, K)
+        c[:] = np.repeat(c0, K)
         U[:] = np.ascontiguousarray(U0[:, :, None] * left).reshape((N, -1))
         V[:] = np.ascontiguousarray(V0[:, :, None] * right).reshape((N, -1))
         a[:] = diag + np.sum(U * V, axis=-1)
@@ -73,38 +91,98 @@ def get_celerite_matrices(
         return c, a, U, V
 
 
-def prepare_rectangular_data(N, M, t, **kwargs):
-    results = dict(
-        t=np.tile(np.asarray(t)[:, None], (1, M)).flatten(),
-        X=np.tile(np.arange(M), N),
-    )
-
-    for k, v in kwargs.items():
-        results[k] = np.ascontiguousarray(
-            np.broadcast_to(v, (N, M)), dtype=np.float64
-        ).flatten()
-
-    return results
-
-
-class KroneckerLatent:
-    def __init__(self, *, R=None, L=None):
+class KroneckerLatentTerm(LatentTerm):
+    def __init__(self, term, *, R=None, L=None):
+        self.R = None
+        self.L = None
         if R is not None:
             if L is not None:
                 raise ValueError("Only one of 'R' and 'L' can be defined")
-            R = np.ascontiguousarray(np.atleast_2d(R), dtype=np.float64)
-            try:
-                self.L = np.linalg.cholesky(R)
-            except np.linalg.LinAlgError:
-                M = np.copy(R)
-                M[np.diag_indices_from(M)] += 1e-10
-                self.L = np.linalg.cholesky(M)
+            self.R = np.ascontiguousarray(np.atleast_2d(R), dtype=np.float64)
+            super().__init__(
+                term,
+                left_latent=self._left_latent,
+                right_latent=self._right_latent,
+            )
+
         elif L is not None:
             self.L = np.ascontiguousarray(L, dtype=np.float64)
             if self.L.ndim == 1:
                 self.L = np.reshape(self.L, (-1, 1))
+            super().__init__(
+                term,
+                left_latent=self._lowrank_latent,
+            )
+
         else:
             raise ValueError("One of 'R' and 'L' must be defined")
 
-    def __call__(self, t, inds):
+    def _left_latent(self, t, inds):
+        return self.R[inds][:, None, :]
+
+    def _right_latent(self, t, inds):
+        N = len(t)
+        latent = np.zeros((N, 1, self.R.shape[0]))
+        latent[(np.arange(N), 0, inds)] = 1.0
+        return latent
+
+    def _lowrank_latent(self, t, inds):
         return self.L[inds][:, None, :]
+
+
+class _DerivativeHelperTerm(terms.Term):
+    def __init__(self, term):
+        self.term = term
+
+    def get_coefficients(self):
+        coeffs = self.term.get_coefficients()
+        a, b, c, d = coeffs[2:]
+        final_coeffs = [
+            -coeffs[0] * coeffs[1],
+            coeffs[1],
+            b * d - c * a,
+            -(a * d + b * c),
+            c,
+            d,
+        ]
+        return final_coeffs
+
+
+class DerivativeLatentTerm(LatentTerm):
+    def __init__(self, term):
+        if term.__requires_general_addition__:
+            raise TypeError(
+                "You cannot perform operations on a term that requires general"
+                " addition, it must be the outer term in the kernel"
+            )
+
+        ar, cr, ac, bc, cc, dc = term.get_coefficients()
+
+        Jr = len(cr)
+        Jc = 2 * len(cc)
+        J = Jr + Jc
+
+        self.left = np.zeros((2, 4 * J, 1))
+        self.right = np.zeros((2, 4 * J, 1))
+
+        self.left[0, :Jr] = 1.0
+        self.left[0, 2 * Jr : 3 * Jr] = -1.0
+        self.left[1, Jr : 2 * Jr] = 1.0
+        self.left[1, 3 * Jr : 4 * Jr] = 1.0
+
+        self.right[0, : 2 * Jr] = 1.0
+        self.right[1, 2 * Jr : 4 * Jr] = 1.0
+
+        dterm = _DerivativeHelperTerm(term)
+        d2term = terms.TermDiff(term)
+        super().__init__(
+            terms.TermSum(term, dterm, dterm, d2term),
+            left_latent=self._left_latent,
+            right_latent=self._right_latent,
+        )
+
+    def _left_latent(self, t, inds):
+        return self.left[inds]
+
+    def _right_latent(self, t, inds):
+        return self.right[inds]
diff --git a/python/celerite2/terms.py b/python/celerite2/terms.py
index fbd1ba7..c3e14e3 100644
--- a/python/celerite2/terms.py
+++ b/python/celerite2/terms.py
@@ -182,12 +182,24 @@ def get_celerite_matrices(
         J = Jr + 2 * Jc
         c, a, U, V = self._resize_matrices(N, J, c, a, U, V)
 
+        arg = dc[None, :] * t[:, None]
+        cos = np.cos(arg)
+        sin = np.sin(arg)
+
         c[:Jr] = cr
-        c[Jr::2] = cc
-        c[Jr + 1 :: 2] = cc
-        a, U, V = driver.get_celerite_matrices(
-            ar, ac, bc, dc, t, diag, a, U, V
-        )
+        c[Jr : Jr + Jc] = cc
+        c[Jr + Jc :] = cc
+
+        a[:] = diag + np.sum(ar) + np.sum(ac)
+
+        U[:, :Jr] = ar[None, :]
+        U[:, Jr : Jr + Jc] = ac[None, :] * cos + bc[None, :] * sin
+        U[:, Jr + Jc :] = ac[None, :] * sin - bc[None, :] * cos
+
+        V[:, :Jr] = 1.0
+        V[:, Jr : Jr + Jc] = cos
+        V[:, Jr + Jc :] = sin
+
         return c, a, U, V
 
     def dot(self, t, diag, y, *, X=None):
diff --git a/python/test/test_latent.py b/python/test/test_latent.py
new file mode 100644
index 0000000..3eb9b1c
--- /dev/null
+++ b/python/test/test_latent.py
@@ -0,0 +1,111 @@
+# -*- coding: utf-8 -*-
+import numpy as np
+import pytest
+
+from celerite2 import latent, terms
+
+
+@pytest.fixture
+def data():
+    N = 10
+    M = 3
+    np.random.seed(105)
+    x = np.sort(np.random.uniform(0, 10, N))
+    y = np.random.randn(N * M, 3)
+    data = latent.prepare_rectangular_data(N, M, t=x)
+
+    return N, M, x, data["t"], data["X"], y
+
+
+def test_value(data):
+    N, M, t0, t, X, y = data
+
+    R = np.random.randn(M, M)
+    R[np.diag_indices_from(R)] = np.exp(R[np.diag_indices_from(R)]) + 1e-5
+    R[np.triu_indices_from(R, 1)] = 0.0
+    R = np.dot(R, R.T)
+
+    term = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3)
+    term += terms.SHOTerm(sigma=0.23456, rho=3.4, Q=2.3)
+    kernel = latent.KroneckerLatentTerm(term, R=R)
+
+    ar, cr, ac, bc, cc, dc = term.get_coefficients()
+    c0, a0, U0, V0 = term.get_celerite_matrices(t0, np.zeros_like(t0))
+    c, a, U, V = kernel.get_celerite_matrices(t, np.zeros_like(t), X=X)
+
+    assert c.shape == (len(c0) * M,)
+    assert a.shape == (N * M,)
+    assert U.shape == (N * M, len(c0) * M)
+    assert V.shape == (N * M, len(c0) * M)
+
+    assert np.allclose(c, np.repeat(c0, M))
+    assert np.allclose(a, (a0[:, None] * np.diag(R)[None, :]).flatten())
+    assert np.allclose(U, np.kron(U0, R))
+    assert np.allclose(V, np.kron(V0, np.eye(M)))
+
+    K = kernel.get_value(t, X=X)
+    K0 = term.get_value(t0)
+
+    assert np.allclose(K, np.kron(K0, R))
+
+
+# def test_low_rank_value(data):
+#     N, M, x, diag, y, t = data
+
+#     alpha = np.random.randn(M)
+#     term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3)
+#     term = kron.KronTerm(term0, L=alpha)
+
+#     a, U, V, P = term.get_celerite_matrices(x, diag)
+#     assert a.shape == (N * M,)
+#     assert U.shape == (N * M, 2)
+#     assert V.shape == (N * M, 2)
+#     assert P.shape == (N * M - 1, 2)
+
+#     check_value(term, x, diag, y, t)
+
+#     full_term = kron.KronTerm(term0, R=np.outer(alpha, alpha))
+#     assert np.allclose(full_term.dot(x, diag, y), term.dot(x, diag, y))
+
+
+# def test_sum_value(data):
+#     N, M, x, diag, y, t = data
+
+#     alpha = np.random.randn(M)
+#     R = np.random.randn(M, M)
+#     R[np.diag_indices_from(R)] = np.exp(R[np.diag_indices_from(R)])
+#     R[np.triu_indices_from(R, 1)] = 0.0
+#     R = np.dot(R, R.T)
+
+#     term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3)
+#     term = kron.KronTerm(term0, R=R) + kron.KronTerm(term0, L=alpha)
+
+#     a, U, V, P = term.get_celerite_matrices(x, diag)
+#     assert a.shape == (N * M,)
+#     assert U.shape == (N * M, 2 * M + 2)
+#     assert V.shape == (N * M, 2 * M + 2)
+#     assert P.shape == (N * M - 1, 2 * M + 2)
+
+#     check_value(term, x, diag, y, t)
+
+
+# def test_missing_values(data):
+#     N, M, x, diag, y, t = data
+#     mask = np.random.rand(N, M) > 0.1
+#     assert np.all(mask.sum(axis=1) > 0)
+
+#     R = np.random.randn(M, M)
+#     R[np.diag_indices_from(R)] = np.exp(R[np.diag_indices_from(R)])
+#     R[np.triu_indices_from(R, 1)] = 0.0
+#     R = np.dot(R, R.T)
+
+#     term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3)
+#     term = kron.KronTerm(term0, R=R)
+
+#     a, U, V, P = term.get_celerite_matrices(x, diag, mask=mask)
+#     assert a.shape == (mask.sum(),)
+#     assert U.shape == (mask.sum(), 2 * M)
+#     assert V.shape == (mask.sum(), 2 * M)
+#     assert P.shape == (mask.sum() - 1, 2 * M)
+
+#     check_value(term, x, diag, y, t, mask=mask)

From 54ee88cf790d25f8fa0faffb6e5e5821dc62a4fc Mon Sep 17 00:00:00 2001
From: Dan F-M <foreman.mackey@gmail.com>
Date: Tue, 17 Nov 2020 21:45:47 -0500
Subject: [PATCH 04/14] derivative latent for complex terms

---
 python/celerite2/latent.py | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/python/celerite2/latent.py b/python/celerite2/latent.py
index 6de2f53..7b85a8d 100644
--- a/python/celerite2/latent.py
+++ b/python/celerite2/latent.py
@@ -140,7 +140,7 @@ def get_coefficients(self):
         final_coeffs = [
             -coeffs[0] * coeffs[1],
             coeffs[1],
-            b * d - c * a,
+            b * d - a * c,
             -(a * d + b * c),
             c,
             d,
@@ -159,19 +159,29 @@ def __init__(self, term):
         ar, cr, ac, bc, cc, dc = term.get_coefficients()
 
         Jr = len(cr)
-        Jc = 2 * len(cc)
-        J = Jr + Jc
+        Jc = len(cc)
+        J = Jr + 2 * Jc
 
         self.left = np.zeros((2, 4 * J, 1))
         self.right = np.zeros((2, 4 * J, 1))
 
-        self.left[0, :Jr] = 1.0
-        self.left[0, 2 * Jr : 3 * Jr] = -1.0
-        self.left[1, Jr : 2 * Jr] = 1.0
-        self.left[1, 3 * Jr : 4 * Jr] = 1.0
-
-        self.right[0, : 2 * Jr] = 1.0
-        self.right[1, 2 * Jr : 4 * Jr] = 1.0
+        if Jr:
+            self.left[0, :Jr] = 1.0
+            self.left[0, 2 * Jr : 3 * Jr] = -1.0
+            self.left[1, Jr : 2 * Jr] = 1.0
+            self.left[1, 3 * Jr : 4 * Jr] = 1.0
+            self.right[0, : 2 * Jr] = 1.0
+            self.right[1, 2 * Jr : 4 * Jr] = 1.0
+
+        if Jc:
+            J0 = 4 * Jr
+            for J0 in [4 * Jr, 4 * Jr + 4 * Jc]:
+                self.left[0, J0 : J0 + Jc] = 1.0
+                self.left[0, J0 + 2 * Jc : J0 + 3 * Jc] = -1.0
+                self.left[1, J0 + Jc : J0 + 2 * Jc] = 1.0
+                self.left[1, J0 + 3 * Jc : J0 + 4 * Jc] = 1.0
+                self.right[0, J0 : J0 + 2 * Jc] = 1.0
+                self.right[1, J0 + 2 * Jc : J0 + 4 * Jc] = 1.0
 
         dterm = _DerivativeHelperTerm(term)
         d2term = terms.TermDiff(term)

From ad65d6a1fe615292a80122eadab1b26bee4e4fa0 Mon Sep 17 00:00:00 2001
From: Dan F-M <foreman.mackey@gmail.com>
Date: Thu, 3 Dec 2020 10:56:30 -0500
Subject: [PATCH 05/14] remove derivative term

---
 python/celerite2/latent.py | 95 +++++++++-----------------------------
 python/celerite2/terms.py  |  4 ++
 2 files changed, 26 insertions(+), 73 deletions(-)

diff --git a/python/celerite2/latent.py b/python/celerite2/latent.py
index 7b85a8d..7993f31 100644
--- a/python/celerite2/latent.py
+++ b/python/celerite2/latent.py
@@ -27,11 +27,18 @@ def prepare_rectangular_data(N, M, t, **kwargs):
 class LatentTerm(terms.Term):
     __requires_general_addition__ = True
 
-    def __init__(self, term, left_latent=None, right_latent=None):
+    def __init__(
+        self, term, *, dimension, left_latent=None, right_latent=None
+    ):
+        self._dimension = int(dimension)
         self.term = term
         self.left_latent = left_latent
         self.right_latent = right_latent
 
+    @property
+    def dimension(self):
+        return self._dimension
+
     def get_psd(self, omega):
         raise NotImplementedError(
             "The PSD is not implemented for latent models"
@@ -73,19 +80,27 @@ def get_celerite_matrices(
     ):
         t = np.atleast_1d(t)
         diag = np.atleast_1d(diag)
-        c0, a0, U0, V0 = self.term.get_celerite_matrices(t, diag, X=X)
 
         left = self.get_left_latent(t, X)
         right = self.get_right_latent(t, X)
+        if left.shape != right.shape:
+            raise ValueError(
+                "The dimensions of the left and right latent models are "
+                "incompatible"
+            )
 
-        N = len(t)
+        c0, a0, U0, V0 = self.term.get_celerite_matrices(t, diag, X=X)
+        U0 = U0[:, :, None]
+        V0 = V0[:, :, None]
+
+        N = t.shape[0]
         K = left.shape[2]
         J = c0.shape[0] * K
         c, a, U, V = self._resize_matrices(N, J, c, a, U, V)
 
         c[:] = np.repeat(c0, K)
-        U[:] = np.ascontiguousarray(U0[:, :, None] * left).reshape((N, -1))
-        V[:] = np.ascontiguousarray(V0[:, :, None] * right).reshape((N, -1))
+        U[:] = np.ascontiguousarray(U0 * left).reshape((N, -1))
+        V[:] = np.ascontiguousarray(V0 * right).reshape((N, -1))
         a[:] = diag + np.sum(U * V, axis=-1)
 
         return c, a, U, V
@@ -101,6 +116,7 @@ def __init__(self, term, *, R=None, L=None):
             self.R = np.ascontiguousarray(np.atleast_2d(R), dtype=np.float64)
             super().__init__(
                 term,
+                dimension=self.R.shape[0],
                 left_latent=self._left_latent,
                 right_latent=self._right_latent,
             )
@@ -111,6 +127,7 @@ def __init__(self, term, *, R=None, L=None):
                 self.L = np.reshape(self.L, (-1, 1))
             super().__init__(
                 term,
+                dimension=self.L.shape[0],
                 left_latent=self._lowrank_latent,
             )
 
@@ -128,71 +145,3 @@ def _right_latent(self, t, inds):
 
     def _lowrank_latent(self, t, inds):
         return self.L[inds][:, None, :]
-
-
-class _DerivativeHelperTerm(terms.Term):
-    def __init__(self, term):
-        self.term = term
-
-    def get_coefficients(self):
-        coeffs = self.term.get_coefficients()
-        a, b, c, d = coeffs[2:]
-        final_coeffs = [
-            -coeffs[0] * coeffs[1],
-            coeffs[1],
-            b * d - a * c,
-            -(a * d + b * c),
-            c,
-            d,
-        ]
-        return final_coeffs
-
-
-class DerivativeLatentTerm(LatentTerm):
-    def __init__(self, term):
-        if term.__requires_general_addition__:
-            raise TypeError(
-                "You cannot perform operations on a term that requires general"
-                " addition, it must be the outer term in the kernel"
-            )
-
-        ar, cr, ac, bc, cc, dc = term.get_coefficients()
-
-        Jr = len(cr)
-        Jc = len(cc)
-        J = Jr + 2 * Jc
-
-        self.left = np.zeros((2, 4 * J, 1))
-        self.right = np.zeros((2, 4 * J, 1))
-
-        if Jr:
-            self.left[0, :Jr] = 1.0
-            self.left[0, 2 * Jr : 3 * Jr] = -1.0
-            self.left[1, Jr : 2 * Jr] = 1.0
-            self.left[1, 3 * Jr : 4 * Jr] = 1.0
-            self.right[0, : 2 * Jr] = 1.0
-            self.right[1, 2 * Jr : 4 * Jr] = 1.0
-
-        if Jc:
-            J0 = 4 * Jr
-            for J0 in [4 * Jr, 4 * Jr + 4 * Jc]:
-                self.left[0, J0 : J0 + Jc] = 1.0
-                self.left[0, J0 + 2 * Jc : J0 + 3 * Jc] = -1.0
-                self.left[1, J0 + Jc : J0 + 2 * Jc] = 1.0
-                self.left[1, J0 + 3 * Jc : J0 + 4 * Jc] = 1.0
-                self.right[0, J0 : J0 + 2 * Jc] = 1.0
-                self.right[1, J0 + 2 * Jc : J0 + 4 * Jc] = 1.0
-
-        dterm = _DerivativeHelperTerm(term)
-        d2term = terms.TermDiff(term)
-        super().__init__(
-            terms.TermSum(term, dterm, dterm, d2term),
-            left_latent=self._left_latent,
-            right_latent=self._right_latent,
-        )
-
-    def _left_latent(self, t, inds):
-        return self.left[inds]
-
-    def _right_latent(self, t, inds):
-        return self.right[inds]
diff --git a/python/celerite2/terms.py b/python/celerite2/terms.py
index c3e14e3..64e7c0b 100644
--- a/python/celerite2/terms.py
+++ b/python/celerite2/terms.py
@@ -45,6 +45,10 @@ def __add__(self, b):
     def __mul__(self, b):
         return TermProduct(self, b)
 
+    @property
+    def dimension(self):
+        return 0
+
     @property
     def terms(self):
         return [self]

From 60df5ad4dfa90c70385f98d898c43570302c95e8 Mon Sep 17 00:00:00 2001
From: Dan F-M <foreman.mackey@gmail.com>
Date: Tue, 8 Dec 2020 11:14:07 -0500
Subject: [PATCH 06/14] install ipykernel for tutorials

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index d876eb0..c8b30de 100644
--- a/setup.py
+++ b/setup.py
@@ -62,6 +62,7 @@
         "pymc3",
         "tqdm",
         "numpyro",
+        "ipykernel",
     ],
 }
 EXTRA_REQUIRE["dev"] = (

From 7b2ee4af64d34bf31960f62d50dce59e1058a300 Mon Sep 17 00:00:00 2001
From: Dan F-M <foreman.mackey@gmail.com>
Date: Tue, 8 Dec 2020 12:44:23 -0500
Subject: [PATCH 07/14] tutorial installation

---
 .github/workflows/tutorials.yml |  4 +-
 python/celerite2/latent.py      | 18 ++++----
 python/test/test_latent.py      | 75 +++++++++------------------------
 3 files changed, 32 insertions(+), 65 deletions(-)

diff --git a/.github/workflows/tutorials.yml b/.github/workflows/tutorials.yml
index 2f16822..37f4ff7 100644
--- a/.github/workflows/tutorials.yml
+++ b/.github/workflows/tutorials.yml
@@ -27,7 +27,9 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install -U pip
-          python -m pip install --use-feature=2020-resolver ".[tutorials]"
+          python -m pip install --use-feature=2020-resolver jupytext ipykernel theano jupyter
+          python -m ipykernel install --name python
+          # python -m pip install --use-feature=2020-resolver ".[tutorials]"
 
       - name: Get theano compiledir
         id: compiledir
diff --git a/python/celerite2/latent.py b/python/celerite2/latent.py
index 7993f31..f75fe3e 100644
--- a/python/celerite2/latent.py
+++ b/python/celerite2/latent.py
@@ -107,16 +107,16 @@ def get_celerite_matrices(
 
 
 class KroneckerLatentTerm(LatentTerm):
-    def __init__(self, term, *, R=None, L=None):
-        self.R = None
+    def __init__(self, term, *, K=None, L=None):
+        self.K = None
         self.L = None
-        if R is not None:
+        if K is not None:
             if L is not None:
-                raise ValueError("Only one of 'R' and 'L' can be defined")
-            self.R = np.ascontiguousarray(np.atleast_2d(R), dtype=np.float64)
+                raise ValueError("Only one of 'K' and 'L' can be defined")
+            self.K = np.ascontiguousarray(np.atleast_2d(K), dtype=np.float64)
             super().__init__(
                 term,
-                dimension=self.R.shape[0],
+                dimension=self.K.shape[0],
                 left_latent=self._left_latent,
                 right_latent=self._right_latent,
             )
@@ -132,14 +132,14 @@ def __init__(self, term, *, R=None, L=None):
             )
 
         else:
-            raise ValueError("One of 'R' and 'L' must be defined")
+            raise ValueError("One of 'K' and 'L' must be defined")
 
     def _left_latent(self, t, inds):
-        return self.R[inds][:, None, :]
+        return self.K[inds][:, None, :]
 
     def _right_latent(self, t, inds):
         N = len(t)
-        latent = np.zeros((N, 1, self.R.shape[0]))
+        latent = np.zeros((N, 1, self.K.shape[0]))
         latent[(np.arange(N), 0, inds)] = 1.0
         return latent
 
diff --git a/python/test/test_latent.py b/python/test/test_latent.py
index 3eb9b1c..7433740 100644
--- a/python/test/test_latent.py
+++ b/python/test/test_latent.py
@@ -27,7 +27,7 @@ def test_value(data):
 
     term = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3)
     term += terms.SHOTerm(sigma=0.23456, rho=3.4, Q=2.3)
-    kernel = latent.KroneckerLatentTerm(term, R=R)
+    kernel = latent.KroneckerLatentTerm(term, K=R)
 
     ar, cr, ac, bc, cc, dc = term.get_coefficients()
     c0, a0, U0, V0 = term.get_celerite_matrices(t0, np.zeros_like(t0))
@@ -49,63 +49,28 @@ def test_value(data):
     assert np.allclose(K, np.kron(K0, R))
 
 
-# def test_low_rank_value(data):
-#     N, M, x, diag, y, t = data
-
-#     alpha = np.random.randn(M)
-#     term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3)
-#     term = kron.KronTerm(term0, L=alpha)
-
-#     a, U, V, P = term.get_celerite_matrices(x, diag)
-#     assert a.shape == (N * M,)
-#     assert U.shape == (N * M, 2)
-#     assert V.shape == (N * M, 2)
-#     assert P.shape == (N * M - 1, 2)
-
-#     check_value(term, x, diag, y, t)
-
-#     full_term = kron.KronTerm(term0, R=np.outer(alpha, alpha))
-#     assert np.allclose(full_term.dot(x, diag, y), term.dot(x, diag, y))
-
-
-# def test_sum_value(data):
-#     N, M, x, diag, y, t = data
-
-#     alpha = np.random.randn(M)
-#     R = np.random.randn(M, M)
-#     R[np.diag_indices_from(R)] = np.exp(R[np.diag_indices_from(R)])
-#     R[np.triu_indices_from(R, 1)] = 0.0
-#     R = np.dot(R, R.T)
-
-#     term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3)
-#     term = kron.KronTerm(term0, R=R) + kron.KronTerm(term0, L=alpha)
-
-#     a, U, V, P = term.get_celerite_matrices(x, diag)
-#     assert a.shape == (N * M,)
-#     assert U.shape == (N * M, 2 * M + 2)
-#     assert V.shape == (N * M, 2 * M + 2)
-#     assert P.shape == (N * M - 1, 2 * M + 2)
-
-#     check_value(term, x, diag, y, t)
+def test_low_rank_value(data):
+    N, M, t0, t, X, y = data
 
+    alpha = np.random.randn(M)
+    R = np.outer(alpha, alpha)
+    term = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3)
+    term += terms.SHOTerm(sigma=0.23456, rho=3.4, Q=2.3)
+    kernel = latent.KroneckerLatentTerm(term, L=alpha)
 
-# def test_missing_values(data):
-#     N, M, x, diag, y, t = data
-#     mask = np.random.rand(N, M) > 0.1
-#     assert np.all(mask.sum(axis=1) > 0)
+    ar, cr, ac, bc, cc, dc = term.get_coefficients()
+    c0, a0, U0, V0 = term.get_celerite_matrices(t0, np.zeros_like(t0))
+    c, a, U, V = kernel.get_celerite_matrices(t, np.zeros_like(t), X=X)
 
-#     R = np.random.randn(M, M)
-#     R[np.diag_indices_from(R)] = np.exp(R[np.diag_indices_from(R)])
-#     R[np.triu_indices_from(R, 1)] = 0.0
-#     R = np.dot(R, R.T)
+    assert c.shape == (len(c0),)
+    assert a.shape == (N * M,)
+    assert U.shape == (N * M, len(c0))
+    assert V.shape == (N * M, len(c0))
 
-#     term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3)
-#     term = kron.KronTerm(term0, R=R)
+    assert np.allclose(c, c0)
+    assert np.allclose(a, (a0[:, None] * np.diag(R)[None, :]).flatten())
 
-#     a, U, V, P = term.get_celerite_matrices(x, diag, mask=mask)
-#     assert a.shape == (mask.sum(),)
-#     assert U.shape == (mask.sum(), 2 * M)
-#     assert V.shape == (mask.sum(), 2 * M)
-#     assert P.shape == (mask.sum() - 1, 2 * M)
+    K = kernel.get_value(t, X=X)
+    K0 = term.get_value(t0)
 
-#     check_value(term, x, diag, y, t, mask=mask)
+    assert np.allclose(K, np.kron(K0, R))

From 8c59418c44a5061dd17364d84a9cdd7db39fddea Mon Sep 17 00:00:00 2001
From: Dan F-M <foreman.mackey@gmail.com>
Date: Tue, 8 Dec 2020 14:25:45 -0500
Subject: [PATCH 08/14] using conda to install python for tutorials

---
 .github/workflows/tutorials.yml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/tutorials.yml b/.github/workflows/tutorials.yml
index 37f4ff7..20f0ef3 100644
--- a/.github/workflows/tutorials.yml
+++ b/.github/workflows/tutorials.yml
@@ -20,11 +20,13 @@ jobs:
           fetch-depth: 0
 
       - name: Set up Python
-        uses: actions/setup-python@v2
+        uses: conda-incubator/setup-miniconda@v2
         with:
           python-version: 3.8
+          auto-update-conda: true
 
       - name: Install dependencies
+        shell: bash -l {0}
         run: |
           python -m pip install -U pip
           python -m pip install --use-feature=2020-resolver jupytext ipykernel theano jupyter
@@ -32,6 +34,7 @@ jobs:
           # python -m pip install --use-feature=2020-resolver ".[tutorials]"
 
       - name: Get theano compiledir
+        shell: bash -l {0}
         id: compiledir
         run: |
           python -c "import theano; print('::set-output name=compiledir::' + theano.config.compiledir.split('/')[-1])"
@@ -46,6 +49,7 @@ jobs:
             tutorials-
 
       - name: Execute the notebooks
+        shell: bash -l {0}
         run: |
           jupytext --to ipynb --execute docs/tutorials/*.py
 

From 8f8cb9022441da4cc643604f5a4ddc064b3c8075 Mon Sep 17 00:00:00 2001
From: Dan F-M <foreman.mackey@gmail.com>
Date: Tue, 8 Dec 2020 14:37:09 -0500
Subject: [PATCH 09/14] install wontcha

---
 .github/workflows/tutorials.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/tutorials.yml b/.github/workflows/tutorials.yml
index 20f0ef3..b83f86e 100644
--- a/.github/workflows/tutorials.yml
+++ b/.github/workflows/tutorials.yml
@@ -29,8 +29,7 @@ jobs:
         shell: bash -l {0}
         run: |
           python -m pip install -U pip
-          python -m pip install --use-feature=2020-resolver jupytext ipykernel theano jupyter
-          python -m ipykernel install --name python
+          python -m pip install jupytext ipykernel theano jupyter
           # python -m pip install --use-feature=2020-resolver ".[tutorials]"
 
       - name: Get theano compiledir

From e06cb95b63a673c9fee08fefa66fec4e536f6ea9 Mon Sep 17 00:00:00 2001
From: Dan F-M <foreman.mackey@gmail.com>
Date: Tue, 8 Dec 2020 14:40:14 -0500
Subject: [PATCH 10/14] properly install

---
 .github/workflows/tutorials.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/tutorials.yml b/.github/workflows/tutorials.yml
index b83f86e..258ad09 100644
--- a/.github/workflows/tutorials.yml
+++ b/.github/workflows/tutorials.yml
@@ -29,8 +29,7 @@ jobs:
         shell: bash -l {0}
         run: |
           python -m pip install -U pip
-          python -m pip install jupytext ipykernel theano jupyter
-          # python -m pip install --use-feature=2020-resolver ".[tutorials]"
+          python -m pip install ".[tutorials]"
 
       - name: Get theano compiledir
         shell: bash -l {0}

From 9ea409b843eefe5e30792c316df3280fb3863fc6 Mon Sep 17 00:00:00 2001
From: Dan F-M <foreman.mackey@gmail.com>
Date: Tue, 8 Dec 2020 16:01:34 -0500
Subject: [PATCH 11/14] installation

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index c8b30de..ff7671c 100644
--- a/setup.py
+++ b/setup.py
@@ -55,7 +55,7 @@
     "tutorials": [
         "jupytext",
         "jupyter",
-        "nbconvert",
+        # "nbconvert",
         "matplotlib",
         "scipy",
         "emcee",

From 0459533d4623b1acb430fd9d2b0059c346877615 Mon Sep 17 00:00:00 2001
From: Dan F-M <foreman.mackey@gmail.com>
Date: Tue, 8 Dec 2020 16:33:46 -0500
Subject: [PATCH 12/14] wtf

---
 .github/workflows/tutorials.yml | 1 +
 setup.py                        | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/tutorials.yml b/.github/workflows/tutorials.yml
index 258ad09..d25277d 100644
--- a/.github/workflows/tutorials.yml
+++ b/.github/workflows/tutorials.yml
@@ -29,6 +29,7 @@ jobs:
         shell: bash -l {0}
         run: |
           python -m pip install -U pip
+          python -m pip install jupytext ipykernel theano jupyter
           python -m pip install ".[tutorials]"
 
       - name: Get theano compiledir
diff --git a/setup.py b/setup.py
index ff7671c..c8b30de 100644
--- a/setup.py
+++ b/setup.py
@@ -55,7 +55,7 @@
     "tutorials": [
         "jupytext",
         "jupyter",
-        # "nbconvert",
+        "nbconvert",
         "matplotlib",
         "scipy",
         "emcee",

From 020a2a9d4eda76c0c88bbb95e3b7a331169735f0 Mon Sep 17 00:00:00 2001
From: Dan F-M <foreman.mackey@gmail.com>
Date: Tue, 8 Dec 2020 17:17:11 -0500
Subject: [PATCH 13/14] jupytext version

---
 .github/workflows/tutorials.yml | 1 -
 setup.py                        | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/tutorials.yml b/.github/workflows/tutorials.yml
index d25277d..258ad09 100644
--- a/.github/workflows/tutorials.yml
+++ b/.github/workflows/tutorials.yml
@@ -29,7 +29,6 @@ jobs:
         shell: bash -l {0}
         run: |
           python -m pip install -U pip
-          python -m pip install jupytext ipykernel theano jupyter
           python -m pip install ".[tutorials]"
 
       - name: Get theano compiledir
diff --git a/setup.py b/setup.py
index c8b30de..4d94f56 100644
--- a/setup.py
+++ b/setup.py
@@ -53,7 +53,7 @@
         "ipython",
     ],
     "tutorials": [
-        "jupytext",
+        "jupytext<1.7.0",
         "jupyter",
         "nbconvert",
         "matplotlib",

From 4d1e05c32c544c271268d9e6576b6fdfd2b6e1aa Mon Sep 17 00:00:00 2001
From: Dan F-M <foreman.mackey@gmail.com>
Date: Wed, 9 Dec 2020 07:33:59 -0500
Subject: [PATCH 14/14] adding convolution term value back in

---
 python/celerite2/terms.py | 72 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)

diff --git a/python/celerite2/terms.py b/python/celerite2/terms.py
index 64e7c0b..530a6e7 100644
--- a/python/celerite2/terms.py
+++ b/python/celerite2/terms.py
@@ -495,6 +495,78 @@ def get_celerite_matrices(
             t, new_diag, c=c, a=a, U=U, V=V, X=X
         )
 
+    def get_value(self, t, t2=None, *, diag=None, X=None, X2=None):
+        t = np.atleast_1d(t)
+        if t2 is None:
+            tau0 = t[:, None] - t[None, :]
+        else:
+            tau0 = t[:, None] - np.atleast_1d(t2)[None, :]
+
+        dt = self.delta
+        ar, cr, a, b, c, d = self.term.get_coefficients()
+
+        # Format the lags correctly
+        tau0 = np.abs(np.atleast_1d(tau0))
+        tau = tau0[..., None]
+
+        # Precompute some factors
+        dpt = dt + tau
+        dmt = dt - tau
+
+        # Real parts:
+        # tau > Delta
+        crd = cr * dt
+        cosh = np.cosh(crd)
+        norm = 2 * ar / crd ** 2
+        K_large = np.sum(norm * (cosh - 1) * np.exp(-cr * tau), axis=-1)
+
+        # tau < Delta
+        crdmt = cr * dmt
+        K_small = K_large + np.sum(norm * (crdmt - np.sinh(crdmt)), axis=-1)
+
+        # Complex part
+        cd = c * dt
+        dd = d * dt
+        c2 = c ** 2
+        d2 = d ** 2
+        c2pd2 = c2 + d2
+        C1 = a * (c2 - d2) + 2 * b * c * d
+        C2 = b * (c2 - d2) - 2 * a * c * d
+        norm = 1.0 / (dt * c2pd2) ** 2
+        k0 = np.exp(-c * tau)
+        cdt = np.cos(d * tau)
+        sdt = np.sin(d * tau)
+
+        # For tau > Delta
+        cos_term = 2 * (np.cosh(cd) * np.cos(dd) - 1)
+        sin_term = 2 * (np.sinh(cd) * np.sin(dd))
+        factor = k0 * norm
+        K_large += np.sum(
+            (C1 * cos_term - C2 * sin_term) * factor * cdt, axis=-1
+        )
+        K_large += np.sum(
+            (C2 * cos_term + C1 * sin_term) * factor * sdt, axis=-1
+        )
+
+        # tau < Delta
+        edmt = np.exp(-c * dmt)
+        edpt = np.exp(-c * dpt)
+        cos_term = (
+            edmt * np.cos(d * dmt) + edpt * np.cos(d * dpt) - 2 * k0 * cdt
+        )
+        sin_term = (
+            edmt * np.sin(d * dmt) + edpt * np.sin(d * dpt) - 2 * k0 * sdt
+        )
+        K_small += np.sum(2 * (a * c + b * d) * c2pd2 * dmt * norm, axis=-1)
+        K_small += np.sum((C1 * cos_term + C2 * sin_term) * norm, axis=-1)
+
+        mask = tau0 >= dt
+        K = K_large * mask + K_small * (~mask)
+
+        if diag is not None:
+            K[np.diag_indices_from(K)] += diag
+        return K
+
     def get_coefficients(self):
         ar, cr, a, b, c, d = self.term.get_coefficients()