diff --git a/docs/api_reference.rst b/docs/api_reference.rst index 18ddc24b..fda1ba1f 100644 --- a/docs/api_reference.rst +++ b/docs/api_reference.rst @@ -39,6 +39,8 @@ Distributions GeneralizedPoisson BetaNegativeBinomial GenExtreme + MvAsymmetricLaplace + MvLaplace R2D2M2CP Skellam histogram_approximation diff --git a/pymc_experimental/distributions/__init__.py b/pymc_experimental/distributions/__init__.py index f206c246..56b66cea 100644 --- a/pymc_experimental/distributions/__init__.py +++ b/pymc_experimental/distributions/__init__.py @@ -24,6 +24,7 @@ Skellam, ) from pymc_experimental.distributions.histogram_utils import histogram_approximation +from pymc_experimental.distributions.multivariate.laplace import MvAsymmetricLaplace, MvLaplace from pymc_experimental.distributions.multivariate.r2d2m2cp import R2D2M2CP from pymc_experimental.distributions.timeseries import DiscreteMarkovChain @@ -32,6 +33,8 @@ "DiscreteMarkovChain", "GeneralizedPoisson", "GenExtreme", + "MvAsymmetricLaplace", + "MvLaplace", "R2D2M2CP", "Skellam", "histogram_approximation", diff --git a/pymc_experimental/distributions/multivariate/laplace.py b/pymc_experimental/distributions/multivariate/laplace.py new file mode 100644 index 00000000..30025b4e --- /dev/null +++ b/pymc_experimental/distributions/multivariate/laplace.py @@ -0,0 +1,276 @@ +import numpy as np +import pytensor.tensor as pt +import scipy + +from pymc.distributions.dist_math import check_parameters +from pymc.distributions.distribution import Continuous, SymbolicRandomVariable, _support_point +from pymc.distributions.moments.means import _mean +from pymc.distributions.multivariate import ( + _logdet_from_cholesky, + nan_lower_cholesky, + quaddist_chol, + quaddist_matrix, + solve_lower, +) +from pymc.distributions.shape_utils import implicit_size_from_params, rv_size_is_none +from pymc.logprob.basic import _logprob +from pymc.pytensorf import normalize_rng_param +from pytensor.gradient import grad_not_implemented +from pytensor.scalar import BinaryScalarOp, upgrade_to_float +from pytensor.tensor.elemwise import Elemwise +from pytensor.tensor.random.utils import normalize_size_param + + +class Kv(BinaryScalarOp): + """ + Modified Bessel function of the second kind of real order v. + """ + + nfunc_spec = ("scipy.special.kv", 2, 1) + + @staticmethod + def st_impl(v, x): + return scipy.special.kv(v, x) + + def impl(self, v, x): + return self.st_impl(v, x) + + def L_op(self, inputs, outputs, output_grads): + v, x = inputs + [out] = outputs + [g_out] = output_grads + dx = -(v / x) * out - self.kv(v - 1, x) + return [grad_not_implemented(self, 0, v), g_out * dx] + + def c_code(self, *args, **kwargs): + raise NotImplementedError() + + +kv = Elemwise(Kv(upgrade_to_float, name="kv")) + + +class MvLaplaceRV(SymbolicRandomVariable): + name = "multivariate_laplace" + extended_signature = "[rng],[size],(m),(m,m)->[rng],(m)" + _print_name = ("MultivariateLaplace", "\\operatorname{MultivariateLaplace}") + + @classmethod + def rv_op(cls, mu, cov, *, size=None, rng=None): + mu = pt.as_tensor(mu) + cov = pt.as_tensor(cov) + rng = normalize_rng_param(rng) + size = normalize_size_param(size) + + assert mu.type.ndim >= 1 + assert cov.type.ndim >= 2 + + if rv_size_is_none(size): + size = implicit_size_from_params(mu, cov, ndims_params=(1, 2)) + + next_rng, e = pt.random.exponential(size=size, rng=rng).owner.outputs + next_rng, z = pt.random.multivariate_normal( + mean=pt.zeros(mu.shape[-1]), cov=cov, size=size, rng=next_rng + ).owner.outputs + rv = mu + pt.sqrt(e)[..., None] * z + + return cls( + inputs=[rng, size, mu, cov], + outputs=[next_rng, rv], + )(rng, size, mu, cov) + + +class MvLaplace(Continuous): + r"""Multivariate (Symmetric) Laplace distribution. + + The pdf of this distribution is + + .. math:: + + pdf(x \mid \mu, \Sigma) = + \frac{2}{(2\pi)^{k/2} |\Sigma|^{1/2}} + ( \frac{(x-\mu)'\Sigma^{-1}(x-mu)}{2} )^{v/2} + \K_v (\sqrt{2(x-\mu)' \Sigma^{-1} (x - \mu)}}) + + where :math:`v = 1 - k/2` and :math:`\K_v` is the modified Bessel function of the second kind. + + ======== ========================== + Support :math:`x \in \mathbb{R}^k` + Mean :math:`\mu` + Variance :math:`\Sigma` + ======== ========================== + + Parameters + ---------- + mu : tensor_like of float + Location. + cov : tensor_like of float, optional + Covariance matrix. Exactly one of cov, tau, or chol is needed. + tau : tensor_like of float, optional + Precision matrix. Exactly one of cov, tau, or chol is needed. + chol : tensor_like of float, optional + Cholesky decomposition of covariance matrix. Exactly one of cov, + tau, or chol is needed. + lower: bool, default=True + Whether chol is the lower tridiagonal cholesky factor. + """ + + rv_type = MvLaplaceRV + rv_op = MvLaplaceRV.rv_op + + @classmethod + def dist(cls, mu=0, cov=None, *, tau=None, chol=None, lower=True, **kwargs): + cov = quaddist_matrix(cov, chol, tau, lower) + + mu = pt.atleast_1d(pt.as_tensor_variable(mu)) + if mu.type.broadcastable[-1] and not cov.type.broadcastable[-1]: + mu, _ = pt.broadcast_arrays(mu, cov[..., -1]) + return super().dist([mu, cov], **kwargs) + + +class MvAsymmetricLaplaceRV(SymbolicRandomVariable): + name = "multivariate_asymmetric_laplace" + extended_signature = "[rng],[size],(m),(m,m)->[rng],(m)" + _print_name = ("MultivariateAsymmetricLaplace", "\\operatorname{MultivariateAsymmetricLaplace}") + + @classmethod + def rv_op(cls, mu, cov, *, size=None, rng=None): + mu = pt.as_tensor(mu) + cov = pt.as_tensor(cov) + rng = normalize_rng_param(rng) + size = normalize_size_param(size) + + assert mu.type.ndim >= 1 + assert cov.type.ndim >= 2 + + if rv_size_is_none(size): + size = implicit_size_from_params(mu, cov, ndims_params=(1, 2)) + + next_rng, e = pt.random.exponential(size=size, rng=rng).owner.outputs + next_rng, z = pt.random.multivariate_normal( + mean=pt.zeros(mu.shape[-1]), cov=cov, size=size, rng=next_rng + ).owner.outputs + e = e[..., None] + rv = e * mu + pt.sqrt(e) * z + + return cls( + inputs=[rng, size, mu, cov], + outputs=[next_rng, rv], + )(rng, size, mu, cov) + + +class MvAsymmetricLaplace(Continuous): + r"""Multivariate Asymmetric Laplace distribution. + + The pdf of this distribution is + + .. math:: + + pdf(x \mid \mu, \Sigma) = + \frac{2}{(2\pi)^{k/2} |\Sigma|^{1/2}} + ( \frac{(x-\mu)'\Sigma^{-1}(x-mu)}{2} )^{v/2} + \K_v (\sqrt{2(x-\mu)' \Sigma^{-1} (x - \mu)}}) + + where :math:`v = 1 - k/2` and :math:`\K_v` is the modified Bessel function of the second kind. + + ======== ========================== + Support :math:`x \in \mathbb{R}^k` + Mean :math:`\mu` + Variance :math:`\Sigma + \mu' \mu` + ======== ========================== + + Parameters + ---------- + mu : tensor_like of float + Location. + cov : tensor_like of float, optional + Covariance matrix. Exactly one of cov, tau, or chol is needed. + tau : tensor_like of float, optional + Precision matrix. Exactly one of cov, tau, or chol is needed. + chol : tensor_like of float, optional + Cholesky decomposition of covariance matrix. Exactly one of cov, + tau, or chol is needed. + lower: bool, default=True + Whether chol is the lower tridiagonal cholesky factor. + """ + + rv_type = MvAsymmetricLaplaceRV + rv_op = MvAsymmetricLaplaceRV.rv_op + + @classmethod + def dist(cls, mu=0, cov=None, *, tau=None, chol=None, lower=True, **kwargs): + cov = quaddist_matrix(cov, chol, tau, lower) + + mu = pt.atleast_1d(pt.as_tensor_variable(mu)) + if mu.type.broadcastable[-1] and not cov.type.broadcastable[-1]: + mu, _ = pt.broadcast_arrays(mu, cov[..., -1]) + return super().dist([mu, cov], **kwargs) + + +@_logprob.register(MvLaplaceRV) +def mv_laplace_logp(op, values, rng, size, mu, cov, **kwargs): + [value] = values + quaddist, logdet, posdef = quaddist_chol(value, mu, cov) + + k = value.shape[-1].astype("floatX") + norm = np.log(2) - (k / 2) * np.log(2 * np.pi) - logdet + + v = 1 - (k / 2) + kernel = ((v / 2) * pt.log(quaddist / 2)) + pt.log(kv(v, pt.sqrt(2 * quaddist))) + + logp_val = norm + kernel + return check_parameters(logp_val, posdef, msg="posdef scale") + + +@_logprob.register(MvAsymmetricLaplaceRV) +def mv_asymmetric_laplace_logp(op, values, rng, size, mu, cov, **kwargs): + [value] = values + + chol_cov = nan_lower_cholesky(cov) + logdet, posdef = _logdet_from_cholesky(chol_cov) + + # solve_triangular will raise if there are nans + # (which happens if the cholesky fails) + chol_cov = pt.switch(posdef[..., None, None], chol_cov, 1) + + solve_x = solve_lower(chol_cov, value, b_ndim=1) + solve_mu = solve_lower(chol_cov, mu, b_ndim=1) + + x_quaddist = (solve_x**2).sum(-1) + mu_quaddist = (solve_mu**2).sum(-1) + x_mu_quaddist = (value * solve_mu).sum(-1) + + k = value.shape[-1].astype("floatX") + norm = np.log(2) - (k / 2) * np.log(2 * np.pi) - logdet + + v = 1 - (k / 2) + kernel = ( + x_mu_quaddist + + ((v / 2) * (pt.log(x_quaddist) - pt.log(2 + mu_quaddist))) + + pt.log(kv(v, pt.sqrt((2 + mu_quaddist) * x_quaddist))) + ) + + logp_val = norm + kernel + return check_parameters(logp_val, posdef, msg="posdef scale") + + +@_mean.register(MvLaplaceRV) +@_mean.register(MvAsymmetricLaplaceRV) +def mv_laplace_mean(op, rv, rng, size, mu, cov): + if rv_size_is_none(size): + bcast_mu, _ = pt.random.utils.broadcast_params([mu, cov], ndims_params=[1, 2]) + else: + bcast_mu = pt.broadcast_to(mu, pt.concatenate([size, [mu.shape[-1]]])) + return bcast_mu + + +@_support_point.register(MvLaplaceRV) +@_support_point.register(MvAsymmetricLaplaceRV) +def mv_laplace_support_point(op, rv, rng, size, mu, cov): + # We have a 0 * inf when value = mu. I assume density is infinite, which isn't a good starting point. + point = mu + 1 + if rv_size_is_none(size): + bcast_point, _ = pt.random.utils.broadcast_params([point, cov], ndims_params=[1, 2]) + else: + bcast_shape = pt.concatenate([size, [point.shape[-1]]]) + bcast_point = pt.broadcast_to(point, bcast_shape) + return bcast_point diff --git a/tests/distributions/multivariate/test_laplace.py b/tests/distributions/multivariate/test_laplace.py new file mode 100644 index 00000000..be9a4ee2 --- /dev/null +++ b/tests/distributions/multivariate/test_laplace.py @@ -0,0 +1,207 @@ +import numpy as np +import pymc as pm +import pytensor.tensor as pt +import pytest +import scipy + +from pymc.testing import assert_support_point_is_expected +from pytensor.graph.basic import equal_computations + +from pymc_experimental.distributions.multivariate.laplace import MvAsymmetricLaplace, MvLaplace + + +@pytest.mark.parametrize("dist", [MvLaplace, MvAsymmetricLaplace]) +def test_params(dist): + mu = pt.vector("mu") + cov = pt.matrix("cov") + chol = pt.matrix("chol") + tau = pt.matrix("tau") + + rv = dist.dist(mu=mu, cov=cov) + mu_param, cov_param = rv.owner.op.dist_params(rv.owner) + assert mu_param is mu + assert cov_param is cov + + # Default mu from shape of chol + rv = dist.dist(chol=chol) + mu_param, cov_param = rv.owner.op.dist_params(rv.owner) + cov_expected = chol @ chol.T + mu_expected, _ = pt.broadcast_arrays(0, cov_expected[..., -1]) + assert equal_computations([mu_param, cov_param], [mu_expected, cov_expected]) + + # Broadcast mu to chol (upper triangular) + rv = dist.dist(mu=mu[0], chol=chol, lower=False) + mu_param, cov_param = rv.owner.op.dist_params(rv.owner) + # It's a bit silly but we transpose twice when lower=False + cov_expected = chol.T @ chol.T.T + mu_expected, _ = pt.broadcast_arrays(mu[0], cov_expected[..., -1]) + assert equal_computations([mu_param, cov_param], [mu_expected, cov_expected]) + + # Mu and tau + rv = dist.dist(mu=mu, tau=tau) + mu_param, cov_param = rv.owner.op.dist_params(rv.owner) + assert equal_computations([mu_param, cov_param], [mu, pt.linalg.inv(tau)]) + + +@pytest.mark.parametrize( + "mu, cov, size, expected", + [ + (np.arange(6).reshape(3, 2), np.eye(2), None, np.arange(1, 7).reshape(3, 2)), + ( + np.arange(2), + np.broadcast_to(np.eye(2), (3, 2, 2)), + None, + np.broadcast_to(np.arange(1, 3), (3, 2)), + ), + (0, np.eye(3), (4,), np.ones((4, 3))), + ], +) +@pytest.mark.parametrize("dist", [MvLaplace, MvAsymmetricLaplace]) +def test_support_point(dist, mu, cov, size, expected): + with pm.Model() as model: + dist("x", mu=mu, cov=cov, size=size) + assert_support_point_is_expected(model, expected) + + +@pytest.mark.parametrize("dist", [MvLaplace, MvAsymmetricLaplace]) +def test_mean(dist): + mu = [np.pi, np.e] + + # Explicit size + rv = dist.dist(mu=mu, chol=np.eye(2), size=(3,)) + mean = pm.distributions.moments.mean(rv) + np.testing.assert_allclose(mean.eval(mode="FAST_COMPILE"), np.broadcast_to(mu, (3, 2))) + + # Implicit size from cov + rv = dist.dist(mu=mu, cov=np.broadcast_to(np.eye(2), (4, 2, 2))) + mean = pm.distributions.moments.mean(rv) + np.testing.assert_allclose(mean.eval(mode="FAST_COMPILE"), np.broadcast_to(mu, (4, 2))) + + +@pytest.mark.parametrize("dist", [MvLaplace, MvAsymmetricLaplace]) +def test_random(dist): + mu = [-1, np.pi, 1] + cov = [[1, 0.5, 0.25], [0.5, 2, 0.5], [0.25, 0.5, 3]] + rv = dist.dist(mu=mu, cov=cov, size=10_000) + + samples = pm.draw(rv, random_seed=13) + assert samples.shape == (10_000, 3) + np.testing.assert_allclose(np.mean(samples, axis=0), mu, rtol=0.05) + + expected_cov = cov if dist is MvLaplace else cov + np.outer(mu, mu) + np.testing.assert_allclose(np.cov(samples, rowvar=False), expected_cov, rtol=0.1) + + +def test_symmetric_matches_univariate_logp(): + # Test MvLaplace matches Univariate Laplace when there's a single entry + mean = 1.0 + scale = 2.0 + # Variance of Laplace is 2 * scale ** 2 + rv = MvLaplace.dist(mu=[mean], cov=[[2 * scale**2]]) + ref_rv = pm.Laplace.dist(mu=mean, b=scale) + + test_val = np.random.normal(size=(3, 1)) + rv_logp = pm.logp(rv, test_val).eval() + ref_logp = pm.logp(ref_rv, test_val).squeeze(-1).eval() + np.testing.assert_allclose(rv_logp, ref_logp) + + +@pytest.mark.xfail(reason="Not sure about equivalence. Test fails") +def test_asymmetric_matches_univariate_logp(): + # Test MvAsymmetricLaplace matches Univariate AsymmetricLaplace when there's a single entry + k = 2.0 + # From wikipedia: https://en.wikipedia.org/wiki/Asymmetric_Laplace_distribution + mean = (1 - k**2) / k + var = ((1 + k**4) / (k**2)) - mean**2 + print(mean, var) + rv = MvAsymmetricLaplace.dist(mu=[mean], cov=[[var]]) + ref_rv = pm.AsymmetricLaplace.dist(kappa=k, mu=0, b=1) + + test_val = np.random.normal(size=(3, 1)) + rv_logp = pm.logp(rv, test_val).eval() + ref_logp = pm.logp(ref_rv, test_val).squeeze(-1).eval() + np.testing.assert_allclose(rv_logp, ref_logp) + + +def test_asymmetric_matches_symmetric_logp(): + # Test it matches with the symmetric case when mu = 0 + mu = np.zeros(2) + tau = np.linalg.inv(np.array([[1, -0.5], [-0.5, 2]])) + rv = MvAsymmetricLaplace.dist(mu=mu, tau=tau) + ref_rv = MvLaplace.dist(mu=mu, tau=tau) + + rv_val = np.random.normal(size=(3, 2)) + logp_eval = pm.logp(rv, rv_val).eval() + logp_expected = pm.logp(ref_rv, rv_val).eval() + np.testing.assert_allclose(logp_eval, logp_expected) + + +def test_symmetric_logp(): + # Testing against simple bivariate cases described in: + # https://en.wikipedia.org/wiki/Multivariate_Laplace_distribution#Probability_density_function + + # Zero mean, non-identity covariance case + mu = np.zeros(2) + s1 = 0.5 + s2 = 2.0 + r = -0.25 + cov = np.array( + [ + [s1**2, r * s1 * s2], + [r * s1 * s2, s2**2], + ] + ) + rv = MvLaplace.dist(mu=mu, cov=cov) + rv_val = np.random.normal(size=(2,)) + logp_eval = pm.logp(rv, rv_val).eval() + + x1, x2 = rv_val + logp_expected = np.log( + (1 / (np.pi * s1 * s2 * np.sqrt(1 - r**2))) + * scipy.special.kv( + 0, + np.sqrt( + (2 * ((x1**2 / s1**2) - (2 * r * x1 * x2 / (s1 * s2)) + (x2**2 / s2**2))) + / (1 - r**2) + ), + ) + ) + np.testing.assert_allclose( + logp_eval, + logp_expected, + ) + + # Non zero mean, identity covariance case + mu = np.array([1, 3]) + rv = MvLaplace.dist(mu=mu, cov=np.eye(2)) + rv_val = np.random.normal(size=(2,)) + logp_eval = pm.logp(rv, rv_val).eval() + + logp_expected = np.log(1 / np.pi * scipy.special.kv(0, np.sqrt(2 * np.sum((rv_val - mu) ** 2)))) + np.testing.assert_allclose( + logp_eval, + logp_expected, + ) + + +def test_asymmetric_logp(): + # Testing against trivial univariate case + mu = 0.5 + cov = np.array([[1.0]]) + rv = MvAsymmetricLaplace.dist(mu=mu, cov=cov) + rv_val = np.random.normal(size=(1,)) + logp_eval = pm.logp(rv, rv_val).eval() + + [x] = rv_val + logp_expected = np.log( + ((2 * np.exp(x * mu)) / np.sqrt(2 * np.pi)) + * np.power(x**2 / (2 + mu**2), 1 / 4) + * scipy.special.kv( + 1 / 2, + np.sqrt((2 + mu**2) * x**2), + ) + ) + np.testing.assert_allclose( + logp_eval, + logp_expected, + )