LSSTDESC · JaimeRZP · Jun 6, 2024 · Jun 11, 2024 · Jun 11, 2024 · Jun 11, 2024
diff --git a/nb/demo.ipynb b/nb/demo.ipynb
diff --git a/nb/priors_example.ipynb b/nb/priors_example.ipynb
diff --git a/src/qp/__init__.py b/src/qp/__init__.py
@@ -34,3 +34,5 @@
 from . import packing_utils
 
 from . import test_funcs
+
+from . import projectors
diff --git a/src/qp/projectors/__init__.py b/src/qp/projectors/__init__.py
@@ -0,0 +1,4 @@
+from .projector_base import ProjectorBase
+from .projector_shifts import ProjectorShifts
+from .projector_shifts_widths import ProjectorShiftsWidths
+from .projector_moments import ProjectorMoments
diff --git a/src/qp/projectors/projector_base.py b/src/qp/projectors/projector_base.py
@@ -0,0 +1,71 @@
+from ..ensemble import Ensemble
+import numpy as np
+
+
+class ProjectorBase(object):
+    """
+    Base class for projectors. Projectors are used to project the measured
+    photometric distributions by RAIL onto the space of a given generative
+    photometric model for inference. 
+    This class is not meant to be used directly, 
+    but to be subclassed by specific projectors. 
+    The subclasses should implement the following methods:
+    - evaluate_model: given a set of parameters, evaluate the model
+    - get_prior: return the prior distribution of the model given
+    the meadured photometric distributions.
+    """
+    def __init__(self, ens):
+        self._project_base(ens)
+
+    def _project_base(self, ens, z=None):
+        if z is None:
+            z = np.linspace(0, 1.5, 45)
+        self.z = z
+        nzs = ens.pdf(z)
+        nzs = ens.objdata()['pdfs']
+        self.ens = ens
+        self.nzs = self._normalize(nzs)
+        self.nz_mean = np.mean(self.nzs, axis=0)
+        self.nz_cov = np.cov(self.nzs, rowvar=False)
+        self.prior = None
+
+    def _normalize(self, nzs):
+        norms = np.sum(nzs, axis=1)
+        nzs /= norms[:, None]  
+        return nzs
+
+    def evaluate_model(self, *args):
+        """
+        Evaluate the model at the given parameters.
+        """
+        raise NotImplementedError
+
+    def get_prior(self):
+        """
+        Returns the calibrated prior distribution for the model 
+        parameters given the measured photometric distributions.
+        """
+        if self.prior is None:
+            self.prior = self._get_prior()
+        return self.prior
+
+    def sample_prior(self):
+        """
+        Draws a sample from the prior distribution.
+        """
+        prior = self.get_prior()
+        return prior.rvs()
+
+    def save_prior(self, mode="dist"):
+        """
+        Saves the prior distribution to a file.
+        """
+        prior = self.get_prior()
+        if mode == "dist":
+            return prior
+        if mode == "file":
+            prior = self.get_prior()
+            np.save("prior_mean.npy", prior.mean)
+            np.save("prior_cov.npy", prior.cov)
+        else:
+            raise NotImplementedError
diff --git a/src/qp/projectors/projector_moments.py b/src/qp/projectors/projector_moments.py
@@ -0,0 +1,63 @@
+import numpy as np
+from ..ensemble import Ensemble
+from .projector_base import ProjectorBase
+from numpy.linalg import eig, cholesky
+from scipy.stats import multivariate_normal as mvn
+
+
+class ProjectorMoments(ProjectorBase):
+    """
+    Projector for the moments model.
+    The moments model assumes that meausred photometric distribution
+    is Gaussian meaning that it can be fully described by its mean and
+    covariance matrix. Conceptually, this is equavalent to a 
+    Gaussian process regressio for a given p(z). The details can be found 
+    in the paper: 2301.11978
+
+    Some measured photometric distributions will possess non-invertible
+    covariance matrices. If this is the case, ProjectorMoments will
+    attempt regularize the covariance matrix by adding a small jitter
+    to its eigen-values. If this fails, the covariance matrix will be
+    diagonalized.
+    """
+    def __init__(self, ens):
+        self._project_base(ens)
+        self._project()
+
+    def _project(self):
+        self.nz_chol = self._get_chol()
+
+    def _get_chol(self):
+        cov = self.nz_cov
+        if not self._is_pos_def(cov):
+            print('Warning: Covariance matrix is not positive definite')
+            print('The covariance matrix will be regularized')
+            jitter = 1e-15 * np.eye(cov.shape[0])
+            w, v = eig(cov+jitter)
+            w = np.real(np.abs(w))
+            v = np.real(v)
+            cov = v @ np.diag(np.abs(w)) @ v.T
+            cov = np.tril(cov) + np.triu(cov.T, 1)
+            if not self._is_pos_def(cov):
+                print('Warning: regularization failed')
+                print('The covariance matrix will be diagonalized')
+                cov = np.diag(np.diag(cov))
+        chol = cholesky(cov)
+        return chol
+
+    def _is_pos_def(self, A):
+        return np.all(np.linalg.eigvals(A) > 0)
+
+    def evaluate_model(self, nz, alpha):
+        """
+        Samples a photometric distribution 
+        from a Gaussian distribution with mean
+        and covariance measured from the data.
+        """
+        z = nz[0]
+        nz = nz[1]
+        return [z, nz + self.nz_chol @ alpha]
+
+    def _get_prior(self):
+        return mvn(np.zeros_like(self.nz_mean),
+                   np.ones_like(self.nz_mean))
diff --git a/src/qp/projectors/projector_shifts.py b/src/qp/projectors/projector_shifts.py
@@ -0,0 +1,50 @@
+import numpy as np
+from ..ensemble import Ensemble
+from .projector_base import ProjectorBase
+from scipy.interpolate import interp1d
+from scipy.stats import multivariate_normal as mvn
+
+
+class ProjectorShifts(ProjectorBase):
+    """
+    Projector for the shifts model.
+    The shift model assumes that all the variation in the measured
+    photometric distributions can be described by a single shift in
+    the position of the mean of a fiducial n(z) distribution.
+
+    This shift is calibrated by computing the standard deviations
+    of the measured photometric distributions over redshift.
+    The shift prior is then given by a Gaussian distribution with
+    mean 0 and variance equal to the ratio of the standard deviation
+    of the standard deviations to the mean of the standard deviations.
+    """
+    def __init__(self, ens):
+        self._project_base(ens)
+        self._project()
+
+    def _project(self):
+        self.shift = self._find_shift()
+
+    def evaluate_model(self, nz, shift):
+        """
+        Aplies a shift to the given p(z) distribution.
+        This is done by interpolating the p(z) distribution
+        at the shifted z values and then evaluating it at the
+        original z values.
+        """
+        z = nz[0]
+        nz = nz[1]
+        z_shift = z + shift
+        pz_shift = interp1d(z_shift, nz,
+                            kind='linear',
+                            fill_value='extrapolate')(z)
+        return [z, pz_shift]
+
+    def _find_shift(self):
+        stds = np.std(self.nzs, axis=1)  # std of each pz
+        s_stds = np.std(stds)            # std of the z-std
+        m_stds = np.mean(stds)           # mean of the z-std
+        return s_stds / m_stds
+
+    def _get_prior(self):
+        return mvn([0], [self.shift**2])
diff --git a/src/qp/projectors/projector_shifts_widths.py b/src/qp/projectors/projector_shifts_widths.py
@@ -0,0 +1,72 @@
+import numpy as np
+from ..ensemble import Ensemble
+from ..interp_pdf import interp
+from .projector_base import ProjectorBase
+from scipy.interpolate import interp1d
+from scipy.stats import multivariate_normal as mvn
+
+
+class ProjectorShiftsWidths(ProjectorBase):
+    """
+    Projector for the shifts and widths model.
+    The shifts and widths model assumes that the variation in the measured
+    photometric distributions can be captured by varying the mean and the
+    standard deviation of a fiducial n(z) distribution.
+
+    The calibration method was written by Tilman Tröster. 
+    The shift prior is given by a Gaussian distributiob with zero mean
+    standard deviation the standard deviation in the mean of 
+    the measured photometric distributions.
+    The width is calibrated by computing the standard deviations
+    of the measured photometric distributions over redshift.
+    The width prior is then given by a Gaussian distribution with
+    mean 0 and variance equal to the ratio of the standard deviation
+    of the standard deviations to the mean of the standard deviations.
+    This is similar to how the shift prior is calibrated in the shift model.
+    """
+    def __init__(self, ens):
+        self._project_base(ens)
+        self._project()
+
+    def _project(self):
+        self.shift = self._find_shift()
+        self.width = self._find_width()
+
+    def evaluate_model(self, nz, shift, width):
+        """
+        Aplies a shift and a width to the given p(z) distribution.
+        This is done by evluating the n(z) distribution at
+        p((z-mu)/width + mu + shift) where mu is the mean redshift
+        of the fiducial n(z) distribution and the rescaling by the width.
+        Finally the distribution is normalized.
+        """
+        z = nz[0]
+        nz = nz[1]
+        nz_i = interp(xvals=z, yvals=nz)
+        mu = nz_i.mean().squeeze()
+
+        pdf = nz_i.pdf((z-mu)/width + mu + shift)/width
+        norm = np.sum(pdf)
+        return [z, pdf/norm]
+
+    def _find_shift(self):
+        ms = np.mean(self.nzs, axis=1)  # mean of each nz
+        ms_std = np.std(ms)             # std of the means
+        return ms_std                   
+
+    def _find_width(self):
+        stds = np.std(self.nzs, axis=1)
+        std_std = np.std(stds)
+        std_mean = np.mean(stds) 
+        width = std_std / std_mean
+        return width
+
+    def _get_prior(self):
+        return mvn([0, 1], [self.shift**2,  self.width**2],
+                    allow_singular=True)
+
+    def _get_shift_prior(self):
+        return mvn([0], [self.shift**2])
+
+    def _get_width_prior(self):
+        return mvn([1], [self.width**2])
diff --git a/tests/qp/dummy.npz b/tests/qp/dummy.npz
diff --git a/tests/qp/test_projector_base.py b/tests/qp/test_projector_base.py
@@ -0,0 +1,23 @@
+import qp
+import qp.projectors as proj
+import numpy as np
+
+
+def make_qp_ens(file):
+    zs = file['zs']
+    nzs = file['pzs']
+    dz = np.mean(np.diff(zs))
+    zs_edges = np.append(zs - dz/2, zs[-1] + dz/2)
+    q = qp.Ensemble(qp.hist, data={"bins":zs_edges, "pdfs":nzs})
+    return q
+
+def test_base():
+    file = np.load('tests/qp/dummy.npz')
+    ens = make_qp_ens(file)
+    projector = proj.ProjectorBase(ens)
+    m, n = projector.nzs.shape
+    k, = projector.z.shape
+    nzs = file['pzs']
+    nzs /= np.sum(nzs, axis=1)[:, None]
+    assert n == k
+    assert np.allclose(projector.nz_mean, np.mean(nzs, axis=0))
diff --git a/tests/qp/test_projector_moments.py b/tests/qp/test_projector_moments.py
@@ -0,0 +1,39 @@
+import qp
+import numpy as np
+import qp.projectors as proj
+
+
+def make_qp_ens(file):
+    zs = file['zs']
+    nzs = file['pzs']
+    dz = np.mean(np.diff(zs))
+    zs_edges = np.append(zs - dz/2, zs[-1] + dz/2)
+    q = qp.Ensemble(qp.hist, data={"bins":zs_edges, "pdfs":nzs})
+    return q
+
+
+def make_projector():
+    file = np.load('tests/qp/dummy.npz')
+    ens = make_qp_ens(file)
+    return proj.ProjectorMoments(ens)
+
+
+def test_prior():
+    projector = make_projector()
+    prior = projector.get_prior()
+    assert prior is not None
+
+
+def test_sample_prior():
+    projector = make_projector()
+    nz = projector.sample_prior()
+    assert len(nz) == len(projector.nz_mean)
+
+
+def test_model():
+    projector = make_projector()
+    shift = projector.sample_prior()
+    input = np.array([projector.z, projector.nz_mean])
+    output = projector.evaluate_model(input, shift)
+    assert (projector.z == output[0]).all()
+    assert len(output[1]) == len(projector.nz_mean)
diff --git a/tests/qp/test_projector_shifts.py b/tests/qp/test_projector_shifts.py
@@ -0,0 +1,39 @@
+import qp
+import numpy as np
+import qp.projectors as proj
+
+
+def make_qp_ens(file):
+    zs = file['zs']
+    nzs = file['pzs']
+    dz = np.mean(np.diff(zs))
+    zs_edges = np.append(zs - dz/2, zs[-1] + dz/2)
+    q = qp.Ensemble(qp.hist, data={"bins":zs_edges, "pdfs":nzs})
+    return q
+
+
+def make_projector():
+    file = np.load('tests/qp/dummy.npz')
+    ens = make_qp_ens(file)
+    return proj.ProjectorShifts(ens)
+
+
+def test_prior():
+    projector = make_projector()
+    prior = projector.get_prior()
+    assert prior is not None
+
+
+def test_sample_prior():
+    projector = make_projector()
+    shift = projector.sample_prior()
+    assert len([shift]) == len([projector.shift])
+
+
+def test_model():
+    projector = make_projector()
+    shift = projector.sample_prior()
+    input = np.array([projector.z, projector.nz_mean])
+    output = projector.evaluate_model(input, shift)
+    assert (projector.z == output[0]).all()
+    assert len(output[1]) == len(projector.nz_mean)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -34,3 +34,5 @@
		from . import packing_utils

		from . import test_funcs

		from . import projectors