Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prior Estimation #232

Closed
wants to merge 13 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,928 changes: 1,828 additions & 100 deletions nb/demo.ipynb

Large diffs are not rendered by default.

337 changes: 337 additions & 0 deletions nb/priors_example.ipynb

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions src/qp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,5 @@
from . import packing_utils

from . import test_funcs

from . import projectors
4 changes: 4 additions & 0 deletions src/qp/projectors/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from .projector_base import ProjectorBase
from .projector_shifts import ProjectorShifts
from .projector_shifts_widths import ProjectorShiftsWidths
from .projector_moments import ProjectorMoments
71 changes: 71 additions & 0 deletions src/qp/projectors/projector_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from ..ensemble import Ensemble
import numpy as np


class ProjectorBase(object):
"""
Base class for projectors. Projectors are used to project the measured
photometric distributions by RAIL onto the space of a given generative
photometric model for inference.
This class is not meant to be used directly,
but to be subclassed by specific projectors.
The subclasses should implement the following methods:
- evaluate_model: given a set of parameters, evaluate the model
- get_prior: return the prior distribution of the model given
the meadured photometric distributions.
"""
def __init__(self, ens):
self._project_base(ens)

def _project_base(self, ens, z=None):
if z is None:
z = np.linspace(0, 1.5, 45)
self.z = z
nzs = ens.pdf(z)
nzs = ens.objdata()['pdfs']
self.ens = ens
self.nzs = self._normalize(nzs)
self.nz_mean = np.mean(self.nzs, axis=0)
self.nz_cov = np.cov(self.nzs, rowvar=False)
self.prior = None

def _normalize(self, nzs):
norms = np.sum(nzs, axis=1)
nzs /= norms[:, None]
return nzs

def evaluate_model(self, *args):
"""
Evaluate the model at the given parameters.
"""
raise NotImplementedError

def get_prior(self):
"""
Returns the calibrated prior distribution for the model
parameters given the measured photometric distributions.
"""
if self.prior is None:
self.prior = self._get_prior()
return self.prior

def sample_prior(self):
"""
Draws a sample from the prior distribution.
"""
prior = self.get_prior()
return prior.rvs()

def save_prior(self, mode="dist"):
"""
Saves the prior distribution to a file.
"""
prior = self.get_prior()
if mode == "dist":
return prior
if mode == "file":
prior = self.get_prior()
np.save("prior_mean.npy", prior.mean)
np.save("prior_cov.npy", prior.cov)
else:
raise NotImplementedError
63 changes: 63 additions & 0 deletions src/qp/projectors/projector_moments.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import numpy as np
from ..ensemble import Ensemble
from .projector_base import ProjectorBase
from numpy.linalg import eig, cholesky
from scipy.stats import multivariate_normal as mvn


class ProjectorMoments(ProjectorBase):
"""
Projector for the moments model.
The moments model assumes that meausred photometric distribution
is Gaussian meaning that it can be fully described by its mean and
covariance matrix. Conceptually, this is equavalent to a
Gaussian process regressio for a given p(z). The details can be found
in the paper: 2301.11978

Some measured photometric distributions will possess non-invertible
covariance matrices. If this is the case, ProjectorMoments will
attempt regularize the covariance matrix by adding a small jitter
to its eigen-values. If this fails, the covariance matrix will be
diagonalized.
"""
def __init__(self, ens):
self._project_base(ens)
self._project()

def _project(self):
self.nz_chol = self._get_chol()

def _get_chol(self):
cov = self.nz_cov
if not self._is_pos_def(cov):
print('Warning: Covariance matrix is not positive definite')
print('The covariance matrix will be regularized')
jitter = 1e-15 * np.eye(cov.shape[0])
w, v = eig(cov+jitter)
w = np.real(np.abs(w))
v = np.real(v)
cov = v @ np.diag(np.abs(w)) @ v.T
cov = np.tril(cov) + np.triu(cov.T, 1)
if not self._is_pos_def(cov):
print('Warning: regularization failed')
print('The covariance matrix will be diagonalized')
cov = np.diag(np.diag(cov))
chol = cholesky(cov)
return chol

def _is_pos_def(self, A):
return np.all(np.linalg.eigvals(A) > 0)

def evaluate_model(self, nz, alpha):
"""
Samples a photometric distribution
from a Gaussian distribution with mean
and covariance measured from the data.
"""
z = nz[0]
nz = nz[1]
return [z, nz + self.nz_chol @ alpha]

def _get_prior(self):
return mvn(np.zeros_like(self.nz_mean),
np.ones_like(self.nz_mean))
50 changes: 50 additions & 0 deletions src/qp/projectors/projector_shifts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import numpy as np
from ..ensemble import Ensemble
from .projector_base import ProjectorBase
from scipy.interpolate import interp1d
from scipy.stats import multivariate_normal as mvn


class ProjectorShifts(ProjectorBase):
"""
Projector for the shifts model.
The shift model assumes that all the variation in the measured
photometric distributions can be described by a single shift in
the position of the mean of a fiducial n(z) distribution.

This shift is calibrated by computing the standard deviations
of the measured photometric distributions over redshift.
The shift prior is then given by a Gaussian distribution with
mean 0 and variance equal to the ratio of the standard deviation
of the standard deviations to the mean of the standard deviations.
"""
def __init__(self, ens):
self._project_base(ens)
self._project()

def _project(self):
self.shift = self._find_shift()

def evaluate_model(self, nz, shift):
"""
Aplies a shift to the given p(z) distribution.
This is done by interpolating the p(z) distribution
at the shifted z values and then evaluating it at the
original z values.
"""
z = nz[0]
nz = nz[1]
z_shift = z + shift
pz_shift = interp1d(z_shift, nz,
kind='linear',
fill_value='extrapolate')(z)
return [z, pz_shift]

def _find_shift(self):
stds = np.std(self.nzs, axis=1) # std of each pz
s_stds = np.std(stds) # std of the z-std
m_stds = np.mean(stds) # mean of the z-std
return s_stds / m_stds

def _get_prior(self):
return mvn([0], [self.shift**2])
72 changes: 72 additions & 0 deletions src/qp/projectors/projector_shifts_widths.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import numpy as np
from ..ensemble import Ensemble
from ..interp_pdf import interp
from .projector_base import ProjectorBase
from scipy.interpolate import interp1d
from scipy.stats import multivariate_normal as mvn


class ProjectorShiftsWidths(ProjectorBase):
"""
Projector for the shifts and widths model.
The shifts and widths model assumes that the variation in the measured
photometric distributions can be captured by varying the mean and the
standard deviation of a fiducial n(z) distribution.

The calibration method was written by Tilman Tröster.
The shift prior is given by a Gaussian distributiob with zero mean
standard deviation the standard deviation in the mean of
the measured photometric distributions.
The width is calibrated by computing the standard deviations
of the measured photometric distributions over redshift.
The width prior is then given by a Gaussian distribution with
mean 0 and variance equal to the ratio of the standard deviation
of the standard deviations to the mean of the standard deviations.
This is similar to how the shift prior is calibrated in the shift model.
"""
def __init__(self, ens):
self._project_base(ens)
self._project()

def _project(self):
self.shift = self._find_shift()
self.width = self._find_width()

def evaluate_model(self, nz, shift, width):
"""
Aplies a shift and a width to the given p(z) distribution.
This is done by evluating the n(z) distribution at
p((z-mu)/width + mu + shift) where mu is the mean redshift
of the fiducial n(z) distribution and the rescaling by the width.
Finally the distribution is normalized.
"""
z = nz[0]
nz = nz[1]
nz_i = interp(xvals=z, yvals=nz)
mu = nz_i.mean().squeeze()

pdf = nz_i.pdf((z-mu)/width + mu + shift)/width
norm = np.sum(pdf)
return [z, pdf/norm]

def _find_shift(self):
ms = np.mean(self.nzs, axis=1) # mean of each nz
ms_std = np.std(ms) # std of the means
return ms_std

def _find_width(self):
stds = np.std(self.nzs, axis=1)
std_std = np.std(stds)
std_mean = np.mean(stds)
width = std_std / std_mean
return width

def _get_prior(self):
return mvn([0, 1], [self.shift**2, self.width**2],
allow_singular=True)

def _get_shift_prior(self):
return mvn([0], [self.shift**2])

def _get_width_prior(self):
return mvn([1], [self.width**2])
Binary file added tests/qp/dummy.npz
Binary file not shown.
23 changes: 23 additions & 0 deletions tests/qp/test_projector_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import qp
import qp.projectors as proj
import numpy as np


def make_qp_ens(file):
zs = file['zs']
nzs = file['pzs']
dz = np.mean(np.diff(zs))
zs_edges = np.append(zs - dz/2, zs[-1] + dz/2)
q = qp.Ensemble(qp.hist, data={"bins":zs_edges, "pdfs":nzs})
return q

def test_base():
file = np.load('tests/qp/dummy.npz')
ens = make_qp_ens(file)
projector = proj.ProjectorBase(ens)
m, n = projector.nzs.shape
k, = projector.z.shape
nzs = file['pzs']
nzs /= np.sum(nzs, axis=1)[:, None]
assert n == k
assert np.allclose(projector.nz_mean, np.mean(nzs, axis=0))
39 changes: 39 additions & 0 deletions tests/qp/test_projector_moments.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import qp
import numpy as np
import qp.projectors as proj


def make_qp_ens(file):
zs = file['zs']
nzs = file['pzs']
dz = np.mean(np.diff(zs))
zs_edges = np.append(zs - dz/2, zs[-1] + dz/2)
q = qp.Ensemble(qp.hist, data={"bins":zs_edges, "pdfs":nzs})
return q


def make_projector():
file = np.load('tests/qp/dummy.npz')
ens = make_qp_ens(file)
return proj.ProjectorMoments(ens)


def test_prior():
projector = make_projector()
prior = projector.get_prior()
assert prior is not None


def test_sample_prior():
projector = make_projector()
nz = projector.sample_prior()
assert len(nz) == len(projector.nz_mean)


def test_model():
projector = make_projector()
shift = projector.sample_prior()
input = np.array([projector.z, projector.nz_mean])
output = projector.evaluate_model(input, shift)
assert (projector.z == output[0]).all()
assert len(output[1]) == len(projector.nz_mean)
39 changes: 39 additions & 0 deletions tests/qp/test_projector_shifts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import qp
import numpy as np
import qp.projectors as proj


def make_qp_ens(file):
zs = file['zs']
nzs = file['pzs']
dz = np.mean(np.diff(zs))
zs_edges = np.append(zs - dz/2, zs[-1] + dz/2)
q = qp.Ensemble(qp.hist, data={"bins":zs_edges, "pdfs":nzs})
return q


def make_projector():
file = np.load('tests/qp/dummy.npz')
ens = make_qp_ens(file)
return proj.ProjectorShifts(ens)


def test_prior():
projector = make_projector()
prior = projector.get_prior()
assert prior is not None


def test_sample_prior():
projector = make_projector()
shift = projector.sample_prior()
assert len([shift]) == len([projector.shift])


def test_model():
projector = make_projector()
shift = projector.sample_prior()
input = np.array([projector.z, projector.nz_mean])
output = projector.evaluate_model(input, shift)
assert (projector.z == output[0]).all()
assert len(output[1]) == len(projector.nz_mean)
Loading
Loading