Moving Brier and PIT metrics from RAIL to qp (#125)

* Moved Brier metric from RAIL to qp. * Added docstrings and test cases for the Ensemble-based Brier metric function. * Moved PIT metrics over to qp. * Separated PIT meta metrics into reusable metrics for pair-wise ensemble comparisons. Added tests and docstrings. * Updated docstrings to adhere to numpy format. Updated qp.rst so that the new metrics will be available on readthedocs.
LSSTDESC · Nov 8, 2022 · 016ad6f · 016ad6f
1 parent 36af46d
commit 016ad6f
Show file tree

Hide file tree

Showing 10 changed files with 975 additions and 25 deletions.
diff --git a/docs/qp.rst b/docs/qp.rst
@@ -13,7 +13,7 @@ these approximations.
 Ensemble and Factory
 ====================
 
-.. automodule:: ensemble
+.. automodule:: qp.ensemble
     :members:
     :undoc-members:
 
@@ -98,11 +98,21 @@ Gaussian mixture model based
               
 Quantification Metrics
 ======================
+.. automodule:: qp.metrics.metrics
+    :members:
+    :undoc-members:
 
-.. automodule:: qp.metrics
+.. automodule:: qp.metrics.array_metrics
     :members:
     :undoc-members:
 
+.. automodule:: qp.metrics.brier
+.. autoclass:: Brier
+    :members:
+
+.. automodule:: qp.metrics.pit
+.. autoclass:: PIT
+    :members:
 
 Utility functions
 =================

diff --git a/src/qp/metrics/__init__.py b/src/qp/metrics/__init__.py
@@ -1,3 +1,5 @@
 from .array_metrics import *
 from .metrics import *
-from .metrics import _calculate_grid_parameters  # added for testing purposes
+
+# added for testing purposes
+from .metrics import _calculate_grid_parameters, _check_ensemble_is_not_nested, _check_ensembles_are_same_size
diff --git a/src/qp/metrics/array_metrics.py b/src/qp/metrics/array_metrics.py
@@ -1,34 +1,68 @@
 """This module implements metric calculations that are independent of qp.Ensembles"""
 
 import numpy as np
+from scipy import stats
 from scipy.integrate import quad
 from scipy.optimize import minimize_scalar
 
 from qp.utils import safelog
 
+def quick_anderson_darling(p_random_variables, scipy_distribution='norm'):
+    """Calculate the Anderson-Darling statistic using scipy.stats.anderson for one CDF vs a scipy distribution.
+    For more details see: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.anderson.html
 
-def quick_moment(p_eval, grid_to_N, dx):
+    Parameters
+    ----------
+    p_random_variables : np.array
+        An array of random variables from the given distribution
+    scipy_distribution : {'norm', 'expon', 'logistic', 'gumbel', 'gumbel_l', 'gumbel_r', 'extreme1'}, optional
+        The type of distribution to test against.
+
+    Returns
+    -------
+    [Result objects]
+        A array of objects with attributes ``statistic``, ``critical_values``, and ``significance_level``.
     """
-    Calculates a moment of an evaluated PDF
+    return stats.anderson(p_random_variables, dist=scipy_distribution)
+
+def quick_anderson_ksamp(p_random_variables, q_random_variables, **kwargs):
+    """Calculate the k-sample Anderson-Darling statistic using scipy.stats.anderson_ksamp for two CDFs. 
+    For more details see: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.anderson_ksamp.html
 
     Parameters
     ----------
-    p_eval: numpy.ndarray, float
-        the values of a probability distribution
-    grid: numpy.ndarray, float
-        the grid upon which p_eval was evaluated
-    dx: float
-        the difference between regular grid points
-    N: int
-        order of the moment to be calculated
+    p_random_variables : np.array
+        An array of random variables from the given distribution
+    q_random_variables : np.array
+        An array of random variables from the given distribution
 
     Returns
     -------
-    M: float
-        value of the moment
+    [Result objects]
+        A array of objects with attributes ``statistic``, ``critical_values``, and ``significance_level``.
     """
-    M = np.dot(p_eval, grid_to_N) * dx
-    return M
+    return stats.anderson_ksamp([p_random_variables, q_random_variables], **kwargs)
+
+def quick_cramer_von_mises(p_random_variables, q_cdf, **kwargs):
+    """Calculate the Cramer von Mises statistic using scipy.stats.cramervonmises for each pair of distributions
+    in two input Ensembles. For more details see:
+    https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.cramervonmises.html
+
+
+    Parameters
+    ----------
+    p_random_variables : np.array
+        An array of random variables from the given distribution
+    q_cdf : callable
+        A function to calculate the CDF of a given distribution
+
+    Returns
+    -------
+    [Result objects]
+        A array of objects with attributes ``statistic`` and ``pvalue``.
+    """
+
+    return stats.cramervonmises(p_random_variables, q_cdf, **kwargs)
 
 def quick_kld(p_eval, q_eval, dx=0.01):
     """
@@ -56,6 +90,51 @@ def quick_kld(p_eval, q_eval, dx=0.01):
     Dpq = dx * np.sum(p_eval * logquotient, axis=-1)
     return Dpq
 
+def quick_kolmogorov_smirnov(p_rvs, q_cdf, num_samples=100, **kwargs):
+    """Calculate the Kolmogorov-Smirnov statistic using scipy.stats.kstest for each pair of distributions
+    in two input Ensembles. For more details see:
+    https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kstest.html 
+
+    Parameters
+    ----------
+    p_rvs : callable
+        A function to generate random variables for the given distribution
+    q_cdf : callable
+        A function to calculate the CDF of a given distribution
+    num_samples : int, optional
+        Number of samples to use in the calculation
+
+    Returns
+    -------
+    [KstestResult]
+        A array of KstestResult objects with attributes ``statistic`` and ``pvalue``.
+    """
+
+    return stats.kstest(p_rvs, q_cdf, N=num_samples, **kwargs)
+
+def quick_moment(p_eval, grid_to_N, dx):
+    """
+    Calculates a moment of an evaluated PDF
+
+    Parameters
+    ----------
+    p_eval: numpy.ndarray, float
+        the values of a probability distribution
+    grid: numpy.ndarray, float
+        the grid upon which p_eval was evaluated
+    dx: float
+        the difference between regular grid points
+    N: int
+        order of the moment to be calculated
+
+    Returns
+    -------
+    M: float
+        value of the moment
+    """
+    M = np.dot(p_eval, grid_to_N) * dx
+    return M
+
 def quick_rmse(p_eval, q_eval, N):
     """
     Calculates the Root Mean Square Error between two evaluations of PDFs.
@@ -102,17 +181,19 @@ def quick_rbpe(pdf_function, integration_bounds, limits=(np.inf, np.inf)):
     """
 
     def calculate_loss(x):
-        return 1. - (1. / (1. + (pow((x / .15), 2))))
+        return 1.0 - (1.0 / (1.0 + (pow((x / 0.15), 2))))
 
     lower = integration_bounds[0]
     upper = integration_bounds[1]
 
     def find_z_risk(zp):
         def integrand(z):
-            return pdf_function(z) * calculate_loss((zp - z) / (1. + z))
+            return pdf_function(z) * calculate_loss((zp - z) / (1.0 + z))
 
         return quad(integrand, lower, upper)[0]
 
     if limits[0] == np.inf:
         return minimize_scalar(find_z_risk).x
-    return minimize_scalar(find_z_risk, bounds=(limits[0], limits[1]), method='bounded').x
+    return minimize_scalar(
+        find_z_risk, bounds=(limits[0], limits[1]), method="bounded"
+    ).x
diff --git a/src/qp/metrics/brier.py b/src/qp/metrics/brier.py
@@ -0,0 +1,97 @@
+import logging
+import numpy as np
+
+
+class Brier:
+    """Brier score based on https://en.wikipedia.org/wiki/Brier_score#Original_definition_by_Brier
+    
+    Parameters
+    ----------
+    prediction: NxM array, float
+        Predicted probability for N celestial objects to have a redshift in
+        one of M bins. The sum of values along each row N should be 1.
+    truth: NxM array, int
+        True redshift values for N celestial objects, where Mth bin for the
+        true redshift will have value 1, all other bins will have a value of
+        0.
+    """
+
+    def __init__(self, prediction, truth):
+        """Constructor"""
+
+        self._prediction = prediction
+        self._truth = truth
+        self._axis_for_summation = None  # axis to sum for metric calculation
+
+    def evaluate(self):
+        """Evaluate the Brier score.
+
+        Returns
+        -------
+        float
+            The result of calculating the Brier metric, a value in the interval [0,2]
+        """
+
+        self._manipulate_data()
+        self._validate_data()
+        return self._calculate_metric()
+
+    def _manipulate_data(self):
+        """
+        Placeholder for data manipulation as required. i.e. converting from
+        qp.ensemble objects into np.array objects.
+        """
+
+        # Attempt to convert the input variables into np.arrays
+        self._prediction = np.array(self._prediction)
+        self._truth = np.array(self._truth)
+
+    def _validate_data(self):
+        """
+        Strictly for data validation - no calculations or data structure
+        changes.
+
+        Raises
+        ------
+        TypeError if either prediction or truth input could not be converted
+        into a numeric Numpy array
+
+        ValueError if the prediction and truth arrays do not have the same
+        numpy.shape.
+
+        Warning
+        -------
+        Logs a warning message if the input predictions do not each sum to 1.
+        """
+
+        # Raise TypeError exceptions if the inputs were not translated to
+        # numeric np.arrays
+        if not np.issubdtype(self._prediction.dtype, np.number):
+            raise TypeError(
+                "Input prediction array could not be converted to a Numpy array"
+            )
+        if not np.issubdtype(self._truth.dtype, np.number):
+            raise TypeError("Input truth array could not be converted to a Numpy array")
+
+        # Raise ValueError if the arrays have different shapes
+        if self._prediction.shape != self._truth.shape:
+            raise ValueError(
+                "Input prediction and truth arrays do not have the same shape"
+            )
+
+        # Log a warning if the N rows of the input prediction do not each sum to
+        # 1. Note: For 1d arrays, a sum along axis = 1 will fail, so we set
+        # self._axis_for_summation appropriately for that case
+        self._axis_for_summation = 0 if self._prediction.ndim == 1 else 1
+        if not np.allclose(
+            np.sum(self._prediction, axis=self._axis_for_summation), 1.0
+        ):
+            logging.warning("Input predictions do not sum to 1.")
+
+    def _calculate_metric(self):
+        """
+        Calculate the Brier metric for the input data.
+        """
+        return np.mean(
+            np.sum((self._prediction - self._truth) ** 2, axis=self._axis_for_summation)
+        )