From f27f36b6dd7d746b9c94d62715b774da40f4c7f5 Mon Sep 17 00:00:00 2001 From: Eric Charles Date: Fri, 2 Aug 2024 14:29:40 -0700 Subject: [PATCH] lazy pytdigest (#234) * lazy pytdigest * fix quote in pyproject.toml * fix deprecation warnings * pin numpy<2.0.0 * Use module imports rather than relative imports --------- Co-authored-by: Sidney Mau --- pyproject.toml | 6 ++++-- src/qp/conversion_funcs.py | 4 ++-- src/qp/lazy_modules.py | 1 + src/qp/metrics/concrete_metric_classes.py | 6 +++--- src/qp/metrics/point_estimate_metric_classes.py | 7 +++---- src/qp/sparse_pdf.py | 6 +++--- src/qp/sparse_rep.py | 4 ++-- src/qp/test_funcs.py | 2 +- 8 files changed, 19 insertions(+), 17 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f8bba55..7421e043 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,11 +17,10 @@ classifiers = [ dynamic = ["version"] dependencies = [ - "numpy", + "numpy<2.0.0", "scipy", "tables-io", "deprecated", - "pytdigest", ] # On a mac, install optional dependencies with `pip install '.[dev]'` (include the single quotes) @@ -37,6 +36,7 @@ dev = [ "packaging", "pillow", "cycler", + "pytdigest", "python-dateutil", "kiwisolver", "joblib", @@ -48,11 +48,13 @@ dev = [ ] full = [ "tables-io[full]", + "pytdigest", "matplotlib", "scikit-learn", ] all = [ "tables-io[full]", + "pytdigest", "matplotlib", "scikit-learn", ] diff --git a/src/qp/conversion_funcs.py b/src/qp/conversion_funcs.py index 8be2f56..85689b9 100644 --- a/src/qp/conversion_funcs.py +++ b/src/qp/conversion_funcs.py @@ -312,7 +312,7 @@ def extract_voigt_xy_sparse(in_dist, **kwargs): # pragma: no cover newz = np.linspace(minz, maxz, nz) interp = sciinterp.interp1d(z, yvals, assume_sorted=True) newpdf = interp(newz) - newpdf = newpdf / sciint.trapz(newpdf, newz).reshape(-1, 1) + newpdf = newpdf / sciint.trapezoid(newpdf, newz).reshape(-1, 1) ALL, bigD, _ = build_sparse_representation(newz, newpdf) return dict(indices=ALL, metadata=bigD) @@ -410,7 +410,7 @@ def extract_xy_sparse(in_dist, **kwargs): # pragma: no cover # normalize and sum the weighted pdfs x = sparse_meta["z"] y = pdf_y.sum(axis=-1) - norms = sciint.trapz(y.T, x) + norms = sciint.trapezoid(y.T, x) y /= norms # super(sparse_gen, self).__init__(x, y.T, *args, **kwargs) xvals = x diff --git a/src/qp/lazy_modules.py b/src/qp/lazy_modules.py index 22db4ba..16a33f2 100644 --- a/src/qp/lazy_modules.py +++ b/src/qp/lazy_modules.py @@ -5,3 +5,4 @@ mpl = lazyImport("matplotlib") plt = lazyImport("matplotlib.pyplot") mixture = lazyImport("sklearn.mixture") +pytdigest = lazyImport("pytdigest") diff --git a/src/qp/metrics/concrete_metric_classes.py b/src/qp/metrics/concrete_metric_classes.py index fceb498..00c7271 100644 --- a/src/qp/metrics/concrete_metric_classes.py +++ b/src/qp/metrics/concrete_metric_classes.py @@ -20,7 +20,7 @@ ) from qp.metrics.pit import PIT -from pytdigest import TDigest +from qp.lazy_modules import pytdigest from functools import reduce from operator import add @@ -54,7 +54,7 @@ def finalize(self, centroids: np.ndarray = []): `compute_from_digest` method. """ digests = ( - TDigest.of_centroids(np.array(centroid), compression=self._tdigest_compression) + pytdigest.TDigest.of_centroids(np.array(centroid), compression=self._tdigest_compression) for centroid in centroids ) digest = reduce(add, digests) @@ -277,7 +277,7 @@ def evaluate(self, estimate, reference): def accumulate(self, estimate, reference): pit_samples = PIT(estimate, reference, self._eval_grid)._gather_pit_samples(estimate, reference) - digest = TDigest.compute(pit_samples, compression=self._tdigest_compression) + digest = pytdigest.TDigest.compute(pit_samples, compression=self._tdigest_compression) centroids = digest.get_centroids() return centroids diff --git a/src/qp/metrics/point_estimate_metric_classes.py b/src/qp/metrics/point_estimate_metric_classes.py index 7856813..c22dbbc 100644 --- a/src/qp/metrics/point_estimate_metric_classes.py +++ b/src/qp/metrics/point_estimate_metric_classes.py @@ -3,10 +3,9 @@ MetricOutputType, PointToPointMetric, ) -from pytdigest import TDigest from functools import reduce from operator import add - +from qp.lazy_modules import pytdigest class PointToPointMetricDigester(PointToPointMetric): @@ -35,7 +34,7 @@ def accumulate(self, estimate, reference): centroid locations and weights. """ ez = (estimate - reference) / (1.0 + reference) - digest = TDigest.compute(ez, compression=self._tdigest_compression) + digest = pytdigest.TDigest.compute(ez, compression=self._tdigest_compression) centroids = digest.get_centroids() return centroids @@ -56,7 +55,7 @@ def finalize(self, centroids: np.ndarray = []): `compute_from_digest` method. """ digests = ( - TDigest.of_centroids(np.array(centroid), compression=self._tdigest_compression) + pytdigest.TDigest.of_centroids(np.array(centroid), compression=self._tdigest_compression) for centroid in centroids ) digest = reduce(add, digests) diff --git a/src/qp/sparse_pdf.py b/src/qp/sparse_pdf.py index deeafb4..be40cfe 100644 --- a/src/qp/sparse_pdf.py +++ b/src/qp/sparse_pdf.py @@ -48,7 +48,7 @@ def __init__(self, xvals, mu, sig, dims, sparse_indices, *args, **kwargs): # py # normalize and sum the weighted pdfs x = sparse_meta["xvals"] y = pdf_y.sum(axis=-1) - norms = sciint.trapz(y.T, x) + norms = sciint.trapezoid(y.T, x) y /= norms kwargs.setdefault("xvals", x) kwargs.setdefault("yvals", y.T) @@ -97,7 +97,7 @@ def build_test_data(): P = np.load(filein) z = P[-1] P = P[:NPDF] - P = P / sciint.trapz(P, z).reshape(-1, 1) + P = P / sciint.trapezoid(P, z).reshape(-1, 1) minz = np.min(z) nz = 301 _, j = np.where(P > 0) @@ -105,7 +105,7 @@ def build_test_data(): newz = np.linspace(minz, maxz, nz) interp = sciinterp.interp1d(z, P, assume_sorted=True) newpdf = interp(newz) - newpdf = newpdf / sciint.trapz(newpdf, newz).reshape(-1, 1) + newpdf = newpdf / sciint.trapezoid(newpdf, newz).reshape(-1, 1) sparse_idx, meta, _ = sparse_rep.build_sparse_representation( newz, newpdf, verbose=False ) diff --git a/src/qp/sparse_rep.py b/src/qp/sparse_rep.py index 398fb86..008cd8e 100644 --- a/src/qp/sparse_rep.py +++ b/src/qp/sparse_rep.py @@ -26,7 +26,7 @@ def shapes2pdf(wa, ma, sa, ga, meta, cut=1.0e-5): # pylint: disable=too-many-ar pdft = w * pdft / sla.norm(pdft) pdf += pdft pdf = np.where(pdf >= cut, pdf, 0.0) - return pdf / sciint.trapz(pdf, x) + return pdf / sciint.trapezoid(pdf, x) def create_basis(metadata, cut=1.0e-5): @@ -301,6 +301,6 @@ def pdf_from_sparse(sparse_indices, A, xvals, cut=1.0e-5): pdf_y = (A[:, indices] * vals).sum(axis=-1) pdf_y = np.where(pdf_y >= cut, pdf_y, 0.0) pdf_x = xvals - norms = sciint.trapz(pdf_y.T, pdf_x) + norms = sciint.trapezoid(pdf_y.T, pdf_x) pdf_y /= norms return pdf_y diff --git a/src/qp/test_funcs.py b/src/qp/test_funcs.py index c2183f8..a9b0de8 100644 --- a/src/qp/test_funcs.py +++ b/src/qp/test_funcs.py @@ -132,7 +132,7 @@ def run_pdf_func_tests(test_class, test_data, short=False, check_props=True): alloc_kwds = pdf.dist.get_allocation_kwds(pdf.npdf, **test_data["ctor_data"]) for key, val in alloc_kwds.items(): - assert np.product(val[0]) == np.size(test_data["ctor_data"][key]) + assert np.prod(val[0]) == np.size(test_data["ctor_data"][key]) return pdf_func_tests(pdf, test_data, short=short, check_props=check_props)