From 1bb5ca40bc91f6d46958a795d933b12473643d0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Benjamin=20St=C3=B6lzner?= Date: Thu, 3 Aug 2023 16:33:54 -0700 Subject: [PATCH 1/6] Started implementing mixture model implementation with generic scipy base function. So far only the ones using loc and scale parameters work. PPFs need to be implemented. --- src/qp/mixmod_pdf.py | 88 +++++++++++++++++++++++++++++++------------- 1 file changed, 63 insertions(+), 25 deletions(-) diff --git a/src/qp/mixmod_pdf.py b/src/qp/mixmod_pdf.py index e9cd405..602d32a 100644 --- a/src/qp/mixmod_pdf.py +++ b/src/qp/mixmod_pdf.py @@ -17,16 +17,18 @@ class mixmod_gen(Pdf_rows_gen): Notes ----- - This implements a PDF using a Gaussian Mixture model + This is a base class for implementing PDFs using a mixture model. + Classes implementing mixture models with specific basis functions + need to define the pdf and cdf. The relevant data members are: - means: (npdf, ncomp) means of the Gaussians - stds: (npdf, ncomp) standard deviations of the Gaussians - weights: (npdf, ncomp) weights for the Gaussians + means: (npdf, ncomp) means of the basis functions + stds: (npdf, ncomp) standard deviations of the basis functions + weights: (npdf, ncomp) weights for the basis functions The pdf() and cdf() are exact, and are computed as a weighted sum of - the pdf() and cdf() of the component Gaussians. + the pdf() and cdf() of the component basis functions. The ppf() is computed by computing the cdf() values on a fixed grid and interpolating the inverse function. @@ -34,29 +36,45 @@ class mixmod_gen(Pdf_rows_gen): # pylint: disable=protected-access +<<<<<<< HEAD name = "mixmod" version = 0 +======= +>>>>>>> 9b08a50 (Started implementing mixture model implementation with generic scipy base function. So far only the ones using loc and scale parameters work. PPFs need to be implemented.) _support_mask = rv_continuous._support_mask - def __init__(self, means, stds, weights, *args, **kwargs): + name = 'gen_mixmod' + version = 0 + + def __init__(self, gen_func, weights, data, ancil=None, *args, **kwargs): """ Create a new distribution using the given histogram Parameters ---------- means : array_like - The means of the Gaussians + The means of the basis functions stds: array_like - The standard deviations of the Gaussians + The standard deviations of the basis functions weights : array_like +<<<<<<< HEAD The weights to attach to the Gaussians. Weights should sum up to one. If not, the weights are interpreted as relative weights. +======= + The weights to attach to the basis functions. Weights should sum up to one. If not, the weights are interpreted as relative weights. +>>>>>>> 9b08a50 (Started implementing mixture model implementation with generic scipy base function. So far only the ones using loc and scale parameters work. PPFs need to be implemented.) """ + self._gen_func = gen_func + print(data) + self._frozen = self._gen_func(**data) + self._gen_obj = self._frozen.dist + self._gen_class = type(self._gen_obj) + self._data = data + self._scipy_version_warning() - self._means = reshape_to_pdf_size(means, -1) - self._stds = reshape_to_pdf_size(stds, -1) self._weights = reshape_to_pdf_size(weights, -1) +<<<<<<< HEAD kwargs["shape"] = means.shape[:-1] self._ncomps = means.shape[-1] super().__init__(*args, **kwargs) @@ -66,6 +84,18 @@ def __init__(self, means, stds, weights, *args, **kwargs): self._addobjdata("weights", self._weights) self._addobjdata("stds", self._stds) self._addobjdata("means", self._means) +======= + for key in self._data.keys(): + self._data[key] = reshape_to_pdf_size(self._data[key],-1) + print(self._data) + kwargs['shape'] = weights.shape[:-1] + self._ncomps = weights.shape[-1] + super().__init__(*args, **kwargs) + if np.any(self._weights<0): + raise ValueError('All weights need to be larger than zero') + self._weights = self._weights/self._weights.sum(axis=1)[:,None] + self._addobjdata('weights', self._weights) +>>>>>>> 9b08a50 (Started implementing mixture model implementation with generic scipy base function. So far only the ones using loc and scale parameters work. PPFs need to be implemented.) def _scipy_version_warning(self): import scipy # pylint: disable=import-outside-toplevel @@ -80,23 +110,14 @@ def _scipy_version_warning(self): @property def weights(self): - """Return weights to attach to the Gaussians""" + """Return weights to attach to the basis functions""" return self._weights - @property - def means(self): - """Return means of the Gaussians""" - return self._means - - @property - def stds(self): - """Return standard deviations of the Gaussians""" - return self._stds - def _pdf(self, x, row): # pylint: disable=arguments-differ if np.ndim(x) > 1: # pragma: no cover x = np.expand_dims(x, -2) +<<<<<<< HEAD return ( self.weights[row].swapaxes(-2, -1) * sps.norm( @@ -104,11 +125,19 @@ def _pdf(self, x, row): scale=self._stds[row].swapaxes(-2, -1), ).pdf(x) ).sum(axis=0) +======= + data_swap=dict() + for key in self._data.keys(): + data_swap[key] = self._data[key][row].swapaxes(-2,-1) + return (self.weights[row].swapaxes(-2,-1) * + self._gen_func(**data_swap).pdf(x)).sum(axis=0) +>>>>>>> 9b08a50 (Started implementing mixture model implementation with generic scipy base function. So far only the ones using loc and scale parameters work. PPFs need to be implemented.) def _cdf(self, x, row): # pylint: disable=arguments-differ if np.ndim(x) > 1: # pragma: no cover x = np.expand_dims(x, -2) +<<<<<<< HEAD return ( self.weights[row].swapaxes(-2, -1) * sps.norm( @@ -116,6 +145,13 @@ def _cdf(self, x, row): scale=self._stds[row].swapaxes(-2, -1), ).cdf(x) ).sum(axis=0) +======= + data_swap=dict() + for key in self._data.keys(): + data_swap[key] = self._data[key][row].swapaxes(-2,-1) + return (self.weights[row].swapaxes(-2,-1) * + self._gen_func(**data_swap).cdf(x)).sum(axis=0) +>>>>>>> 9b08a50 (Started implementing mixture model implementation with generic scipy base function. So far only the ones using loc and scale parameters work. PPFs need to be implemented.) def _ppf(self, x, row): # pylint: disable=arguments-differ @@ -140,9 +176,11 @@ def _updated_ctor_param(self): Set the bins as additional constructor argument """ dct = super()._updated_ctor_param() - dct["means"] = self._means - dct["stds"] = self._stds - dct["weights"] = self._weights + # for key in self._data.keys(): + # dct[key] = self._data[key] + dct['weights'] = self._weights + dct['data'] = self._data + dct['gen_func'] = self._gen_func return dct @classmethod @@ -191,7 +229,7 @@ def make_test_data(cls): ) ) - mixmod = mixmod_gen.create add_class(mixmod_gen) + From 6a2952e8772b965693bcc775d06af4ec49b4d4a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Benjamin=20St=C3=B6lzner?= Date: Mon, 18 Sep 2023 16:43:35 +0200 Subject: [PATCH 2/6] add unit test --- tests/qp/test_ensemble.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/qp/test_ensemble.py b/tests/qp/test_ensemble.py index 46a0cbe..6fc5951 100644 --- a/tests/qp/test_ensemble.py +++ b/tests/qp/test_ensemble.py @@ -235,6 +235,13 @@ def test_mixmod_with_negative_weights(self): with self.assertRaises(ValueError): _ = qp.mixmod(weights=weights, means=means, stds=sigmas) + def test_mixmod_with_negative_weights(self): + """Verify that an exception is raised when setting up a mixture model with negative weights""" + means = np.array([0.5,1.1, 2.9]) + sigmas = np.array([0.15,0.13,0.14]) + weights = np.array([1,0.5,-0.25]) + with self.assertRaises(ValueError): + _ = qp.mixmod(gen_func=qp.stats.norm, weights=weights, data = dict(loc=means, scale=sigmas)) -if __name__ == "__main__": +if __name__ == '__main__': unittest.main() From 6b51f1166db034aba49608d470cc59b6702bbbe8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Benjamin=20St=C3=B6lzner?= Date: Mon, 9 Oct 2023 11:39:48 +0200 Subject: [PATCH 3/6] fixed mixture model implementation with scipy basis functions that require additional parameters --- src/qp/factory.py | 4 ++-- src/qp/mixmod_pdf.py | 2 -- src/qp/pdf_gen.py | 9 ++++++++- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/qp/factory.py b/src/qp/factory.py index a8ef2c0..e80742f 100644 --- a/src/qp/factory.py +++ b/src/qp/factory.py @@ -54,7 +54,7 @@ def _build_data_dict(md_table, data_table): data_dict[col] = col_data return data_dict - def _make_scipy_wrapped_class(self, class_name, scipy_class): + def _make_scipy_wrapped_class(self, class_name, scipy_class, ctor_param): """Build a qp class from a scipy class""" # pylint: disable=protected-access override_dict = dict( @@ -72,7 +72,7 @@ def _load_scipy_classes(self): for name in names: attr = getattr(sps, name) if isinstance(attr, sps.rv_continuous): - self._make_scipy_wrapped_class(name, type(attr)) + self._make_scipy_wrapped_class(name, type(attr), attr._updated_ctor_param()) def add_class(self, the_class): """Add a class to the factory diff --git a/src/qp/mixmod_pdf.py b/src/qp/mixmod_pdf.py index 602d32a..a285dc4 100644 --- a/src/qp/mixmod_pdf.py +++ b/src/qp/mixmod_pdf.py @@ -66,7 +66,6 @@ def __init__(self, gen_func, weights, data, ancil=None, *args, **kwargs): >>>>>>> 9b08a50 (Started implementing mixture model implementation with generic scipy base function. So far only the ones using loc and scale parameters work. PPFs need to be implemented.) """ self._gen_func = gen_func - print(data) self._frozen = self._gen_func(**data) self._gen_obj = self._frozen.dist self._gen_class = type(self._gen_obj) @@ -87,7 +86,6 @@ def __init__(self, gen_func, weights, data, ancil=None, *args, **kwargs): ======= for key in self._data.keys(): self._data[key] = reshape_to_pdf_size(self._data[key],-1) - print(self._data) kwargs['shape'] = weights.shape[:-1] self._ncomps = weights.shape[-1] super().__init__(*args, **kwargs) diff --git a/src/qp/pdf_gen.py b/src/qp/pdf_gen.py index 36fefac..84c66f0 100644 --- a/src/qp/pdf_gen.py +++ b/src/qp/pdf_gen.py @@ -124,8 +124,13 @@ def create_gen(cls, **kwds): """Create and return a `scipy.stats.rv_continuous` object using the keyword arguemntets provided""" kwds_copy = kwds.copy() +<<<<<<< HEAD name = kwds_copy.pop("name", "dist") return (cls(name=name), kwds_copy) +======= + name = kwds_copy.pop('name', 'dist') + return (cls(), kwds_copy) +>>>>>>> 71047e2 (fixed mixture model implementation with scipy basis functions that require additional parameters) @classmethod def create(cls, **kwds): @@ -393,7 +398,9 @@ def __init__(self, *args, **kwargs): """C'tor""" # pylint: disable=no-member,protected-access super().__init__(*args, **kwargs) - self._other_init(*args, **kwargs) + if kwargs==self._ctor_param: + kwargs=dict() + self._other_init(*args, **kwargs, **self._ctor_param) def _my_freeze(self, *args, **kwds): """Freeze the distribution for the given arguments. From 56c4521e9f9f40dae4dc826af49c6c3fe0405d1d Mon Sep 17 00:00:00 2001 From: Eric Charles Date: Thu, 30 Nov 2023 15:41:25 -0800 Subject: [PATCH 4/6] finish resolving conflicts --- src/qp/mixmod_pdf.py | 44 +------------------------------------------- src/qp/pdf_gen.py | 5 ----- 2 files changed, 1 insertion(+), 48 deletions(-) diff --git a/src/qp/mixmod_pdf.py b/src/qp/mixmod_pdf.py index a285dc4..649ca66 100644 --- a/src/qp/mixmod_pdf.py +++ b/src/qp/mixmod_pdf.py @@ -36,15 +36,9 @@ class mixmod_gen(Pdf_rows_gen): # pylint: disable=protected-access -<<<<<<< HEAD - name = "mixmod" - version = 0 - -======= ->>>>>>> 9b08a50 (Started implementing mixture model implementation with generic scipy base function. So far only the ones using loc and scale parameters work. PPFs need to be implemented.) _support_mask = rv_continuous._support_mask - name = 'gen_mixmod' + name = 'mixmod' version = 0 def __init__(self, gen_func, weights, data, ancil=None, *args, **kwargs): @@ -58,12 +52,8 @@ def __init__(self, gen_func, weights, data, ancil=None, *args, **kwargs): stds: array_like The standard deviations of the basis functions weights : array_like -<<<<<<< HEAD The weights to attach to the Gaussians. Weights should sum up to one. If not, the weights are interpreted as relative weights. -======= - The weights to attach to the basis functions. Weights should sum up to one. If not, the weights are interpreted as relative weights. ->>>>>>> 9b08a50 (Started implementing mixture model implementation with generic scipy base function. So far only the ones using loc and scale parameters work. PPFs need to be implemented.) """ self._gen_func = gen_func self._frozen = self._gen_func(**data) @@ -73,17 +63,6 @@ def __init__(self, gen_func, weights, data, ancil=None, *args, **kwargs): self._scipy_version_warning() self._weights = reshape_to_pdf_size(weights, -1) -<<<<<<< HEAD - kwargs["shape"] = means.shape[:-1] - self._ncomps = means.shape[-1] - super().__init__(*args, **kwargs) - if np.any(self._weights < 0): - raise ValueError("All weights need to be larger than zero") - self._weights = self._weights / self._weights.sum(axis=1)[:, None] - self._addobjdata("weights", self._weights) - self._addobjdata("stds", self._stds) - self._addobjdata("means", self._means) -======= for key in self._data.keys(): self._data[key] = reshape_to_pdf_size(self._data[key],-1) kwargs['shape'] = weights.shape[:-1] @@ -93,7 +72,6 @@ def __init__(self, gen_func, weights, data, ancil=None, *args, **kwargs): raise ValueError('All weights need to be larger than zero') self._weights = self._weights/self._weights.sum(axis=1)[:,None] self._addobjdata('weights', self._weights) ->>>>>>> 9b08a50 (Started implementing mixture model implementation with generic scipy base function. So far only the ones using loc and scale parameters work. PPFs need to be implemented.) def _scipy_version_warning(self): import scipy # pylint: disable=import-outside-toplevel @@ -115,41 +93,21 @@ def _pdf(self, x, row): # pylint: disable=arguments-differ if np.ndim(x) > 1: # pragma: no cover x = np.expand_dims(x, -2) -<<<<<<< HEAD - return ( - self.weights[row].swapaxes(-2, -1) - * sps.norm( - loc=self._means[row].swapaxes(-2, -1), - scale=self._stds[row].swapaxes(-2, -1), - ).pdf(x) - ).sum(axis=0) -======= data_swap=dict() for key in self._data.keys(): data_swap[key] = self._data[key][row].swapaxes(-2,-1) return (self.weights[row].swapaxes(-2,-1) * self._gen_func(**data_swap).pdf(x)).sum(axis=0) ->>>>>>> 9b08a50 (Started implementing mixture model implementation with generic scipy base function. So far only the ones using loc and scale parameters work. PPFs need to be implemented.) def _cdf(self, x, row): # pylint: disable=arguments-differ if np.ndim(x) > 1: # pragma: no cover x = np.expand_dims(x, -2) -<<<<<<< HEAD - return ( - self.weights[row].swapaxes(-2, -1) - * sps.norm( - loc=self._means[row].swapaxes(-2, -1), - scale=self._stds[row].swapaxes(-2, -1), - ).cdf(x) - ).sum(axis=0) -======= data_swap=dict() for key in self._data.keys(): data_swap[key] = self._data[key][row].swapaxes(-2,-1) return (self.weights[row].swapaxes(-2,-1) * self._gen_func(**data_swap).cdf(x)).sum(axis=0) ->>>>>>> 9b08a50 (Started implementing mixture model implementation with generic scipy base function. So far only the ones using loc and scale parameters work. PPFs need to be implemented.) def _ppf(self, x, row): # pylint: disable=arguments-differ diff --git a/src/qp/pdf_gen.py b/src/qp/pdf_gen.py index 84c66f0..caaa332 100644 --- a/src/qp/pdf_gen.py +++ b/src/qp/pdf_gen.py @@ -124,13 +124,8 @@ def create_gen(cls, **kwds): """Create and return a `scipy.stats.rv_continuous` object using the keyword arguemntets provided""" kwds_copy = kwds.copy() -<<<<<<< HEAD name = kwds_copy.pop("name", "dist") return (cls(name=name), kwds_copy) -======= - name = kwds_copy.pop('name', 'dist') - return (cls(), kwds_copy) ->>>>>>> 71047e2 (fixed mixture model implementation with scipy basis functions that require additional parameters) @classmethod def create(cls, **kwds): From e6b4da890eab182e8fa253cb3da7e5d4f0566365 Mon Sep 17 00:00:00 2001 From: Eric Charles Date: Mon, 4 Dec 2023 12:15:12 -0800 Subject: [PATCH 5/6] put _ctor_param back in and do a couple of fixes for unit tests --- src/qp/factory.py | 1 + src/qp/pdf_gen.py | 1 + 2 files changed, 2 insertions(+) diff --git a/src/qp/factory.py b/src/qp/factory.py index e80742f..9f4b581 100644 --- a/src/qp/factory.py +++ b/src/qp/factory.py @@ -62,6 +62,7 @@ def _make_scipy_wrapped_class(self, class_name, scipy_class, ctor_param): version=0, freeze=Pdf_gen_wrap._my_freeze, _other_init=scipy_class.__init__, + _ctor_param=ctor_param, ) the_class = type(class_name, (Pdf_gen_wrap, scipy_class), override_dict) self.add_class(the_class) diff --git a/src/qp/pdf_gen.py b/src/qp/pdf_gen.py index caaa332..f2052eb 100644 --- a/src/qp/pdf_gen.py +++ b/src/qp/pdf_gen.py @@ -395,6 +395,7 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) if kwargs==self._ctor_param: kwargs=dict() + kwargs.pop('name', None) self._other_init(*args, **kwargs, **self._ctor_param) def _my_freeze(self, *args, **kwds): From 64932d6fc2dd92df9acb1bfe9b8c56a66c2d78ac Mon Sep 17 00:00:00 2001 From: Eric Charles Date: Mon, 4 Dec 2023 12:15:25 -0800 Subject: [PATCH 6/6] put _ctor_param back in and do a couple of fixes for unit tests --- tests/qp/test_auto.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/qp/test_auto.py b/tests/qp/test_auto.py index c4aedc0..ce41dff 100644 --- a/tests/qp/test_auto.py +++ b/tests/qp/test_auto.py @@ -80,7 +80,8 @@ def auto_add(cls, class_list, ens_orig): ENS_MULTI = test_funcs.build_ensemble( qp.stats.norm_gen.test_data["norm"] # pylint: disable=no-member ) -TEST_CLASSES = qp.instance().values() +TEST_CLASSES = list(qp.instance().values()) +TEST_CLASSES.remove(qp.mixmod_pdf.mixmod_gen) PDFTestCase.auto_add(TEST_CLASSES, [ENS_ORIG, ENS_MULTI])