From 1bb5ca40bc91f6d46958a795d933b12473643d0c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Benjamin=20St=C3=B6lzner?= <stoelzner@astro.rub.de>
Date: Thu, 3 Aug 2023 16:33:54 -0700
Subject: [PATCH 1/6] Started implementing mixture model implementation with
 generic scipy base function. So far only the ones using loc and scale
 parameters work. PPFs need to be implemented.

---
 src/qp/mixmod_pdf.py | 88 +++++++++++++++++++++++++++++++-------------
 1 file changed, 63 insertions(+), 25 deletions(-)

diff --git a/src/qp/mixmod_pdf.py b/src/qp/mixmod_pdf.py
index e9cd405..602d32a 100644
--- a/src/qp/mixmod_pdf.py
+++ b/src/qp/mixmod_pdf.py
@@ -17,16 +17,18 @@ class mixmod_gen(Pdf_rows_gen):
 
     Notes
     -----
-    This implements a PDF using a Gaussian Mixture model
+    This is a base class for implementing PDFs using a mixture model. 
+    Classes implementing mixture models with specific basis functions
+    need to define the pdf and cdf.
 
     The relevant data members are:
 
-    means:  (npdf, ncomp) means of the Gaussians
-    stds:  (npdf, ncomp) standard deviations of the Gaussians
-    weights: (npdf, ncomp) weights for the Gaussians
+    means:  (npdf, ncomp) means of the basis functions
+    stds:  (npdf, ncomp) standard deviations of the basis functions
+    weights: (npdf, ncomp) weights for the basis functions
 
     The pdf() and cdf() are exact, and are computed as a weighted sum of
-    the pdf() and cdf() of the component Gaussians.
+    the pdf() and cdf() of the component basis functions.
 
     The ppf() is computed by computing the cdf() values on a fixed
     grid and interpolating the inverse function.
@@ -34,29 +36,45 @@ class mixmod_gen(Pdf_rows_gen):
 
     # pylint: disable=protected-access
 
+<<<<<<< HEAD
     name = "mixmod"
     version = 0
 
+=======
+>>>>>>> 9b08a50 (Started implementing mixture model implementation with generic scipy base function. So far only the ones using loc and scale parameters work. PPFs need to be implemented.)
     _support_mask = rv_continuous._support_mask
 
-    def __init__(self, means, stds, weights, *args, **kwargs):
+    name = 'gen_mixmod'
+    version = 0
+
+    def __init__(self, gen_func, weights, data, ancil=None, *args, **kwargs):
         """
         Create a new distribution using the given histogram
 
         Parameters
         ----------
         means : array_like
-            The means of the Gaussians
+            The means of the basis functions
         stds:  array_like
-            The standard deviations of the Gaussians
+            The standard deviations of the basis functions
         weights : array_like
+<<<<<<< HEAD
             The weights to attach to the Gaussians. Weights should sum up to one.
             If not, the weights are interpreted as relative weights.
+=======
+            The weights to attach to the basis functions. Weights should sum up to one. If not, the weights are interpreted as relative weights.
+>>>>>>> 9b08a50 (Started implementing mixture model implementation with generic scipy base function. So far only the ones using loc and scale parameters work. PPFs need to be implemented.)
         """
+        self._gen_func = gen_func
+        print(data)
+        self._frozen = self._gen_func(**data)
+        self._gen_obj = self._frozen.dist
+        self._gen_class = type(self._gen_obj)
+        self._data = data
+
         self._scipy_version_warning()
-        self._means = reshape_to_pdf_size(means, -1)
-        self._stds = reshape_to_pdf_size(stds, -1)
         self._weights = reshape_to_pdf_size(weights, -1)
+<<<<<<< HEAD
         kwargs["shape"] = means.shape[:-1]
         self._ncomps = means.shape[-1]
         super().__init__(*args, **kwargs)
@@ -66,6 +84,18 @@ def __init__(self, means, stds, weights, *args, **kwargs):
         self._addobjdata("weights", self._weights)
         self._addobjdata("stds", self._stds)
         self._addobjdata("means", self._means)
+=======
+        for key in self._data.keys():
+            self._data[key] = reshape_to_pdf_size(self._data[key],-1)
+        print(self._data)
+        kwargs['shape'] = weights.shape[:-1]
+        self._ncomps = weights.shape[-1]
+        super().__init__(*args, **kwargs)
+        if np.any(self._weights<0):
+            raise ValueError('All weights need to be larger than zero')
+        self._weights = self._weights/self._weights.sum(axis=1)[:,None]
+        self._addobjdata('weights', self._weights)
+>>>>>>> 9b08a50 (Started implementing mixture model implementation with generic scipy base function. So far only the ones using loc and scale parameters work. PPFs need to be implemented.)
 
     def _scipy_version_warning(self):
         import scipy  # pylint: disable=import-outside-toplevel
@@ -80,23 +110,14 @@ def _scipy_version_warning(self):
 
     @property
     def weights(self):
-        """Return weights to attach to the Gaussians"""
+        """Return weights to attach to the basis functions"""
         return self._weights
 
-    @property
-    def means(self):
-        """Return means of the Gaussians"""
-        return self._means
-
-    @property
-    def stds(self):
-        """Return standard deviations of the Gaussians"""
-        return self._stds
-
     def _pdf(self, x, row):
         # pylint: disable=arguments-differ
         if np.ndim(x) > 1:  # pragma: no cover
             x = np.expand_dims(x, -2)
+<<<<<<< HEAD
         return (
             self.weights[row].swapaxes(-2, -1)
             * sps.norm(
@@ -104,11 +125,19 @@ def _pdf(self, x, row):
                 scale=self._stds[row].swapaxes(-2, -1),
             ).pdf(x)
         ).sum(axis=0)
+=======
+        data_swap=dict()
+        for key in self._data.keys():
+            data_swap[key] = self._data[key][row].swapaxes(-2,-1)
+        return (self.weights[row].swapaxes(-2,-1) *
+                self._gen_func(**data_swap).pdf(x)).sum(axis=0)
+>>>>>>> 9b08a50 (Started implementing mixture model implementation with generic scipy base function. So far only the ones using loc and scale parameters work. PPFs need to be implemented.)
 
     def _cdf(self, x, row):
         # pylint: disable=arguments-differ
         if np.ndim(x) > 1:  # pragma: no cover
             x = np.expand_dims(x, -2)
+<<<<<<< HEAD
         return (
             self.weights[row].swapaxes(-2, -1)
             * sps.norm(
@@ -116,6 +145,13 @@ def _cdf(self, x, row):
                 scale=self._stds[row].swapaxes(-2, -1),
             ).cdf(x)
         ).sum(axis=0)
+=======
+        data_swap=dict()
+        for key in self._data.keys():
+            data_swap[key] = self._data[key][row].swapaxes(-2,-1)
+        return (self.weights[row].swapaxes(-2,-1) *
+                self._gen_func(**data_swap).cdf(x)).sum(axis=0)
+>>>>>>> 9b08a50 (Started implementing mixture model implementation with generic scipy base function. So far only the ones using loc and scale parameters work. PPFs need to be implemented.)
 
     def _ppf(self, x, row):
         # pylint: disable=arguments-differ
@@ -140,9 +176,11 @@ def _updated_ctor_param(self):
         Set the bins as additional constructor argument
         """
         dct = super()._updated_ctor_param()
-        dct["means"] = self._means
-        dct["stds"] = self._stds
-        dct["weights"] = self._weights
+        # for key in self._data.keys():
+        #     dct[key] = self._data[key]
+        dct['weights'] = self._weights
+        dct['data'] = self._data
+        dct['gen_func'] = self._gen_func
         return dct
 
     @classmethod
@@ -191,7 +229,7 @@ def make_test_data(cls):
             )
         )
 
-
 mixmod = mixmod_gen.create
 
 add_class(mixmod_gen)
+

From 6a2952e8772b965693bcc775d06af4ec49b4d4a6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Benjamin=20St=C3=B6lzner?= <stoelzner@astro.rub.de>
Date: Mon, 18 Sep 2023 16:43:35 +0200
Subject: [PATCH 2/6] add unit test

---
 tests/qp/test_ensemble.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tests/qp/test_ensemble.py b/tests/qp/test_ensemble.py
index 46a0cbe..6fc5951 100644
--- a/tests/qp/test_ensemble.py
+++ b/tests/qp/test_ensemble.py
@@ -235,6 +235,13 @@ def test_mixmod_with_negative_weights(self):
         with self.assertRaises(ValueError):
             _ = qp.mixmod(weights=weights, means=means, stds=sigmas)
 
+    def test_mixmod_with_negative_weights(self):
+        """Verify that an exception is raised when setting up a mixture model with negative weights"""
+        means = np.array([0.5,1.1, 2.9])
+        sigmas = np.array([0.15,0.13,0.14])
+        weights = np.array([1,0.5,-0.25])
+        with self.assertRaises(ValueError):
+            _ = qp.mixmod(gen_func=qp.stats.norm, weights=weights, data = dict(loc=means, scale=sigmas))
 
-if __name__ == "__main__":
+if __name__ == '__main__':
     unittest.main()

From 6b51f1166db034aba49608d470cc59b6702bbbe8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Benjamin=20St=C3=B6lzner?= <benjamin@MarSara.fritz.box>
Date: Mon, 9 Oct 2023 11:39:48 +0200
Subject: [PATCH 3/6] fixed mixture model implementation with scipy basis
 functions that require additional parameters

---
 src/qp/factory.py    | 4 ++--
 src/qp/mixmod_pdf.py | 2 --
 src/qp/pdf_gen.py    | 9 ++++++++-
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/qp/factory.py b/src/qp/factory.py
index a8ef2c0..e80742f 100644
--- a/src/qp/factory.py
+++ b/src/qp/factory.py
@@ -54,7 +54,7 @@ def _build_data_dict(md_table, data_table):
                 data_dict[col] = col_data
         return data_dict
 
-    def _make_scipy_wrapped_class(self, class_name, scipy_class):
+    def _make_scipy_wrapped_class(self, class_name, scipy_class, ctor_param):
         """Build a qp class from a scipy class"""
         # pylint: disable=protected-access
         override_dict = dict(
@@ -72,7 +72,7 @@ def _load_scipy_classes(self):
         for name in names:
             attr = getattr(sps, name)
             if isinstance(attr, sps.rv_continuous):
-                self._make_scipy_wrapped_class(name, type(attr))
+                self._make_scipy_wrapped_class(name, type(attr), attr._updated_ctor_param())
 
     def add_class(self, the_class):
         """Add a class to the factory
diff --git a/src/qp/mixmod_pdf.py b/src/qp/mixmod_pdf.py
index 602d32a..a285dc4 100644
--- a/src/qp/mixmod_pdf.py
+++ b/src/qp/mixmod_pdf.py
@@ -66,7 +66,6 @@ def __init__(self, gen_func, weights, data, ancil=None, *args, **kwargs):
 >>>>>>> 9b08a50 (Started implementing mixture model implementation with generic scipy base function. So far only the ones using loc and scale parameters work. PPFs need to be implemented.)
         """
         self._gen_func = gen_func
-        print(data)
         self._frozen = self._gen_func(**data)
         self._gen_obj = self._frozen.dist
         self._gen_class = type(self._gen_obj)
@@ -87,7 +86,6 @@ def __init__(self, gen_func, weights, data, ancil=None, *args, **kwargs):
 =======
         for key in self._data.keys():
             self._data[key] = reshape_to_pdf_size(self._data[key],-1)
-        print(self._data)
         kwargs['shape'] = weights.shape[:-1]
         self._ncomps = weights.shape[-1]
         super().__init__(*args, **kwargs)
diff --git a/src/qp/pdf_gen.py b/src/qp/pdf_gen.py
index 36fefac..84c66f0 100644
--- a/src/qp/pdf_gen.py
+++ b/src/qp/pdf_gen.py
@@ -124,8 +124,13 @@ def create_gen(cls, **kwds):
         """Create and return a `scipy.stats.rv_continuous` object using the
         keyword arguemntets provided"""
         kwds_copy = kwds.copy()
+<<<<<<< HEAD
         name = kwds_copy.pop("name", "dist")
         return (cls(name=name), kwds_copy)
+=======
+        name = kwds_copy.pop('name', 'dist')
+        return (cls(), kwds_copy)
+>>>>>>> 71047e2 (fixed mixture model implementation with scipy basis functions that require additional parameters)
 
     @classmethod
     def create(cls, **kwds):
@@ -393,7 +398,9 @@ def __init__(self, *args, **kwargs):
         """C'tor"""
         # pylint: disable=no-member,protected-access
         super().__init__(*args, **kwargs)
-        self._other_init(*args, **kwargs)
+        if kwargs==self._ctor_param:
+            kwargs=dict()
+        self._other_init(*args, **kwargs, **self._ctor_param)
 
     def _my_freeze(self, *args, **kwds):
         """Freeze the distribution for the given arguments.

From 56c4521e9f9f40dae4dc826af49c6c3fe0405d1d Mon Sep 17 00:00:00 2001
From: Eric Charles <badass@stanford.edu>
Date: Thu, 30 Nov 2023 15:41:25 -0800
Subject: [PATCH 4/6] finish resolving conflicts

---
 src/qp/mixmod_pdf.py | 44 +-------------------------------------------
 src/qp/pdf_gen.py    |  5 -----
 2 files changed, 1 insertion(+), 48 deletions(-)

diff --git a/src/qp/mixmod_pdf.py b/src/qp/mixmod_pdf.py
index a285dc4..649ca66 100644
--- a/src/qp/mixmod_pdf.py
+++ b/src/qp/mixmod_pdf.py
@@ -36,15 +36,9 @@ class mixmod_gen(Pdf_rows_gen):
 
     # pylint: disable=protected-access
 
-<<<<<<< HEAD
-    name = "mixmod"
-    version = 0
-
-=======
->>>>>>> 9b08a50 (Started implementing mixture model implementation with generic scipy base function. So far only the ones using loc and scale parameters work. PPFs need to be implemented.)
     _support_mask = rv_continuous._support_mask
 
-    name = 'gen_mixmod'
+    name = 'mixmod'
     version = 0
 
     def __init__(self, gen_func, weights, data, ancil=None, *args, **kwargs):
@@ -58,12 +52,8 @@ def __init__(self, gen_func, weights, data, ancil=None, *args, **kwargs):
         stds:  array_like
             The standard deviations of the basis functions
         weights : array_like
-<<<<<<< HEAD
             The weights to attach to the Gaussians. Weights should sum up to one.
             If not, the weights are interpreted as relative weights.
-=======
-            The weights to attach to the basis functions. Weights should sum up to one. If not, the weights are interpreted as relative weights.
->>>>>>> 9b08a50 (Started implementing mixture model implementation with generic scipy base function. So far only the ones using loc and scale parameters work. PPFs need to be implemented.)
         """
         self._gen_func = gen_func
         self._frozen = self._gen_func(**data)
@@ -73,17 +63,6 @@ def __init__(self, gen_func, weights, data, ancil=None, *args, **kwargs):
 
         self._scipy_version_warning()
         self._weights = reshape_to_pdf_size(weights, -1)
-<<<<<<< HEAD
-        kwargs["shape"] = means.shape[:-1]
-        self._ncomps = means.shape[-1]
-        super().__init__(*args, **kwargs)
-        if np.any(self._weights < 0):
-            raise ValueError("All weights need to be larger than zero")
-        self._weights = self._weights / self._weights.sum(axis=1)[:, None]
-        self._addobjdata("weights", self._weights)
-        self._addobjdata("stds", self._stds)
-        self._addobjdata("means", self._means)
-=======
         for key in self._data.keys():
             self._data[key] = reshape_to_pdf_size(self._data[key],-1)
         kwargs['shape'] = weights.shape[:-1]
@@ -93,7 +72,6 @@ def __init__(self, gen_func, weights, data, ancil=None, *args, **kwargs):
             raise ValueError('All weights need to be larger than zero')
         self._weights = self._weights/self._weights.sum(axis=1)[:,None]
         self._addobjdata('weights', self._weights)
->>>>>>> 9b08a50 (Started implementing mixture model implementation with generic scipy base function. So far only the ones using loc and scale parameters work. PPFs need to be implemented.)
 
     def _scipy_version_warning(self):
         import scipy  # pylint: disable=import-outside-toplevel
@@ -115,41 +93,21 @@ def _pdf(self, x, row):
         # pylint: disable=arguments-differ
         if np.ndim(x) > 1:  # pragma: no cover
             x = np.expand_dims(x, -2)
-<<<<<<< HEAD
-        return (
-            self.weights[row].swapaxes(-2, -1)
-            * sps.norm(
-                loc=self._means[row].swapaxes(-2, -1),
-                scale=self._stds[row].swapaxes(-2, -1),
-            ).pdf(x)
-        ).sum(axis=0)
-=======
         data_swap=dict()
         for key in self._data.keys():
             data_swap[key] = self._data[key][row].swapaxes(-2,-1)
         return (self.weights[row].swapaxes(-2,-1) *
                 self._gen_func(**data_swap).pdf(x)).sum(axis=0)
->>>>>>> 9b08a50 (Started implementing mixture model implementation with generic scipy base function. So far only the ones using loc and scale parameters work. PPFs need to be implemented.)
 
     def _cdf(self, x, row):
         # pylint: disable=arguments-differ
         if np.ndim(x) > 1:  # pragma: no cover
             x = np.expand_dims(x, -2)
-<<<<<<< HEAD
-        return (
-            self.weights[row].swapaxes(-2, -1)
-            * sps.norm(
-                loc=self._means[row].swapaxes(-2, -1),
-                scale=self._stds[row].swapaxes(-2, -1),
-            ).cdf(x)
-        ).sum(axis=0)
-=======
         data_swap=dict()
         for key in self._data.keys():
             data_swap[key] = self._data[key][row].swapaxes(-2,-1)
         return (self.weights[row].swapaxes(-2,-1) *
                 self._gen_func(**data_swap).cdf(x)).sum(axis=0)
->>>>>>> 9b08a50 (Started implementing mixture model implementation with generic scipy base function. So far only the ones using loc and scale parameters work. PPFs need to be implemented.)
 
     def _ppf(self, x, row):
         # pylint: disable=arguments-differ
diff --git a/src/qp/pdf_gen.py b/src/qp/pdf_gen.py
index 84c66f0..caaa332 100644
--- a/src/qp/pdf_gen.py
+++ b/src/qp/pdf_gen.py
@@ -124,13 +124,8 @@ def create_gen(cls, **kwds):
         """Create and return a `scipy.stats.rv_continuous` object using the
         keyword arguemntets provided"""
         kwds_copy = kwds.copy()
-<<<<<<< HEAD
         name = kwds_copy.pop("name", "dist")
         return (cls(name=name), kwds_copy)
-=======
-        name = kwds_copy.pop('name', 'dist')
-        return (cls(), kwds_copy)
->>>>>>> 71047e2 (fixed mixture model implementation with scipy basis functions that require additional parameters)
 
     @classmethod
     def create(cls, **kwds):

From e6b4da890eab182e8fa253cb3da7e5d4f0566365 Mon Sep 17 00:00:00 2001
From: Eric Charles <badass@stanford.edu>
Date: Mon, 4 Dec 2023 12:15:12 -0800
Subject: [PATCH 5/6] put _ctor_param back in and do a couple of fixes for unit
 tests

---
 src/qp/factory.py | 1 +
 src/qp/pdf_gen.py | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/qp/factory.py b/src/qp/factory.py
index e80742f..9f4b581 100644
--- a/src/qp/factory.py
+++ b/src/qp/factory.py
@@ -62,6 +62,7 @@ def _make_scipy_wrapped_class(self, class_name, scipy_class, ctor_param):
             version=0,
             freeze=Pdf_gen_wrap._my_freeze,
             _other_init=scipy_class.__init__,
+            _ctor_param=ctor_param,
         )
         the_class = type(class_name, (Pdf_gen_wrap, scipy_class), override_dict)
         self.add_class(the_class)
diff --git a/src/qp/pdf_gen.py b/src/qp/pdf_gen.py
index caaa332..f2052eb 100644
--- a/src/qp/pdf_gen.py
+++ b/src/qp/pdf_gen.py
@@ -395,6 +395,7 @@ def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         if kwargs==self._ctor_param:
             kwargs=dict()
+        kwargs.pop('name', None)
         self._other_init(*args, **kwargs, **self._ctor_param)
 
     def _my_freeze(self, *args, **kwds):

From 64932d6fc2dd92df9acb1bfe9b8c56a66c2d78ac Mon Sep 17 00:00:00 2001
From: Eric Charles <badass@stanford.edu>
Date: Mon, 4 Dec 2023 12:15:25 -0800
Subject: [PATCH 6/6] put _ctor_param back in and do a couple of fixes for unit
 tests

---
 tests/qp/test_auto.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/qp/test_auto.py b/tests/qp/test_auto.py
index c4aedc0..ce41dff 100644
--- a/tests/qp/test_auto.py
+++ b/tests/qp/test_auto.py
@@ -80,7 +80,8 @@ def auto_add(cls, class_list, ens_orig):
 ENS_MULTI = test_funcs.build_ensemble(
     qp.stats.norm_gen.test_data["norm"]  # pylint: disable=no-member
 )
-TEST_CLASSES = qp.instance().values()
+TEST_CLASSES = list(qp.instance().values())
+TEST_CLASSES.remove(qp.mixmod_pdf.mixmod_gen)
 
 PDFTestCase.auto_add(TEST_CLASSES, [ENS_ORIG, ENS_MULTI])