fix: solve test run error on github

SF-Tec · Nov 7, 2024 · 3c30cd9 · 3c30cd9
1 parent 669e52f
commit 3c30cd9
Show file tree

Hide file tree

Showing 4 changed files with 39 additions and 15 deletions.
diff --git a/openmodels/serializers/sklearn_serializer.py b/openmodels/serializers/sklearn_serializer.py
@@ -195,19 +195,25 @@ def _convert_to_sklearn_types(value: Any, attr_type: str = "none") -> Any:
         ----------
         value : Any
             The JSON-deserialized value.
+        attr_type : str
+            The target type to convert to.
 
         Returns
         -------
         Any
             The scikit-learn type of the value.
         """
-
         # Base case: if attr_type is not a list, convert value based on attr_type
         if isinstance(attr_type, str):
             if attr_type == "csr_matrix":
+                # Ensure all sparse matrix components are of correct dtype
                 return csr_matrix(
-                    (value["data"], value["indices"], value["indptr"]),
-                    shape=value["shape"],
+                    (
+                        np.array(value["data"], dtype=np.float64),
+                        np.array(value["indices"], dtype=np.int32),
+                        np.array(value["indptr"], dtype=np.int32),
+                    ),
+                    shape=tuple(value["shape"]),
                 )
             elif attr_type == "ndarray":
                 return np.array(value)

diff --git a/openmodels/test_helpers.py b/openmodels/test_helpers.py
@@ -25,7 +25,9 @@ def transform(self, X: Union[np.ndarray, csr_matrix]) -> np.ndarray: ...
 
 @runtime_checkable
 class FittableModel(Protocol):
-    def fit(self, X: np.ndarray, y: np.ndarray) -> "FittableModel": ...
+    def fit(
+        self, X: Union[np.ndarray, csr_matrix], y: np.ndarray
+    ) -> "FittableModel": ...
 
 
 ModelType = Union[PredictorModel, TransformerModel, FittableModel]
@@ -51,16 +53,19 @@ def ensure_correct_sparse_format(
 
 
 def fit_model(
-    model: FittableModel, x: np.ndarray, y: np.ndarray, abs: bool = False
+    model: FittableModel,
+    x: Union[np.ndarray, csr_matrix],
+    y: np.ndarray,
+    abs: bool = False,
 ) -> FittableModel:
     """
     Fits a model to the provided data.
 
     Parameters
     ----------
-    model : T
+    model : FittableModel
         The scikit-learn model to fit.
-    x : np.ndarray
+    x : Union[np.ndarray, csr_matrix]
         The training input samples.
     y : np.ndarray
         The target values (class labels in classification, real numbers in regression).
@@ -69,14 +74,21 @@ def fit_model(
 
     Returns
     -------
-    T
+    FittableModel
         The fitted scikit-learn model.
     """
     if not isinstance(model, FittableModel):
         raise TypeError("Model must have a 'fit' method")
 
     if abs:
-        model.fit(np.absolute(x), y)
+        if isinstance(x, csr_matrix):
+            # Handle absolute value for sparse matrix
+            x_abs = csr_matrix(
+                (np.absolute(x.data), x.indices, x.indptr), shape=x.shape
+            )
+            model.fit(x_abs, y)
+        else:
+            model.fit(np.absolute(x), y)
     else:
         model.fit(x, y)
     return model
@@ -143,7 +155,7 @@ def run_test_model(
     model: FittableModel,
     x: np.ndarray,
     y: np.ndarray,
-    x_sparse: Optional[np.ndarray],
+    x_sparse: Optional[Union[np.ndarray, csr_matrix]],
     y_sparse: Optional[np.ndarray],
     model_name: str,
     abs: bool = False,
@@ -159,7 +171,7 @@ def run_test_model(
         The training input samples.
     y : np.ndarray
         The target values (class labels in classification, real numbers in regression).
-    x_sparse : np.ndarray or None
+    x_sparse : Optional[Union[np.ndarray, csr_matrix]]
         The sparse training input samples.
     y_sparse : np.ndarray or None
         The sparse target values.

diff --git a/test/test_classification.py b/test/test_classification.py
@@ -13,7 +13,7 @@
 from sklearn.naive_bayes import BernoulliNB, GaussianNB, MultinomialNB, ComplementNB
 from sklearn.neural_network import MLPClassifier
 from sklearn.tree import DecisionTreeClassifier
-from openmodels.test_helpers import run_test_model
+from openmodels.test_helpers import run_test_model, ensure_correct_sparse_format
 
 # Define constants
 N_SAMPLES = 50
@@ -109,6 +109,9 @@ def test_qda(data):
 
 def test_svm(data):
     x, y, x_sparse, y_sparse = data
+    # Ensure sparse data is properly formatted before testing
+    x_sparse = ensure_correct_sparse_format(x_sparse)
+
     run_test_model(
         svm.SVC(gamma=0.001, C=100.0, kernel="linear"),
         x,

diff --git a/test/test_regression.py b/test/test_regression.py
@@ -8,7 +8,7 @@
 from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
 from sklearn.neural_network import MLPRegressor
 from sklearn.svm import SVR
-from openmodels.test_helpers import run_test_model
+from openmodels.test_helpers import run_test_model, ensure_correct_sparse_format
 
 
 @pytest.fixture(scope="module")
@@ -32,7 +32,7 @@ def data():
             }
         )
     y_sparse = [random.random() for i in range(0, 100)]
-    x_sparse = feature_hasher.transform(features)
+    x_sparse = feature_hasher.transform(iter(features))
 
     return x, y, x_sparse, y_sparse
 
@@ -57,6 +57,9 @@ def test_ridge_regression(data):
 
 def test_svr(data):
     x, y, x_sparse, y_sparse = data
+    # Ensure sparse data is properly formatted before testing
+    x_sparse = ensure_correct_sparse_format(x_sparse)
+
     run_test_model(
         SVR(gamma="scale", C=1.0, epsilon=0.2), x, y, x_sparse, y_sparse, "svr.json"
     )
@@ -109,5 +112,5 @@ def test_mlp_regression(data):
 def test_pls_regression(data):
     x, y, _, _ = data
     run_test_model(
-        PLSRegression(n_components=2), x, y, None, None, "pls-regression.json"
+        PLSRegression(n_components=2), x, y, None, None, "pls-regression.json"  # type: ignore
     )