pre-commit

MaxHalford · Jan 3, 2025 · 4064549 · 4064549
1 parent 29f9cec
commit 4064549
Show file tree

Hide file tree

Showing 13 changed files with 52 additions and 45 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -7,16 +7,14 @@ repos:
       - id: trailing-whitespace
       - id: mixed-line-ending
 
-  - repo: local
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    # Ruff version.
+    rev: v0.8.0
     hooks:
-      - id: black
-        name: black
-        language: python
-        types: [python]
-        entry: black
-
+      # Run the linter.
       - id: ruff
-        name: ruff
-        language: python
-        types: [python]
-        entry: ruff
+        types_or: [python, pyi]
+        args: [--fix]
+      # Run the formatter.
+      - id: ruff-format
+        types_or: [python, pyi]
diff --git a/prince/ca.py b/prince/ca.py
@@ -1,4 +1,5 @@
 """Correspondence Analysis (CA)"""
+
 from __future__ import annotations
 
 import functools
@@ -277,7 +278,8 @@ def plot(
             row_coords = self.row_coordinates(X)
             row_coords.columns = [f"component {i}" for i in row_coords.columns]
             row_coords = row_coords.assign(
-                variable=row_coords.index.name or "row", value=row_coords.index.astype(str)
+                variable=row_coords.index.name or "row",
+                value=row_coords.index.astype(str),
             )
             row_labels = pd.Series(row_coords.index, index=row_coords.index)
             row_chart = alt.Chart(row_coords.assign(label=row_labels)).encode(

diff --git a/prince/famd.py b/prince/famd.py
@@ -1,4 +1,5 @@
 """Factor Analysis of Mixed Data (FAMD)"""
+
 from __future__ import annotations
 
 import numpy as np

diff --git a/prince/gpa.py b/prince/gpa.py
@@ -1,4 +1,5 @@
 """Generalized Procrustes Analysis (GPA)"""
+
 from __future__ import annotations
 
 import numpy as np

diff --git a/prince/mca.py b/prince/mca.py
@@ -1,4 +1,5 @@
 """Multiple Correspondence Analysis (MCA)"""
+
 from __future__ import annotations
 
 import numpy as np

diff --git a/prince/pca.py b/prince/pca.py
@@ -1,4 +1,5 @@
 """Principal Component Analysis (PCA)"""
+
 from __future__ import annotations
 
 import functools
@@ -71,7 +72,14 @@ def get_feature_names_out(self, input_features=None):
         return np.arange(self.n_components_)
 
     @utils.check_is_dataframe_input
-    def fit(self, X, y=None, sample_weight=None, column_weight=None, supplementary_columns=None):
+    def fit(
+        self,
+        X,
+        y=None,
+        sample_weight=None,
+        column_weight=None,
+        supplementary_columns=None,
+    ):
         self._check_input(X)
 
         # Massage input
@@ -106,7 +114,8 @@ def fit(self, X, y=None, sample_weight=None, column_weight=None, supplementary_c
                 ).fit_transform(X_sup)
 
         self._column_dist = pd.Series(
-            (X_active**2 * sample_weight[:, np.newaxis]).sum(axis=0), index=active_variables
+            (X_active**2 * sample_weight[:, np.newaxis]).sum(axis=0),
+            index=active_variables,
         )
         if supplementary_columns:
             self._column_dist = pd.concat(
@@ -126,10 +135,12 @@ def fit(self, X, y=None, sample_weight=None, column_weight=None, supplementary_c
             random_state=self.random_state,
             engine=self.engine,
             row_weights=sample_weight,
-            column_weights=column_weight
+            column_weights=column_weight,
         )
 
-        self.total_inertia_ = np.sum(np.square(X_active) * column_weight * sample_weight[:, np.newaxis])
+        self.total_inertia_ = np.sum(
+            np.square(X_active) * column_weight * sample_weight[:, np.newaxis]
+        )
 
         self.column_coordinates_ = pd.DataFrame(
             data=self.svd_.V.T * self.eigenvalues_**0.5,
@@ -154,7 +165,9 @@ def fit(self, X, y=None, sample_weight=None, column_weight=None, supplementary_c
             index=self.row_contributions_.index if hasattr(self, "row_contributions_") else None,
         )
         row_coords.columns.name = "component"
-        self.row_contributions_ = (row_coords**2 * sample_weight[:, np.newaxis]).div(self.eigenvalues_, axis=1)
+        self.row_contributions_ = (row_coords**2 * sample_weight[:, np.newaxis]).div(
+            self.eigenvalues_, axis=1
+        )
         self.row_contributions_.index = X.index
 
         return self
@@ -311,9 +324,10 @@ def column_cosine_similarities_(self):
     @property
     @utils.check_is_fitted
     def column_contributions_(self):
-        return (((self.column_coordinates_.loc[self.feature_names_in_]) ** 2) * self.column_weight_[:, np.newaxis]).div(
-            self.eigenvalues_, axis=1
-        )
+        return (
+            ((self.column_coordinates_.loc[self.feature_names_in_]) ** 2)
+            * self.column_weight_[:, np.newaxis]
+        ).div(self.eigenvalues_, axis=1)
 
     @utils.check_is_dataframe_input
     @utils.check_is_fitted

diff --git a/prince/plot.py b/prince/plot.py
@@ -9,7 +9,6 @@
 
 
 def stylize_axis(ax, grid=True):
-
     if grid:
         ax.grid()
 

diff --git a/prince/svd.py b/prince/svd.py
@@ -1,4 +1,5 @@
 """Singular Value Decomposition (SVD)"""
+
 from __future__ import annotations
 
 import dataclasses
@@ -55,7 +56,7 @@ def compute_svd(
     else:
         raise ValueError("engine has to be one of ('fbpca', 'scipy', 'sklearn')")
 
-    #U, V = extmath.svd_flip(U, V)
+    # U, V = extmath.svd_flip(U, V)
 
     if row_weights is not None:
         U = U / np.sqrt(row_weights)[:, np.newaxis]  # row-wise scaling

diff --git a/pyproject.toml b/pyproject.toml
@@ -22,12 +22,12 @@ pre-commit = "^4.0.1"
 xarray = "^2023.11.0"
 
 [tool.ruff]
-select = ["E", "F", "I", "UP"]  # https://beta.ruff.rs/docs/rules/
+lint.select = ["E", "F", "I", "UP"]  # https://beta.ruff.rs/docs/rules/
 line-length = 100
 target-version = 'py310'
-ignore = ["E501"]
+lint.ignore = ["E501"]
 
-[tool.ruff.isort]
+[tool.ruff.lint.isort]
 required-imports = ["from __future__ import annotations"]
 
 [build-system]

diff --git a/tests/test_famd.py b/tests/test_famd.py
@@ -32,7 +32,6 @@ class TestFAMD:
 
     @pytest.fixture(autouse=True)
     def _prepare(self, sup_rows, sup_cols):
-
         self.sup_rows = sup_rows
         self.sup_cols = sup_cols
 

diff --git a/tests/test_gpa.py b/tests/test_gpa.py
@@ -86,7 +86,6 @@ def test_copy(self):
         self.assertRaises(AssertionError, np.testing.assert_array_equal, self.shapes, shapes_copy)
 
     def test_xarray(self):
-
         points = pd.DataFrame(
             data=[
                 [0, 0, 0, 0],

diff --git a/tests/test_pca.py b/tests/test_pca.py
@@ -6,10 +6,11 @@
 import pandas as pd
 import pytest
 import rpy2.robjects as robjects
-from rpy2.robjects import numpy2ri
 import sklearn.utils.estimator_checks
 import sklearn.utils.validation
+from rpy2.robjects import numpy2ri
 from sklearn import decomposition, pipeline, preprocessing
+
 import prince
 from tests import load_df_from_R
 
@@ -35,7 +36,6 @@
 class TestPCA:
     @pytest.fixture(autouse=True)
     def _prepare(self, sup_rows, sup_cols, scale, sample_weights, column_weights):
-
         self.sup_rows = sup_rows
         self.sup_cols = sup_cols
         self.scale = scale
@@ -48,13 +48,13 @@ def _prepare(self, sup_rows, sup_cols, scale, sample_weights, column_weights):
         if self.sup_rows:
             self.active = self.active.query('competition == "Decastar"')
         self.sample_weights = (
-            np.random.default_rng().dirichlet([1] * len(self.active))
-            if sample_weights
-            else None
+            np.random.default_rng().dirichlet([1] * len(self.active)) if sample_weights else None
         )
         supplementary_columns = ["rank", "points"] if self.sup_cols else []
         self.column_weights = (
-            np.random.default_rng().random(len(self.active.columns.difference(supplementary_columns)))
+            np.random.default_rng().random(
+                len(self.active.columns.difference(supplementary_columns))
+            )
             if column_weights
             else None
         )

diff --git a/tests/test_svd.py b/tests/test_svd.py
@@ -1,18 +1,11 @@
 from __future__ import annotations
 
-import math
-
 import numpy as np
-import pandas as pd
 import pytest
 import rpy2.robjects as robjects
 from rpy2.robjects import numpy2ri
-import sklearn.utils.estimator_checks
-import sklearn.utils.validation
-from sklearn import decomposition, pipeline, preprocessing
 
-import prince
-from prince import svd, utils
+from prince import svd
 from tests import load_df_from_R
 
 
@@ -23,7 +16,7 @@
             n_components,
             are_rows_weighted,
             are_columns_weighted,
-            id=f"{n_components=}:{are_rows_weighted=}:{are_columns_weighted=}"
+            id=f"{n_components=}:{are_rows_weighted=}:{are_columns_weighted=}",
         )
         for n_components in [1, 3, 10]
         for are_rows_weighted in [False, True]
@@ -33,7 +26,6 @@
 class TestSVD:
     @pytest.fixture(autouse=True)
     def _prepare(self, n_components, are_rows_weighted, are_columns_weighted):
-
         self.n_components = n_components
         self.are_rows_weighted = are_rows_weighted
         self.are_columns_weighted = are_columns_weighted
@@ -51,7 +43,7 @@ def _prepare(self, n_components, are_rows_weighted, are_columns_weighted):
             n_components=n_components,
             n_iter=3,
             random_state=42,
-            engine="scipy"
+            engine="scipy",
         )
 
         # Fit FactoMineR
@@ -79,7 +71,7 @@ def test_s(self):
         assert self.svd.s.shape == (self.n_components,)
         if self.are_rows_weighted:
             P = self.svd.s
-            F = robjects.r("svd$vs")[:self.n_components]
+            F = robjects.r("svd$vs")[: self.n_components]
             np.testing.assert_allclose(np.abs(F), np.abs(P))
 
     def test_V(self):
Original file line number	Diff line number	Diff line change
Expand Up		@@ -9,7 +9,6 @@


		def stylize_axis(ax, grid=True):

		if grid:
		ax.grid()

Expand Down