Skip to content

Commit

Permalink
pre-commit
Browse files Browse the repository at this point in the history
  • Loading branch information
MaxHalford committed Jan 3, 2025
1 parent 29f9cec commit 4064549
Show file tree
Hide file tree
Showing 13 changed files with 52 additions and 45 deletions.
20 changes: 9 additions & 11 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,14 @@ repos:
- id: trailing-whitespace
- id: mixed-line-ending

- repo: local
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.8.0
hooks:
- id: black
name: black
language: python
types: [python]
entry: black

# Run the linter.
- id: ruff
name: ruff
language: python
types: [python]
entry: ruff
types_or: [python, pyi]
args: [--fix]
# Run the formatter.
- id: ruff-format
types_or: [python, pyi]
4 changes: 3 additions & 1 deletion prince/ca.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Correspondence Analysis (CA)"""

from __future__ import annotations

import functools
Expand Down Expand Up @@ -277,7 +278,8 @@ def plot(
row_coords = self.row_coordinates(X)
row_coords.columns = [f"component {i}" for i in row_coords.columns]
row_coords = row_coords.assign(
variable=row_coords.index.name or "row", value=row_coords.index.astype(str)
variable=row_coords.index.name or "row",
value=row_coords.index.astype(str),
)
row_labels = pd.Series(row_coords.index, index=row_coords.index)
row_chart = alt.Chart(row_coords.assign(label=row_labels)).encode(
Expand Down
1 change: 1 addition & 0 deletions prince/famd.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Factor Analysis of Mixed Data (FAMD)"""

from __future__ import annotations

import numpy as np
Expand Down
1 change: 1 addition & 0 deletions prince/gpa.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Generalized Procrustes Analysis (GPA)"""

from __future__ import annotations

import numpy as np
Expand Down
1 change: 1 addition & 0 deletions prince/mca.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Multiple Correspondence Analysis (MCA)"""

from __future__ import annotations

import numpy as np
Expand Down
30 changes: 22 additions & 8 deletions prince/pca.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Principal Component Analysis (PCA)"""

from __future__ import annotations

import functools
Expand Down Expand Up @@ -71,7 +72,14 @@ def get_feature_names_out(self, input_features=None):
return np.arange(self.n_components_)

@utils.check_is_dataframe_input
def fit(self, X, y=None, sample_weight=None, column_weight=None, supplementary_columns=None):
def fit(
self,
X,
y=None,
sample_weight=None,
column_weight=None,
supplementary_columns=None,
):
self._check_input(X)

# Massage input
Expand Down Expand Up @@ -106,7 +114,8 @@ def fit(self, X, y=None, sample_weight=None, column_weight=None, supplementary_c
).fit_transform(X_sup)

self._column_dist = pd.Series(
(X_active**2 * sample_weight[:, np.newaxis]).sum(axis=0), index=active_variables
(X_active**2 * sample_weight[:, np.newaxis]).sum(axis=0),
index=active_variables,
)
if supplementary_columns:
self._column_dist = pd.concat(
Expand All @@ -126,10 +135,12 @@ def fit(self, X, y=None, sample_weight=None, column_weight=None, supplementary_c
random_state=self.random_state,
engine=self.engine,
row_weights=sample_weight,
column_weights=column_weight
column_weights=column_weight,
)

self.total_inertia_ = np.sum(np.square(X_active) * column_weight * sample_weight[:, np.newaxis])
self.total_inertia_ = np.sum(
np.square(X_active) * column_weight * sample_weight[:, np.newaxis]
)

self.column_coordinates_ = pd.DataFrame(
data=self.svd_.V.T * self.eigenvalues_**0.5,
Expand All @@ -154,7 +165,9 @@ def fit(self, X, y=None, sample_weight=None, column_weight=None, supplementary_c
index=self.row_contributions_.index if hasattr(self, "row_contributions_") else None,
)
row_coords.columns.name = "component"
self.row_contributions_ = (row_coords**2 * sample_weight[:, np.newaxis]).div(self.eigenvalues_, axis=1)
self.row_contributions_ = (row_coords**2 * sample_weight[:, np.newaxis]).div(
self.eigenvalues_, axis=1
)
self.row_contributions_.index = X.index

return self
Expand Down Expand Up @@ -311,9 +324,10 @@ def column_cosine_similarities_(self):
@property
@utils.check_is_fitted
def column_contributions_(self):
return (((self.column_coordinates_.loc[self.feature_names_in_]) ** 2) * self.column_weight_[:, np.newaxis]).div(
self.eigenvalues_, axis=1
)
return (
((self.column_coordinates_.loc[self.feature_names_in_]) ** 2)
* self.column_weight_[:, np.newaxis]
).div(self.eigenvalues_, axis=1)

@utils.check_is_dataframe_input
@utils.check_is_fitted
Expand Down
1 change: 0 additions & 1 deletion prince/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@


def stylize_axis(ax, grid=True):

if grid:
ax.grid()

Expand Down
3 changes: 2 additions & 1 deletion prince/svd.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Singular Value Decomposition (SVD)"""

from __future__ import annotations

import dataclasses
Expand Down Expand Up @@ -55,7 +56,7 @@ def compute_svd(
else:
raise ValueError("engine has to be one of ('fbpca', 'scipy', 'sklearn')")

#U, V = extmath.svd_flip(U, V)
# U, V = extmath.svd_flip(U, V)

if row_weights is not None:
U = U / np.sqrt(row_weights)[:, np.newaxis] # row-wise scaling
Expand Down
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@ pre-commit = "^4.0.1"
xarray = "^2023.11.0"

[tool.ruff]
select = ["E", "F", "I", "UP"] # https://beta.ruff.rs/docs/rules/
lint.select = ["E", "F", "I", "UP"] # https://beta.ruff.rs/docs/rules/
line-length = 100
target-version = 'py310'
ignore = ["E501"]
lint.ignore = ["E501"]

[tool.ruff.isort]
[tool.ruff.lint.isort]
required-imports = ["from __future__ import annotations"]

[build-system]
Expand Down
1 change: 0 additions & 1 deletion tests/test_famd.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ class TestFAMD:

@pytest.fixture(autouse=True)
def _prepare(self, sup_rows, sup_cols):

self.sup_rows = sup_rows
self.sup_cols = sup_cols

Expand Down
1 change: 0 additions & 1 deletion tests/test_gpa.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ def test_copy(self):
self.assertRaises(AssertionError, np.testing.assert_array_equal, self.shapes, shapes_copy)

def test_xarray(self):

points = pd.DataFrame(
data=[
[0, 0, 0, 0],
Expand Down
12 changes: 6 additions & 6 deletions tests/test_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@
import pandas as pd
import pytest
import rpy2.robjects as robjects
from rpy2.robjects import numpy2ri
import sklearn.utils.estimator_checks
import sklearn.utils.validation
from rpy2.robjects import numpy2ri
from sklearn import decomposition, pipeline, preprocessing

import prince
from tests import load_df_from_R

Expand All @@ -35,7 +36,6 @@
class TestPCA:
@pytest.fixture(autouse=True)
def _prepare(self, sup_rows, sup_cols, scale, sample_weights, column_weights):

self.sup_rows = sup_rows
self.sup_cols = sup_cols
self.scale = scale
Expand All @@ -48,13 +48,13 @@ def _prepare(self, sup_rows, sup_cols, scale, sample_weights, column_weights):
if self.sup_rows:
self.active = self.active.query('competition == "Decastar"')
self.sample_weights = (
np.random.default_rng().dirichlet([1] * len(self.active))
if sample_weights
else None
np.random.default_rng().dirichlet([1] * len(self.active)) if sample_weights else None
)
supplementary_columns = ["rank", "points"] if self.sup_cols else []
self.column_weights = (
np.random.default_rng().random(len(self.active.columns.difference(supplementary_columns)))
np.random.default_rng().random(
len(self.active.columns.difference(supplementary_columns))
)
if column_weights
else None
)
Expand Down
16 changes: 4 additions & 12 deletions tests/test_svd.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,11 @@
from __future__ import annotations

import math

import numpy as np
import pandas as pd
import pytest
import rpy2.robjects as robjects
from rpy2.robjects import numpy2ri
import sklearn.utils.estimator_checks
import sklearn.utils.validation
from sklearn import decomposition, pipeline, preprocessing

import prince
from prince import svd, utils
from prince import svd
from tests import load_df_from_R


Expand All @@ -23,7 +16,7 @@
n_components,
are_rows_weighted,
are_columns_weighted,
id=f"{n_components=}:{are_rows_weighted=}:{are_columns_weighted=}"
id=f"{n_components=}:{are_rows_weighted=}:{are_columns_weighted=}",
)
for n_components in [1, 3, 10]
for are_rows_weighted in [False, True]
Expand All @@ -33,7 +26,6 @@
class TestSVD:
@pytest.fixture(autouse=True)
def _prepare(self, n_components, are_rows_weighted, are_columns_weighted):

self.n_components = n_components
self.are_rows_weighted = are_rows_weighted
self.are_columns_weighted = are_columns_weighted
Expand All @@ -51,7 +43,7 @@ def _prepare(self, n_components, are_rows_weighted, are_columns_weighted):
n_components=n_components,
n_iter=3,
random_state=42,
engine="scipy"
engine="scipy",
)

# Fit FactoMineR
Expand Down Expand Up @@ -79,7 +71,7 @@ def test_s(self):
assert self.svd.s.shape == (self.n_components,)
if self.are_rows_weighted:
P = self.svd.s
F = robjects.r("svd$vs")[:self.n_components]
F = robjects.r("svd$vs")[: self.n_components]
np.testing.assert_allclose(np.abs(F), np.abs(P))

def test_V(self):
Expand Down

0 comments on commit 4064549

Please sign in to comment.