diff --git a/docs/source/libraries/sklearn.rst b/docs/source/libraries/sklearn.rst index d05be37a..fbcf7915 100644 --- a/docs/source/libraries/sklearn.rst +++ b/docs/source/libraries/sklearn.rst @@ -261,6 +261,8 @@ Currently the following transformers are supported out of the box: * SelectorMixin-based transformers: SelectPercentile_, SelectKBest_, GenericUnivariateSelect_, VarianceThreshold_, RFE_, RFECV_, SelectFromModel_, RandomizedLogisticRegression_; +* stability selection-based transformers: RandomizedLogisticRegression_, + RandomizedLasso_, StabilitySelection_; * scalers from sklearn.preprocessing: MinMaxScaler_, StandardScaler_, MaxAbsScaler_, RobustScaler_. @@ -276,6 +278,8 @@ Currently the following transformers are supported out of the box: .. _RFECV: http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.RFECV.html .. _VarianceThreshold: http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.VarianceThreshold.html .. _RandomizedLogisticRegression: http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.RandomizedLogisticRegression.html +.. _RandomizedLasso: https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.RandomizedLasso.html +.. _StabilitySelection: https://github.com/scikit-learn-contrib/stability-selection .. _Pipeline: http://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html#sklearn.pipeline.Pipeline .. _singledispatch: https://pypi.python.org/pypi/singledispatch diff --git a/eli5/sklearn/transform.py b/eli5/sklearn/transform.py index 2d431f8c..6337318c 100644 --- a/eli5/sklearn/transform.py +++ b/eli5/sklearn/transform.py @@ -4,6 +4,21 @@ import numpy as np # type: ignore from sklearn.pipeline import Pipeline, FeatureUnion # type: ignore from sklearn.feature_selection.base import SelectorMixin # type: ignore +try: + from sklearn.linear_model import ( # type: ignore + RandomizedLogisticRegression, + RandomizedLasso, + ) +except ImportError: + # randomized_l1 feature selectors are not available (removed in scikit-learn 0.21) + RandomizedLogisticRegression = None + RandomizedLasso = None +try: + from stability_selection import StabilitySelection # type: ignore + # TODO: add support for stability_selection.RandomizedLogisticRegression and stability_selection.RandomizedLasso ? +except ImportError: + # scikit-learn-contrib/stability-selection is not available + StabilitySelection = None from sklearn.preprocessing import ( # type: ignore MinMaxScaler, @@ -16,25 +31,33 @@ from eli5.sklearn.utils import get_feature_names as _get_feature_names +def register_notnone(generic_func, cls): + """ + Register an implementation of a generic function + if the supplied type is not None. + """ + def inner_register(func): + if cls is None: + # do nothing + return func + else: + # register a new implementation + return generic_func.register(cls)(func) + return inner_register + + # Feature selection: @transform_feature_names.register(SelectorMixin) +@register_notnone(transform_feature_names, RandomizedLogisticRegression) +@register_notnone(transform_feature_names, RandomizedLasso) +@register_notnone(transform_feature_names, StabilitySelection) def _select_names(est, in_names=None): mask = est.get_support(indices=False) in_names = _get_feature_names(est, feature_names=in_names, num_features=len(mask)) return [in_names[i] for i in np.flatnonzero(mask)] -try: - from sklearn.linear_model import ( # type: ignore - RandomizedLogisticRegression, - RandomizedLasso, - ) - _select_names = transform_feature_names.register(RandomizedLasso)(_select_names) - _select_names = transform_feature_names.register(RandomizedLogisticRegression)(_select_names) -except ImportError: # Removed in scikit-learn 0.21 - pass - # Scaling diff --git a/tests/test_sklearn_transform.py b/tests/test_sklearn_transform.py index eda63812..10fe1c65 100644 --- a/tests/test_sklearn_transform.py +++ b/tests/test_sklearn_transform.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, print_function import re import pytest @@ -15,11 +17,21 @@ RFECV, SelectFromModel, ) -from sklearn.linear_model import ( - LogisticRegression, - RandomizedLogisticRegression, - RandomizedLasso, # TODO: add tests and document -) +from sklearn.linear_model import LogisticRegression +try: + from sklearn.linear_model import ( + RandomizedLogisticRegression, + RandomizedLasso, + ) +except ImportError: + # randomized_l1 feature selectors are not available (removed in scikit-learn 0.21) + RandomizedLogisticRegression = None + RandomizedLasso = None +try: + from stability_selection import StabilitySelection +except ImportError: + # scikit-learn-contrib/stability-selection is not available + StabilitySelection = None from sklearn.preprocessing import ( MinMaxScaler, StandardScaler, @@ -28,6 +40,7 @@ ) from sklearn.pipeline import FeatureUnion, make_pipeline +from .utils import sklearn_version from eli5 import transform_feature_names from eli5.sklearn import PermutationImportance @@ -47,6 +60,10 @@ def selection_score_func(X, y): return np.array([1, 2, 3, 4]) +def instantiate_notnone(cls, *args, **kwargs): + return cls(*args, **kwargs) if cls is not None else None + + @pytest.mark.parametrize('transformer,expected', [ (MyFeatureExtractor(), ['f1', 'f2', 'f3']), @@ -88,8 +105,37 @@ def selection_score_func(X, y): ['', '']), (RFECV(LogisticRegression(random_state=42)), ['', '', '', '']), - (RandomizedLogisticRegression(random_state=42), - ['', '', '']), + + pytest.param( + instantiate_notnone(RandomizedLogisticRegression, random_state=42), + ['', '', ''], + marks=pytest.mark.skipif(RandomizedLogisticRegression is None, + reason='scikit-learn RandomizedLogisticRegression is not available') + ), + pytest.param( + instantiate_notnone(RandomizedLasso, random_state=42), + ['', '', ''], + marks=[ + pytest.mark.skipif(RandomizedLasso is None, + reason='RandomizedLasso is not available'), + pytest.mark.skipif(sklearn_version() < '0.19', + reason='scikit-learn < 0.19')] + ), + pytest.param( + instantiate_notnone(RandomizedLasso, random_state=42), + ['', '', '', ''], + marks=[ + pytest.mark.skipif(RandomizedLasso is None, + reason='RandomizedLasso is not available'), + pytest.mark.skipif('0.19' <= sklearn_version(), + reason='scikit-learn >= 0.19')] + ), + pytest.param( + instantiate_notnone(StabilitySelection, random_state=42), + [''], + marks=pytest.mark.skipif(StabilitySelection is None, + reason='scikit-learn-contrib/stability-selection is not available') + ), ]) def test_transform_feature_names_iris(transformer, expected, iris_train): X, y, _, _ = iris_train @@ -102,4 +148,4 @@ def test_transform_feature_names_iris(transformer, expected, iris_train): # Test in_names being None expected_default_names = [re.sub('', r'x\1', name) for name in expected] - assert transform_feature_names(transformer, None) == expected_default_names + assert transform_feature_names(transformer, None) == expected_default_names \ No newline at end of file