From 0e0b05dd185ec4becc221f6e614b81ac4a73eecc Mon Sep 17 00:00:00 2001 From: teabolt Date: Sun, 28 Apr 2019 16:58:19 +0100 Subject: [PATCH 1/7] Accept StabilitySelection for the _select_names feature name transformer (if scikit-learn-contrib/stability-selection available) --- eli5/sklearn/transform.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/eli5/sklearn/transform.py b/eli5/sklearn/transform.py index 2d431f8c..81e4e8de 100644 --- a/eli5/sklearn/transform.py +++ b/eli5/sklearn/transform.py @@ -32,7 +32,16 @@ def _select_names(est, in_names=None): ) _select_names = transform_feature_names.register(RandomizedLasso)(_select_names) _select_names = transform_feature_names.register(RandomizedLogisticRegression)(_select_names) -except ImportError: # Removed in scikit-learn 0.21 +except ImportError: + # randomized_l1 was removed in scikit-learn 0.21 + pass + +try: + from stability_selection import StabilitySelection + _select_names = transform_feature_names.register(StabilitySelection)(_select_names) + # TODO: add support for stability_selection.RandomizedLogisticRegression and stability_selection.RandomizedLasso ? +except ImportError: + # scikit-learn-contrib/stability-selection is not available pass From 679fb2fd70d0b8022c9da47ccfd94907c69d0769 Mon Sep 17 00:00:00 2001 From: teabolt Date: Sun, 28 Apr 2019 23:55:10 +0100 Subject: [PATCH 2/7] Conditionally test transform_feature_names on randomized_l1 (scikit-learn) and stability_selection (scikit-learn-contrib) --- tests/test_sklearn_transform.py | 52 ++++++++++++++++++++++++++++----- 1 file changed, 44 insertions(+), 8 deletions(-) diff --git a/tests/test_sklearn_transform.py b/tests/test_sklearn_transform.py index eda63812..40dfb26f 100644 --- a/tests/test_sklearn_transform.py +++ b/tests/test_sklearn_transform.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, print_function import re import pytest @@ -15,11 +17,21 @@ RFECV, SelectFromModel, ) -from sklearn.linear_model import ( - LogisticRegression, - RandomizedLogisticRegression, - RandomizedLasso, # TODO: add tests and document -) +from sklearn.linear_model import LogisticRegression +try: + from sklearn.linear_model import ( + RandomizedLogisticRegression, + RandomizedLasso, + ) +except ImportError: + # randomized_l1 feature selectors are not available (removed in scikit-learn 0.21) + RandomizedLogisticRegression = None + RandomizedLasso = None +try: + from stability_selection import StabilitySelection +except ImportError: + # scikit-learn-contrib/stability-selection is not available + StabilitySelection = None from sklearn.preprocessing import ( MinMaxScaler, StandardScaler, @@ -47,6 +59,10 @@ def selection_score_func(X, y): return np.array([1, 2, 3, 4]) +def instantiate_notnone(cls, *args, **kwargs): + return cls(*args, **kwargs) if cls is not None else None + + @pytest.mark.parametrize('transformer,expected', [ (MyFeatureExtractor(), ['f1', 'f2', 'f3']), @@ -88,8 +104,28 @@ def selection_score_func(X, y): ['', '']), (RFECV(LogisticRegression(random_state=42)), ['', '', '', '']), - (RandomizedLogisticRegression(random_state=42), - ['', '', '']), + + pytest.param( + instantiate_notnone(RandomizedLogisticRegression, random_state=42), + ['', '', ''], + marks=pytest.mark.skipif( + RandomizedLogisticRegression is None, + reason='scikit-learn RandomizedLogisticRegression is not available') + ), + pytest.param( + instantiate_notnone(RandomizedLasso, random_state=42), + ['', '', ''], + marks=pytest.mark.skipif( + RandomizedLasso is None, + reason='scikit-learn RandomizedLasso is not available') + ), + pytest.param( + instantiate_notnone(StabilitySelection, random_state=42), + [''], + marks=pytest.mark.skipif( + StabilitySelection is None, + reason='scikit-learn-contrib/stability-selection is not available') + ), ]) def test_transform_feature_names_iris(transformer, expected, iris_train): X, y, _, _ = iris_train @@ -102,4 +138,4 @@ def test_transform_feature_names_iris(transformer, expected, iris_train): # Test in_names being None expected_default_names = [re.sub('', r'x\1', name) for name in expected] - assert transform_feature_names(transformer, None) == expected_default_names + assert transform_feature_names(transformer, None) == expected_default_names \ No newline at end of file From ce4f96bdf8a6f2025bbe0f5622e0c7de207af5d2 Mon Sep 17 00:00:00 2001 From: teabolt Date: Mon, 29 Apr 2019 00:18:06 +0100 Subject: [PATCH 3/7] Mention stability selection under Transformation Pipelines --- docs/source/libraries/sklearn.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/source/libraries/sklearn.rst b/docs/source/libraries/sklearn.rst index d05be37a..fbcf7915 100644 --- a/docs/source/libraries/sklearn.rst +++ b/docs/source/libraries/sklearn.rst @@ -261,6 +261,8 @@ Currently the following transformers are supported out of the box: * SelectorMixin-based transformers: SelectPercentile_, SelectKBest_, GenericUnivariateSelect_, VarianceThreshold_, RFE_, RFECV_, SelectFromModel_, RandomizedLogisticRegression_; +* stability selection-based transformers: RandomizedLogisticRegression_, + RandomizedLasso_, StabilitySelection_; * scalers from sklearn.preprocessing: MinMaxScaler_, StandardScaler_, MaxAbsScaler_, RobustScaler_. @@ -276,6 +278,8 @@ Currently the following transformers are supported out of the box: .. _RFECV: http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.RFECV.html .. _VarianceThreshold: http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.VarianceThreshold.html .. _RandomizedLogisticRegression: http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.RandomizedLogisticRegression.html +.. _RandomizedLasso: https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.RandomizedLasso.html +.. _StabilitySelection: https://github.com/scikit-learn-contrib/stability-selection .. _Pipeline: http://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html#sklearn.pipeline.Pipeline .. _singledispatch: https://pypi.python.org/pypi/singledispatch From bb9c80d5c1fee3bd96f998239902b71fa714e8e2 Mon Sep 17 00:00:00 2001 From: teabolt Date: Mon, 29 Apr 2019 01:10:18 +0100 Subject: [PATCH 4/7] Add a wrapper for registering types that may fail to import. Refactor sklearn/transform.py --- eli5/sklearn/transform.py | 52 +++++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 19 deletions(-) diff --git a/eli5/sklearn/transform.py b/eli5/sklearn/transform.py index 81e4e8de..4192b165 100644 --- a/eli5/sklearn/transform.py +++ b/eli5/sklearn/transform.py @@ -4,6 +4,21 @@ import numpy as np # type: ignore from sklearn.pipeline import Pipeline, FeatureUnion # type: ignore from sklearn.feature_selection.base import SelectorMixin # type: ignore +try: + from sklearn.linear_model import ( # type: ignore + RandomizedLogisticRegression, + RandomizedLasso, + ) +except ImportError: + # randomized_l1 feature selectors are not available (removed in scikit-learn 0.21) + RandomizedLogisticRegression = None + RandomizedLasso = None +try: + from stability_selection import StabilitySelection + # TODO: add support for stability_selection.RandomizedLogisticRegression and stability_selection.RandomizedLasso ? +except ImportError: + # scikit-learn-contrib/stability-selection is not available + StabilitySelection = None from sklearn.preprocessing import ( # type: ignore MinMaxScaler, @@ -16,34 +31,33 @@ from eli5.sklearn.utils import get_feature_names as _get_feature_names +def register_notnone(generic_func, cls): + """ + Register an implementation of a generic function + if the supplied type is not None. + """ + def inner_register(func): + if cls is None: + # do nothing + return func + else: + # register a new implementation + return generic_func.register(cls)(func) + return inner_register + + # Feature selection: @transform_feature_names.register(SelectorMixin) +@register_notnone(transform_feature_names, RandomizedLogisticRegression) +@register_notnone(transform_feature_names, RandomizedLasso) +@register_notnone(transform_feature_names, StabilitySelection) def _select_names(est, in_names=None): mask = est.get_support(indices=False) in_names = _get_feature_names(est, feature_names=in_names, num_features=len(mask)) return [in_names[i] for i in np.flatnonzero(mask)] -try: - from sklearn.linear_model import ( # type: ignore - RandomizedLogisticRegression, - RandomizedLasso, - ) - _select_names = transform_feature_names.register(RandomizedLasso)(_select_names) - _select_names = transform_feature_names.register(RandomizedLogisticRegression)(_select_names) -except ImportError: - # randomized_l1 was removed in scikit-learn 0.21 - pass - -try: - from stability_selection import StabilitySelection - _select_names = transform_feature_names.register(StabilitySelection)(_select_names) - # TODO: add support for stability_selection.RandomizedLogisticRegression and stability_selection.RandomizedLasso ? -except ImportError: - # scikit-learn-contrib/stability-selection is not available - pass - # Scaling From c946e45c4bc35972189c63c05469e418162b2424 Mon Sep 17 00:00:00 2001 From: teabolt Date: Mon, 29 Apr 2019 20:07:55 +0100 Subject: [PATCH 5/7] Ignore type for StabilitySelection import, add conditional test for RandomizedLasso of scikit-learn < 0.19 (fix failed CI build) --- eli5/sklearn/transform.py | 2 +- tests/test_sklearn_transform.py | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/eli5/sklearn/transform.py b/eli5/sklearn/transform.py index 4192b165..6337318c 100644 --- a/eli5/sklearn/transform.py +++ b/eli5/sklearn/transform.py @@ -14,7 +14,7 @@ RandomizedLogisticRegression = None RandomizedLasso = None try: - from stability_selection import StabilitySelection + from stability_selection import StabilitySelection # type: ignore # TODO: add support for stability_selection.RandomizedLogisticRegression and stability_selection.RandomizedLasso ? except ImportError: # scikit-learn-contrib/stability-selection is not available diff --git a/tests/test_sklearn_transform.py b/tests/test_sklearn_transform.py index 40dfb26f..73077de3 100644 --- a/tests/test_sklearn_transform.py +++ b/tests/test_sklearn_transform.py @@ -4,6 +4,7 @@ import pytest import numpy as np +import sklearn from sklearn.base import BaseEstimator, TransformerMixin from sklearn.feature_selection import ( SelectPercentile, @@ -63,6 +64,12 @@ def instantiate_notnone(cls, *args, **kwargs): return cls(*args, **kwargs) if cls is not None else None +def parse_version(mod): + v = mod.__version__ + vint = list(map(int, v.split('.'))) + return vint + + @pytest.mark.parametrize('transformer,expected', [ (MyFeatureExtractor(), ['f1', 'f2', 'f3']), @@ -116,8 +123,15 @@ def instantiate_notnone(cls, *args, **kwargs): instantiate_notnone(RandomizedLasso, random_state=42), ['', '', ''], marks=pytest.mark.skipif( - RandomizedLasso is None, - reason='scikit-learn RandomizedLasso is not available') + parse_version(sklearn)[1] < 19 or RandomizedLasso is None, + reason='scikit-learn < 0.19 or RandomizedLasso is not available') + ), + pytest.param( + instantiate_notnone(RandomizedLasso, random_state=42), + ['', '', '', ''], + marks=pytest.mark.skipif( + 19 <= parse_version(sklearn)[1] or RandomizedLasso is None, + reason='scikit-learn >= 0.19 or RandomizedLasso is not available') ), pytest.param( instantiate_notnone(StabilitySelection, random_state=42), From f51946911c6fbe61f59a78d9eb7ff1af34cf8b53 Mon Sep 17 00:00:00 2001 From: teabolt Date: Mon, 29 Apr 2019 21:25:38 +0100 Subject: [PATCH 6/7] Prefer utils.sklearn_version --- tests/test_sklearn_transform.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/tests/test_sklearn_transform.py b/tests/test_sklearn_transform.py index 73077de3..5f45fae7 100644 --- a/tests/test_sklearn_transform.py +++ b/tests/test_sklearn_transform.py @@ -4,7 +4,6 @@ import pytest import numpy as np -import sklearn from sklearn.base import BaseEstimator, TransformerMixin from sklearn.feature_selection import ( SelectPercentile, @@ -41,6 +40,7 @@ ) from sklearn.pipeline import FeatureUnion, make_pipeline +from .utils import sklearn_version from eli5 import transform_feature_names from eli5.sklearn import PermutationImportance @@ -64,12 +64,6 @@ def instantiate_notnone(cls, *args, **kwargs): return cls(*args, **kwargs) if cls is not None else None -def parse_version(mod): - v = mod.__version__ - vint = list(map(int, v.split('.'))) - return vint - - @pytest.mark.parametrize('transformer,expected', [ (MyFeatureExtractor(), ['f1', 'f2', 'f3']), @@ -123,14 +117,14 @@ def parse_version(mod): instantiate_notnone(RandomizedLasso, random_state=42), ['', '', ''], marks=pytest.mark.skipif( - parse_version(sklearn)[1] < 19 or RandomizedLasso is None, + sklearn_version() < '0.19' or RandomizedLasso is None, reason='scikit-learn < 0.19 or RandomizedLasso is not available') ), pytest.param( instantiate_notnone(RandomizedLasso, random_state=42), ['', '', '', ''], marks=pytest.mark.skipif( - 19 <= parse_version(sklearn)[1] or RandomizedLasso is None, + '0.19' <= sklearn_version() or RandomizedLasso is None, reason='scikit-learn >= 0.19 or RandomizedLasso is not available') ), pytest.param( From c21a50c28dae7be75ec6cc8b32f0de2fa6d00c16 Mon Sep 17 00:00:00 2001 From: teabolt Date: Mon, 29 Apr 2019 21:55:56 +0100 Subject: [PATCH 7/7] Pass a list to marks when skipping for multiple reasons --- tests/test_sklearn_transform.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/tests/test_sklearn_transform.py b/tests/test_sklearn_transform.py index 5f45fae7..10fe1c65 100644 --- a/tests/test_sklearn_transform.py +++ b/tests/test_sklearn_transform.py @@ -109,29 +109,31 @@ def instantiate_notnone(cls, *args, **kwargs): pytest.param( instantiate_notnone(RandomizedLogisticRegression, random_state=42), ['', '', ''], - marks=pytest.mark.skipif( - RandomizedLogisticRegression is None, + marks=pytest.mark.skipif(RandomizedLogisticRegression is None, reason='scikit-learn RandomizedLogisticRegression is not available') ), pytest.param( instantiate_notnone(RandomizedLasso, random_state=42), ['', '', ''], - marks=pytest.mark.skipif( - sklearn_version() < '0.19' or RandomizedLasso is None, - reason='scikit-learn < 0.19 or RandomizedLasso is not available') + marks=[ + pytest.mark.skipif(RandomizedLasso is None, + reason='RandomizedLasso is not available'), + pytest.mark.skipif(sklearn_version() < '0.19', + reason='scikit-learn < 0.19')] ), pytest.param( instantiate_notnone(RandomizedLasso, random_state=42), ['', '', '', ''], - marks=pytest.mark.skipif( - '0.19' <= sklearn_version() or RandomizedLasso is None, - reason='scikit-learn >= 0.19 or RandomizedLasso is not available') + marks=[ + pytest.mark.skipif(RandomizedLasso is None, + reason='RandomizedLasso is not available'), + pytest.mark.skipif('0.19' <= sklearn_version(), + reason='scikit-learn >= 0.19')] ), pytest.param( instantiate_notnone(StabilitySelection, random_state=42), [''], - marks=pytest.mark.skipif( - StabilitySelection is None, + marks=pytest.mark.skipif(StabilitySelection is None, reason='scikit-learn-contrib/stability-selection is not available') ), ])