Skip to content

Commit

Permalink
Merge pull request #55 from goeckslab/v0.9.0
Browse files Browse the repository at this point in the history
V0.9.0 - update sklearn to v0.24.*, tensorflow to v2.* etc.
  • Loading branch information
qiagu authored May 11, 2021
2 parents a8561b4 + 39b2605 commit d662fc5
Show file tree
Hide file tree
Showing 2,330 changed files with 19,427 additions and 823,772 deletions.
6 changes: 3 additions & 3 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ version: 2
jobs:
build:
docker:
- image: circleci/python:3.6.8
- image: circleci/python:3.7.10
steps:
- checkout
- run:
Expand All @@ -25,7 +25,7 @@ jobs:
path: test-reports
tool_lint:
docker:
- image: circleci/python:3.6.8
- image: circleci/python:3.7.10
steps:
- checkout
- run:
Expand All @@ -39,7 +39,7 @@ jobs:
planemo lint
tool_test:
docker:
- image: circleci/python:3.6.8
- image: circleci/python:3.7.10
parallelism: 4
steps:
- checkout
Expand Down
2 changes: 1 addition & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
include LICENSE
include README.md
include requirements.txt
include galaxy_ml/pk_whitelist.json
include galaxy_ml/model_persist/pk_whitelist.json
include galaxy_ml/externals/selene_sdk/sequences/_sequence.pyx
include galaxy_ml/externals/selene_sdk/targets/_genomic_features.pyx
recursive-include galaxy_ml/externals/selene_sdk/sequences/data *.bed*
15 changes: 15 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
### Version 0.9.0

#### Changes

- Updates scikit-learn to v0.24.x, tensorflow to v2.4.x, xgboost to v1.3.x, mlxtend to v0.17.x, skrebate to v.62, imbalanced-learn to v0.8.x and so on.
- Makes load_model and save_model util methods in keras_galaxy_model module.
- Refactors `_SafePickler` and moves it from `utils` to `model_persist`.
- Refactors `dump_model_to_h5` and `load_model_from_h5` to dynamically save and load xgboost and tensorflow models.
- Replaces pickled models with `h5mlm` models in all tools.

#### Bug Fixes

-


### Version 0.8.3

#### Changes
Expand Down
14 changes: 13 additions & 1 deletion galaxy_ml/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,13 @@
__version__ = '0.8.3'
__version__ = '0.9.0'


__all__ = (
'keras_galaxy_models',
'feature_selectors',
'preprocessors',
'iraps_classifier',
'model_validations',
'binarize_target',
'metrics',
'model_persist'
)
7 changes: 6 additions & 1 deletion galaxy_ml/binarize_target/_binarize_estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ def fit(self, X, y, sample_weight=None, **fit_params):
self.n_outputs_ = self.classifier_.n_outputs_
if hasattr(self.classifier_, 'n_features_'):
self.n_features_ = self.classifier_.n_features_
if hasattr(self.classifier_, 'classes_'):
self.classes_ = self.classifier_.classes_

return self

Expand All @@ -108,7 +110,10 @@ def predict(self, X):
def decision_function(self, X):
"""Predict using a fitted estimator
"""
return self.classifier_.decision_function(X)
try:
return self.classifier_.decision_function(X)
except Exception:
raise

def predict_proba(self, X):
"""Predict using a fitted estimator
Expand Down
2 changes: 1 addition & 1 deletion galaxy_ml/binarize_target/_iraps_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from joblib import Parallel, delayed
from scipy.stats import ttest_ind
from sklearn.base import BaseEstimator, RegressorMixin, clone
from sklearn.feature_selection.univariate_selection import _BaseFilter
from sklearn.feature_selection._univariate_selection import _BaseFilter
from sklearn.utils import as_float_array, check_X_y, check_random_state
from sklearn.utils.validation import check_is_fitted, check_memory

Expand Down
54 changes: 29 additions & 25 deletions galaxy_ml/binarize_target/_scorers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@
from ..utils import get_main_estimator
from sklearn import metrics
from sklearn.utils.multiclass import type_of_target
from sklearn.metrics.scorer import _BaseScorer
from sklearn.metrics._scorer import _BaseScorer


class _BinarizeTargetThresholdScorer(_BaseScorer):
"""
Base class to make binarized target specific scorer.
Class to make binarized target specific scorer to evaluate decision
function output.
"""
def __call__(self, clf, X, y, sample_weight=None):
def _score(self, method_caller, clf, X, y, sample_weight=None):
main_estimator = get_main_estimator(clf)
discretize_value = main_estimator.discretize_value
less_is_positive = main_estimator.less_is_positive
Expand All @@ -24,38 +25,40 @@ def __call__(self, clf, X, y, sample_weight=None):
raise ValueError("{0} format is not supported".format(y_type))

try:
y_score = clf.decision_function(X)
y_pred = method_caller(clf, "decision_function", X)

# For multi-output multi-class estimator
if isinstance(y_score, list):
y_score = np.vstack([p for p in y_score]).T
if isinstance(y_pred, list):
y_pred = np.vstack([p for p in y_pred]).T
elif y_type == "binary" and "pos_label" in self._kwargs:
self._check_pos_label(
self._kwargs["pos_label"], clf.classes_
)
if self._kwargs["pos_label"] == clf.classes_[0]:
# The implicit positive class of the binary classifier
# does not match `pos_label`: we need to invert the
# predictions
y_pred *= -1

except (NotImplementedError, AttributeError):
y_score = clf.predict_proba(X)
y_pred = method_caller(clf, "predict_proba", X)

if y_type == "binary":
if y_score.shape[1] == 2:
y_score = y_score[:, 1]
else:
raise ValueError('got predict_proba of shape {},'
' but need classifier with two'
' classes for {} scoring'.format(
y_score.shape,
self._score_func.__name__))
elif isinstance(y_score, list):
y_score = np.vstack([p[:, -1] for p in y_score]).T
y_pred = self._select_proba_binary(y_pred, clf.classes_)
elif isinstance(y_pred, list):
y_pred = np.vstack([p[:, -1] for p in y_pred]).T

if sample_weight is not None:
return self._sign * self._score_func(y_trans, y_score,
return self._sign * self._score_func(y_trans, y_pred,
sample_weight=sample_weight,
**self._kwargs)
else:
return self._sign * self._score_func(y_trans, y_score,
return self._sign * self._score_func(y_trans, y_pred,
**self._kwargs)

def _factory_args(self):
return ", needs_threshold=True"

# TODO deprecate in next major version
_BinarizeTargetProbaScorer = _BinarizeTargetThresholdScorer

# roc_auc
binarize_auc_scorer =\
Expand All @@ -80,10 +83,11 @@ def __call__(self, clf, X, y, sample_weight=None):

class _BinarizeTargetPredictScorer(_BaseScorer):
"""
Base class to make binarized target specific scorer.
Class to make binarized target specific scorer to evaluate predicted
target values.
"""
def __call__(self, clf, X, y, sample_weight=None):
main_estimator = get_main_estimator(clf)
def _score(self, method_caller, estimator, X, y, sample_weight=None):
main_estimator = get_main_estimator(estimator)
discretize_value = main_estimator.discretize_value
less_is_positive = main_estimator.less_is_positive

Expand All @@ -92,7 +96,7 @@ def __call__(self, clf, X, y, sample_weight=None):
else:
y_trans = y > discretize_value

y_pred = clf.predict(X)
y_pred = method_caller(estimator, "predict", X)
if sample_weight is not None:
return self._sign * self._score_func(y_trans, y_pred,
sample_weight=sample_weight,
Expand Down
6 changes: 3 additions & 3 deletions galaxy_ml/feature_selectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@

from sklearn.base import BaseEstimator
from sklearn.base import MetaEstimatorMixin, clone, is_classifier
from sklearn.feature_selection.rfe import _rfe_single_fit, RFE, RFECV
from sklearn.feature_selection._rfe import _rfe_single_fit, RFE, RFECV
from sklearn.model_selection import check_cv
from sklearn.metrics.scorer import check_scoring
from sklearn.utils import check_X_y, safe_indexing, safe_sqr
from sklearn.metrics._scorer import check_scoring
from sklearn.utils import check_X_y, safe_sqr


__all__ = ('DyRFE', 'DyRFECV', '_MyPipeline', '_MyimbPipeline',
Expand Down
6 changes: 1 addition & 5 deletions galaxy_ml/iraps_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,7 @@
binarize_auc_scorer,
binarize_average_precision_scorer)

from .binarize_target import _BinarizeTargetThresholdScorer \
as _BinarizeTargetProbaScorer


__all__ = ('IRAPSCore', 'IRAPSClassifier', 'binarize_auc_scorer',
'binarize_average_precision_scorer', 'BinarizeTargetClassifier',
'BinarizeTargetRegressor', 'BinarizeTargetTransformer',
'_BinarizeTargetProbaScorer')
'BinarizeTargetRegressor', 'BinarizeTargetTransformer')
Loading

0 comments on commit d662fc5

Please sign in to comment.