Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: including the possibility to choose the hyperparameter of a given model. #385

Open
wants to merge 1 commit into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 16 additions & 11 deletions lazypredict/Supervised.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,9 @@ class LazyClassifier:
When set to True, the predictions of all the models models are returned as dataframe.
classifiers : list, optional (default="all")
When function is provided, trains the chosen classifier(s).
hyperparameters_dict=False: dict, optional (default={})
A dictionary which the keys are the names of the model classes desired to pass the hyperparameters
and the value is another dictionary with the hyperparameters names and values for the model.

Examples
--------
Expand Down Expand Up @@ -210,6 +213,7 @@ def __init__(
predictions=False,
random_state=42,
classifiers="all",
hyperparameters_dict={}
):
self.verbose = verbose
self.ignore_warnings = ignore_warnings
Expand All @@ -218,6 +222,7 @@ def __init__(
self.models = {}
self.random_state = random_state
self.classifiers = classifiers
self.hyperparameters_dict = hyperparameters_dict

def fit(self, X_train, X_test, y_train, y_test):
"""Fit Classification algorithms to X_train and y_train, predict and score on X_test, y_test.
Expand Down Expand Up @@ -288,18 +293,18 @@ def fit(self, X_train, X_test, y_train, y_test):
for name, model in tqdm(self.classifiers):
start = time.time()
try:
if "random_state" in model().get_params().keys():
pipe = Pipeline(
steps=[
("preprocessor", preprocessor),
("classifier", model(random_state=self.random_state)),
]
)
if model.__name__ in list(self.hyperparameters_dict.keys()):
model_hyperparameters = self.hyperparameters_dict[model.__name__]
elif "random_state" in model().get_params().keys():
model_hyperparameters = {"random_state": self.random_state}
else:
pipe = Pipeline(
steps=[("preprocessor", preprocessor), ("classifier", model())]
)

model_hyperparameters = {}
pipe = Pipeline(
steps=[
("preprocessor", preprocessor),
("classifier", model(**model_hyperparameters))
]
)
pipe.fit(X_train, y_train)
self.models[name] = pipe
y_pred = pipe.predict(X_test)
Expand Down
25 changes: 24 additions & 1 deletion tests/test_lazypredict.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,11 @@

from click.testing import CliRunner

from lazypredict import cli
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

from lazypredict import cli
from lazypredict.Supervised import LazyClassifier

@pytest.fixture
def response():
Expand All @@ -35,3 +38,23 @@ def test_command_line_interface():
help_result = runner.invoke(cli.main, ["--help"])
assert help_result.exit_code == 0
assert "--help Show this message and exit." in help_result.output

def test_hyperparameter_set():

X, y = make_classification(
n_samples=500, n_features=10, n_informative=10, n_redundant=0, random_state=42
)

train_samples = 100 # Samples used for training the models
X_train, X_test, y_train, y_test = train_test_split(
X,
y,
shuffle=False,
test_size=500 - train_samples,
)

clf = LazyClassifier(
verbose=0,
ignore_warnings=True,
hyperparameters_dict={"DecisionTreeClassifier": {"max_depth": 4}})
_ , _ = clf.fit(X_train, X_test, y_train, y_test)