diff --git a/lazypredict/Supervised.py b/lazypredict/Supervised.py index bf61208..d7993b8 100644 --- a/lazypredict/Supervised.py +++ b/lazypredict/Supervised.py @@ -154,6 +154,9 @@ class LazyClassifier: When set to True, the predictions of all the models models are returned as dataframe. classifiers : list, optional (default="all") When function is provided, trains the chosen classifier(s). + hyperparameters_dict=False: dict, optional (default={}) + A dictionary which the keys are the names of the model classes desired to pass the hyperparameters + and the value is another dictionary with the hyperparameters names and values for the model. Examples -------- @@ -210,6 +213,7 @@ def __init__( predictions=False, random_state=42, classifiers="all", + hyperparameters_dict={} ): self.verbose = verbose self.ignore_warnings = ignore_warnings @@ -218,6 +222,7 @@ def __init__( self.models = {} self.random_state = random_state self.classifiers = classifiers + self.hyperparameters_dict = hyperparameters_dict def fit(self, X_train, X_test, y_train, y_test): """Fit Classification algorithms to X_train and y_train, predict and score on X_test, y_test. @@ -288,18 +293,18 @@ def fit(self, X_train, X_test, y_train, y_test): for name, model in tqdm(self.classifiers): start = time.time() try: - if "random_state" in model().get_params().keys(): - pipe = Pipeline( - steps=[ - ("preprocessor", preprocessor), - ("classifier", model(random_state=self.random_state)), - ] - ) + if model.__name__ in list(self.hyperparameters_dict.keys()): + model_hyperparameters = self.hyperparameters_dict[model.__name__] + elif "random_state" in model().get_params().keys(): + model_hyperparameters = {"random_state": self.random_state} else: - pipe = Pipeline( - steps=[("preprocessor", preprocessor), ("classifier", model())] - ) - + model_hyperparameters = {} + pipe = Pipeline( + steps=[ + ("preprocessor", preprocessor), + ("classifier", model(**model_hyperparameters)) + ] + ) pipe.fit(X_train, y_train) self.models[name] = pipe y_pred = pipe.predict(X_test) diff --git a/tests/test_lazypredict.py b/tests/test_lazypredict.py index 2ae91ff..30caa9a 100644 --- a/tests/test_lazypredict.py +++ b/tests/test_lazypredict.py @@ -7,8 +7,11 @@ from click.testing import CliRunner -from lazypredict import cli +from sklearn.datasets import make_classification +from sklearn.model_selection import train_test_split +from lazypredict import cli +from lazypredict.Supervised import LazyClassifier @pytest.fixture def response(): @@ -35,3 +38,23 @@ def test_command_line_interface(): help_result = runner.invoke(cli.main, ["--help"]) assert help_result.exit_code == 0 assert "--help Show this message and exit." in help_result.output + +def test_hyperparameter_set(): + + X, y = make_classification( + n_samples=500, n_features=10, n_informative=10, n_redundant=0, random_state=42 + ) + + train_samples = 100 # Samples used for training the models + X_train, X_test, y_train, y_test = train_test_split( + X, + y, + shuffle=False, + test_size=500 - train_samples, + ) + + clf = LazyClassifier( + verbose=0, + ignore_warnings=True, + hyperparameters_dict={"DecisionTreeClassifier": {"max_depth": 4}}) + _ , _ = clf.fit(X_train, X_test, y_train, y_test) \ No newline at end of file