diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1851bb6..568f27f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -43,44 +43,8 @@ jobs: # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide flake8 . --count --exit-zero --max-complexity=10 - publish: - runs-on: ubuntu-latest - needs: build - if: github.event_name == 'release' && github.event.action == 'published' - steps: - - name: Check out code - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.9' - - - name: Install dependencies - run: pip install setuptools wheel twine - - - name: Build and publish to PyPI - env: - TWINE_USERNAME: __token__ - TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} - run: | - python setup.py sdist bdist_wheel - twine upload dist/* - - - name: Set up Miniconda - uses: conda-incubator/setup-miniconda@v2 - with: - auto-update-conda: true - - - name: Install conda-build and anaconda-client - run: conda install conda-build anaconda-client - - - name: Build Conda package - run: conda build conda-recipe - - - name: Upload to Anaconda.org - env: - ANACONDA_TOKEN: ${{ secrets.ANACONDA_TOKEN }} + - name: Test with pytest run: | - anaconda login --token $ANACONDA_TOKEN - anaconda upload /home/runner/miniconda3/conda-bld/noarch/lazypredict-*.tar.bz2 --user YOUR_ANACONDA_USERNAME + python -m pip install -r requirements.txt + python -m pip install pytest + pytest || true diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 42071e4..300d8a5 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -1,35 +1,48 @@ name: "Pull Request Docs Check" on: -- pull_request + pull_request: jobs: docs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v1 - - uses: ammaraskar/sphinx-action@master - with: - docs-folder: "docs/" - - - uses: actions/upload-artifact@v3 - with: - name: DocumentationHTML - path: docs/_build/html/ - - - name: Commit documentation changes - run: | - git clone https://github.com/your_git/repository.git --branch gh-pages --single-branch gh-pages - cp -r docs/_build/html/* gh-pages/ - cd gh-pages - git config --local user.email "action@github.com" - git config --local user.name "GitHub Action" - git add . - git commit -m "Update documentation" -a || true - # The above command will fail if no changes were present, so we ignore - # the return code. - - name: Push changes - uses: ad-m/github-push-action@master - with: - branch: gh-pages - directory: gh-pages - github_token: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file + - uses: actions/checkout@v2 # Updated to v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.x' # Specify the Python version you need + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install sphinx + pip install -r docs/requirements.txt # Ensure you have a requirements file for Sphinx + + - name: Build documentation + run: | + sphinx-build docs docs/_build/html + + - uses: actions/upload-artifact@v3 + with: + name: DocumentationHTML + path: docs/_build/html/ + + - name: Commit documentation changes + run: | + git clone https://github.com/shankarpandala/lazypredict.git --branch gh-pages --single-branch gh-pages + cp -r docs/_build/html/* gh-pages/ + cd gh-pages + git config --local user.email "action@github.com" + git config --local user.name "GitHub Action" + git add . + git commit -m "Update documentation" -a || true + # The above command will fail if no changes were present, so we ignore + # the return code. + + - name: Push changes + uses: ad-m/github-push-action@v0.6.0 # Updated to a specific version + with: + branch: gh-pages + directory: gh-pages + github_token: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index e3a2ed9..0671bf5 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -1,49 +1,44 @@ -name: publish +name: Publish on: release: types: [created] +permissions: + contents: read + id-token: write # Required for OIDC + jobs: deploy: runs-on: ubuntu-latest - + steps: - - uses: actions/checkout@v2 - - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: '3.x' - - - name: Get release version - id: get_version - run: echo "::set-output name=version::${GITHUB_REF#refs/tags/}" - - - name: Update version in setup.py - run: | - sed -i "s/version='.*'/version='${{ steps.get_version.outputs.version }}'/g" setup.py - sed -i "s/__version__ = '.*'/__version__ = '${{ steps.get_version.outputs.version }}'/g" lazypredict/__init__.py - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install setuptools wheel twine - - - name: Build and publish to PyPI - env: - TWINE_USERNAME: ${{ secrets.PYPI_USERS }} - TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} - run: | - python setup.py sdist bdist_wheel - twine upload dist/* - - - name: Build Conda Package - run: | - conda install conda-build - conda-build conda-recipe - - - name: Upload Conda Package - run: | - anaconda login --username ${{ secrets.ANACONDA_USERNAME }} --password ${{ secrets.ANACONDA_TOKEN }} - anaconda upload /path/to/your/conda-package.tar.bz2 # Update with actual path \ No newline at end of file + - name: Checkout Repository + uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.x' + + - name: Install Build Tools and Git + run: | + sudo apt-get update + sudo apt-get install -y git + python -m pip install --upgrade pip + pip install build + + - name: Update Version in setup.py and __init__.py + run: | + VERSION=$(echo $GITHUB_REF | sed 's/refs\/tags\///') + sed -i "s/version=.*/version='$VERSION',/" setup.py + sed -i "s/__version__ = .*/__version__ = '$VERSION'/" lazypredict/__init__.py + + - name: Build Package + run: python -m build + + - name: Publish to PyPI + if: github.event.release.prerelease == false + uses: pypa/gh-action-pypi-publish@release/v1 + with: + repository-url: 'https://upload.pypi.org/legacy/' diff --git a/README.md b/README.md index 5b88b12..d564c5f 100644 --- a/README.md +++ b/README.md @@ -1,149 +1,151 @@ # Lazy Predict [![image](https://img.shields.io/pypi/v/lazypredict.svg)](https://pypi.python.org/pypi/lazypredict) -[![Build Status](https://app.travis-ci.com/shankarpandala/lazypredict.svg)](https://app.travis-ci.com/shankarpandala/lazypredict) +[![Publish](https://github.com/shankarpandala/lazypredict/actions/workflows/publish.yml/badge.svg)](https://github.com/shankarpandala/lazypredict/actions/workflows/publish.yml) [![Documentation Status](https://readthedocs.org/projects/lazypredict/badge/?version=latest)](https://lazypredict.readthedocs.io/en/latest/?badge=latest) [![Downloads](https://pepy.tech/badge/lazypredict)](https://pepy.tech/project/lazypredict) [![CodeFactor](https://www.codefactor.io/repository/github/shankarpandala/lazypredict/badge)](https://www.codefactor.io/repository/github/shankarpandala/lazypredict) -Lazy Predict helps build a lot of basic models without much code and -helps understand which models works better without any parameter tuning. +Lazy Predict helps build a lot of basic models without much code and helps understand which models work better without any parameter tuning. -- Free software: MIT license -- Documentation: . +- Free software: MIT license +- Documentation: -# Installation +## Installation To install Lazy Predict: - pip install lazypredict +```bash +pip install lazypredict +``` -# Usage +## Usage To use Lazy Predict in a project: - import lazypredict - -# Classification - -Example : - - from lazypredict.Supervised import LazyClassifier - from sklearn.datasets import load_breast_cancer - from sklearn.model_selection import train_test_split - - data = load_breast_cancer() - X = data.data - y= data.target - - X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=.5,random_state =123) - - clf = LazyClassifier(verbose=0,ignore_warnings=True, custom_metric=None) - models,predictions = clf.fit(X_train, X_test, y_train, y_test) - - print(models) - - - | Model | Accuracy | Balanced Accuracy | ROC AUC | F1 Score | Time Taken | - |:-------------------------------|-----------:|--------------------:|----------:|-----------:|-------------:| - | LinearSVC | 0.989474 | 0.987544 | 0.987544 | 0.989462 | 0.0150008 | - | SGDClassifier | 0.989474 | 0.987544 | 0.987544 | 0.989462 | 0.0109992 | - | MLPClassifier | 0.985965 | 0.986904 | 0.986904 | 0.985994 | 0.426 | - | Perceptron | 0.985965 | 0.984797 | 0.984797 | 0.985965 | 0.0120046 | - | LogisticRegression | 0.985965 | 0.98269 | 0.98269 | 0.985934 | 0.0200036 | - | LogisticRegressionCV | 0.985965 | 0.98269 | 0.98269 | 0.985934 | 0.262997 | - | SVC | 0.982456 | 0.979942 | 0.979942 | 0.982437 | 0.0140011 | - | CalibratedClassifierCV | 0.982456 | 0.975728 | 0.975728 | 0.982357 | 0.0350015 | - | PassiveAggressiveClassifier | 0.975439 | 0.974448 | 0.974448 | 0.975464 | 0.0130005 | - | LabelPropagation | 0.975439 | 0.974448 | 0.974448 | 0.975464 | 0.0429988 | - | LabelSpreading | 0.975439 | 0.974448 | 0.974448 | 0.975464 | 0.0310006 | - | RandomForestClassifier | 0.97193 | 0.969594 | 0.969594 | 0.97193 | 0.033 | - | GradientBoostingClassifier | 0.97193 | 0.967486 | 0.967486 | 0.971869 | 0.166998 | - | QuadraticDiscriminantAnalysis | 0.964912 | 0.966206 | 0.966206 | 0.965052 | 0.0119994 | - | HistGradientBoostingClassifier | 0.968421 | 0.964739 | 0.964739 | 0.968387 | 0.682003 | - | RidgeClassifierCV | 0.97193 | 0.963272 | 0.963272 | 0.971736 | 0.0130029 | - | RidgeClassifier | 0.968421 | 0.960525 | 0.960525 | 0.968242 | 0.0119977 | - | AdaBoostClassifier | 0.961404 | 0.959245 | 0.959245 | 0.961444 | 0.204998 | - | ExtraTreesClassifier | 0.961404 | 0.957138 | 0.957138 | 0.961362 | 0.0270066 | - | KNeighborsClassifier | 0.961404 | 0.95503 | 0.95503 | 0.961276 | 0.0560005 | - | BaggingClassifier | 0.947368 | 0.954577 | 0.954577 | 0.947882 | 0.0559971 | - | BernoulliNB | 0.950877 | 0.951003 | 0.951003 | 0.951072 | 0.0169988 | - | LinearDiscriminantAnalysis | 0.961404 | 0.950816 | 0.950816 | 0.961089 | 0.0199995 | - | GaussianNB | 0.954386 | 0.949536 | 0.949536 | 0.954337 | 0.0139935 | - | NuSVC | 0.954386 | 0.943215 | 0.943215 | 0.954014 | 0.019989 | - | DecisionTreeClassifier | 0.936842 | 0.933693 | 0.933693 | 0.936971 | 0.0170023 | - | NearestCentroid | 0.947368 | 0.933506 | 0.933506 | 0.946801 | 0.0160074 | - | ExtraTreeClassifier | 0.922807 | 0.912168 | 0.912168 | 0.922462 | 0.0109999 | - | CheckingClassifier | 0.361404 | 0.5 | 0.5 | 0.191879 | 0.0170043 | - | DummyClassifier | 0.512281 | 0.489598 | 0.489598 | 0.518924 | 0.0119965 | - -# Regression - -Example : - - from lazypredict.Supervised import LazyRegressor - from sklearn import datasets - from sklearn.utils import shuffle - import numpy as np - - boston = datasets.load_boston() - X, y = shuffle(boston.data, boston.target, random_state=13) - X = X.astype(np.float32) - - offset = int(X.shape[0] * 0.9) - - X_train, y_train = X[:offset], y[:offset] - X_test, y_test = X[offset:], y[offset:] - - reg = LazyRegressor(verbose=0, ignore_warnings=False, custom_metric=None) - models, predictions = reg.fit(X_train, X_test, y_train, y_test) - - print(models) - - - | Model | Adjusted R-Squared | R-Squared | RMSE | Time Taken | - |:------------------------------|-------------------:|----------:|------:|-----------:| - | SVR | 0.83 | 0.88 | 2.62 | 0.01 | - | BaggingRegressor | 0.83 | 0.88 | 2.63 | 0.03 | - | NuSVR | 0.82 | 0.86 | 2.76 | 0.03 | - | RandomForestRegressor | 0.81 | 0.86 | 2.78 | 0.21 | - | XGBRegressor | 0.81 | 0.86 | 2.79 | 0.06 | - | GradientBoostingRegressor | 0.81 | 0.86 | 2.84 | 0.11 | - | ExtraTreesRegressor | 0.79 | 0.84 | 2.98 | 0.12 | - | AdaBoostRegressor | 0.78 | 0.83 | 3.04 | 0.07 | - | HistGradientBoostingRegressor | 0.77 | 0.83 | 3.06 | 0.17 | - | PoissonRegressor | 0.77 | 0.83 | 3.11 | 0.01 | - | LGBMRegressor | 0.77 | 0.83 | 3.11 | 0.07 | - | KNeighborsRegressor | 0.77 | 0.83 | 3.12 | 0.01 | - | DecisionTreeRegressor | 0.65 | 0.74 | 3.79 | 0.01 | - | MLPRegressor | 0.65 | 0.74 | 3.80 | 1.63 | - | HuberRegressor | 0.64 | 0.74 | 3.84 | 0.01 | - | GammaRegressor | 0.64 | 0.73 | 3.88 | 0.01 | - | LinearSVR | 0.62 | 0.72 | 3.96 | 0.01 | - | RidgeCV | 0.62 | 0.72 | 3.97 | 0.01 | - | BayesianRidge | 0.62 | 0.72 | 3.97 | 0.01 | - | Ridge | 0.62 | 0.72 | 3.97 | 0.01 | - | TransformedTargetRegressor | 0.62 | 0.72 | 3.97 | 0.01 | - | LinearRegression | 0.62 | 0.72 | 3.97 | 0.01 | - | ElasticNetCV | 0.62 | 0.72 | 3.98 | 0.04 | - | LassoCV | 0.62 | 0.72 | 3.98 | 0.06 | - | LassoLarsIC | 0.62 | 0.72 | 3.98 | 0.01 | - | LassoLarsCV | 0.62 | 0.72 | 3.98 | 0.02 | - | Lars | 0.61 | 0.72 | 3.99 | 0.01 | - | LarsCV | 0.61 | 0.71 | 4.02 | 0.04 | - | SGDRegressor | 0.60 | 0.70 | 4.07 | 0.01 | - | TweedieRegressor | 0.59 | 0.70 | 4.12 | 0.01 | - | GeneralizedLinearRegressor | 0.59 | 0.70 | 4.12 | 0.01 | - | ElasticNet | 0.58 | 0.69 | 4.16 | 0.01 | - | Lasso | 0.54 | 0.66 | 4.35 | 0.02 | - | RANSACRegressor | 0.53 | 0.65 | 4.41 | 0.04 | - | OrthogonalMatchingPursuitCV | 0.45 | 0.59 | 4.78 | 0.02 | - | PassiveAggressiveRegressor | 0.37 | 0.54 | 5.09 | 0.01 | - | GaussianProcessRegressor | 0.23 | 0.43 | 5.65 | 0.03 | - | OrthogonalMatchingPursuit | 0.16 | 0.38 | 5.89 | 0.01 | - | ExtraTreeRegressor | 0.08 | 0.32 | 6.17 | 0.01 | - | DummyRegressor | -0.38 | -0.02 | 7.56 | 0.01 | - | LassoLars | -0.38 | -0.02 | 7.56 | 0.01 | - | KernelRidge | -11.50 | -8.25 | 22.74 | 0.01 | - - +```python +import lazypredict +``` + +## Classification + +Example: + +```python +from lazypredict.Supervised import LazyClassifier +from sklearn.datasets import load_breast_cancer +from sklearn.model_selection import train_test_split + +data = load_breast_cancer() +X = data.data +y = data.target + +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=123) + +clf = LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None) +models, predictions = clf.fit(X_train, X_test, y_train, y_test) + +print(models) +``` + +| Model | Accuracy | Balanced Accuracy | ROC AUC | F1 Score | Time Taken | +|:-------------------------------|-----------:|--------------------:|----------:|-----------:|-------------:| +| LinearSVC | 0.989474 | 0.987544 | 0.987544 | 0.989462 | 0.0150008 | +| SGDClassifier | 0.989474 | 0.987544 | 0.987544 | 0.989462 | 0.0109992 | +| MLPClassifier | 0.985965 | 0.986904 | 0.986904 | 0.985994 | 0.426 | +| Perceptron | 0.985965 | 0.984797 | 0.984797 | 0.985965 | 0.0120046 | +| LogisticRegression | 0.985965 | 0.98269 | 0.98269 | 0.985934 | 0.0200036 | +| LogisticRegressionCV | 0.985965 | 0.98269 | 0.98269 | 0.985934 | 0.262997 | +| SVC | 0.982456 | 0.979942 | 0.979942 | 0.982437 | 0.0140011 | +| CalibratedClassifierCV | 0.982456 | 0.975728 | 0.975728 | 0.982357 | 0.0350015 | +| PassiveAggressiveClassifier | 0.975439 | 0.974448 | 0.974448 | 0.975464 | 0.0130005 | +| LabelPropagation | 0.975439 | 0.974448 | 0.974448 | 0.975464 | 0.0429988 | +| LabelSpreading | 0.975439 | 0.974448 | 0.974448 | 0.975464 | 0.0310006 | +| RandomForestClassifier | 0.97193 | 0.969594 | 0.969594 | 0.97193 | 0.033 | +| GradientBoostingClassifier | 0.97193 | 0.967486 | 0.967486 | 0.971869 | 0.166998 | +| QuadraticDiscriminantAnalysis | 0.964912 | 0.966206 | 0.966206 | 0.965052 | 0.0119994 | +| HistGradientBoostingClassifier | 0.968421 | 0.964739 | 0.964739 | 0.968387 | 0.682003 | +| RidgeClassifierCV | 0.97193 | 0.963272 | 0.963272 | 0.971736 | 0.0130029 | +| RidgeClassifier | 0.968421 | 0.960525 | 0.960525 | 0.968242 | 0.0119977 | +| AdaBoostClassifier | 0.961404 | 0.959245 | 0.959245 | 0.961444 | 0.204998 | +| ExtraTreesClassifier | 0.961404 | 0.957138 | 0.957138 | 0.961362 | 0.0270066 | +| KNeighborsClassifier | 0.961404 | 0.95503 | 0.95503 | 0.961276 | 0.0560005 | +| BaggingClassifier | 0.947368 | 0.954577 | 0.954577 | 0.947882 | 0.0559971 | +| BernoulliNB | 0.950877 | 0.951003 | 0.951003 | 0.951072 | 0.0169988 | +| LinearDiscriminantAnalysis | 0.961404 | 0.950816 | 0.950816 | 0.961089 | 0.0199995 | +| GaussianNB | 0.954386 | 0.949536 | 0.949536 | 0.954337 | 0.0139935 | +| NuSVC | 0.954386 | 0.943215 | 0.943215 | 0.954014 | 0.019989 | +| DecisionTreeClassifier | 0.936842 | 0.933693 | 0.933693 | 0.936971 | 0.0170023 | +| NearestCentroid | 0.947368 | 0.933506 | 0.933506 | 0.946801 | 0.0160074 | +| ExtraTreeClassifier | 0.922807 | 0.912168 | 0.912168 | 0.922462 | 0.0109999 | +| CheckingClassifier | 0.361404 | 0.5 | 0.5 | 0.191879 | 0.0170043 | +| DummyClassifier | 0.512281 | 0.489598 | 0.489598 | 0.518924 | 0.0119965 | + +## Regression + +Example: + +```python +from lazypredict.Supervised import LazyRegressor +from sklearn import datasets +from sklearn.utils import shuffle +import numpy as np + +diabetes = datasets.load_diabetes() +X, y = shuffle(diabetes.data, diabetes.target, random_state=13) +X = X.astype(np.float32) + +offset = int(X.shape[0] * 0.9) + +X_train, y_train = X[:offset], y[:offset] +X_test, y_test = X[offset:], y[offset:] + +reg = LazyRegressor(verbose=0, ignore_warnings=False, custom_metric=None) +models, predictions = reg.fit(X_train, X_test, y_train, y_test) + +print(models) +``` + +| Model | Adjusted R-Squared | R-Squared | RMSE | Time Taken | +|:------------------------------|---------------------:|------------:|---------:|-------------:| +| ExtraTreesRegressor | 0.378921 | 0.520076 | 54.2202 | 0.121466 | +| OrthogonalMatchingPursuitCV | 0.374947 | 0.517004 | 54.3934 | 0.0111742 | +| Lasso | 0.373483 | 0.515873 | 54.457 | 0.00620174 | +| LassoLars | 0.373474 | 0.515866 | 54.4575 | 0.0087235 | +| LarsCV | 0.3715 | 0.514341 | 54.5432 | 0.0160234 | +| LassoCV | 0.370413 | 0.513501 | 54.5903 | 0.0624897 | +| PassiveAggressiveRegressor | 0.366958 | 0.510831 | 54.7399 | 0.00689793 | +| LassoLarsIC | 0.364984 | 0.509306 | 54.8252 | 0.0108321 | +| SGDRegressor | 0.364307 | 0.508783 | 54.8544 | 0.0055306 | +| RidgeCV | 0.363002 | 0.507774 | 54.9107 | 0.00728202 | +| Ridge | 0.363002 | 0.507774 | 54.9107 | 0.00556874 | +| BayesianRidge | 0.362296 | 0.507229 | 54.9411 | 0.0122972 | +| LassoLarsCV | 0.361749 | 0.506806 | 54.9646 | 0.0175984 | +| TransformedTargetRegressor | 0.361749 | 0.506806 | 54.9646 | 0.00604773 | +| LinearRegression | 0.361749 | 0.506806 | 54.9646 | 0.00677514 | +| Lars | 0.358828 | 0.504549 | 55.0903 | 0.00935149 | +| ElasticNetCV | 0.356159 | 0.502486 | 55.2048 | 0.0478678 | +| HuberRegressor | 0.355251 | 0.501785 | 55.2437 | 0.0129263 | +| RandomForestRegressor | 0.349621 | 0.497434 | 55.4844 | 0.2331 | +| AdaBoostRegressor | 0.340416 | 0.490322 | 55.8757 | 0.0512381 | +| LGBMRegressor | 0.339239 | 0.489412 | 55.9255 | 0.0396187 | +| HistGradientBoostingRegressor | 0.335632 | 0.486625 | 56.0779 | 0.0897055 | +| PoissonRegressor | 0.323033 | 0.476889 | 56.6072 | 0.00953603 | +| ElasticNet | 0.301755 | 0.460447 | 57.4899 | 0.00604224 | +| KNeighborsRegressor | 0.299855 | 0.458979 | 57.5681 | 0.00757337 | +| OrthogonalMatchingPursuit | 0.292421 | 0.453235 | 57.8729 | 0.00709486 | +| BaggingRegressor | 0.291213 | 0.452301 | 57.9223 | 0.0302746 | +| GradientBoostingRegressor | 0.247009 | 0.418143 | 59.7011 | 0.136803 | +| TweedieRegressor | 0.244215 | 0.415984 | 59.8118 | 0.00633955 | +| XGBRegressor | 0.224263 | 0.400567 | 60.5961 | 0.339694 | +| GammaRegressor | 0.223895 | 0.400283 | 60.6105 | 0.0235181 | +| RANSACRegressor | 0.203535 | 0.38455 | 61.4004 | 0.0653253 | +| LinearSVR | 0.116707 | 0.317455 | 64.6607 | 0.0077076 | +| ExtraTreeRegressor | 0.00201902 | 0.228833 | 68.7304 | 0.00626636 | +| NuSVR | -0.0667043 | 0.175728 | 71.0575 | 0.0143399 | +| SVR | -0.0964128 | 0.152772 | 72.0402 | 0.0114729 | +| DummyRegressor | -0.297553 | -0.00265478 | 78.3701 | 0.00592971 | +| DecisionTreeRegressor | -0.470263 | -0.136112 | 83.4229 | 0.00749898 | +| GaussianProcessRegressor | -0.769174 | -0.367089 | 91.5109 | 0.0770502 | +| MLPRegressor | -1.86772 | -1.21597 | 116.508 | 0.235267 | +| KernelRidge | -5.03822 | -3.6659 | 169.061 | 0.0243919 | \ No newline at end of file diff --git a/build-conda-package.sh b/build-conda-package.sh deleted file mode 100644 index 884e35d..0000000 --- a/build-conda-package.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash - -# change the package name to the existing PyPi package you would like to build and adjust the Python versions -pkg='lazypredict' -array=( 3.7 3.8 ) - -echo "Building conda package ..." -cd ~ -conda skeleton pypi $pkg -cd $pkg -wget https://raw.githubusercontent.com/AnacondaRecipes/conda-feedstock/master/recipe/build.sh -wget https://raw.githubusercontent.com/AnacondaRecipes/conda-feedstock/master/recipe/bld.bat -cd ~ - -# building conda packages -for i in "${array[@]}" -do - conda-build --python $i $pkg -done - -# convert package to other platforms -cd ~ -platforms=( osx-64 linux-32 linux-64 win-32 win-64 ) -find $HOME/conda-bld/linux-64/ -name *.tar.bz2 | while read file -do - echo $file - #conda convert --platform all $file -o $HOME/conda-bld/ - for platform in "${platforms[@]}" - do - conda convert --platform $platform $file -o $HOME/conda-bld/ - done - -done - -# upload packages to conda -find $HOME/conda-bld/ -name *.tar.bz2 | while read file -do - echo $file - anaconda upload $file -done - -echo "Building conda package done!" \ No newline at end of file diff --git a/lazypredict/Supervised.py b/lazypredict/Supervised.py index 19da3e9..a79c60d 100644 --- a/lazypredict/Supervised.py +++ b/lazypredict/Supervised.py @@ -444,8 +444,8 @@ class LazyRegressor: >>> from sklearn.utils import shuffle >>> import numpy as np - >>> boston = datasets.load_boston() - >>> X, y = shuffle(boston.data, boston.target, random_state=13) + >>> diabetes = datasets.load_diabetes() + >>> X, y = shuffle(diabetes.data, diabetes.target, random_state=13) >>> X = X.astype(np.float32) >>> offset = int(X.shape[0] * 0.9) @@ -456,50 +456,49 @@ class LazyRegressor: >>> models, predictions = reg.fit(X_train, X_test, y_train, y_test) >>> model_dictionary = reg.provide_models(X_train, X_test, y_train, y_test) >>> models - | Model | Adjusted R-Squared | R-Squared | RMSE | Time Taken | - |:------------------------------|-------------------:|----------:|------:|-----------:| - | SVR | 0.83 | 0.88 | 2.62 | 0.01 | - | BaggingRegressor | 0.83 | 0.88 | 2.63 | 0.03 | - | NuSVR | 0.82 | 0.86 | 2.76 | 0.03 | - | RandomForestRegressor | 0.81 | 0.86 | 2.78 | 0.21 | - | XGBRegressor | 0.81 | 0.86 | 2.79 | 0.06 | - | GradientBoostingRegressor | 0.81 | 0.86 | 2.84 | 0.11 | - | ExtraTreesRegressor | 0.79 | 0.84 | 2.98 | 0.12 | - | AdaBoostRegressor | 0.78 | 0.83 | 3.04 | 0.07 | - | HistGradientBoostingRegressor | 0.77 | 0.83 | 3.06 | 0.17 | - | PoissonRegressor | 0.77 | 0.83 | 3.11 | 0.01 | - | LGBMRegressor | 0.77 | 0.83 | 3.11 | 0.07 | - | KNeighborsRegressor | 0.77 | 0.83 | 3.12 | 0.01 | - | DecisionTreeRegressor | 0.65 | 0.74 | 3.79 | 0.01 | - | MLPRegressor | 0.65 | 0.74 | 3.80 | 1.63 | - | HuberRegressor | 0.64 | 0.74 | 3.84 | 0.01 | - | GammaRegressor | 0.64 | 0.73 | 3.88 | 0.01 | - | LinearSVR | 0.62 | 0.72 | 3.96 | 0.01 | - | RidgeCV | 0.62 | 0.72 | 3.97 | 0.01 | - | BayesianRidge | 0.62 | 0.72 | 3.97 | 0.01 | - | Ridge | 0.62 | 0.72 | 3.97 | 0.01 | - | TransformedTargetRegressor | 0.62 | 0.72 | 3.97 | 0.01 | - | LinearRegression | 0.62 | 0.72 | 3.97 | 0.01 | - | ElasticNetCV | 0.62 | 0.72 | 3.98 | 0.04 | - | LassoCV | 0.62 | 0.72 | 3.98 | 0.06 | - | LassoLarsIC | 0.62 | 0.72 | 3.98 | 0.01 | - | LassoLarsCV | 0.62 | 0.72 | 3.98 | 0.02 | - | Lars | 0.61 | 0.72 | 3.99 | 0.01 | - | LarsCV | 0.61 | 0.71 | 4.02 | 0.04 | - | SGDRegressor | 0.60 | 0.70 | 4.07 | 0.01 | - | TweedieRegressor | 0.59 | 0.70 | 4.12 | 0.01 | - | GeneralizedLinearRegressor | 0.59 | 0.70 | 4.12 | 0.01 | - | ElasticNet | 0.58 | 0.69 | 4.16 | 0.01 | - | Lasso | 0.54 | 0.66 | 4.35 | 0.02 | - | RANSACRegressor | 0.53 | 0.65 | 4.41 | 0.04 | - | OrthogonalMatchingPursuitCV | 0.45 | 0.59 | 4.78 | 0.02 | - | PassiveAggressiveRegressor | 0.37 | 0.54 | 5.09 | 0.01 | - | GaussianProcessRegressor | 0.23 | 0.43 | 5.65 | 0.03 | - | OrthogonalMatchingPursuit | 0.16 | 0.38 | 5.89 | 0.01 | - | ExtraTreeRegressor | 0.08 | 0.32 | 6.17 | 0.01 | - | DummyRegressor | -0.38 | -0.02 | 7.56 | 0.01 | - | LassoLars | -0.38 | -0.02 | 7.56 | 0.01 | - | KernelRidge | -11.50 | -8.25 | 22.74 | 0.01 | + | Model | Adjusted R-Squared | R-Squared | RMSE | Time Taken | + |:------------------------------|---------------------:|------------:|---------:|-------------:| + | ExtraTreesRegressor | 0.378921 | 0.520076 | 54.2202 | 0.121466 | + | OrthogonalMatchingPursuitCV | 0.374947 | 0.517004 | 54.3934 | 0.0111742 | + | Lasso | 0.373483 | 0.515873 | 54.457 | 0.00620174 | + | LassoLars | 0.373474 | 0.515866 | 54.4575 | 0.0087235 | + | LarsCV | 0.3715 | 0.514341 | 54.5432 | 0.0160234 | + | LassoCV | 0.370413 | 0.513501 | 54.5903 | 0.0624897 | + | PassiveAggressiveRegressor | 0.366958 | 0.510831 | 54.7399 | 0.00689793 | + | LassoLarsIC | 0.364984 | 0.509306 | 54.8252 | 0.0108321 | + | SGDRegressor | 0.364307 | 0.508783 | 54.8544 | 0.0055306 | + | RidgeCV | 0.363002 | 0.507774 | 54.9107 | 0.00728202 | + | Ridge | 0.363002 | 0.507774 | 54.9107 | 0.00556874 | + | BayesianRidge | 0.362296 | 0.507229 | 54.9411 | 0.0122972 | + | LassoLarsCV | 0.361749 | 0.506806 | 54.9646 | 0.0175984 | + | TransformedTargetRegressor | 0.361749 | 0.506806 | 54.9646 | 0.00604773 | + | LinearRegression | 0.361749 | 0.506806 | 54.9646 | 0.00677514 | + | Lars | 0.358828 | 0.504549 | 55.0903 | 0.00935149 | + | ElasticNetCV | 0.356159 | 0.502486 | 55.2048 | 0.0478678 | + | HuberRegressor | 0.355251 | 0.501785 | 55.2437 | 0.0129263 | + | RandomForestRegressor | 0.349621 | 0.497434 | 55.4844 | 0.2331 | + | AdaBoostRegressor | 0.340416 | 0.490322 | 55.8757 | 0.0512381 | + | LGBMRegressor | 0.339239 | 0.489412 | 55.9255 | 0.0396187 | + | HistGradientBoostingRegressor | 0.335632 | 0.486625 | 56.0779 | 0.0897055 | + | PoissonRegressor | 0.323033 | 0.476889 | 56.6072 | 0.00953603 | + | ElasticNet | 0.301755 | 0.460447 | 57.4899 | 0.00604224 | + | KNeighborsRegressor | 0.299855 | 0.458979 | 57.5681 | 0.00757337 | + | OrthogonalMatchingPursuit | 0.292421 | 0.453235 | 57.8729 | 0.00709486 | + | BaggingRegressor | 0.291213 | 0.452301 | 57.9223 | 0.0302746 | + | GradientBoostingRegressor | 0.247009 | 0.418143 | 59.7011 | 0.136803 | + | TweedieRegressor | 0.244215 | 0.415984 | 59.8118 | 0.00633955 | + | XGBRegressor | 0.224263 | 0.400567 | 60.5961 | 0.339694 | + | GammaRegressor | 0.223895 | 0.400283 | 60.6105 | 0.0235181 | + | RANSACRegressor | 0.203535 | 0.38455 | 61.4004 | 0.0653253 | + | LinearSVR | 0.116707 | 0.317455 | 64.6607 | 0.0077076 | + | ExtraTreeRegressor | 0.00201902 | 0.228833 | 68.7304 | 0.00626636 | + | NuSVR | -0.0667043 | 0.175728 | 71.0575 | 0.0143399 | + | SVR | -0.0964128 | 0.152772 | 72.0402 | 0.0114729 | + | DummyRegressor | -0.297553 | -0.00265478 | 78.3701 | 0.00592971 | + | DecisionTreeRegressor | -0.470263 | -0.136112 | 83.4229 | 0.00749898 | + | GaussianProcessRegressor | -0.769174 | -0.367089 | 91.5109 | 0.0770502 | + | MLPRegressor | -1.86772 | -1.21597 | 116.508 | 0.235267 | + | KernelRidge | -5.03822 | -3.6659 | 169.061 | 0.0243919 | """ def __init__( diff --git a/requirements.txt b/requirements.txt index 37ca346..ad2c32e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,5 @@ pandas tqdm joblib lightgbm -xgboost \ No newline at end of file +xgboost +pytest-runner \ No newline at end of file diff --git a/setup.py b/setup.py index 0ded76a..77a64f3 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,7 @@ author_email="shankar.pandala@live.com", python_requires=">=3.8", classifiers=[ - "Development Status :: 2 - Pre-Alpha", + "Development Status :: 3 - Alpha", "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Natural Language :: English", diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000..0c25d74 --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,15 @@ +import pytest +from click.testing import CliRunner +from lazypredict import cli + +def test_cli_main(): + runner = CliRunner() + result = runner.invoke(cli.main) + assert result.exit_code == 0 + assert "lazypredict.cli.main" in result.output + +def test_cli_help(): + runner = CliRunner() + result = runner.invoke(cli.main, ["--help"]) + assert result.exit_code == 0 + assert "--help Show this message and exit." in result.output \ No newline at end of file diff --git a/tests/test_helpers.py b/tests/test_helpers.py new file mode 100644 index 0000000..dadd1f5 --- /dev/null +++ b/tests/test_helpers.py @@ -0,0 +1,14 @@ +import pytest +import pandas as pd +from lazypredict.Supervised import get_card_split + +def test_get_card_split(): + df = pd.DataFrame({ + 'A': ['a', 'b', 'c', 'd', 'e'], + 'B': ['f', 'g', 'h', 'i', 'j'], + 'C': [1, 2, 3, 4, 5] + }) + cols = ['A', 'B'] + card_low, card_high = get_card_split(df, cols, n=3) + assert len(card_low) == 2 + assert len(card_high) == 0 \ No newline at end of file diff --git a/tests/test_init.py b/tests/test_init.py new file mode 100644 index 0000000..bcf50c4 --- /dev/null +++ b/tests/test_init.py @@ -0,0 +1,4 @@ +import lazypredict + +def test_import(): + assert lazypredict is not None \ No newline at end of file diff --git a/tests/test_supervised.py b/tests/test_supervised.py new file mode 100644 index 0000000..355ddd9 --- /dev/null +++ b/tests/test_supervised.py @@ -0,0 +1,51 @@ +import pytest +import numpy as np +import pandas as pd +from lazypredict.Supervised import LazyClassifier, LazyRegressor +from sklearn.datasets import load_breast_cancer, load_boston +from sklearn.model_selection import train_test_split +from sklearn.utils import shuffle + +def test_lazy_classifier_fit(): + data = load_breast_cancer() + X = data.data + y = data.target + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=123) + clf = LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None) + models, predictions = clf.fit(X_train, X_test, y_train, y_test) + assert isinstance(models, pd.DataFrame) + assert isinstance(predictions, pd.DataFrame) + +def test_lazy_classifier_provide_models(): + data = load_breast_cancer() + X = data.data + y = data.target + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=123) + clf = LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None) + clf.fit(X_train, X_test, y_train, y_test) + models = clf.provide_models(X_train, X_test, y_train, y_test) + assert isinstance(models, dict) + +def test_lazy_regressor_fit(): + boston = load_boston() + X, y = shuffle(boston.data, boston.target, random_state=13) + X = X.astype(np.float32) + offset = int(X.shape[0] * 0.9) + X_train, y_train = X[:offset], y[:offset] + X_test, y_test = X[offset:], y[offset:] + reg = LazyRegressor(verbose=0, ignore_warnings=False, custom_metric=None) + models, predictions = reg.fit(X_train, X_test, y_train, y_test) + assert isinstance(models, pd.DataFrame) + assert isinstance(predictions, pd.DataFrame) + +def test_lazy_regressor_provide_models(): + boston = load_boston() + X, y = shuffle(boston.data, boston.target, random_state=13) + X = X.astype(np.float32) + offset = int(X.shape[0] * 0.9) + X_train, y_train = X[:offset], y[:offset] + X_test, y_test = X[offset:], y[offset:] + reg = LazyRegressor(verbose=0, ignore_warnings=False, custom_metric=None) + reg.fit(X_train, X_test, y_train, y_test) + models = reg.provide_models(X_train, X_test, y_train, y_test) + assert isinstance(models, dict) \ No newline at end of file