Skip to content

Commit

Permalink
Update RandomForest documentation (#418)
Browse files Browse the repository at this point in the history
* Update in documentation

Authored-by: FernandoVN98 <[email protected]>
  • Loading branch information
FernandoVN98 authored Nov 10, 2022
1 parent 4eb3fc6 commit 7579891
Show file tree
Hide file tree
Showing 4 changed files with 172 additions and 7 deletions.
21 changes: 21 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,27 @@ This work has received funding from the European Union’s Horizon 2020 research

This work has also received funding from the collaboration project between the Barcelona Supercomputing Center (BSC) and Fujitsu Ltd.

In addition, the development of this software has been also supported by the following institutions:

* Spanish Government under contracts SEV2015-0493, TIN2015-65316 and PID2019-107255G.

* Generalitat de Catalunya under contract 2017-SGR-01414 and the CECH project, co-funded with 50% by the European Regional Development Fund under the
framework of the ERFD Operative Programme for Catalunya 2014-2020.

* European Commission's through the following R&D projects:
- H2020 I-BiDaaS project (Contract 780787)
- H2020 BioExcel Center of Excellence (Contracts 823830, and 675728)
- H2020 EuroHPC Joint Undertaking MEEP Project (Contract 946002)
- H2020 EuroHPC Joint Undertaking eFlows4HPC Project (Contract 955558)
- H2020 AI-Sprint project (Contract 101016577)
- H2020 PerMedCoE Center of Excellence (Contract 951773)
- Horizon Europe CAELESTIS project (Contract 101056886)
- Horizon Europe DT-Geo project (Contract 101058129)





## License

Apache License Version 2.0, see [LICENSE](LICENSE)
150 changes: 143 additions & 7 deletions dislib/trees/forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,20 +131,22 @@ def save_model(self, filepath, overwrite=True, save_format="json"):
Format used to save the models.
Examples
--------
>>> from dislib.cluster import DecisionTreeClassifier
>>> from dislib.trees import RandomForestClassifier
>>> import numpy as np
>>> import dislib as ds
>>> x = np.array([[1, 2], [1, 4], [1, 0], [4, 2], [4, 4], [4, 0]])
>>> y = np.array([1, 1, 2, 2, 2, 1])
>>> x_train = ds.array(x, (2, 2))
>>> model = DecisionTreeClassifier(n_clusters=2, random_state=0)
>>> model.fit(x_train)
>>> y_train = ds.array(y, (2, 1))
>>> model = RandomForestClassifier(n_estimators=2, random_state=0)
>>> model.fit(x_train, y_train)
>>> save_model(model, '/tmp/model')
>>> loaded_model = load_model('/tmp/model')
>>> x_test = ds.array(np.array([[0, 0], [4, 4]]), (2, 2))
>>> model_pred = model.predict(x_test)
>>> loaded_model_pred = loaded_model.predict(x_test)
>>> assert np.allclose(model_pred.collect(),
loaded_model_pred.collect())
>>> loaded_model_pred.collect())
"""

# Check overwrite
Expand Down Expand Up @@ -186,13 +188,15 @@ def load_model(self, filepath, load_format="json"):
Format used to load the model.
Examples
--------
>>> from dislib.cluster import DecisionTreeClassifier
>>> from dislib.trees import RandomForestClassifier
>>> import numpy as np
>>> import dislib as ds
>>> x = np.array([[1, 2], [1, 4], [1, 0], [4, 2], [4, 4], [4, 0]])
>>> y = np.array([1, 1, 2, 2, 2, 1])
>>> x_train = ds.array(x, (2, 2))
>>> model = DecisionTreeClassifier(n_clusters=2, random_state=0)
>>> model.fit(x_train)
>>> y_train = ds.array(y, (2, 1))
>>> model = RandomForestClassifier(n_estimators=2, random_state=0)
>>> model.fit(x_train, y_train)
>>> save_model(model, '/tmp/model')
>>> loaded_model = load_model('/tmp/model')
>>> x_test = ds.array(np.array([[0, 0], [4, 4]]), (2, 2))
Expand Down Expand Up @@ -415,6 +419,72 @@ def score(self, x, y, collect=False):

return compss_wait_on(score) if collect else score

def load_model(self, filepath, load_format="json"):
"""Loads a model from a file.
The model is reinstantiated in the exact same state in which it
was saved, without any of the code used for model definition or
fitting.
Parameters
----------
filepath : str
Path of the saved the model
load_format : str, optional (default='json')
Format used to load the model.
Examples
--------
>>> from dislib.trees import RandomForestClassifier
>>> import numpy as np
>>> import dislib as ds
>>> x = np.array([[1, 2], [1, 4], [1, 0], [4, 2], [4, 4], [4, 0]])
>>> y = np.array([1, 1, 2, 2, 2, 1])
>>> x_train = ds.array(x, (2, 2))
>>> y_train = ds.array(y, (2, 1))
>>> model = RandomForestClassifier(n_estimators=2, random_state=0)
>>> model.fit(x_train, y_train)
>>> save_model(model, '/tmp/model')
>>> loaded_model = load_model('/tmp/model')
>>> x_test = ds.array(np.array([[0, 0], [4, 4]]), (2, 2))
>>> model_pred = model.predict(x_test)
>>> loaded_model_pred = loaded_model.predict(x_test)
>>> assert np.allclose(model_pred.collect(),
"""
super().load_model(filepath, load_format=load_format)

def save_model(self, filepath, overwrite=True, save_format="json"):
"""Saves a model to a file.
The model is synchronized before saving and can be reinstantiated in
the exact same state, without any of the code used for model
definition or fitting.
Parameters
----------
filepath : str
Path where to save the model
overwrite : bool, optional (default=True)
Whether any existing model at the target
location should be overwritten.
save_format : str, optional (default='json)
Format used to save the models.
Examples
--------
>>> from dislib.trees import RandomForestClassifier
>>> import numpy as np
>>> import dislib as ds
>>> x = np.array([[1, 2], [1, 4], [1, 0], [4, 2], [4, 4], [4, 0]])
>>> y = np.array([1, 1, 2, 2, 2, 1])
>>> x_train = ds.array(x, (2, 2))
>>> y_train = ds.array(y, (2, 1))
>>> model = RandomForestClassifier(n_estimators=2, random_state=0)
>>> model.fit(x_train, y_train)
>>> save_model(model, '/tmp/model')
>>> loaded_model = load_model('/tmp/model')
>>> x_test = ds.array(np.array([[0, 0], [4, 4]]), (2, 2))
>>> model_pred = model.predict(x_test)
>>> loaded_model_pred = loaded_model.predict(x_test)
>>> assert np.allclose(model_pred.collect(),
>>> loaded_model_pred.collect())
"""
super().save_model(filepath, overwrite=overwrite, save_format=save_format)


class RandomForestRegressor(BaseRandomForest):
"""A distributed random forest regressor.
Expand Down Expand Up @@ -554,6 +624,72 @@ def score(self, x, y, collect=False):

return compss_wait_on(score) if collect else score

def load_model(self, filepath, load_format="json"):
"""Loads a model from a file.
The model is reinstantiated in the exact same state in which it
was saved, without any of the code used for model definition or
fitting.
Parameters
----------
filepath : str
Path of the saved the model
load_format : str, optional (default='json')
Format used to load the model.
Examples
--------
>>> from dislib.trees import RandomForestRegressor
>>> import numpy as np
>>> import dislib as ds
>>> x = np.array([[1, 2], [1, 4], [1, 0], [4, 2], [4, 4], [4, 0]])
>>> y = np.array([1.5, 1.2, 2.7, 2.1, 0.2, 0.6])
>>> x_train = ds.array(x, (2, 2))
>>> y_train = ds.array(y, (2, 1))
>>> model = RandomForestRegressor(n_estimators=2, random_state=0)
>>> model.fit(x_train, y_train)
>>> save_model(model, '/tmp/model')
>>> loaded_model = load_model('/tmp/model')
>>> x_test = ds.array(np.array([[0, 0], [4, 4]]), (2, 2))
>>> model_pred = model.predict(x_test)
>>> loaded_model_pred = loaded_model.predict(x_test)
>>> assert np.allclose(model_pred.collect(),
"""
super().load_model(filepath, load_format=load_format)

def save_model(self, filepath, overwrite=True, save_format="json"):
"""Saves a model to a file.
The model is synchronized before saving and can be reinstantiated in
the exact same state, without any of the code used for model
definition or fitting.
Parameters
----------
filepath : str
Path where to save the model
overwrite : bool, optional (default=True)
Whether any existing model at the target
location should be overwritten.
save_format : str, optional (default='json)
Format used to save the models.
Examples
--------
>>> from dislib.trees import RandomForestRegressor
>>> import numpy as np
>>> import dislib as ds
>>> x = np.array([[1, 2], [1, 4], [1, 0], [4, 2], [4, 4], [4, 0]])
>>> y = np.array([1.5, 1.2, 2.7, 2.1, 0.2, 0.6])
>>> x_train = ds.array(x, (2, 2))
>>> y_train = ds.array(y, (2, 1))
>>> model = RandomForestRegressor(n_estimators=2, random_state=0)
>>> model.fit(x_train, y_train)
>>> save_model(model, '/tmp/model')
>>> loaded_model = load_model('/tmp/model')
>>> x_test = ds.array(np.array([[0, 0], [4, 4]]), (2, 2))
>>> model_pred = model.predict(x_test)
>>> loaded_model_pred = loaded_model.predict(x_test)
>>> assert np.allclose(model_pred.collect(),
>>> loaded_model_pred.collect())
"""
super().save_model(filepath, overwrite=overwrite, save_format=save_format)


def _base_soft_vote(classes, *predictions):
aggregate = predictions[0]
Expand Down
3 changes: 3 additions & 0 deletions docs/source/api-reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@ dislib.decomposition: Matrix Decomposition
:meth:`decomposition.qr <dislib.decomposition.qr.base.qr>` -
QR decomposition.

:class:`decomposition.tsqr <dislib.decomposition.tsqr.base.tsqr>` -
Tall-Skinny QR decomposition.

:class:`decomposition.PCA <dislib.decomposition.pca.base.PCA>` -
Principal
Component Analysis (PCA).
Expand Down
5 changes: 5 additions & 0 deletions docs/source/dislib.decomposition.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,8 @@ dislib.decomposition
:members:
:undoc-members:
:show-inheritance:

.. automodule:: dislib.decomposition.tsqr.base
:members:
:undoc-members:
:show-inheritance:

0 comments on commit 7579891

Please sign in to comment.