diff --git a/verticapy/machine_learning/memmodel/decomposition.py b/verticapy/machine_learning/memmodel/decomposition.py
index ac0df275d..bb434da97 100755
--- a/verticapy/machine_learning/memmodel/decomposition.py
+++ b/verticapy/machine_learning/memmodel/decomposition.py
@@ -100,9 +100,10 @@ def transform_sql(self, X: ArrayLike) -> list[str]:
"of the vector 'mean'."
)
sql = []
- for i in range(len(X)):
+ m, n = self.principal_components_.shape
+ for i in range(n):
sql_tmp = []
- for j in range(len(X)):
+ for j in range(m):
sql_tmp += [
f"({X[j]} - {self.mean_[j]}) * {self.principal_components_[:, i][j]}"
]
@@ -232,15 +233,16 @@ def transform_sql(self, X: ArrayLike) -> list[str]:
list
SQL code.
"""
- if len(X) != len(self.values_):
+ if len(X) != len(self.vectors_):
raise ValueError(
"The length of parameter 'X' must be equal to the length "
"of the vector 'values'."
)
sql = []
- for i in range(len(X)):
+ m, n = self.vectors_.shape
+ for i in range(n):
sql_tmp = []
- for j in range(len(X)):
+ for j in range(m):
sql_tmp += [f"{X[j]} * {self.vectors_[:, i][j]} / {self.values_[i]}"]
sql += [" + ".join(sql_tmp)]
return sql
diff --git a/verticapy/machine_learning/vertica/decomposition.py b/verticapy/machine_learning/vertica/decomposition.py
index 9abfeef1a..0593fb661 100755
--- a/verticapy/machine_learning/vertica/decomposition.py
+++ b/verticapy/machine_learning/vertica/decomposition.py
@@ -421,7 +421,278 @@ class PCA(Decomposition):
step.
method: str, optional
The method used to calculate PCA.
- lapack: Lapack definition.
+
+ - lapack:
+ Lapack definition.
+
+ Examples
+ ---------
+
+ The following examples provide a basic understanding of usage.
+ For more detailed examples, please refer to the
+ :ref:`user_guide.machine_learning` or the
+ `Examples `_
+ section on the website.
+
+ Load data for machine learning
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ We import ``verticapy``:
+
+ .. ipython:: python
+
+ import verticapy as vp
+
+ .. hint::
+
+ By assigning an alias to ``verticapy``, we mitigate the risk of code
+ collisions with other libraries. This precaution is necessary
+ because verticapy uses commonly known function names like "average"
+ and "median", which can potentially lead to naming conflicts.
+ The use of an alias ensures that the functions from verticapy are
+ used as intended without interfering with functions from other
+ libraries.
+
+ For this example, we will use the winequality dataset.
+
+ .. code-block:: python
+
+ import verticapy.datasets as vpd
+
+ data = vpd.load_winequality()
+
+ .. raw:: html
+ :file: SPHINX_DIRECTORY/figures/datasets_loaders_load_winequality.html
+
+ .. note::
+
+ VerticaPy offers a wide range of sample datasets that are
+ ideal for training and testing purposes. You can explore
+ the full list of available datasets in the :ref:`api.datasets`,
+ which provides detailed information on each dataset
+ and how to use them effectively. These datasets are invaluable
+ resources for honing your data analysis and machine learning
+ skills within the VerticaPy environment.
+
+ We can drop the "color" column as it is varchar type.
+
+ .. code-block::
+
+ data.drop("color")
+
+ .. ipython:: python
+ :suppress:
+
+ import verticapy.datasets as vpd
+ data = vpd.load_winequality()
+ data.drop("color")
+
+ Model Initialization
+ ^^^^^^^^^^^^^^^^^^^^^
+
+ First we import the ``PCA`` model:
+
+ .. code-block::
+
+ from verticapy.machine_learning.vertica import PCA
+
+ .. ipython:: python
+ :suppress:
+
+ from verticapy.machine_learning.vertica import PCA
+
+ Then we can create the model:
+
+ .. ipython:: python
+ :okwarning:
+
+ model = PCA(
+ n_components = 3,
+ )
+
+ You can select the number of components by the ``n_component``
+ parameter. If it is not provided, then all are considered.
+
+ .. hint::
+
+ In ``verticapy`` 1.0.x and higher, you do not need to specify the
+ model name, as the name is automatically assigned. If you need to
+ re-use the model, you can fetch the model name from the model's
+ attributes.
+
+ .. important::
+
+ The model name is crucial for the model management system and
+ versioning. It's highly recommended to provide a name if you
+ plan to reuse the model later.
+
+ Model Training
+ ^^^^^^^^^^^^^^^
+
+ We can now fit the model:
+
+ .. ipython:: python
+ :okwarning:
+
+ model.fit(data)
+
+ .. important::
+
+ To train a model, you can directly use the ``vDataFrame`` or the
+ name of the relation stored in the database.
+
+ Scores
+ ^^^^^^
+
+ The decomposition score on the dataset for each
+ transformed column can be calculated by:
+
+ .. ipython:: python
+
+ model.score()
+
+ For more details on the function, check out
+ :py:mod:`verticapy.machine_learning.PCA.score`
+
+ You can also fetch the explained variance by:
+
+ .. ipython:: python
+
+ model.explained_variance_
+
+ Principal Components
+ ^^^^^^^^^^^^^^^^^^^^^^
+
+ To get the transformed dataset in the form of principal
+ components:
+
+ .. ipython:: python
+
+ model.transform(data)
+
+ Please refer to :py:mod:`verticapy.machine_learning.PCA.transform`
+ for more details on transforming a ``vDataFrame``.
+
+ Similarly, you can perform the inverse tranform to get
+ the original features using:
+
+ .. code-block:: python
+
+ model.inverse_transform(data_transformed)
+
+ The variable ``data_transformed`` includes the PCA components.
+
+ Plots - PCA
+ ^^^^^^^^^^^^
+
+ You can plot the first two components conveniently using:
+
+ .. code-block:: python
+
+ model.plot()
+
+ .. ipython:: python
+ :suppress:
+
+ vp.set_option("plotting_lib", "plotly")
+ fig = model.plot()
+ fig.write_html("figures/machine_learning_vertica_pca_plot.html")
+
+ .. raw:: html
+ :file: SPHINX_DIRECTORY/figures/machine_learning_vertica_pca_plot.html
+
+ Plots - Scree
+ ^^^^^^^^^^^^^^
+
+ You can also plot the Scree plot:
+
+ .. code-block:: python
+
+ model.plot_scree()
+
+ .. ipython:: python
+ :suppress:
+
+ vp.set_option("plotting_lib", "highcharts")
+ fig = model.plot_scree()
+ html_text = fig.htmlcontent.replace("container", "ml_vertica_PCA_scree")
+ with open("figures/machine_learning_vertica_pca_plot_scree.html", "w") as file:
+ file.write(html_text)
+
+ .. raw:: html
+ :file: SPHINX_DIRECTORY/figures/machine_learning_vertica_pca_plot_scree.html
+
+ Parameter Modification
+ ^^^^^^^^^^^^^^^^^^^^^^^
+
+ In order to see the parameters:
+
+ .. ipython:: python
+
+ model.get_params()
+
+ And to manually change some of the parameters:
+
+ .. ipython:: python
+
+ model.set_params({'n_components': 3})
+
+ Model Register
+ ^^^^^^^^^^^^^^
+
+ In order to register the model for tracking and versioning:
+
+ .. code-block:: python
+
+ model.register("model_v1")
+
+ Please refer to :ref:`notebooks/ml/model_tracking_versioning/index.html`
+ for more details on model tracking and versioning.
+
+ Model Exporting
+ ^^^^^^^^^^^^^^^^
+
+ **To Memmodel**
+
+ .. code-block:: python
+
+ model.to_memmodel()
+
+ .. note::
+
+ ``MemModel`` objects serve as in-memory representations of machine
+ learning models. They can be used for both in-database and in-memory
+ prediction tasks. These objects can be pickled in the same way that
+ you would pickle a ``scikit-learn`` model.
+
+ The preceding methods for exporting the model use ``MemModel``, and it
+ is recommended to use ``MemModel`` directly.
+
+ **SQL**
+
+ To get the SQL query use below:
+
+ .. ipython:: python
+
+ model.to_sql()
+
+ **To Python**
+
+ To obtain the prediction function in Python syntax, use the following code:
+
+ .. ipython:: python
+
+ X = [[3.8, 0.3, 0.02, 11, 0.03, 20, 113, 0.99, 3, 0.4, 12, 6, 0]]
+ model.to_python()(X)
+
+ .. hint::
+
+ The
+ :py:mod:`verticapy.machine_learning.vertica.decomposition.PCA.to_python`
+ method is used to retrieve the Principal Component values.
+ For specific details on how to
+ use this method for different model types, refer to the relevant
+ documentation for each model.
"""
# Properties.
@@ -778,7 +1049,278 @@ class SVD(Decomposition):
number of rows).
method: str, optional
The method used to calculate SVD.
- lapack: Lapack definition.
+
+ - lapack:
+ Lapack definition.
+
+ Examples
+ ---------
+
+ The following examples provide a basic understanding of usage.
+ For more detailed examples, please refer to the
+ :ref:`user_guide.machine_learning` or the
+ `Examples `_
+ section on the website.
+
+ Load data for machine learning
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ We import ``verticapy``:
+
+ .. ipython:: python
+
+ import verticapy as vp
+
+ .. hint::
+
+ By assigning an alias to ``verticapy``, we mitigate the risk of code
+ collisions with other libraries. This precaution is necessary
+ because verticapy uses commonly known function names like "average"
+ and "median", which can potentially lead to naming conflicts.
+ The use of an alias ensures that the functions from verticapy are
+ used as intended without interfering with functions from other
+ libraries.
+
+ For this example, we will use the winequality dataset.
+
+ .. code-block:: python
+
+ import verticapy.datasets as vpd
+
+ data = vpd.load_winequality()
+
+ .. raw:: html
+ :file: SPHINX_DIRECTORY/figures/datasets_loaders_load_winequality.html
+
+ We can drop the "color" column as it is varchar type.
+
+ .. code-block::
+
+ data.drop("color")
+
+ .. note::
+
+ VerticaPy offers a wide range of sample datasets that are
+ ideal for training and testing purposes. You can explore
+ the full list of available datasets in the :ref:`api.datasets`,
+ which provides detailed information on each dataset
+ and how to use them effectively. These datasets are invaluable
+ resources for honing your data analysis and machine learning
+ skills within the VerticaPy environment.
+
+ .. ipython:: python
+ :suppress:
+
+ import verticapy.datasets as vpd
+ data = vpd.load_winequality()
+ data.drop("color")
+
+ Model Initialization
+ ^^^^^^^^^^^^^^^^^^^^^
+
+ First we import the ``SVD`` model:
+
+ .. code-block::
+
+ from verticapy.machine_learning.vertica import SVD
+
+ .. ipython:: python
+ :suppress:
+
+ from verticapy.machine_learning.vertica import SVD
+
+ Then we can create the model:
+
+ .. ipython:: python
+ :okwarning:
+
+ model = SVD(
+ n_components = 3,
+ )
+
+ You can select the number of components by the ``n_component``
+ parameter. If it is not provided, then all are considered.
+
+ .. hint::
+
+ In ``verticapy`` 1.0.x and higher, you do not need to specify the
+ model name, as the name is automatically assigned. If you need to
+ re-use the model, you can fetch the model name from the model's
+ attributes.
+
+ .. important::
+
+ The model name is crucial for the model management system and
+ versioning. It's highly recommended to provide a name if you
+ plan to reuse the model later.
+
+ Model Training
+ ^^^^^^^^^^^^^^^
+
+ We can now fit the model:
+
+ .. ipython:: python
+ :okwarning:
+
+ model.fit(data)
+
+ .. important::
+
+ To train a model, you can directly use the ``vDataFrame`` or the
+ name of the relation stored in the database.
+
+ Scores
+ ^^^^^^
+
+ The decomposition score on the dataset for each
+ transformed column can be calculated by:
+
+ .. ipython:: python
+
+ model.score()
+
+ For more details on the function, check out
+ :py:mod:`verticapy.machine_learning.SVD.score`
+
+ You can also fetch the explained variance by:
+
+ .. ipython:: python
+
+ model.explained_variance_
+
+ Principal Components
+ ^^^^^^^^^^^^^^^^^^^^^^
+
+ To get the transformed dataset in the form of principal
+ components:
+
+ .. ipython:: python
+
+ model.transform(data)
+
+ Please refer to :py:mod:`verticapy.machine_learning.SVD.transform`
+ for more details on transforming a ``vDataFrame``.
+
+ Similarly, you can perform the inverse tranform to get
+ the original features using:
+
+ .. code-block:: python
+
+ model.inverse_transform(data_transformed)
+
+ The variable ``data_transformed`` includes the PCA components.
+
+ Plots - SVD
+ ^^^^^^^^^^^^
+
+ You can plot the first two dimensions conveniently using:
+
+ .. code-block:: python
+
+ model.plot()
+
+ .. ipython:: python
+ :suppress:
+
+ vp.set_option("plotting_lib", "plotly")
+ fig = model.plot()
+ fig.write_html("figures/machine_learning_vertica_svd_plot.html")
+
+ .. raw:: html
+ :file: SPHINX_DIRECTORY/figures/machine_learning_vertica_svd_plot.html
+
+ Plots - Scree
+ ^^^^^^^^^^^^^^
+
+ You can also plot the Scree plot:
+
+ .. code-block:: python
+
+ model.plot_scree()
+
+ .. ipython:: python
+ :suppress:
+
+ vp.set_option("plotting_lib", "highcharts")
+ fig = model.plot_scree()
+ html_text = fig.htmlcontent.replace("container", "ml_vertica_SVD_scree")
+ with open("figures/machine_learning_vertica_svd_plot_scree.html", "w") as file:
+ file.write(html_text)
+
+ .. raw:: html
+ :file: SPHINX_DIRECTORY/figures/machine_learning_vertica_svd_plot_scree.html
+
+ Parameter Modification
+ ^^^^^^^^^^^^^^^^^^^^^^^
+
+ In order to see the parameters:
+
+ .. ipython:: python
+
+ model.get_params()
+
+ And to manually change some of the parameters:
+
+ .. ipython:: python
+
+ model.set_params({'n_components': 3})
+
+ Model Register
+ ^^^^^^^^^^^^^^
+
+ In order to register the model for tracking and versioning:
+
+ .. code-block:: python
+
+ model.register("model_v1")
+
+ Please refer to :ref:`notebooks/ml/model_tracking_versioning/index.html`
+ for more details on model tracking and versioning.
+
+ Model Exporting
+ ^^^^^^^^^^^^^^^^
+
+ **To Memmodel**
+
+ .. code-block:: python
+
+ model.to_memmodel()
+
+ .. note::
+
+ ``MemModel`` objects serve as in-memory representations of machine
+ learning models. They can be used for both in-database and in-memory
+ prediction tasks. These objects can be pickled in the same way that
+ you would pickle a ``scikit-learn`` model.
+
+ The preceding methods for exporting the model use ``MemModel``, and it
+ is recommended to use ``MemModel`` directly.
+
+ **SQL**
+
+ To get the SQL query use below:
+
+ .. ipython:: python
+
+ model.to_sql()
+
+ **To Python**
+
+ To obtain the prediction function in Python syntax, use the following code:
+
+ .. ipython:: python
+
+ X = [[3.8, 0.3, 0.02, 11, 0.03, 20, 113, 0.99, 3, 0.4, 12, 6, 0]]
+ model.to_python()(X)
+
+ .. hint::
+
+ The
+ :py:mod:`verticapy.machine_learning.vertica.decomposition.SVD.to_python`
+ method is used to retrieve the Principal Component values.
+ For specific details on how to
+ use this method for different model types, refer to the relevant
+ documentation for each model.
"""
# Properties.