From 10b0eca1c9f5259301ebdd9a5540d7a9a00113e5 Mon Sep 17 00:00:00 2001
From: umar <46414488+mail4umar@users.noreply.github.com>
Date: Mon, 30 Oct 2023 23:31:42 -0500
Subject: [PATCH 1/7] Sphinx Docstring - Scaler and subclasses
---
.../machine_learning/vertica/preprocessing.py | 259 +++++++++++++++++-
1 file changed, 250 insertions(+), 9 deletions(-)
diff --git a/verticapy/machine_learning/vertica/preprocessing.py b/verticapy/machine_learning/vertica/preprocessing.py
index 64404e99d..07b378b30 100755
--- a/verticapy/machine_learning/vertica/preprocessing.py
+++ b/verticapy/machine_learning/vertica/preprocessing.py
@@ -657,12 +657,226 @@ class Scaler(Preprocessing):
existing model.
method: str, optional
Method used to scale the data.
- zscore : Scaling using the Z-Score.
- (x - avg) / std
- robust_zscore : Scaling using the Robust Z-Score.
- (x - median) / (1.4826 * mad)
- minmax : Normalization using the Min & Max.
- (x - min) / (max - min)
+
+ - zscore:
+ Scaling using the Z-Score
+
+ .. math::
+
+ (x - avg) / std
+
+ - robust_zscore:
+ Scaling using the Robust Z-Score.
+
+ .. math::
+
+ (x - median) / (1.4826 * mad)
+
+ - minmax:
+ Normalization using the Min & Max.
+
+ .. math::
+
+ (x - min) / (max - min)
+
+ Examples
+ --------
+
+ The following examples provide a basic understanding of usage.
+ For more detailed examples, please refer to the
+ :ref:`user_guide.machine_learning` or the
+ `Examples `_
+ section on the website.
+
+ Load data for machine learning
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ We import ``verticapy``:
+
+ .. ipython:: python
+
+ import verticapy as vp
+
+ .. hint::
+
+ By assigning an alias to ``verticapy``, we mitigate the risk of code
+ collisions with other libraries. This precaution is necessary
+ because verticapy uses commonly known function names like "average"
+ and "median", which can potentially lead to naming conflicts.
+ The use of an alias ensures that the functions from verticapy are
+ used as intended without interfering with functions from other
+ libraries.
+
+ For this example, we will use a dummy dataset.
+
+ .. ipython:: python
+
+ data = vp.vDataFrame({"values": [1, 1.01, 1.02, 1.05, 1.024]})
+
+ .. raw:: html
+ :file: SPHINX_DIRECTORY/figures/datasets_loaders_load_titanic.html
+
+ .. note::
+
+ VerticaPy offers a wide range of sample datasets that are
+ ideal for training and testing purposes. You can explore
+ the full list of available datasets in the :ref:`api.datasets`,
+ which provides detailed information on each dataset
+ and how to use them effectively. These datasets are invaluable
+ resources for honing your data analysis and machine learning
+ skills within the VerticaPy environment.
+
+
+ Model Initialization
+ ^^^^^^^^^^^^^^^^^^^^^
+
+ First we import the ``Scaler`` model:
+
+ .. ipython:: python
+
+ from verticapy.machine_learning.vertica import Scaler
+
+ Then we can create the model:
+
+ .. ipython:: python
+ :okwarning:
+
+ model = Scaler(method = "zscore")
+
+ .. hint::
+
+ In ``verticapy`` 1.0.x and higher, you do not need to specify the
+ model name, as the name is automatically assigned. If you need to
+ re-use the model, you can fetch the model name from the model's
+ attributes.
+
+ .. important::
+
+ The model name is crucial for the model management system and
+ versioning. It's highly recommended to provide a name if you
+ plan to reuse the model later.
+
+ Model Fitting
+ ^^^^^^^^^^^^^^^
+
+ We can now fit the model:
+
+ .. ipython:: python
+ :okwarning:
+
+ model.fit(data)
+
+ .. important::
+
+ To fit a model, you can directly use the ``vDataFrame``
+ or the name of the relation stored in the database.
+
+ Model Parameters
+ ^^^^^^^^^^^^^^^^^
+
+ To fetch the model parameter (mean) you can use:
+
+ .. ipython:: python
+
+ model.mean_
+
+ Similarly for standard deviation:
+
+ .. ipython:: python
+
+ model.std_
+
+ Conversion/Transformation
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ To get the scaled dataset, we can use the ``transform`` function.
+ Let us transform the data:
+
+
+ .. ipython:: python
+ :okwarning:
+
+ model.transform(data)
+
+ Please refer to
+ :py:mod:`verticapy.machine_learning.Scaler.transform`
+ for more details on transforming a ``vDataFrame``.
+
+ Similarly, you can perform the inverse tranform to get
+ the original features using:
+
+ .. code-block:: python
+
+ model.inverse_transform(data_transformed)
+
+ The variable ``data_transformed`` is the scaled dataset.
+
+ Model Register
+ ^^^^^^^^^^^^^^
+
+ In order to register the model for tracking and versioning:
+
+ .. code-block:: python
+
+ model.register("model_v1")
+
+ Please refer to
+ :ref:`notebooks/ml/model_tracking_versioning/index.html`
+ for more details on model tracking and versioning.
+
+ Model Exporting
+ ^^^^^^^^^^^^^^^^
+
+ **To Memmodel**
+
+ .. code-block:: python
+
+ model.to_memmodel()
+
+ .. note::
+
+ ``MemModel`` objects serve as in-memory representations of
+ machine learning models. They can be used for both in-database
+ and in-memory prediction tasks. These objects can be pickled
+ in the same way that you would pickle a ``scikit-learn`` model.
+
+ The preceding methods for exporting the model use ``MemModel``,
+ and it is recommended to use ``MemModel`` directly.
+
+ **SQL**
+
+ To get the SQL query use below:
+
+ .. ipython:: python
+
+ model.to_sql()
+
+ **To Python**
+
+ To obtain the prediction function in Python syntax, use the
+ following code:
+
+ .. ipython:: python
+
+ X = [[1]]
+ model.to_python()(X)
+
+ .. hint::
+
+ The
+ :py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler.to_python`
+ method is used to scale the data. For specific details on how to use this method for
+ different model types, refer to the relevant documentation for
+ each model.
+
+ .. seealso::
+ | :py:mod:`verticapy.machine_learning.vertica.preprocessing.StandardScaler` :
+ Scalar with method set as ``zscore``.
+ | :py:mod:`verticapy.machine_learning.vertica.preprocessing.RobustScaler` :
+ Scalar with method set as ``robust_zscore``.
+ | :py:mod:`verticapy.machine_learning.vertica.preprocessing.MinMaxScaler` :
+ Scalar with method set as ``minmax``.
+
"""
# Properties.
@@ -742,7 +956,16 @@ def to_memmodel(self) -> mm.Scaler:
class StandardScaler(Scaler):
- """i.e. Scaler with param method = 'zscore'"""
+ """
+ i.e. Scaler with param method = 'zscore'
+
+ .. note::
+
+ This is a child class. See
+ :py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler`
+ for more details and examples.
+
+ """
@property
def _attributes(self) -> list[str]:
@@ -753,7 +976,16 @@ def __init__(self, name: str = None, overwrite_model: bool = False) -> None:
class RobustScaler(Scaler):
- """i.e. Scaler with param method = 'robust_zscore'"""
+ """
+ i.e. Scaler with param method = 'robust_zscore'
+
+ .. note::
+
+ This is a child class. See
+ :py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler`
+ for more details and examples.
+
+ """
@property
def _attributes(self) -> list[str]:
@@ -764,7 +996,16 @@ def __init__(self, name: str = None, overwrite_model: bool = False) -> None:
class MinMaxScaler(Scaler):
- """i.e. Scaler with param method = 'minmax'"""
+ """
+ i.e. Scaler with param method = 'minmax'
+
+ .. note::
+
+ This is a child class. See
+ :py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler`
+ for more details and examples.
+
+ """
@property
def _attributes(self) -> list[str]:
From a804b53e69582b18944444fdcbd0ac194732e45b Mon Sep 17 00:00:00 2001
From: umar <46414488+mail4umar@users.noreply.github.com>
Date: Mon, 30 Oct 2023 23:58:49 -0500
Subject: [PATCH 2/7] Added Balance
---
.../machine_learning/vertica/preprocessing.py | 128 ++++++++++++++++--
1 file changed, 113 insertions(+), 15 deletions(-)
diff --git a/verticapy/machine_learning/vertica/preprocessing.py b/verticapy/machine_learning/vertica/preprocessing.py
index 07b378b30..1db7231da 100755
--- a/verticapy/machine_learning/vertica/preprocessing.py
+++ b/verticapy/machine_learning/vertica/preprocessing.py
@@ -70,18 +70,25 @@ def Balance(
Response column.
method: str, optional
Method used to do the balancing.
- hybrid : Performs over-sampling and
- under-sampling on different
- classes so that each class is
- equally represented.
- over : Over-samples on all classes,
- except the most represented
- class, towards the most
- represented class's cardinality.
- under : Under-samples on all classes,
- except the least represented
- class, towards the least
- represented class's cardinality.
+
+ - hybrid :
+ Performs over-sampling and
+ under-sampling on different
+ classes so that each class is
+ equally represented.
+
+ - over :
+ Over-samples on all classes,
+ except the most represented
+ class, towards the most
+ represented class's cardinality.
+
+ - under:
+ Under-samples on all classes,
+ except the least represented
+ class, towards the least
+ represented class's cardinality.
+
ratio: float, optional
The desired ratio between the majority class
and the minority class. This value has no
@@ -92,6 +99,100 @@ def Balance(
-------
vDataFrame
vDataFrame of the created view.
+
+
+ Examples
+ --------
+
+ The following examples provide a basic understanding of usage.
+ For more detailed examples, please refer to the
+ :ref:`user_guide.machine_learning` or the
+ `Examples `_
+ section on the website.
+
+ Load data for machine learning
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ We import ``verticapy``:
+
+ .. ipython:: python
+
+ import verticapy as vp
+
+ .. hint::
+
+ By assigning an alias to ``verticapy``, we mitigate the risk of code
+ collisions with other libraries. This precaution is necessary
+ because verticapy uses commonly known function names like "average"
+ and "median", which can potentially lead to naming conflicts.
+ The use of an alias ensures that the functions from verticapy are
+ used as intended without interfering with functions from other
+ libraries.
+
+ For this example, we will use the Titanic dataset.
+
+ .. code-block:: python
+
+ import verticapy.datasets as vpd
+
+ data = vpd.load_titanic()
+
+ .. raw:: html
+ :file: SPHINX_DIRECTORY/figures/datasets_loaders_load_titanic.html
+
+ .. ipython:: python
+ :suppress:
+
+ import verticapy.datasets as vpd
+ data = vpd.load_titanic()
+
+ .. note::
+
+ VerticaPy offers a wide range of sample datasets that are
+ ideal for training and testing purposes. You can explore
+ the full list of available datasets in the :ref:`api.datasets`,
+ which provides detailed information on each dataset
+ and how to use them effectively. These datasets are invaluable
+ resources for honing your data analysis and machine learning
+ skills within the VerticaPy environment.
+
+
+ Model Application
+ ^^^^^^^^^^^^^^^^^^^
+
+ First we import the ``Balance`` model:
+
+ .. ipython:: python
+
+ from verticapy.machine_learning.vertica import Balance
+
+ Then we can directly apply it to the dataset:
+
+ .. ipython:: python
+ :okwarning:
+
+ @suppress
+ vp.drop("balance_model")
+
+ Balance(name = "balance_model",
+ input_relation = data,
+ y = "survived",
+ method = "under"
+ )
+
+ .. important::
+
+ The model name is crucial for the model management system and
+ versioning. It's highly recommended to provide a name if you
+ plan to reuse the model later.
+
+ The output vDataFrame can then be used or stored for later analysis.
+
+
+
+ .. seealso::
+ | :py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler` :
+ Normalizing the dataset.
"""
_executeSQL(
query=f"""
@@ -713,9 +814,6 @@ class Scaler(Preprocessing):
data = vp.vDataFrame({"values": [1, 1.01, 1.02, 1.05, 1.024]})
- .. raw:: html
- :file: SPHINX_DIRECTORY/figures/datasets_loaders_load_titanic.html
-
.. note::
VerticaPy offers a wide range of sample datasets that are
From fa0449a41a8965b643fe38973859b194ab7c17e8 Mon Sep 17 00:00:00 2001
From: Badr
Date: Tue, 31 Oct 2023 09:09:49 -0400
Subject: [PATCH 3/7] Update preprocessing.py
---
.../machine_learning/vertica/preprocessing.py | 42 ++++++++-----------
1 file changed, 17 insertions(+), 25 deletions(-)
diff --git a/verticapy/machine_learning/vertica/preprocessing.py b/verticapy/machine_learning/vertica/preprocessing.py
index 1db7231da..6c5aea26a 100755
--- a/verticapy/machine_learning/vertica/preprocessing.py
+++ b/verticapy/machine_learning/vertica/preprocessing.py
@@ -100,7 +100,6 @@ def Balance(
vDataFrame
vDataFrame of the created view.
-
Examples
--------
@@ -156,11 +155,10 @@ def Balance(
resources for honing your data analysis and machine learning
skills within the VerticaPy environment.
+ Function Application
+ ^^^^^^^^^^^^^^^^^^^^^
- Model Application
- ^^^^^^^^^^^^^^^^^^^
-
- First we import the ``Balance`` model:
+ First we import the ``Balance`` function:
.. ipython:: python
@@ -174,10 +172,11 @@ def Balance(
@suppress
vp.drop("balance_model")
- Balance(name = "balance_model",
- input_relation = data,
- y = "survived",
- method = "under"
+ Balance(
+ name = "balance_model",
+ input_relation = data,
+ y = "survived",
+ method = "under"
)
.. important::
@@ -188,8 +187,6 @@ def Balance(
The output vDataFrame can then be used or stored for later analysis.
-
-
.. seealso::
| :py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler` :
Normalizing the dataset.
@@ -824,7 +821,6 @@ class Scaler(Preprocessing):
resources for honing your data analysis and machine learning
skills within the VerticaPy environment.
-
Model Initialization
^^^^^^^^^^^^^^^^^^^^^
@@ -887,9 +883,8 @@ class Scaler(Preprocessing):
Conversion/Transformation
^^^^^^^^^^^^^^^^^^^^^^^^^^
- To get the scaled dataset, we can use the ``transform`` function.
- Let us transform the data:
-
+ To get the scaled dataset, we can use the ``transform``
+ function. Let us transform the data:
.. ipython:: python
:okwarning:
@@ -910,7 +905,7 @@ class Scaler(Preprocessing):
The variable ``data_transformed`` is the scaled dataset.
Model Register
- ^^^^^^^^^^^^^^
+ ^^^^^^^^^^^^^^^
In order to register the model for tracking and versioning:
@@ -963,9 +958,9 @@ class Scaler(Preprocessing):
The
:py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler.to_python`
- method is used to scale the data. For specific details on how to use this method for
- different model types, refer to the relevant documentation for
- each model.
+ method is used to scale the data. For specific details on how
+ to use this method for different model types, refer to the
+ relevant documentation for each model.
.. seealso::
| :py:mod:`verticapy.machine_learning.vertica.preprocessing.StandardScaler` :
@@ -1062,7 +1057,6 @@ class StandardScaler(Scaler):
This is a child class. See
:py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler`
for more details and examples.
-
"""
@property
@@ -1082,7 +1076,6 @@ class RobustScaler(Scaler):
This is a child class. See
:py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler`
for more details and examples.
-
"""
@property
@@ -1102,7 +1095,6 @@ class MinMaxScaler(Scaler):
This is a child class. See
:py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler`
for more details and examples.
-
"""
@property
@@ -1286,8 +1278,8 @@ class OneHotEncoder(Preprocessing):
^^^^^^^^^^^^^^^^^^^^^^^^^^
To get the transformed dataset in the form that is encoded,
- we can use the ``transform`` function. Let us transform the data
- and display the first 20 datapoints.
+ we can use the ``transform`` function. Let us transform the
+ data and display the first 20 datapoints.
.. ipython:: python
:okwarning:
@@ -1309,7 +1301,7 @@ class OneHotEncoder(Preprocessing):
components.
Model Register
- ^^^^^^^^^^^^^^
+ ^^^^^^^^^^^^^^^
In order to register the model for tracking and versioning:
From feceec48d857f3f4a97929410cff4456182b5fb8 Mon Sep 17 00:00:00 2001
From: Badr
Date: Tue, 31 Oct 2023 09:15:01 -0400
Subject: [PATCH 4/7] Update preprocessing.py
---
.../machine_learning/vertica/preprocessing.py | 21 +++++++------------
1 file changed, 8 insertions(+), 13 deletions(-)
diff --git a/verticapy/machine_learning/vertica/preprocessing.py b/verticapy/machine_learning/vertica/preprocessing.py
index 6c5aea26a..167cc6053 100755
--- a/verticapy/machine_learning/vertica/preprocessing.py
+++ b/verticapy/machine_learning/vertica/preprocessing.py
@@ -57,8 +57,8 @@ def Balance(
ratio: float = 0.5,
) -> vDataFrame:
"""
- Creates a view with an equal distribution of the
- input data based on the response_column.
+ Creates a view with an equal distribution of
+ the input data based on the response_column.
Parameters
----------
@@ -179,13 +179,8 @@ def Balance(
method = "under"
)
- .. important::
-
- The model name is crucial for the model management system and
- versioning. It's highly recommended to provide a name if you
- plan to reuse the model later.
-
- The output vDataFrame can then be used or stored for later analysis.
+ The output vDataFrame can then be used or stored for later
+ analysis.
.. seealso::
| :py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler` :
@@ -851,7 +846,7 @@ class Scaler(Preprocessing):
plan to reuse the model later.
Model Fitting
- ^^^^^^^^^^^^^^^
+ ^^^^^^^^^^^^^^
We can now fit the model:
@@ -883,7 +878,7 @@ class Scaler(Preprocessing):
Conversion/Transformation
^^^^^^^^^^^^^^^^^^^^^^^^^^
- To get the scaled dataset, we can use the ``transform``
+ To get the scaled dataset, we can use the ``transform``
function. Let us transform the data:
.. ipython:: python
@@ -958,8 +953,8 @@ class Scaler(Preprocessing):
The
:py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler.to_python`
- method is used to scale the data. For specific details on how
- to use this method for different model types, refer to the
+ method is used to scale the data. For specific details on how
+ to use this method for different model types, refer to the
relevant documentation for each model.
.. seealso::
From ec6af1d7aee0207730c20778b978b757e7f40733 Mon Sep 17 00:00:00 2001
From: umar <46414488+mail4umar@users.noreply.github.com>
Date: Tue, 31 Oct 2023 09:48:32 -0500
Subject: [PATCH 5/7] resolving comments
---
.../machine_learning/vertica/preprocessing.py | 84 ++++++++++++-------
1 file changed, 56 insertions(+), 28 deletions(-)
diff --git a/verticapy/machine_learning/vertica/preprocessing.py b/verticapy/machine_learning/vertica/preprocessing.py
index 167cc6053..62870a516 100755
--- a/verticapy/machine_learning/vertica/preprocessing.py
+++ b/verticapy/machine_learning/vertica/preprocessing.py
@@ -57,8 +57,8 @@ def Balance(
ratio: float = 0.5,
) -> vDataFrame:
"""
- Creates a view with an equal distribution of
- the input data based on the response_column.
+ Creates a view with an equal distribution of the
+ input data based on the response_column.
Parameters
----------
@@ -100,6 +100,7 @@ def Balance(
vDataFrame
vDataFrame of the created view.
+
Examples
--------
@@ -155,10 +156,11 @@ def Balance(
resources for honing your data analysis and machine learning
skills within the VerticaPy environment.
- Function Application
- ^^^^^^^^^^^^^^^^^^^^^
- First we import the ``Balance`` function:
+ Model Application
+ ^^^^^^^^^^^^^^^^^^^
+
+ First we import the ``Balance`` model:
.. ipython:: python
@@ -168,23 +170,44 @@ def Balance(
.. ipython:: python
:okwarning:
+ :suppress:
+
- @suppress
vp.drop("balance_model")
+ result = Balance(name = "balance_model",
+ input_relation = data,
+ y = "survived",
+ method = "under"
+ )
+ html_file = open("SPHINX_DIRECTORY/figures/machine_learning_vertica_preprocessing_balance.html", "w")
+ html_file.write(result._repr_html_())
+ html_file.close()
+
+ .. code-block:: python
- Balance(
- name = "balance_model",
- input_relation = data,
- y = "survived",
- method = "under"
+ Balance(name = "balance_model",
+ input_relation = data,
+ y = "survived",
+ method = "under"
)
- The output vDataFrame can then be used or stored for later
- analysis.
+ .. raw:: html
+ :file: SPHINX_DIRECTORY/figures/machine_learning_vertica_preprocessing_balance.html
+
+
+ .. important::
+
+ The model name is crucial for the model management system and
+ versioning. It's highly recommended to provide a name if you
+ plan to reuse the model later.
+
+ The output vDataFrame can then be used or stored for later analysis.
+
+
.. seealso::
- | :py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler` :
- Normalizing the dataset.
+ | :py:mod:`verticapy.vDataFrame.sample` :
+ Sampling the dataset.
"""
_executeSQL(
query=f"""
@@ -756,21 +779,21 @@ class Scaler(Preprocessing):
.. math::
- (x - avg) / std
+ Z_score = (x - avg) / std
- robust_zscore:
Scaling using the Robust Z-Score.
.. math::
- (x - median) / (1.4826 * mad)
+ Z_rscore = (x - median) / (1.4826 * mad)
- minmax:
Normalization using the Min & Max.
.. math::
- (x - min) / (max - min)
+ Z_minmax = (x - min) / (max - min)
Examples
--------
@@ -816,6 +839,7 @@ class Scaler(Preprocessing):
resources for honing your data analysis and machine learning
skills within the VerticaPy environment.
+
Model Initialization
^^^^^^^^^^^^^^^^^^^^^
@@ -846,7 +870,7 @@ class Scaler(Preprocessing):
plan to reuse the model later.
Model Fitting
- ^^^^^^^^^^^^^^
+ ^^^^^^^^^^^^^^^
We can now fit the model:
@@ -878,8 +902,9 @@ class Scaler(Preprocessing):
Conversion/Transformation
^^^^^^^^^^^^^^^^^^^^^^^^^^
- To get the scaled dataset, we can use the ``transform``
- function. Let us transform the data:
+ To get the scaled dataset, we can use the ``transform`` function.
+ Let us transform the data:
+
.. ipython:: python
:okwarning:
@@ -900,7 +925,7 @@ class Scaler(Preprocessing):
The variable ``data_transformed`` is the scaled dataset.
Model Register
- ^^^^^^^^^^^^^^^
+ ^^^^^^^^^^^^^^
In order to register the model for tracking and versioning:
@@ -953,9 +978,9 @@ class Scaler(Preprocessing):
The
:py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler.to_python`
- method is used to scale the data. For specific details on how
- to use this method for different model types, refer to the
- relevant documentation for each model.
+ method is used to scale the data. For specific details on how to use this method for
+ different model types, refer to the relevant documentation for
+ each model.
.. seealso::
| :py:mod:`verticapy.machine_learning.vertica.preprocessing.StandardScaler` :
@@ -1052,6 +1077,7 @@ class StandardScaler(Scaler):
This is a child class. See
:py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler`
for more details and examples.
+
"""
@property
@@ -1071,6 +1097,7 @@ class RobustScaler(Scaler):
This is a child class. See
:py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler`
for more details and examples.
+
"""
@property
@@ -1090,6 +1117,7 @@ class MinMaxScaler(Scaler):
This is a child class. See
:py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler`
for more details and examples.
+
"""
@property
@@ -1273,8 +1301,8 @@ class OneHotEncoder(Preprocessing):
^^^^^^^^^^^^^^^^^^^^^^^^^^
To get the transformed dataset in the form that is encoded,
- we can use the ``transform`` function. Let us transform the
- data and display the first 20 datapoints.
+ we can use the ``transform`` function. Let us transform the data
+ and display the first 20 datapoints.
.. ipython:: python
:okwarning:
@@ -1296,7 +1324,7 @@ class OneHotEncoder(Preprocessing):
components.
Model Register
- ^^^^^^^^^^^^^^^
+ ^^^^^^^^^^^^^^
In order to register the model for tracking and versioning:
From 66d1002a04c5c65197e452caa4af69085f410acc Mon Sep 17 00:00:00 2001
From: umar <46414488+mail4umar@users.noreply.github.com>
Date: Tue, 31 Oct 2023 09:56:13 -0500
Subject: [PATCH 6/7] Revert "resolving comments"
This reverts commit ec6af1d7aee0207730c20778b978b757e7f40733.
---
.../machine_learning/vertica/preprocessing.py | 84 +++++++------------
1 file changed, 28 insertions(+), 56 deletions(-)
diff --git a/verticapy/machine_learning/vertica/preprocessing.py b/verticapy/machine_learning/vertica/preprocessing.py
index 62870a516..167cc6053 100755
--- a/verticapy/machine_learning/vertica/preprocessing.py
+++ b/verticapy/machine_learning/vertica/preprocessing.py
@@ -57,8 +57,8 @@ def Balance(
ratio: float = 0.5,
) -> vDataFrame:
"""
- Creates a view with an equal distribution of the
- input data based on the response_column.
+ Creates a view with an equal distribution of
+ the input data based on the response_column.
Parameters
----------
@@ -100,7 +100,6 @@ def Balance(
vDataFrame
vDataFrame of the created view.
-
Examples
--------
@@ -156,11 +155,10 @@ def Balance(
resources for honing your data analysis and machine learning
skills within the VerticaPy environment.
+ Function Application
+ ^^^^^^^^^^^^^^^^^^^^^
- Model Application
- ^^^^^^^^^^^^^^^^^^^
-
- First we import the ``Balance`` model:
+ First we import the ``Balance`` function:
.. ipython:: python
@@ -170,44 +168,23 @@ def Balance(
.. ipython:: python
:okwarning:
- :suppress:
-
+ @suppress
vp.drop("balance_model")
- result = Balance(name = "balance_model",
- input_relation = data,
- y = "survived",
- method = "under"
- )
- html_file = open("SPHINX_DIRECTORY/figures/machine_learning_vertica_preprocessing_balance.html", "w")
- html_file.write(result._repr_html_())
- html_file.close()
-
- .. code-block:: python
- Balance(name = "balance_model",
- input_relation = data,
- y = "survived",
- method = "under"
+ Balance(
+ name = "balance_model",
+ input_relation = data,
+ y = "survived",
+ method = "under"
)
- .. raw:: html
- :file: SPHINX_DIRECTORY/figures/machine_learning_vertica_preprocessing_balance.html
-
-
- .. important::
-
- The model name is crucial for the model management system and
- versioning. It's highly recommended to provide a name if you
- plan to reuse the model later.
-
- The output vDataFrame can then be used or stored for later analysis.
-
-
+ The output vDataFrame can then be used or stored for later
+ analysis.
.. seealso::
- | :py:mod:`verticapy.vDataFrame.sample` :
- Sampling the dataset.
+ | :py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler` :
+ Normalizing the dataset.
"""
_executeSQL(
query=f"""
@@ -779,21 +756,21 @@ class Scaler(Preprocessing):
.. math::
- Z_score = (x - avg) / std
+ (x - avg) / std
- robust_zscore:
Scaling using the Robust Z-Score.
.. math::
- Z_rscore = (x - median) / (1.4826 * mad)
+ (x - median) / (1.4826 * mad)
- minmax:
Normalization using the Min & Max.
.. math::
- Z_minmax = (x - min) / (max - min)
+ (x - min) / (max - min)
Examples
--------
@@ -839,7 +816,6 @@ class Scaler(Preprocessing):
resources for honing your data analysis and machine learning
skills within the VerticaPy environment.
-
Model Initialization
^^^^^^^^^^^^^^^^^^^^^
@@ -870,7 +846,7 @@ class Scaler(Preprocessing):
plan to reuse the model later.
Model Fitting
- ^^^^^^^^^^^^^^^
+ ^^^^^^^^^^^^^^
We can now fit the model:
@@ -902,9 +878,8 @@ class Scaler(Preprocessing):
Conversion/Transformation
^^^^^^^^^^^^^^^^^^^^^^^^^^
- To get the scaled dataset, we can use the ``transform`` function.
- Let us transform the data:
-
+ To get the scaled dataset, we can use the ``transform``
+ function. Let us transform the data:
.. ipython:: python
:okwarning:
@@ -925,7 +900,7 @@ class Scaler(Preprocessing):
The variable ``data_transformed`` is the scaled dataset.
Model Register
- ^^^^^^^^^^^^^^
+ ^^^^^^^^^^^^^^^
In order to register the model for tracking and versioning:
@@ -978,9 +953,9 @@ class Scaler(Preprocessing):
The
:py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler.to_python`
- method is used to scale the data. For specific details on how to use this method for
- different model types, refer to the relevant documentation for
- each model.
+ method is used to scale the data. For specific details on how
+ to use this method for different model types, refer to the
+ relevant documentation for each model.
.. seealso::
| :py:mod:`verticapy.machine_learning.vertica.preprocessing.StandardScaler` :
@@ -1077,7 +1052,6 @@ class StandardScaler(Scaler):
This is a child class. See
:py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler`
for more details and examples.
-
"""
@property
@@ -1097,7 +1071,6 @@ class RobustScaler(Scaler):
This is a child class. See
:py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler`
for more details and examples.
-
"""
@property
@@ -1117,7 +1090,6 @@ class MinMaxScaler(Scaler):
This is a child class. See
:py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler`
for more details and examples.
-
"""
@property
@@ -1301,8 +1273,8 @@ class OneHotEncoder(Preprocessing):
^^^^^^^^^^^^^^^^^^^^^^^^^^
To get the transformed dataset in the form that is encoded,
- we can use the ``transform`` function. Let us transform the data
- and display the first 20 datapoints.
+ we can use the ``transform`` function. Let us transform the
+ data and display the first 20 datapoints.
.. ipython:: python
:okwarning:
@@ -1324,7 +1296,7 @@ class OneHotEncoder(Preprocessing):
components.
Model Register
- ^^^^^^^^^^^^^^
+ ^^^^^^^^^^^^^^^
In order to register the model for tracking and versioning:
From 78d6117e1c3dad85be53ed4da266d6e626aefb8a Mon Sep 17 00:00:00 2001
From: umar <46414488+mail4umar@users.noreply.github.com>
Date: Tue, 31 Oct 2023 10:00:59 -0500
Subject: [PATCH 7/7] resolving comments again
---
.../machine_learning/vertica/preprocessing.py | 36 ++++++++++++-------
1 file changed, 23 insertions(+), 13 deletions(-)
diff --git a/verticapy/machine_learning/vertica/preprocessing.py b/verticapy/machine_learning/vertica/preprocessing.py
index 167cc6053..535c9d746 100755
--- a/verticapy/machine_learning/vertica/preprocessing.py
+++ b/verticapy/machine_learning/vertica/preprocessing.py
@@ -168,23 +168,33 @@ def Balance(
.. ipython:: python
:okwarning:
+ :suppress:
+
- @suppress
vp.drop("balance_model")
+ result = Balance(name = "balance_model",
+ input_relation = data,
+ y = "survived",
+ method = "under"
+ )
+ html_file = open("SPHINX_DIRECTORY/figures/machine_learning_vertica_preprocessing_balance.html", "w")
+ html_file.write(result._repr_html_())
+ html_file.close()
- Balance(
- name = "balance_model",
- input_relation = data,
- y = "survived",
- method = "under"
+ .. code-block:: python
+
+ Balance(name = "balance_model",
+ input_relation = data,
+ y = "survived",
+ method = "under"
)
- The output vDataFrame can then be used or stored for later
- analysis.
+ .. raw:: html
+ :file: SPHINX_DIRECTORY/figures/machine_learning_vertica_preprocessing_balance.html
.. seealso::
- | :py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler` :
- Normalizing the dataset.
+ | :py:mod:`verticapy.vDataFrame.sample` :
+ Sampling the dataset.
"""
_executeSQL(
query=f"""
@@ -756,21 +766,21 @@ class Scaler(Preprocessing):
.. math::
- (x - avg) / std
+ Z_score = (x - avg) / std
- robust_zscore:
Scaling using the Robust Z-Score.
.. math::
- (x - median) / (1.4826 * mad)
+ Z_rscore = (x - median) / (1.4826 * mad)
- minmax:
Normalization using the Min & Max.
.. math::
- (x - min) / (max - min)
+ Z_minmax = (x - min) / (max - min)
Examples
--------