From 10b0eca1c9f5259301ebdd9a5540d7a9a00113e5 Mon Sep 17 00:00:00 2001 From: umar <46414488+mail4umar@users.noreply.github.com> Date: Mon, 30 Oct 2023 23:31:42 -0500 Subject: [PATCH 1/7] Sphinx Docstring - Scaler and subclasses --- .../machine_learning/vertica/preprocessing.py | 259 +++++++++++++++++- 1 file changed, 250 insertions(+), 9 deletions(-) diff --git a/verticapy/machine_learning/vertica/preprocessing.py b/verticapy/machine_learning/vertica/preprocessing.py index 64404e99d..07b378b30 100755 --- a/verticapy/machine_learning/vertica/preprocessing.py +++ b/verticapy/machine_learning/vertica/preprocessing.py @@ -657,12 +657,226 @@ class Scaler(Preprocessing): existing model. method: str, optional Method used to scale the data. - zscore : Scaling using the Z-Score. - (x - avg) / std - robust_zscore : Scaling using the Robust Z-Score. - (x - median) / (1.4826 * mad) - minmax : Normalization using the Min & Max. - (x - min) / (max - min) + + - zscore: + Scaling using the Z-Score + + .. math:: + + (x - avg) / std + + - robust_zscore: + Scaling using the Robust Z-Score. + + .. math:: + + (x - median) / (1.4826 * mad) + + - minmax: + Normalization using the Min & Max. + + .. math:: + + (x - min) / (max - min) + + Examples + -------- + + The following examples provide a basic understanding of usage. + For more detailed examples, please refer to the + :ref:`user_guide.machine_learning` or the + `Examples `_ + section on the website. + + Load data for machine learning + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + We import ``verticapy``: + + .. ipython:: python + + import verticapy as vp + + .. hint:: + + By assigning an alias to ``verticapy``, we mitigate the risk of code + collisions with other libraries. This precaution is necessary + because verticapy uses commonly known function names like "average" + and "median", which can potentially lead to naming conflicts. + The use of an alias ensures that the functions from verticapy are + used as intended without interfering with functions from other + libraries. + + For this example, we will use a dummy dataset. + + .. ipython:: python + + data = vp.vDataFrame({"values": [1, 1.01, 1.02, 1.05, 1.024]}) + + .. raw:: html + :file: SPHINX_DIRECTORY/figures/datasets_loaders_load_titanic.html + + .. note:: + + VerticaPy offers a wide range of sample datasets that are + ideal for training and testing purposes. You can explore + the full list of available datasets in the :ref:`api.datasets`, + which provides detailed information on each dataset + and how to use them effectively. These datasets are invaluable + resources for honing your data analysis and machine learning + skills within the VerticaPy environment. + + + Model Initialization + ^^^^^^^^^^^^^^^^^^^^^ + + First we import the ``Scaler`` model: + + .. ipython:: python + + from verticapy.machine_learning.vertica import Scaler + + Then we can create the model: + + .. ipython:: python + :okwarning: + + model = Scaler(method = "zscore") + + .. hint:: + + In ``verticapy`` 1.0.x and higher, you do not need to specify the + model name, as the name is automatically assigned. If you need to + re-use the model, you can fetch the model name from the model's + attributes. + + .. important:: + + The model name is crucial for the model management system and + versioning. It's highly recommended to provide a name if you + plan to reuse the model later. + + Model Fitting + ^^^^^^^^^^^^^^^ + + We can now fit the model: + + .. ipython:: python + :okwarning: + + model.fit(data) + + .. important:: + + To fit a model, you can directly use the ``vDataFrame`` + or the name of the relation stored in the database. + + Model Parameters + ^^^^^^^^^^^^^^^^^ + + To fetch the model parameter (mean) you can use: + + .. ipython:: python + + model.mean_ + + Similarly for standard deviation: + + .. ipython:: python + + model.std_ + + Conversion/Transformation + ^^^^^^^^^^^^^^^^^^^^^^^^^^ + + To get the scaled dataset, we can use the ``transform`` function. + Let us transform the data: + + + .. ipython:: python + :okwarning: + + model.transform(data) + + Please refer to + :py:mod:`verticapy.machine_learning.Scaler.transform` + for more details on transforming a ``vDataFrame``. + + Similarly, you can perform the inverse tranform to get + the original features using: + + .. code-block:: python + + model.inverse_transform(data_transformed) + + The variable ``data_transformed`` is the scaled dataset. + + Model Register + ^^^^^^^^^^^^^^ + + In order to register the model for tracking and versioning: + + .. code-block:: python + + model.register("model_v1") + + Please refer to + :ref:`notebooks/ml/model_tracking_versioning/index.html` + for more details on model tracking and versioning. + + Model Exporting + ^^^^^^^^^^^^^^^^ + + **To Memmodel** + + .. code-block:: python + + model.to_memmodel() + + .. note:: + + ``MemModel`` objects serve as in-memory representations of + machine learning models. They can be used for both in-database + and in-memory prediction tasks. These objects can be pickled + in the same way that you would pickle a ``scikit-learn`` model. + + The preceding methods for exporting the model use ``MemModel``, + and it is recommended to use ``MemModel`` directly. + + **SQL** + + To get the SQL query use below: + + .. ipython:: python + + model.to_sql() + + **To Python** + + To obtain the prediction function in Python syntax, use the + following code: + + .. ipython:: python + + X = [[1]] + model.to_python()(X) + + .. hint:: + + The + :py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler.to_python` + method is used to scale the data. For specific details on how to use this method for + different model types, refer to the relevant documentation for + each model. + + .. seealso:: + | :py:mod:`verticapy.machine_learning.vertica.preprocessing.StandardScaler` : + Scalar with method set as ``zscore``. + | :py:mod:`verticapy.machine_learning.vertica.preprocessing.RobustScaler` : + Scalar with method set as ``robust_zscore``. + | :py:mod:`verticapy.machine_learning.vertica.preprocessing.MinMaxScaler` : + Scalar with method set as ``minmax``. + """ # Properties. @@ -742,7 +956,16 @@ def to_memmodel(self) -> mm.Scaler: class StandardScaler(Scaler): - """i.e. Scaler with param method = 'zscore'""" + """ + i.e. Scaler with param method = 'zscore' + + .. note:: + + This is a child class. See + :py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler` + for more details and examples. + + """ @property def _attributes(self) -> list[str]: @@ -753,7 +976,16 @@ def __init__(self, name: str = None, overwrite_model: bool = False) -> None: class RobustScaler(Scaler): - """i.e. Scaler with param method = 'robust_zscore'""" + """ + i.e. Scaler with param method = 'robust_zscore' + + .. note:: + + This is a child class. See + :py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler` + for more details and examples. + + """ @property def _attributes(self) -> list[str]: @@ -764,7 +996,16 @@ def __init__(self, name: str = None, overwrite_model: bool = False) -> None: class MinMaxScaler(Scaler): - """i.e. Scaler with param method = 'minmax'""" + """ + i.e. Scaler with param method = 'minmax' + + .. note:: + + This is a child class. See + :py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler` + for more details and examples. + + """ @property def _attributes(self) -> list[str]: From a804b53e69582b18944444fdcbd0ac194732e45b Mon Sep 17 00:00:00 2001 From: umar <46414488+mail4umar@users.noreply.github.com> Date: Mon, 30 Oct 2023 23:58:49 -0500 Subject: [PATCH 2/7] Added Balance --- .../machine_learning/vertica/preprocessing.py | 128 ++++++++++++++++-- 1 file changed, 113 insertions(+), 15 deletions(-) diff --git a/verticapy/machine_learning/vertica/preprocessing.py b/verticapy/machine_learning/vertica/preprocessing.py index 07b378b30..1db7231da 100755 --- a/verticapy/machine_learning/vertica/preprocessing.py +++ b/verticapy/machine_learning/vertica/preprocessing.py @@ -70,18 +70,25 @@ def Balance( Response column. method: str, optional Method used to do the balancing. - hybrid : Performs over-sampling and - under-sampling on different - classes so that each class is - equally represented. - over : Over-samples on all classes, - except the most represented - class, towards the most - represented class's cardinality. - under : Under-samples on all classes, - except the least represented - class, towards the least - represented class's cardinality. + + - hybrid : + Performs over-sampling and + under-sampling on different + classes so that each class is + equally represented. + + - over : + Over-samples on all classes, + except the most represented + class, towards the most + represented class's cardinality. + + - under: + Under-samples on all classes, + except the least represented + class, towards the least + represented class's cardinality. + ratio: float, optional The desired ratio between the majority class and the minority class. This value has no @@ -92,6 +99,100 @@ def Balance( ------- vDataFrame vDataFrame of the created view. + + + Examples + -------- + + The following examples provide a basic understanding of usage. + For more detailed examples, please refer to the + :ref:`user_guide.machine_learning` or the + `Examples `_ + section on the website. + + Load data for machine learning + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + We import ``verticapy``: + + .. ipython:: python + + import verticapy as vp + + .. hint:: + + By assigning an alias to ``verticapy``, we mitigate the risk of code + collisions with other libraries. This precaution is necessary + because verticapy uses commonly known function names like "average" + and "median", which can potentially lead to naming conflicts. + The use of an alias ensures that the functions from verticapy are + used as intended without interfering with functions from other + libraries. + + For this example, we will use the Titanic dataset. + + .. code-block:: python + + import verticapy.datasets as vpd + + data = vpd.load_titanic() + + .. raw:: html + :file: SPHINX_DIRECTORY/figures/datasets_loaders_load_titanic.html + + .. ipython:: python + :suppress: + + import verticapy.datasets as vpd + data = vpd.load_titanic() + + .. note:: + + VerticaPy offers a wide range of sample datasets that are + ideal for training and testing purposes. You can explore + the full list of available datasets in the :ref:`api.datasets`, + which provides detailed information on each dataset + and how to use them effectively. These datasets are invaluable + resources for honing your data analysis and machine learning + skills within the VerticaPy environment. + + + Model Application + ^^^^^^^^^^^^^^^^^^^ + + First we import the ``Balance`` model: + + .. ipython:: python + + from verticapy.machine_learning.vertica import Balance + + Then we can directly apply it to the dataset: + + .. ipython:: python + :okwarning: + + @suppress + vp.drop("balance_model") + + Balance(name = "balance_model", + input_relation = data, + y = "survived", + method = "under" + ) + + .. important:: + + The model name is crucial for the model management system and + versioning. It's highly recommended to provide a name if you + plan to reuse the model later. + + The output vDataFrame can then be used or stored for later analysis. + + + + .. seealso:: + | :py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler` : + Normalizing the dataset. """ _executeSQL( query=f""" @@ -713,9 +814,6 @@ class Scaler(Preprocessing): data = vp.vDataFrame({"values": [1, 1.01, 1.02, 1.05, 1.024]}) - .. raw:: html - :file: SPHINX_DIRECTORY/figures/datasets_loaders_load_titanic.html - .. note:: VerticaPy offers a wide range of sample datasets that are From fa0449a41a8965b643fe38973859b194ab7c17e8 Mon Sep 17 00:00:00 2001 From: Badr Date: Tue, 31 Oct 2023 09:09:49 -0400 Subject: [PATCH 3/7] Update preprocessing.py --- .../machine_learning/vertica/preprocessing.py | 42 ++++++++----------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/verticapy/machine_learning/vertica/preprocessing.py b/verticapy/machine_learning/vertica/preprocessing.py index 1db7231da..6c5aea26a 100755 --- a/verticapy/machine_learning/vertica/preprocessing.py +++ b/verticapy/machine_learning/vertica/preprocessing.py @@ -100,7 +100,6 @@ def Balance( vDataFrame vDataFrame of the created view. - Examples -------- @@ -156,11 +155,10 @@ def Balance( resources for honing your data analysis and machine learning skills within the VerticaPy environment. + Function Application + ^^^^^^^^^^^^^^^^^^^^^ - Model Application - ^^^^^^^^^^^^^^^^^^^ - - First we import the ``Balance`` model: + First we import the ``Balance`` function: .. ipython:: python @@ -174,10 +172,11 @@ def Balance( @suppress vp.drop("balance_model") - Balance(name = "balance_model", - input_relation = data, - y = "survived", - method = "under" + Balance( + name = "balance_model", + input_relation = data, + y = "survived", + method = "under" ) .. important:: @@ -188,8 +187,6 @@ def Balance( The output vDataFrame can then be used or stored for later analysis. - - .. seealso:: | :py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler` : Normalizing the dataset. @@ -824,7 +821,6 @@ class Scaler(Preprocessing): resources for honing your data analysis and machine learning skills within the VerticaPy environment. - Model Initialization ^^^^^^^^^^^^^^^^^^^^^ @@ -887,9 +883,8 @@ class Scaler(Preprocessing): Conversion/Transformation ^^^^^^^^^^^^^^^^^^^^^^^^^^ - To get the scaled dataset, we can use the ``transform`` function. - Let us transform the data: - + To get the scaled dataset, we can use the ``transform`` + function. Let us transform the data: .. ipython:: python :okwarning: @@ -910,7 +905,7 @@ class Scaler(Preprocessing): The variable ``data_transformed`` is the scaled dataset. Model Register - ^^^^^^^^^^^^^^ + ^^^^^^^^^^^^^^^ In order to register the model for tracking and versioning: @@ -963,9 +958,9 @@ class Scaler(Preprocessing): The :py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler.to_python` - method is used to scale the data. For specific details on how to use this method for - different model types, refer to the relevant documentation for - each model. + method is used to scale the data. For specific details on how + to use this method for different model types, refer to the + relevant documentation for each model. .. seealso:: | :py:mod:`verticapy.machine_learning.vertica.preprocessing.StandardScaler` : @@ -1062,7 +1057,6 @@ class StandardScaler(Scaler): This is a child class. See :py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler` for more details and examples. - """ @property @@ -1082,7 +1076,6 @@ class RobustScaler(Scaler): This is a child class. See :py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler` for more details and examples. - """ @property @@ -1102,7 +1095,6 @@ class MinMaxScaler(Scaler): This is a child class. See :py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler` for more details and examples. - """ @property @@ -1286,8 +1278,8 @@ class OneHotEncoder(Preprocessing): ^^^^^^^^^^^^^^^^^^^^^^^^^^ To get the transformed dataset in the form that is encoded, - we can use the ``transform`` function. Let us transform the data - and display the first 20 datapoints. + we can use the ``transform`` function. Let us transform the + data and display the first 20 datapoints. .. ipython:: python :okwarning: @@ -1309,7 +1301,7 @@ class OneHotEncoder(Preprocessing): components. Model Register - ^^^^^^^^^^^^^^ + ^^^^^^^^^^^^^^^ In order to register the model for tracking and versioning: From feceec48d857f3f4a97929410cff4456182b5fb8 Mon Sep 17 00:00:00 2001 From: Badr Date: Tue, 31 Oct 2023 09:15:01 -0400 Subject: [PATCH 4/7] Update preprocessing.py --- .../machine_learning/vertica/preprocessing.py | 21 +++++++------------ 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/verticapy/machine_learning/vertica/preprocessing.py b/verticapy/machine_learning/vertica/preprocessing.py index 6c5aea26a..167cc6053 100755 --- a/verticapy/machine_learning/vertica/preprocessing.py +++ b/verticapy/machine_learning/vertica/preprocessing.py @@ -57,8 +57,8 @@ def Balance( ratio: float = 0.5, ) -> vDataFrame: """ - Creates a view with an equal distribution of the - input data based on the response_column. + Creates a view with an equal distribution of + the input data based on the response_column. Parameters ---------- @@ -179,13 +179,8 @@ def Balance( method = "under" ) - .. important:: - - The model name is crucial for the model management system and - versioning. It's highly recommended to provide a name if you - plan to reuse the model later. - - The output vDataFrame can then be used or stored for later analysis. + The output vDataFrame can then be used or stored for later + analysis. .. seealso:: | :py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler` : @@ -851,7 +846,7 @@ class Scaler(Preprocessing): plan to reuse the model later. Model Fitting - ^^^^^^^^^^^^^^^ + ^^^^^^^^^^^^^^ We can now fit the model: @@ -883,7 +878,7 @@ class Scaler(Preprocessing): Conversion/Transformation ^^^^^^^^^^^^^^^^^^^^^^^^^^ - To get the scaled dataset, we can use the ``transform`` + To get the scaled dataset, we can use the ``transform`` function. Let us transform the data: .. ipython:: python @@ -958,8 +953,8 @@ class Scaler(Preprocessing): The :py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler.to_python` - method is used to scale the data. For specific details on how - to use this method for different model types, refer to the + method is used to scale the data. For specific details on how + to use this method for different model types, refer to the relevant documentation for each model. .. seealso:: From ec6af1d7aee0207730c20778b978b757e7f40733 Mon Sep 17 00:00:00 2001 From: umar <46414488+mail4umar@users.noreply.github.com> Date: Tue, 31 Oct 2023 09:48:32 -0500 Subject: [PATCH 5/7] resolving comments --- .../machine_learning/vertica/preprocessing.py | 84 ++++++++++++------- 1 file changed, 56 insertions(+), 28 deletions(-) diff --git a/verticapy/machine_learning/vertica/preprocessing.py b/verticapy/machine_learning/vertica/preprocessing.py index 167cc6053..62870a516 100755 --- a/verticapy/machine_learning/vertica/preprocessing.py +++ b/verticapy/machine_learning/vertica/preprocessing.py @@ -57,8 +57,8 @@ def Balance( ratio: float = 0.5, ) -> vDataFrame: """ - Creates a view with an equal distribution of - the input data based on the response_column. + Creates a view with an equal distribution of the + input data based on the response_column. Parameters ---------- @@ -100,6 +100,7 @@ def Balance( vDataFrame vDataFrame of the created view. + Examples -------- @@ -155,10 +156,11 @@ def Balance( resources for honing your data analysis and machine learning skills within the VerticaPy environment. - Function Application - ^^^^^^^^^^^^^^^^^^^^^ - First we import the ``Balance`` function: + Model Application + ^^^^^^^^^^^^^^^^^^^ + + First we import the ``Balance`` model: .. ipython:: python @@ -168,23 +170,44 @@ def Balance( .. ipython:: python :okwarning: + :suppress: + - @suppress vp.drop("balance_model") + result = Balance(name = "balance_model", + input_relation = data, + y = "survived", + method = "under" + ) + html_file = open("SPHINX_DIRECTORY/figures/machine_learning_vertica_preprocessing_balance.html", "w") + html_file.write(result._repr_html_()) + html_file.close() + + .. code-block:: python - Balance( - name = "balance_model", - input_relation = data, - y = "survived", - method = "under" + Balance(name = "balance_model", + input_relation = data, + y = "survived", + method = "under" ) - The output vDataFrame can then be used or stored for later - analysis. + .. raw:: html + :file: SPHINX_DIRECTORY/figures/machine_learning_vertica_preprocessing_balance.html + + + .. important:: + + The model name is crucial for the model management system and + versioning. It's highly recommended to provide a name if you + plan to reuse the model later. + + The output vDataFrame can then be used or stored for later analysis. + + .. seealso:: - | :py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler` : - Normalizing the dataset. + | :py:mod:`verticapy.vDataFrame.sample` : + Sampling the dataset. """ _executeSQL( query=f""" @@ -756,21 +779,21 @@ class Scaler(Preprocessing): .. math:: - (x - avg) / std + Z_score = (x - avg) / std - robust_zscore: Scaling using the Robust Z-Score. .. math:: - (x - median) / (1.4826 * mad) + Z_rscore = (x - median) / (1.4826 * mad) - minmax: Normalization using the Min & Max. .. math:: - (x - min) / (max - min) + Z_minmax = (x - min) / (max - min) Examples -------- @@ -816,6 +839,7 @@ class Scaler(Preprocessing): resources for honing your data analysis and machine learning skills within the VerticaPy environment. + Model Initialization ^^^^^^^^^^^^^^^^^^^^^ @@ -846,7 +870,7 @@ class Scaler(Preprocessing): plan to reuse the model later. Model Fitting - ^^^^^^^^^^^^^^ + ^^^^^^^^^^^^^^^ We can now fit the model: @@ -878,8 +902,9 @@ class Scaler(Preprocessing): Conversion/Transformation ^^^^^^^^^^^^^^^^^^^^^^^^^^ - To get the scaled dataset, we can use the ``transform`` - function. Let us transform the data: + To get the scaled dataset, we can use the ``transform`` function. + Let us transform the data: + .. ipython:: python :okwarning: @@ -900,7 +925,7 @@ class Scaler(Preprocessing): The variable ``data_transformed`` is the scaled dataset. Model Register - ^^^^^^^^^^^^^^^ + ^^^^^^^^^^^^^^ In order to register the model for tracking and versioning: @@ -953,9 +978,9 @@ class Scaler(Preprocessing): The :py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler.to_python` - method is used to scale the data. For specific details on how - to use this method for different model types, refer to the - relevant documentation for each model. + method is used to scale the data. For specific details on how to use this method for + different model types, refer to the relevant documentation for + each model. .. seealso:: | :py:mod:`verticapy.machine_learning.vertica.preprocessing.StandardScaler` : @@ -1052,6 +1077,7 @@ class StandardScaler(Scaler): This is a child class. See :py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler` for more details and examples. + """ @property @@ -1071,6 +1097,7 @@ class RobustScaler(Scaler): This is a child class. See :py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler` for more details and examples. + """ @property @@ -1090,6 +1117,7 @@ class MinMaxScaler(Scaler): This is a child class. See :py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler` for more details and examples. + """ @property @@ -1273,8 +1301,8 @@ class OneHotEncoder(Preprocessing): ^^^^^^^^^^^^^^^^^^^^^^^^^^ To get the transformed dataset in the form that is encoded, - we can use the ``transform`` function. Let us transform the - data and display the first 20 datapoints. + we can use the ``transform`` function. Let us transform the data + and display the first 20 datapoints. .. ipython:: python :okwarning: @@ -1296,7 +1324,7 @@ class OneHotEncoder(Preprocessing): components. Model Register - ^^^^^^^^^^^^^^^ + ^^^^^^^^^^^^^^ In order to register the model for tracking and versioning: From 66d1002a04c5c65197e452caa4af69085f410acc Mon Sep 17 00:00:00 2001 From: umar <46414488+mail4umar@users.noreply.github.com> Date: Tue, 31 Oct 2023 09:56:13 -0500 Subject: [PATCH 6/7] Revert "resolving comments" This reverts commit ec6af1d7aee0207730c20778b978b757e7f40733. --- .../machine_learning/vertica/preprocessing.py | 84 +++++++------------ 1 file changed, 28 insertions(+), 56 deletions(-) diff --git a/verticapy/machine_learning/vertica/preprocessing.py b/verticapy/machine_learning/vertica/preprocessing.py index 62870a516..167cc6053 100755 --- a/verticapy/machine_learning/vertica/preprocessing.py +++ b/verticapy/machine_learning/vertica/preprocessing.py @@ -57,8 +57,8 @@ def Balance( ratio: float = 0.5, ) -> vDataFrame: """ - Creates a view with an equal distribution of the - input data based on the response_column. + Creates a view with an equal distribution of + the input data based on the response_column. Parameters ---------- @@ -100,7 +100,6 @@ def Balance( vDataFrame vDataFrame of the created view. - Examples -------- @@ -156,11 +155,10 @@ def Balance( resources for honing your data analysis and machine learning skills within the VerticaPy environment. + Function Application + ^^^^^^^^^^^^^^^^^^^^^ - Model Application - ^^^^^^^^^^^^^^^^^^^ - - First we import the ``Balance`` model: + First we import the ``Balance`` function: .. ipython:: python @@ -170,44 +168,23 @@ def Balance( .. ipython:: python :okwarning: - :suppress: - + @suppress vp.drop("balance_model") - result = Balance(name = "balance_model", - input_relation = data, - y = "survived", - method = "under" - ) - html_file = open("SPHINX_DIRECTORY/figures/machine_learning_vertica_preprocessing_balance.html", "w") - html_file.write(result._repr_html_()) - html_file.close() - - .. code-block:: python - Balance(name = "balance_model", - input_relation = data, - y = "survived", - method = "under" + Balance( + name = "balance_model", + input_relation = data, + y = "survived", + method = "under" ) - .. raw:: html - :file: SPHINX_DIRECTORY/figures/machine_learning_vertica_preprocessing_balance.html - - - .. important:: - - The model name is crucial for the model management system and - versioning. It's highly recommended to provide a name if you - plan to reuse the model later. - - The output vDataFrame can then be used or stored for later analysis. - - + The output vDataFrame can then be used or stored for later + analysis. .. seealso:: - | :py:mod:`verticapy.vDataFrame.sample` : - Sampling the dataset. + | :py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler` : + Normalizing the dataset. """ _executeSQL( query=f""" @@ -779,21 +756,21 @@ class Scaler(Preprocessing): .. math:: - Z_score = (x - avg) / std + (x - avg) / std - robust_zscore: Scaling using the Robust Z-Score. .. math:: - Z_rscore = (x - median) / (1.4826 * mad) + (x - median) / (1.4826 * mad) - minmax: Normalization using the Min & Max. .. math:: - Z_minmax = (x - min) / (max - min) + (x - min) / (max - min) Examples -------- @@ -839,7 +816,6 @@ class Scaler(Preprocessing): resources for honing your data analysis and machine learning skills within the VerticaPy environment. - Model Initialization ^^^^^^^^^^^^^^^^^^^^^ @@ -870,7 +846,7 @@ class Scaler(Preprocessing): plan to reuse the model later. Model Fitting - ^^^^^^^^^^^^^^^ + ^^^^^^^^^^^^^^ We can now fit the model: @@ -902,9 +878,8 @@ class Scaler(Preprocessing): Conversion/Transformation ^^^^^^^^^^^^^^^^^^^^^^^^^^ - To get the scaled dataset, we can use the ``transform`` function. - Let us transform the data: - + To get the scaled dataset, we can use the ``transform`` + function. Let us transform the data: .. ipython:: python :okwarning: @@ -925,7 +900,7 @@ class Scaler(Preprocessing): The variable ``data_transformed`` is the scaled dataset. Model Register - ^^^^^^^^^^^^^^ + ^^^^^^^^^^^^^^^ In order to register the model for tracking and versioning: @@ -978,9 +953,9 @@ class Scaler(Preprocessing): The :py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler.to_python` - method is used to scale the data. For specific details on how to use this method for - different model types, refer to the relevant documentation for - each model. + method is used to scale the data. For specific details on how + to use this method for different model types, refer to the + relevant documentation for each model. .. seealso:: | :py:mod:`verticapy.machine_learning.vertica.preprocessing.StandardScaler` : @@ -1077,7 +1052,6 @@ class StandardScaler(Scaler): This is a child class. See :py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler` for more details and examples. - """ @property @@ -1097,7 +1071,6 @@ class RobustScaler(Scaler): This is a child class. See :py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler` for more details and examples. - """ @property @@ -1117,7 +1090,6 @@ class MinMaxScaler(Scaler): This is a child class. See :py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler` for more details and examples. - """ @property @@ -1301,8 +1273,8 @@ class OneHotEncoder(Preprocessing): ^^^^^^^^^^^^^^^^^^^^^^^^^^ To get the transformed dataset in the form that is encoded, - we can use the ``transform`` function. Let us transform the data - and display the first 20 datapoints. + we can use the ``transform`` function. Let us transform the + data and display the first 20 datapoints. .. ipython:: python :okwarning: @@ -1324,7 +1296,7 @@ class OneHotEncoder(Preprocessing): components. Model Register - ^^^^^^^^^^^^^^ + ^^^^^^^^^^^^^^^ In order to register the model for tracking and versioning: From 78d6117e1c3dad85be53ed4da266d6e626aefb8a Mon Sep 17 00:00:00 2001 From: umar <46414488+mail4umar@users.noreply.github.com> Date: Tue, 31 Oct 2023 10:00:59 -0500 Subject: [PATCH 7/7] resolving comments again --- .../machine_learning/vertica/preprocessing.py | 36 ++++++++++++------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/verticapy/machine_learning/vertica/preprocessing.py b/verticapy/machine_learning/vertica/preprocessing.py index 167cc6053..535c9d746 100755 --- a/verticapy/machine_learning/vertica/preprocessing.py +++ b/verticapy/machine_learning/vertica/preprocessing.py @@ -168,23 +168,33 @@ def Balance( .. ipython:: python :okwarning: + :suppress: + - @suppress vp.drop("balance_model") + result = Balance(name = "balance_model", + input_relation = data, + y = "survived", + method = "under" + ) + html_file = open("SPHINX_DIRECTORY/figures/machine_learning_vertica_preprocessing_balance.html", "w") + html_file.write(result._repr_html_()) + html_file.close() - Balance( - name = "balance_model", - input_relation = data, - y = "survived", - method = "under" + .. code-block:: python + + Balance(name = "balance_model", + input_relation = data, + y = "survived", + method = "under" ) - The output vDataFrame can then be used or stored for later - analysis. + .. raw:: html + :file: SPHINX_DIRECTORY/figures/machine_learning_vertica_preprocessing_balance.html .. seealso:: - | :py:mod:`verticapy.machine_learning.vertica.preprocessing.Scaler` : - Normalizing the dataset. + | :py:mod:`verticapy.vDataFrame.sample` : + Sampling the dataset. """ _executeSQL( query=f""" @@ -756,21 +766,21 @@ class Scaler(Preprocessing): .. math:: - (x - avg) / std + Z_score = (x - avg) / std - robust_zscore: Scaling using the Robust Z-Score. .. math:: - (x - median) / (1.4826 * mad) + Z_rscore = (x - median) / (1.4826 * mad) - minmax: Normalization using the Min & Max. .. math:: - (x - min) / (max - min) + Z_minmax = (x - min) / (max - min) Examples --------