From d2a7faf7d06de56cc9648896c9d06bffe30721e3 Mon Sep 17 00:00:00 2001 From: abhsharma2 Date: Mon, 30 Oct 2023 13:20:05 -0400 Subject: [PATCH 1/3] docstring changes for memmodel/preprocessing --- .../memmodel/preprocessing.py | 208 ++++++++++++++++-- 1 file changed, 194 insertions(+), 14 deletions(-) diff --git a/verticapy/machine_learning/memmodel/preprocessing.py b/verticapy/machine_learning/memmodel/preprocessing.py index 381a012a2..220056f02 100755 --- a/verticapy/machine_learning/memmodel/preprocessing.py +++ b/verticapy/machine_learning/memmodel/preprocessing.py @@ -28,7 +28,7 @@ class Scaler(InMemoryModel): """ - InMemoryModel implementation of scalers. + :py:mod:`verticapy.machine_learning.memmodel.base.InMemoryModel` implementation of scalers. Parameters ---------- @@ -100,7 +100,7 @@ def transform_sql(self, X: ArrayLike) -> list[str]: class StandardScaler(Scaler): """ - InMemoryModel implementation of standard scaler. + :py:mod:`verticapy.machine_learning.memmodel.base.InMemoryModel` implementation of standard scaler. Parameters ---------- @@ -108,6 +108,65 @@ class StandardScaler(Scaler): Model's features averages. std: ArrayLike Model's features standard deviations. + + .. note:: :py:mod:`verticapy.machine_learning.memmodel` are defined + entirely by their attributes. For example, 'mean', + and 'standard deviation' of feature(S) define a StandardScaler model. + + Examples + -------- + + **Initalization** + + Import the required module. + + .. ipython:: python + + from verticapy.machine_learning.memmodel.preprocessing import StandardScaler + + A StandardScaler model is defined by mean and standard deviation values. In this example, we will use the following: + + .. ipython:: python + + mean = [0.4, 0.1] + std = [0.5, 0.2] + + Let's create a :py:mod:`verticapy.machine_learning.memmodel.preprocessing.StandardScaler` model. + + .. ipython:: python + + model_sts = StandardScaler(mean, std) + + Create a dataset. + + .. ipython:: python + + data = [[0.45, 0.17]] + + **Making In-Memory Transformation** + + Use :py:meth:`verticapy.machine_learning.memmodel.preprocessing.StandardScaler.transform` method to do transformation + + .. ipython:: python + + model_sts.transform(data) + + **Deploy SQL Code** + + Let's use the following column names: + + .. ipython:: python + + cnames = ['col1', 'col2'] + + Use :py:meth:`verticapy.machine_learning.memmodel.preprocessing.StandardScaler.transform_sql` + method to get the SQL code needed to deploy the model using its attributes + + .. ipython:: python + + model_mms.transform_sql(cnames) + + .. hint:: This object can be pickled and used in any in-memory environment, just like `SKLEARN `_ models. """ # Properties. @@ -125,14 +184,74 @@ def __init__(self, mean: ArrayLike, std: ArrayLike) -> None: class MinMaxScaler(Scaler): """ - InMemoryModel implementation of MinMax scaler. + :py:mod:`verticapy.machine_learning.memmodel.base.InMemoryModel` implementation of MinMax scaler. Parameters ---------- - min_: ArrayLike + + min\_: ArrayLike Model's features minimums. - max_: ArrayLike + max\_: ArrayLike Model's features maximums. + + .. note:: :py:mod:`verticapy.machine_learning.memmodel` are defined + entirely by their attributes. For example, 'minimum', + and 'maximum' values of feature(S) defines a MinMaxScaler model. + + Examples + -------- + + **Initalization** + + Import the required module. + + .. ipython:: python + + from verticapy.machine_learning.memmodel.preprocessing import MinMaxScaler + + A MinMaxScaler model is defined by minimum and maximum values. In this example, we will use the following: + + .. ipython:: python + + min = [0.4, 0.1] + max = [0.5, 0.2] + + Let's create a :py:mod:`verticapy.machine_learning.memmodel.preprocessing.MinMaxScaler` model. + + .. ipython:: python + + model_mms = MinMaxScaler(min, max) + + Create a dataset. + + .. ipython:: python + + data = [[0.45, 0.17]] + + **Making In-Memory Transformation** + + Use :py:meth:`verticapy.machine_learning.memmodel.preprocessing.MinMaxScaler.transform` method to do transformation + + .. ipython:: python + + model_mms.transform(data) + + **Deploy SQL Code** + + Let's use the following column names: + + .. ipython:: python + + cnames = ['col1', 'col2'] + + Use :py:meth:`verticapy.machine_learning.memmodel.preprocessing.MinMaxScaler.transform_sql` + method to get the SQL code needed to deploy the model using its attributes + + .. ipython:: python + + model_mms.transform_sql(cnames) + + .. hint:: This object can be pickled and used in any in-memory environment, just like `SKLEARN `_ models. """ # Properties. @@ -150,26 +269,87 @@ def __init__(self, min_: ArrayLike, max_: ArrayLike) -> None: class OneHotEncoder(InMemoryModel): """ - InMemoryModel implementation of one-hot encoder. + :py:mod:`verticapy.machine_learning.memmodel.base.InMemoryModel` implementation of one-hot encoder. Parameters ---------- + categories: ArrayLike ArrayLike of the categories of the different features. column_naming: str, optional Appends categorical levels to column names according to the specified method: - indices : Uses integer indices to represent - categorical levels. - values/values_relaxed: Both methods use categorical level - names. If duplicate column names - occur, the function attempts to - disambiguate them by appending _n, - where n is a zero-based integer - index (_0, _1,…). + + - indices : Uses integer indices to represent categorical levels. + + - values/values_relaxed: Both methods use categorical level names. + If duplicate column names occur, the function attempts to + disambiguate them by appending _n, where n is a zero-based integer index (_0, _1,…). + drop_first: bool, optional If set to False, the first dummy of each category is dropped. + + .. note:: :py:mod:`verticapy.machine_learning.memmodel` are defined + entirely by their attributes. For example, 'categories' to encode + defines a OneHotEncoder model. You can optionally provide 'column naming' + criteria and a 'drop_first' flag to denote whether to drop first dummy of each category. + + Examples + -------- + + **Initalization** + + Import the required module. + + .. ipython:: python + + from verticapy.machine_learning.memmodel.preprocessing import OneHotEncoder + + A OneHotEncoder model is defined by categories, column naming criteria and drop_first flag. In this example, we will use the following: + + .. ipython:: python + + categories = [["male", "female"], [1, 2, 3]] + drop_first = False + column_naming = None + + Let's create a :py:mod:`verticapy.machine_learning.memmodel.preprocessing.OneHotEncoder` model. + + .. ipython:: python + + model_ohe = OneHotEncoder(categories, drop_first, column_naming) + + Create a dataset. + + .. ipython:: python + + data = [["male", 1], ["female", 3]] + + **Making In-Memory Transformation** + + Use :py:meth:`verticapy.machine_learning.memmodel.preprocessing.OneHotEncoder.transform` method to do transformation + + .. ipython:: python + + model_ohe.transform(data) + + **Deploy SQL Code** + + Let's use the following column names: + + .. ipython:: python + + cnames = ['sex', 'pclass'] + + Use :py:meth:`verticapy.machine_learning.memmodel.preprocessing.OneHotEncoder.transform_sql` + method to get the SQL code needed to deploy the model using its attributes + + .. ipython:: python + + model_ohe.transform_sql(cnames) + + .. hint:: This object can be pickled and used in any in-memory environment, just like `SKLEARN `_ models. """ # Properties. From 672f6d8f87c837db5ed338e63998a16eb70efdea Mon Sep 17 00:00:00 2001 From: Badr Date: Mon, 30 Oct 2023 14:48:30 -0400 Subject: [PATCH 2/3] Updating the docstring --- .../memmodel/preprocessing.py | 109 ++++++++++++------ 1 file changed, 76 insertions(+), 33 deletions(-) diff --git a/verticapy/machine_learning/memmodel/preprocessing.py b/verticapy/machine_learning/memmodel/preprocessing.py index 220056f02..adac0eaa1 100755 --- a/verticapy/machine_learning/memmodel/preprocessing.py +++ b/verticapy/machine_learning/memmodel/preprocessing.py @@ -28,7 +28,8 @@ class Scaler(InMemoryModel): """ - :py:mod:`verticapy.machine_learning.memmodel.base.InMemoryModel` implementation of scalers. + :py:mod:`verticapy.machine_learning.memmodel.base.InMemoryModel` + implementation of scalers. Parameters ---------- @@ -100,7 +101,8 @@ def transform_sql(self, X: ArrayLike) -> list[str]: class StandardScaler(Scaler): """ - :py:mod:`verticapy.machine_learning.memmodel.base.InMemoryModel` implementation of standard scaler. + :py:mod:`verticapy.machine_learning.memmodel.base.InMemoryModel` + implementation of standard scaler. Parameters ---------- @@ -109,9 +111,12 @@ class StandardScaler(Scaler): std: ArrayLike Model's features standard deviations. - .. note:: :py:mod:`verticapy.machine_learning.memmodel` are defined - entirely by their attributes. For example, 'mean', - and 'standard deviation' of feature(S) define a StandardScaler model. + .. note:: + + :py:mod:`verticapy.machine_learning.memmodel` are defined + entirely by their attributes. For example, 'mean', and + 'standard deviation' of feature(S) define a StandardScaler + model. Examples -------- @@ -124,14 +129,17 @@ class StandardScaler(Scaler): from verticapy.machine_learning.memmodel.preprocessing import StandardScaler - A StandardScaler model is defined by mean and standard deviation values. In this example, we will use the following: + A StandardScaler model is defined by mean and standard deviation + values. In this example, we will use the following: .. ipython:: python mean = [0.4, 0.1] std = [0.5, 0.2] - Let's create a :py:mod:`verticapy.machine_learning.memmodel.preprocessing.StandardScaler` model. + Let's create a + :py:mod:`verticapy.machine_learning.memmodel.preprocessing.StandardScaler` + model. .. ipython:: python @@ -145,7 +153,9 @@ class StandardScaler(Scaler): **Making In-Memory Transformation** - Use :py:meth:`verticapy.machine_learning.memmodel.preprocessing.StandardScaler.transform` method to do transformation + Use + :py:meth:`verticapy.machine_learning.memmodel.preprocessing.StandardScaler.transform` + method to do transformation. .. ipython:: python @@ -159,14 +169,18 @@ class StandardScaler(Scaler): cnames = ['col1', 'col2'] - Use :py:meth:`verticapy.machine_learning.memmodel.preprocessing.StandardScaler.transform_sql` - method to get the SQL code needed to deploy the model using its attributes + Use + :py:meth:`verticapy.machine_learning.memmodel.preprocessing.StandardScaler.transform_sql` + method to get the SQL code needed to deploy the model using its attributes. .. ipython:: python model_mms.transform_sql(cnames) - .. hint:: This object can be pickled and used in any in-memory environment, just like `SKLEARN `_ models. + .. hint:: + + This object can be pickled and used in any in-memory environment, just + like `SKLEARN `_ models. """ # Properties. @@ -184,7 +198,8 @@ def __init__(self, mean: ArrayLike, std: ArrayLike) -> None: class MinMaxScaler(Scaler): """ - :py:mod:`verticapy.machine_learning.memmodel.base.InMemoryModel` implementation of MinMax scaler. + :py:mod:`verticapy.machine_learning.memmodel.base.InMemoryModel` + implementation of MinMax scaler. Parameters ---------- @@ -194,9 +209,12 @@ class MinMaxScaler(Scaler): max\_: ArrayLike Model's features maximums. - .. note:: :py:mod:`verticapy.machine_learning.memmodel` are defined + .. note:: + + :py:mod:`verticapy.machine_learning.memmodel` are defined entirely by their attributes. For example, 'minimum', - and 'maximum' values of feature(S) defines a MinMaxScaler model. + and 'maximum' values of the input features define a + MinMaxScaler model. Examples -------- @@ -209,14 +227,17 @@ class MinMaxScaler(Scaler): from verticapy.machine_learning.memmodel.preprocessing import MinMaxScaler - A MinMaxScaler model is defined by minimum and maximum values. In this example, we will use the following: + A MinMaxScaler model is defined by minimum and maximum values. + In this example, we will use the following: .. ipython:: python min = [0.4, 0.1] max = [0.5, 0.2] - Let's create a :py:mod:`verticapy.machine_learning.memmodel.preprocessing.MinMaxScaler` model. + Let's create a + :py:mod:`verticapy.machine_learning.memmodel.preprocessing.MinMaxScaler` + model. .. ipython:: python @@ -230,7 +251,9 @@ class MinMaxScaler(Scaler): **Making In-Memory Transformation** - Use :py:meth:`verticapy.machine_learning.memmodel.preprocessing.MinMaxScaler.transform` method to do transformation + Use + :py:meth:`verticapy.machine_learning.memmodel.preprocessing.MinMaxScaler.transform` + method to do transformation. .. ipython:: python @@ -244,14 +267,18 @@ class MinMaxScaler(Scaler): cnames = ['col1', 'col2'] - Use :py:meth:`verticapy.machine_learning.memmodel.preprocessing.MinMaxScaler.transform_sql` - method to get the SQL code needed to deploy the model using its attributes + Use + :py:meth:`verticapy.machine_learning.memmodel.preprocessing.MinMaxScaler.transform_sql` + method to get the SQL code needed to deploy the model using its attributes. .. ipython:: python model_mms.transform_sql(cnames) - .. hint:: This object can be pickled and used in any in-memory environment, just like `SKLEARN `_ models. + .. hint:: + + This object can be pickled and used in any in-memory environment, + just like `SKLEARN `_ models. """ # Properties. @@ -269,7 +296,8 @@ def __init__(self, min_: ArrayLike, max_: ArrayLike) -> None: class OneHotEncoder(InMemoryModel): """ - :py:mod:`verticapy.machine_learning.memmodel.base.InMemoryModel` implementation of one-hot encoder. + :py:mod:`verticapy.machine_learning.memmodel.base.InMemoryModel` + implementation of one-hot encoder. Parameters ---------- @@ -280,20 +308,25 @@ class OneHotEncoder(InMemoryModel): Appends categorical levels to column names according to the specified method: - - indices : Uses integer indices to represent categorical levels. + - indices : Uses integer indices to represent + categorical levels. - values/values_relaxed: Both methods use categorical level names. If duplicate column names occur, the function attempts to - disambiguate them by appending _n, where n is a zero-based integer index (_0, _1,…). + disambiguate them by appending _n, where n is a zero-based + integer index (_0, _1,…). drop_first: bool, optional If set to False, the first dummy of each category is dropped. - .. note:: :py:mod:`verticapy.machine_learning.memmodel` are defined - entirely by their attributes. For example, 'categories' to encode - defines a OneHotEncoder model. You can optionally provide 'column naming' - criteria and a 'drop_first' flag to denote whether to drop first dummy of each category. + .. note:: + + :py:mod:`verticapy.machine_learning.memmodel` are defined + entirely by their attributes. For example, 'categories' to + encode defines a OneHotEncoder model. You can optionally + provide 'column naming' criteria and a 'drop_first' flag to + denote whether to drop first dummy of each category. Examples -------- @@ -306,7 +339,9 @@ class OneHotEncoder(InMemoryModel): from verticapy.machine_learning.memmodel.preprocessing import OneHotEncoder - A OneHotEncoder model is defined by categories, column naming criteria and drop_first flag. In this example, we will use the following: + A OneHotEncoder model is defined by categories, column naming + criteria and drop_first flag. In this example, we will use the + following: .. ipython:: python @@ -314,7 +349,9 @@ class OneHotEncoder(InMemoryModel): drop_first = False column_naming = None - Let's create a :py:mod:`verticapy.machine_learning.memmodel.preprocessing.OneHotEncoder` model. + Let's create a + :py:mod:`verticapy.machine_learning.memmodel.preprocessing.OneHotEncoder` + model. .. ipython:: python @@ -328,7 +365,9 @@ class OneHotEncoder(InMemoryModel): **Making In-Memory Transformation** - Use :py:meth:`verticapy.machine_learning.memmodel.preprocessing.OneHotEncoder.transform` method to do transformation + Use + :py:meth:`verticapy.machine_learning.memmodel.preprocessing.OneHotEncoder.transform` + method to do transformation. .. ipython:: python @@ -342,14 +381,18 @@ class OneHotEncoder(InMemoryModel): cnames = ['sex', 'pclass'] - Use :py:meth:`verticapy.machine_learning.memmodel.preprocessing.OneHotEncoder.transform_sql` - method to get the SQL code needed to deploy the model using its attributes + Use + :py:meth:`verticapy.machine_learning.memmodel.preprocessing.OneHotEncoder.transform_sql` + method to get the SQL code needed to deploy the model using its attributes. .. ipython:: python model_ohe.transform_sql(cnames) - .. hint:: This object can be pickled and used in any in-memory environment, just like `SKLEARN `_ models. + .. hint:: + + This object can be pickled and used in any in-memory environment, just + like `SKLEARN `_ models. """ # Properties. From 14fbb84fbedbe3296fd18c1d06f0a81612c26500 Mon Sep 17 00:00:00 2001 From: Badr Date: Mon, 30 Oct 2023 15:17:05 -0400 Subject: [PATCH 3/3] Update preprocessing.py --- .../machine_learning/memmodel/preprocessing.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/verticapy/machine_learning/memmodel/preprocessing.py b/verticapy/machine_learning/memmodel/preprocessing.py index adac0eaa1..3047625cc 100755 --- a/verticapy/machine_learning/memmodel/preprocessing.py +++ b/verticapy/machine_learning/memmodel/preprocessing.py @@ -340,14 +340,7 @@ class OneHotEncoder(InMemoryModel): from verticapy.machine_learning.memmodel.preprocessing import OneHotEncoder A OneHotEncoder model is defined by categories, column naming - criteria and drop_first flag. In this example, we will use the - following: - - .. ipython:: python - - categories = [["male", "female"], [1, 2, 3]] - drop_first = False - column_naming = None + criteria and drop_first flag. Let's create a :py:mod:`verticapy.machine_learning.memmodel.preprocessing.OneHotEncoder` @@ -355,7 +348,11 @@ class OneHotEncoder(InMemoryModel): .. ipython:: python - model_ohe = OneHotEncoder(categories, drop_first, column_naming) + model_ohe = OneHotEncoder( + categories = [["male", "female"], [1, 2, 3]], + drop_first = False, + column_naming = None, + ) Create a dataset.