From 7ea46289f76b8e5b71350063d42e727897b6de3d Mon Sep 17 00:00:00 2001
From: Umar Farooq Ghumman <46414488+mail4umar@users.noreply.github.com>
Date: Wed, 1 Nov 2023 03:15:02 +0500
Subject: [PATCH] Sphinx Docstring Regression Metrics (#801)

* Sphinx Docstring Regression Metrics

* added missing metrics docstring

* entire correction

* Update regression.py

---------

Co-authored-by: Badr <badr.ouali@outlook.fr>
---
 .../machine_learning/metrics/regression.py    | 752 +++++++++++++++++-
 1 file changed, 737 insertions(+), 15 deletions(-)

diff --git a/verticapy/machine_learning/metrics/regression.py b/verticapy/machine_learning/metrics/regression.py
index f1c6a0d51..775d9a044 100755
--- a/verticapy/machine_learning/metrics/regression.py
+++ b/verticapy/machine_learning/metrics/regression.py
@@ -69,6 +69,67 @@ def aic_score(
 ) -> float:
     """
     Returns the AIC score.
+
+    Examples
+    ---------
+
+    We should first import verticapy.
+
+    .. ipython:: python
+
+        import verticapy as vp
+
+    Let's create a small dataset that has:
+
+    - true value
+    - predicted value
+
+    .. ipython:: python
+
+        data = vp.vDataFrame(
+            {
+                "y_true": [1, 1.5, 3, 2, 5],
+                "y_pred": [1.1, 1.55, 2.9, 2.01, 4.5],
+            }
+        )
+
+    Next, we import the metric:
+
+    .. ipython:: python
+
+        from verticapy.machine_learning.metrics import aic_score
+
+    Now we can conveniently calculate the score:
+
+    .. ipython:: python
+
+        aic_score(
+            y_true = "y_true",
+            y_score = "y_pred",
+            input_relation = data,
+        )
+
+    It is also possible to directly compute the score
+    from the vDataFrame:
+
+    .. ipython:: python
+
+        data.score(
+            y_true  = "y_true",
+            y_score = "y_pred",
+            metric  = "aic",
+        )
+
+    .. note::
+
+        VerticaPy uses simple SQL queries to compute various metrics.
+        You can use the :py:mod:`verticapy.set_option` function with
+        the ``sql_on`` parameter to enable SQL generation and examine
+        the generated queries.
+
+    .. seealso::
+
+        :py:mod:`verticapy.vDataFrame.score`
     """
     return regression_report(y_true, y_score, input_relation, metrics="aic", k=k)
 
@@ -81,6 +142,67 @@ def bic_score(
 ) -> float:
     """
     Returns the BIC score.
+
+    Examples
+    ---------
+
+    We should first import verticapy.
+
+    .. ipython:: python
+
+        import verticapy as vp
+
+    Let's create a small dataset that has:
+
+    - true value
+    - predicted value
+
+    .. ipython:: python
+
+        data = vp.vDataFrame(
+            {
+                "y_true": [1, 1.5, 3, 2, 5],
+                "y_pred": [1.1, 1.55, 2.9, 2.01, 4.5],
+            }
+        )
+
+    Next, we import the metric:
+
+    .. ipython:: python
+
+        from verticapy.machine_learning.metrics import bic_score
+
+    Now we can conveniently calculate the score:
+
+    .. ipython:: python
+
+        bic_score(
+            y_true = "y_true",
+            y_score = "y_pred",
+            input_relation = data,
+        )
+
+    It is also possible to directly compute the score
+    from the vDataFrame:
+
+    .. ipython:: python
+
+        data.score(
+            y_true  = "y_true",
+            y_score = "y_pred",
+            metric  = "bic",
+        )
+
+    .. note::
+
+        VerticaPy uses simple SQL queries to compute various metrics.
+        You can use the :py:mod:`verticapy.set_option` function with
+        the ``sql_on`` parameter to enable SQL generation and examine
+        the generated queries.
+
+    .. seealso::
+
+        :py:mod:`verticapy.vDataFrame.score`
     """
     return regression_report(y_true, y_score, input_relation, metrics="bic", k=k)
 
@@ -106,6 +228,67 @@ def explained_variance(y_true: str, y_score: str, input_relation: SQLRelation) -
     -------
     float
         score.
+
+    Examples
+    ---------
+
+    We should first import verticapy.
+
+    .. ipython:: python
+
+        import verticapy as vp
+
+    Let's create a small dataset that has:
+
+    - true value
+    - predicted value
+
+    .. ipython:: python
+
+        data = vp.vDataFrame(
+            {
+                "y_true": [1, 1.5, 3, 2, 5],
+                "y_pred": [1.1, 1.55, 2.9, 2.01, 4.5],
+            }
+        )
+
+    Next, we import the metric:
+
+    .. ipython:: python
+
+        from verticapy.machine_learning.metrics import explained_variance
+
+    Now we can conveniently calculate the score:
+
+    .. ipython:: python
+
+        explained_variance(
+            y_true = "y_true",
+            y_score = "y_pred",
+            input_relation = data,
+        )
+
+    It is also possible to directly compute the score
+    from the vDataFrame:
+
+    .. ipython:: python
+
+        data.score(
+            y_true  = "y_true",
+            y_score = "y_pred",
+            metric  = "explained_variance",
+        )
+
+    .. note::
+
+        VerticaPy uses simple SQL queries to compute various metrics.
+        You can use the :py:mod:`verticapy.set_option` function with
+        the ``sql_on`` parameter to enable SQL generation and examine
+        the generated queries.
+
+    .. seealso::
+
+        :py:mod:`verticapy.vDataFrame.score`
     """
     return regression_report(
         y_true, y_score, input_relation, metrics="explained_variance"
@@ -133,6 +316,67 @@ def max_error(y_true: str, y_score: str, input_relation: SQLRelation) -> float:
     -------
     float
         score.
+
+    Examples
+    ---------
+
+    We should first import verticapy.
+
+    .. ipython:: python
+
+        import verticapy as vp
+
+    Let's create a small dataset that has:
+
+    - true value
+    - predicted value
+
+    .. ipython:: python
+
+        data = vp.vDataFrame(
+            {
+                "y_true": [1, 1.5, 3, 2, 5],
+                "y_pred": [1.1, 1.55, 2.9, 2.01, 4.5],
+            }
+        )
+
+    Next, we import the metric:
+
+    .. ipython:: python
+
+        from verticapy.machine_learning.metrics import max_error
+
+    Now we can conveniently calculate the score:
+
+    .. ipython:: python
+
+        max_error(
+            y_true = "y_true",
+            y_score = "y_pred",
+            input_relation = data,
+        )
+
+    It is also possible to directly compute the score
+    from the vDataFrame:
+
+    .. ipython:: python
+
+        data.score(
+            y_true  = "y_true",
+            y_score = "y_pred",
+            metric  = "max_error",
+        )
+
+    .. note::
+
+        VerticaPy uses simple SQL queries to compute various metrics.
+        You can use the :py:mod:`verticapy.set_option` function with
+        the ``sql_on`` parameter to enable SQL generation and examine
+        the generated queries.
+
+    .. seealso::
+
+        :py:mod:`verticapy.vDataFrame.score`
     """
     return regression_report(y_true, y_score, input_relation, metrics="max_error")
 
@@ -160,6 +404,67 @@ def mean_absolute_error(
     -------
     float
         score.
+
+    Examples
+    ---------
+
+    We should first import verticapy.
+
+    .. ipython:: python
+
+        import verticapy as vp
+
+    Let's create a small dataset that has:
+
+    - true value
+    - predicted value
+
+    .. ipython:: python
+
+        data = vp.vDataFrame(
+            {
+                "y_true": [1, 1.5, 3, 2, 5],
+                "y_pred": [1.1, 1.55, 2.9, 2.01, 4.5],
+            }
+        )
+
+    Next, we import the metric:
+
+    .. ipython:: python
+
+        from verticapy.machine_learning.metrics import mean_absolute_error
+
+    Now we can conveniently calculate the score:
+
+    .. ipython:: python
+
+        mean_absolute_error(
+            y_true = "y_true",
+            y_score = "y_pred",
+            input_relation = data,
+        )
+
+    It is also possible to directly compute the score
+    from the vDataFrame:
+
+    .. ipython:: python
+
+        data.score(
+            y_true  = "y_true",
+            y_score = "y_pred",
+            metric  = "mean_absolute_error",
+        )
+
+    .. note::
+
+        VerticaPy uses simple SQL queries to compute various metrics.
+        You can use the :py:mod:`verticapy.set_option` function with
+        the ``sql_on`` parameter to enable SQL generation and examine
+        the generated queries.
+
+    .. seealso::
+
+        :py:mod:`verticapy.vDataFrame.score`
     """
     return regression_report(y_true, y_score, input_relation, metrics="mae")
 
@@ -190,6 +495,67 @@ def mean_squared_error(
     -------
     float
         score.
+
+    Examples
+    ---------
+
+    We should first import verticapy.
+
+    .. ipython:: python
+
+        import verticapy as vp
+
+    Let's create a small dataset that has:
+
+    - true value
+    - predicted value
+
+    .. ipython:: python
+
+        data = vp.vDataFrame(
+            {
+                "y_true": [1, 1.5, 3, 2, 5],
+                "y_pred": [1.1, 1.55, 2.9, 2.01, 4.5],
+            }
+        )
+
+    Next, we import the metric:
+
+    .. ipython:: python
+
+        from verticapy.machine_learning.metrics import mean_squared_error
+
+    Now we can conveniently calculate the score:
+
+    .. ipython:: python
+
+        mean_squared_error(
+            y_true = "y_true",
+            y_score = "y_pred",
+            input_relation = data,
+        )
+
+    It is also possible to directly compute the score
+    from the vDataFrame:
+
+    .. ipython:: python
+
+        data.score(
+            y_true  = "y_true",
+            y_score = "y_pred",
+            metric  = "mean_squared_error",
+        )
+
+    .. note::
+
+        VerticaPy uses simple SQL queries to compute various metrics.
+        You can use the :py:mod:`verticapy.set_option` function with
+        the ``sql_on`` parameter to enable SQL generation and examine
+        the generated queries.
+
+    .. seealso::
+
+        :py:mod:`verticapy.vDataFrame.score`
     """
     return regression_report(
         y_true, y_score, input_relation, metrics="rmse" if root else "mse"
@@ -219,6 +585,67 @@ def mean_squared_log_error(
     -------
     float
         score.
+
+    Examples
+    ---------
+
+    We should first import verticapy.
+
+    .. ipython:: python
+
+        import verticapy as vp
+
+    Let's create a small dataset that has:
+
+    - true value
+    - predicted value
+
+    .. ipython:: python
+
+        data = vp.vDataFrame(
+            {
+                "y_true": [1, 1.5, 3, 2, 5],
+                "y_pred": [1.1, 1.55, 2.9, 2.01, 4.5],
+            }
+        )
+
+    Next, we import the metric:
+
+    .. ipython:: python
+
+        from verticapy.machine_learning.metrics import mean_squared_log_error
+
+    Now we can conveniently calculate the score:
+
+    .. ipython:: python
+
+        mean_squared_log_error(
+            y_true = "y_true",
+            y_score = "y_pred",
+            input_relation = data,
+        )
+
+    It is also possible to directly compute the score
+    from the vDataFrame:
+
+    .. ipython:: python
+
+        data.score(
+            y_true  = "y_true",
+            y_score = "y_pred",
+            metric  = "mean_squared_log_error",
+        )
+
+    .. note::
+
+        VerticaPy uses simple SQL queries to compute various metrics.
+        You can use the :py:mod:`verticapy.set_option` function with
+        the ``sql_on`` parameter to enable SQL generation and examine
+        the generated queries.
+
+    .. seealso::
+
+        :py:mod:`verticapy.vDataFrame.score`
     """
     return regression_report(y_true, y_score, input_relation, metrics="msle")
 
@@ -246,6 +673,67 @@ def median_absolute_error(
     -------
     float
         score.
+
+    Examples
+    ---------
+
+    We should first import verticapy.
+
+    .. ipython:: python
+
+        import verticapy as vp
+
+    Let's create a small dataset that has:
+
+    - true value
+    - predicted value
+
+    .. ipython:: python
+
+        data = vp.vDataFrame(
+            {
+                "y_true": [1, 1.5, 3, 2, 5],
+                "y_pred": [1.1, 1.55, 2.9, 2.01, 4.5],
+            }
+        )
+
+    Next, we import the metric:
+
+    .. ipython:: python
+
+        from verticapy.machine_learning.metrics import median_absolute_error
+
+    Now we can conveniently calculate the score:
+
+    .. ipython:: python
+
+        median_absolute_error(
+            y_true = "y_true",
+            y_score = "y_pred",
+            input_relation = data,
+        )
+
+    It is also possible to directly compute the score
+    from the vDataFrame:
+
+    .. ipython:: python
+
+        data.score(
+            y_true  = "y_true",
+            y_score = "y_pred",
+            metric  = "median_absolute_error",
+        )
+
+    .. note::
+
+        VerticaPy uses simple SQL queries to compute various metrics.
+        You can use the :py:mod:`verticapy.set_option` function with
+        the ``sql_on`` parameter to enable SQL generation and examine
+        the generated queries.
+
+    .. seealso::
+
+        :py:mod:`verticapy.vDataFrame.score`
     """
     return regression_report(
         y_true, y_score, input_relation, metrics="median_absolute_error"
@@ -280,6 +768,57 @@ def quantile_error(
     -------
     float
         score.
+
+    Examples
+    ---------
+
+    We should first import verticapy.
+
+    .. ipython:: python
+
+        import verticapy as vp
+
+    Let's create a small dataset that has:
+
+    - true value
+    - predicted value
+
+    .. ipython:: python
+
+        data = vp.vDataFrame(
+            {
+                "y_true": [1, 1.5, 3, 2, 5],
+                "y_pred": [1.1, 1.55, 2.9, 2.01, 4.5],
+            }
+        )
+
+    Next, we import the metric:
+
+    .. ipython:: python
+
+        from verticapy.machine_learning.metrics import quantile_error
+
+    Now we can conveniently calculate the score:
+
+    .. ipython:: python
+
+        quantile_error(
+            y_true  = "y_true",
+            y_score = "y_pred",
+            input_relation = data,
+            q = 0.25, # First Quartile
+        )
+
+    .. note::
+
+        VerticaPy uses simple SQL queries to compute various metrics.
+        You can use the :py:mod:`verticapy.set_option` function with
+        the ``sql_on`` parameter to enable SQL generation and examine
+        the generated queries.
+
+    .. seealso::
+
+        :py:mod:`verticapy.vDataFrame.score`
     """
     return regression_report(y_true, y_score, input_relation, metrics=f"qe{100 * q}%")
 
@@ -316,6 +855,67 @@ def r2_score(
     -------
     float
         score.
+
+    Examples
+    ---------
+
+    We should first import verticapy.
+
+    .. ipython:: python
+
+        import verticapy as vp
+
+    Let's create a small dataset that has:
+
+    - true value
+    - predicted value
+
+    .. ipython:: python
+
+        data = vp.vDataFrame(
+            {
+                "y_true": [1, 1.5, 3, 2, 5],
+                "y_pred": [1.1, 1.55, 2.9, 2.01, 4.5],
+            }
+        )
+
+    Next, we import the metric:
+
+    .. ipython:: python
+
+        from verticapy.machine_learning.metrics import r2_score
+
+    Now we can conveniently calculate the score:
+
+    .. ipython:: python
+
+        r2_score(
+            y_true = "y_true",
+            y_score = "y_pred",
+            input_relation = data,
+        )
+
+    It is also possible to directly compute the score
+    from the vDataFrame:
+
+    .. ipython:: python
+
+        data.score(
+            y_true  = "y_true",
+            y_score = "y_pred",
+            metric  = "r2",
+        )
+
+    .. note::
+
+        VerticaPy uses simple SQL queries to compute various metrics.
+        You can use the :py:mod:`verticapy.set_option` function with
+        the ``sql_on`` parameter to enable SQL generation and examine
+        the generated queries.
+
+    .. seealso::
+
+        :py:mod:`verticapy.vDataFrame.score`
     """
     if adj:
         kwargs = {"metrics": "r2_adj", "k": k}
@@ -357,6 +957,56 @@ def anova_table(
     -------
     TableSample
         ANOVA table.
+
+    Examples
+    ---------
+
+    We should first import verticapy.
+
+    .. ipython:: python
+
+        import verticapy as vp
+
+    Let's create a small dataset that has:
+
+    - true value
+    - predicted value
+
+    .. ipython:: python
+
+        data = vp.vDataFrame(
+            {
+                "y_true": [1, 1.5, 3, 2, 5],
+                "y_pred": [1.1, 1.55, 2.9, 2.01, 4.5],
+            }
+        )
+
+    Next, we import the metric:
+
+    .. ipython:: python
+
+        from verticapy.machine_learning.metrics import anova_table
+
+    Now we can conveniently compute the ANOVA table:
+
+    .. ipython:: python
+
+        anova_table(
+            y_true  = "y_true",
+            y_score = "y_pred",
+            input_relation = data,
+        )
+
+    .. note::
+
+        VerticaPy uses simple SQL queries to compute various metrics.
+        You can use the :py:mod:`verticapy.set_option` function with
+        the ``sql_on`` parameter to enable SQL generation and examine
+        the generated queries.
+
+    .. seealso::
+
+        :py:mod:`verticapy.vDataFrame.score`
     """
     n, avg = _executeSQL(
         query=f"""
@@ -426,21 +1076,43 @@ def regression_report(
     metrics: list, optional
         List of the metrics used to compute the final
         report.
-            aic    : Akaike’s Information Criterion
-            bic    : Bayesian Information Criterion
-            max    : Max Error
-            mae    : Mean Absolute Error
-            median : Median Absolute Error
-            mse    : Mean Squared Error
-            msle   : Mean Squared Log Error
-            r2     : R squared coefficient
-            r2a    : R2 adjusted
-            qe     : quantile error, the quantile must be
-                     included in the name. Example:
-                        qe50.1% will  return the quantile
-                        error using q=0.501.
-            rmse   : Root Mean Squared Error
-            var    : Explained Variance
+
+        - aic:
+            Akaike's Information Criterion
+
+        - bic:
+            Bayesian Information Criterion
+
+        - max:
+            Max Error
+
+        - mae:
+            Mean Absolute Error
+
+        - median:
+            Median Absolute Error
+
+        - mse:
+            Mean Squared Error
+
+        - msle:
+            Mean Squared Log Error
+
+        - r2:
+            R squared coefficient
+
+        - r2a:
+            R2 adjusted
+
+        - qe:
+            quantile error, the quantile must be
+            included in the name. Example:
+            qe50.1% will  return the quantile
+            error using q=0.501.
+
+        - rmse   : Root Mean Squared Error
+        - var    : Explained Variance
+
     k: int, optional
         Number  of predictors. Used  to compute the adjusted
         R2.
@@ -450,6 +1122,56 @@ def regression_report(
     -------
     TableSample
         report.
+
+    Examples
+    ---------
+
+    We should first import verticapy.
+
+    .. ipython:: python
+
+        import verticapy as vp
+
+    Let's create a small dataset that has:
+
+    - true value
+    - predicted value
+
+    .. ipython:: python
+
+        data = vp.vDataFrame(
+            {
+                "y_true": [1, 1.5, 3, 2, 5],
+                "y_pred": [1.1, 1.55, 2.9, 2.01, 4.5],
+            }
+        )
+
+    Next, we import the metric:
+
+    .. ipython:: python
+
+        from verticapy.machine_learning.metrics import regression_report
+
+    Now we can conveniently compute the report:
+
+    .. ipython:: python
+
+        regression_report(
+            y_true  = "y_true",
+            y_score = "y_pred",
+            input_relation = data,
+        )
+
+    .. note::
+
+        VerticaPy uses simple SQL queries to compute various metrics.
+        You can use the :py:mod:`verticapy.set_option` function with
+        the ``sql_on`` parameter to enable SQL generation and examine
+        the generated queries.
+
+    .. seealso::
+
+        :py:mod:`verticapy.vDataFrame.score`
     """
     return_scalar = False
     if isinstance(metrics, str):