diff --git a/verticapy/machine_learning/vertica/tsa.py b/verticapy/machine_learning/vertica/tsa.py index 16ae1f05b..b087c4b98 100755 --- a/verticapy/machine_learning/vertica/tsa.py +++ b/verticapy/machine_learning/vertica/tsa.py @@ -793,6 +793,8 @@ class ARIMA(TimeSeriesModelBase): """ Creates a inDB ARIMA model. + .. versionadded:: 12.0.0 + Parameters ---------- name: str, optional @@ -848,7 +850,259 @@ class ARIMA(TimeSeriesModelBase): `Examples `_ section on the website. - ... + + Load data for machine learning + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + We import ``verticapy``: + + .. ipython:: python + + import verticapy as vp + + .. hint:: + + By assigning an alias to ``verticapy``, we mitigate the risk + of code collisions with other libraries. This precaution is + necessary because verticapy uses commonly known function names + like "average" and "median", which can potentially lead to naming + conflicts. The use of an alias ensures that the functions from + verticapy are used as intended without interfering with functions + from other libraries. + + For this example, we will use the airline passengers dataset. + + .. code-block:: python + + import verticapy.datasets as vpd + + data = vpd.load_airline_passengers() + + .. raw:: html + :file: SPHINX_DIRECTORY/figures/datasets_loaders_load_airline_passengers.html + + .. note:: + + VerticaPy offers a wide range of sample datasets that are + ideal for training and testing purposes. You can explore + the full list of available datasets in the :ref:`api.datasets`, + which provides detailed information on each dataset + and how to use them effectively. These datasets are invaluable + resources for honing your data analysis and machine learning + skills within the VerticaPy environment. + + .. ipython:: python + :suppress: + + import verticapy.datasets as vpd + data = vpd.load_airline_passengers() + + Model Initialization + ^^^^^^^^^^^^^^^^^^^^^ + + First we import the ``ARIMA`` model: + + .. ipython:: python + + from verticapy.machine_learning.vertica.tsa import ARIMA + + Then we can create the model: + + .. ipython:: python + :okwarning: + + model = ARIMA(order = (12, 0, 2)) + + .. hint:: + + In ``verticapy`` 1.0.x and higher, you do not need to specify the + model name, as the name is automatically assigned. If you need to + re-use the model, you can fetch the model name from the model's + attributes. + + .. important:: + + The model name is crucial for the model management system and + versioning. It's highly recommended to provide a name if you + plan to reuse the model later. + + + Model Training + ^^^^^^^^^^^^^^^ + + We can now fit the model: + + .. ipython:: python + :okwarning: + + model.fit(data, "date", "passengers") + + .. important:: + + To train a model, you can directly use the ``vDataFrame`` or the + name of the relation stored in the database. The test set is optional + and is only used to compute the test metrics. In ``verticapy``, we + don't work using ``X`` matrices and ``y`` vectors. Instead, we work + directly with lists of predictors and the response name. + + Features Importance + ^^^^^^^^^^^^^^^^^^^^ + + We can conveniently get the features importance: + + .. ipython:: python + :okwarning: + + model.features_importance() + + .. ipython:: python + :suppress: + :okwarning: + + vp.set_option("plotting_lib", "plotly") + fig = model.features_importance() + fig.write_html("SPHINX_DIRECTORY/figures/machine_learning_vertica_tsa_arima_features.html") + + + .. raw:: html + :file: SPHINX_DIRECTORY/figures/machine_learning_vertica_tsa_arima_features.html + + + Metrics + ^^^^^^^^ + + We can get the entire report using: + + .. code-block:: python + + model.report() + + .. ipython:: python + :suppress: + :okwarning: + + result = model.report() + html_file = open("SPHINX_DIRECTORY/figures/machine_learning_vertica_tsa_arima_report.html", "w") + html_file.write(result._repr_html_()) + html_file.close() + + .. raw:: html + :file: SPHINX_DIRECTORY/figures/machine_learning_vertica_tsa_arima_report.html + + .. important:: + + Most metrics are computed using a single SQL query, but some of them might + require multiple SQL queries. Selecting only the necessary metrics in the + report can help optimize performance. + E.g. ``model.report(metrics = ["mse", "r2"])``. + + You can utilize the + :py:mod:`verticapy.machine_learning.vertica.tsa.ARIMA.score` + function to calculate various regression metrics, with the explained + variance being the default. + + .. ipython:: python + :okwarning: + + model.score() + + Prediction + ^^^^^^^^^^^ + + Prediction is straight-forward: + + .. code-block:: python + + model.predict() + + .. ipython:: python + :suppress: + :okwarning: + + result = model.predict() + html_file = open("figures/machine_learning_vertica_tsa_arima_prediction.html", "w") + html_file.write(result._repr_html_()) + html_file.close() + + .. raw:: html + :file: SPHINX_DIRECTORY/figures/machine_learning_vertica_tsa_arima_prediction.html + + .. hint:: + + You can control the number of prediction steps by changing + the ``npredictions`` parameter: + ``model.predict(npredictions=30)``. + + .. note:: + + Predictions can be made automatically by using the training set, + in which case you don't need to specify the predictors. Alternatively, you + can pass only the ``vDataFrame`` to the + :py:mod:`verticapy.machine_learning.vertica.tsa.ARIMA.predict` + function, but in this case, it's essential that the column names of + the ``vDataFrame`` match the predictors and response name in the + model. + + If you would like to have the time-stamps in the output then you + can switch the ``output_estimated_ts`` the parameter. And if you + also would like to see the standard error then you can switch the + ``output_standard_errors``parameter: + + .. code-block:: python + + model.predict(output_estimated_ts = True, output_standard_errors = True) + + .. ipython:: python + :suppress: + :okwarning: + + result = model.predict(output_estimated_ts = True, output_standard_errors = True) + html_file = open("figures/machine_learning_vertica_tsa_arima_prediction_2.html", "w") + html_file.write(result._repr_html_()) + html_file.close() + + .. raw:: html + :file: SPHINX_DIRECTORY/figures/machine_learning_vertica_tsa_arima_prediction_2.html + + Plots + ^^^^^^ + + We can conveniently plot the predictions on a line plot + to observe the efficacy of our model: + + .. code-block:: python + + model.plot(data, "date", "passengers", npredictions = 80, start=120) + + .. ipython:: python + :suppress: + :okwarning: + + vp.set_option("plotting_lib", "plotly") + fig = model.plot(data, "date", "passengers", npredictions = 80, start=120, width =650) + fig.write_html("figures/machine_learning_vertica_tsa_arima_plot_1.html") + + + .. raw:: html + :file: SPHINX_DIRECTORY/figures/machine_learning_vertica_tsa_arima_plot_1.html + + .. note:: + + You can control the number of prediction steps by changing + the ``npredictions`` parameter: + ``model.plot(npredictions=30)``. + + Model Register + ^^^^^^^^^^^^^^ + + In order to register the model for tracking and versioning: + + .. code-block:: python + + model.register("model_v1") + + Please refer to :ref:`notebooks/ml/model_tracking_versioning/index.html` + for more details on model tracking and versioning. """ # Properties. @@ -1023,7 +1277,7 @@ class AR(TimeSeriesModelBase): """ Creates a inDB Autoregressor model. - .. versionadded: 11.0.0 + .. versionadded:: 11.0.0 Parameters ---------- @@ -1104,7 +1358,7 @@ class AR(TimeSeriesModelBase): verticapy are used as intended without interfering with functions from other libraries. - For this example, we will generate a dummy time-series + For this example, we will generate a dummy time-series dataset. .. ipython:: python @@ -1115,7 +1369,7 @@ class AR(TimeSeriesModelBase): "GB": [5, 10, 20, 35, 55, 80, 110, 145, 185, 230] } ) - + .. ipython:: python :suppress: @@ -1214,7 +1468,7 @@ class AR(TimeSeriesModelBase): We can get the entire report using: .. code-block:: python - + model.report() .. ipython:: python @@ -1271,18 +1525,43 @@ class AR(TimeSeriesModelBase): You can control the number of prediction steps by changing the ``npredictions`` parameter: - ``model.predict(npredictions=30)``. + ``model.predict(npredictions=30)``. .. note:: - Predictions can be made automatically calculated using the test set, in which - case you don't need to specify the predictors. Alternatively, you + Predictions can be made automatically by using the training set, + in which case you don't need to specify the predictors. Alternatively, you can pass only the ``vDataFrame`` to the :py:mod:`verticapy.machine_learning.vertica.tsa.AR.predict` function, but in this case, it's essential that the column names of the ``vDataFrame`` match the predictors and response name in the model. + If you would like to have the time-stamps in the output then you + can use: + + .. code-block:: python + + model.predict(output_estimated_ts = True) + + .. ipython:: python + :suppress: + :okwarning: + + result = model.predict(output_estimated_ts = True) + html_file = open("figures/machine_learning_vertica_tsa_ar_prediction_2.html", "w") + html_file.write(result._repr_html_()) + html_file.close() + + .. raw:: html + :file: SPHINX_DIRECTORY/figures/machine_learning_vertica_tsa_ar_prediction_2.html + + .. important:: + + In order to get the standard error use the + ``output_standard_errors`` parameter and switch + it to True. + Plots ^^^^^^ @@ -1299,17 +1578,17 @@ class AR(TimeSeriesModelBase): vp.set_option("plotting_lib", "plotly") fig = model.plot(width=550) - fig.write_html("figures/machine_learning_vertica_tsa_plot_1.html") + fig.write_html("figures/machine_learning_vertica_tsa_ar_plot_1.html") .. raw:: html - :file: SPHINX_DIRECTORY/figures/machine_learning_vertica_tsa_plot_1.html + :file: SPHINX_DIRECTORY/figures/machine_learning_vertica_tsa_ar_plot_1.html .. note:: You can control the number of prediction steps by changing the ``npredictions`` parameter: - ``model.plot(npredictions=30)``. + ``model.plot(npredictions=30)``. Model Register ^^^^^^^^^^^^^^