Skip to content

Commit

Permalink
Adding TIME SERIES models (AR, MA, ARMA, ARIMA) (#830)
Browse files Browse the repository at this point in the history
* Adding ARIMA model

* Multiple support

 - AR, MA, ARMA
 - load_model
 - improving code quality

* corrections

 - fi

* Possibility to draw TS

* correction: start parameter

* corrections

* corrections

* corrections

* Correcting plots + adding metrics

* Supporting estimation of "ts"

* supporting highchart

* correction + plotly

* Update tsa.py

* Update tsa.py

* Update tsa.py

* Docstring for AR

* Added Docstring for ARIMA

* Docstring for ARMA

* multiple corrections

---------

Co-authored-by: umar <[email protected]>
  • Loading branch information
oualib and mail4umar authored Nov 6, 2023
1 parent 69056e1 commit 12ba3a9
Show file tree
Hide file tree
Showing 17 changed files with 2,761 additions and 22 deletions.
4 changes: 4 additions & 0 deletions verticapy/_utils/_sql/_vertica_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
from verticapy.errors import VersionError

MINIMUM_VERTICA_VERSION = {
"ARIMA": [23, 3, 0],
"AR": [11, 0, 0],
"ARMA": [12, 0, 3],
"Balance": [8, 1, 1],
"BernoulliNB": [8, 0, 0],
"BisectingKMeans": [9, 3, 1],
Expand All @@ -47,6 +50,7 @@
"LogisticRegression": [8, 0, 0],
"KMeans": [8, 0, 0],
"KPrototypes": [12, 0, 3],
"MA": [11, 0, 0],
"MCA": [9, 1, 0],
"MinMaxScaler": [8, 1, 0],
"MultinomialNB": [8, 0, 0],
Expand Down
1 change: 1 addition & 0 deletions verticapy/machine_learning/vertica/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,4 @@
DummyTreeClassifier,
DummyTreeRegressor,
)
from verticapy.machine_learning.vertica.tsa import ARIMA, ARMA, AR, MA
38 changes: 22 additions & 16 deletions verticapy/machine_learning/vertica/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -783,6 +783,8 @@ def fit(
str
model's summary.
"""

# Initialization
if self.overwrite_model:
self.drop()
else:
Expand Down Expand Up @@ -819,7 +821,7 @@ def fit(
ROW_NUMBER() OVER
(ORDER BY {', '.join(X)})
AS {id_column_name}"""
tmp_view = False
tmp_view = False
if isinstance(input_relation, vDataFrame) or (id_column):
tmp_view = True
if isinstance(input_relation, vDataFrame):
Expand All @@ -830,10 +832,9 @@ def fit(
relation = gen_tmp_name(
schema=schema_relation(self.model_name)[0], name="view"
)
drop(relation, method="view")
_executeSQL(
query=f"""
CREATE VIEW {relation} AS
CREATE OR REPLACE VIEW {relation} AS
SELECT
/*+LABEL('learn.VerticaModel.fit')*/
*{id_column}
Expand All @@ -849,6 +850,7 @@ def fit(
self.test_relation = test_relation
else:
self.test_relation = self.input_relation
# Fitting
if self._is_native:
parameters = self._get_vertica_param_dict()
if (
Expand Down Expand Up @@ -917,15 +919,6 @@ def _attributes(self) -> list:
def __init__(self) -> None:
"""Must be overridden in the child class"""
self.features_importance_trees_ = {}
return None
# self.input_relation = None
# self.test_relation = None
# self.X = None
# self.y = None
# self.parameters = {}
# self.classes_ = None
# for att in self._attributes:
# setattr(self, att, None)

def _compute_trees_arrays(
self, tree: TableSample, X: list, return_probability: bool = False
Expand Down Expand Up @@ -1265,6 +1258,10 @@ def plot_tree(


class BinaryClassifier(Supervised):
"""
Base Class for Vertica Binary Classifier.
"""

# Properties.

@property
Expand Down Expand Up @@ -1805,13 +1802,16 @@ def roc_curve(


class MulticlassClassifier(Supervised):
"""
Base Class for Vertica Multiclass Classifiers.
"""

# System & Special Methods.

@abstractmethod
def __init__(self, name: str, overwrite_model: bool = False) -> None:
"""Must be overridden in the child class"""
super().__init__(name, overwrite_model)
# self.classes_ = None

def _check_pos_label(self, pos_label: PythonScalar) -> PythonScalar:
"""
Expand Down Expand Up @@ -2645,6 +2645,10 @@ def roc_curve(


class Regressor(Supervised):
"""
Base Class for Vertica Regressors.
"""

# System & Special Methods.

@abstractmethod
Expand Down Expand Up @@ -2845,7 +2849,7 @@ def predict(
Returns
-------
vDataFrame
the input object.
the input object.
"""
if hasattr(self, "_predict"):
return self._predict(vdf=vdf, X=X, name=name, inplace=inplace)
Expand Down Expand Up @@ -2904,6 +2908,8 @@ def fit(
str
model's summary.
"""

# Initialization
if self.overwrite_model:
self.drop()
else:
Expand Down Expand Up @@ -2938,10 +2944,9 @@ def fit(
relation = gen_tmp_name(
schema=schema_relation(self.model_name)[0], name="view"
)
drop(relation, method="view")
_executeSQL(
query=f"""
CREATE VIEW {relation} AS
CREATE OR REPLACE VIEW {relation} AS
SELECT
/*+LABEL('learn.VerticaModel.fit')*/ *
{id_column}
Expand All @@ -2962,6 +2967,7 @@ def fit(
parameters = self._get_vertica_param_dict()
if "num_components" in parameters and not parameters["num_components"]:
del parameters["num_components"]
# Fitting
fun = self._vertica_fit_sql if self._model_type != "MCA" else "PCA"
query = f"""
SELECT
Expand Down
8 changes: 8 additions & 0 deletions verticapy/machine_learning/vertica/linear_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@


class LinearModel:
"""
Base Class for Vertica Linear Models.
"""

# Properties.

@property
Expand Down Expand Up @@ -188,6 +192,10 @@ def plot(


class LinearModelClassifier(LinearModel):
"""
Base Class for Vertica Linear Models Classifiers.
"""

# Properties.

@property
Expand Down
26 changes: 26 additions & 0 deletions verticapy/machine_learning/vertica/model_management.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,13 @@
Lasso,
LinearRegression,
LogisticRegression,
PoissonRegressor,
Ridge,
)
from verticapy.machine_learning.vertica.naive_bayes import NaiveBayes
from verticapy.machine_learning.vertica.preprocessing import Scaler, OneHotEncoder
from verticapy.machine_learning.vertica.svm import LinearSVC, LinearSVR
from verticapy.machine_learning.vertica.tsa import ARIMA, AR, MA


@save_verticapy_logs
Expand Down Expand Up @@ -139,6 +141,9 @@ def load_model(
info = info[0]
info = eval("[" + info + "]")
lookup_table = {
"arima": ARIMA,
"autoregressor": AR,
"moving_average": MA,
"rf_regressor": RandomForestRegressor,
"rf_classifier": RandomForestClassifier,
"iforest": IsolationForest,
Expand All @@ -149,6 +154,7 @@ def load_model(
"svm_regressor": LinearSVR,
"svm_classifier": LinearSVC,
"linear_reg": LinearRegression,
"poisson_reg": PoissonRegressor,
"kmeans": KMeans,
"kprototypes": KPrototypes,
"bisecting_kmeans": BisectingKMeans,
Expand All @@ -158,9 +164,20 @@ def load_model(
}
model = lookup_table[model_type](name)
if model_type != "svd":
# Variables used in the CALL STRING
true, false = True, False
squarederror = "squarederror"
crossentropy = "crossentropy"
ols = "ols"
hr = "hr"
linear_interpolation = "linear_interpolation"
zero = "zero"
error = "error"
drop = "drop"
if "method=yule-walker," in parameters:
parameters = parameters.replace(
"method=yule-walker,", "method='yule-walker',"
)
if " lambda=" in parameters:
parameters = parameters.replace(" lambda=", " C=")
try:
Expand All @@ -186,6 +203,15 @@ def load_model(
model.y = info[2]
model.X = eval("[" + info[3] + "]")
model.test_relation = test_relation if (test_relation) else model.input_relation
elif model._model_category == "TIMESERIES":
model.y = info[2]
model.ts = info[3]
model.test_relation = test_relation if (test_relation) else model.input_relation
if model._model_type == "ARIMA":
p = int(model.get_vertica_attributes("p")["p"][0])
d = int(model.get_vertica_attributes("d")["d"][0])
q = int(model.get_vertica_attributes("q")["q"][0])
model.set_params({"order": (p, d, q)})
else:
model.X = eval("[" + info[2] + "]")
model._compute_attributes()
Expand Down
Loading

0 comments on commit 12ba3a9

Please sign in to comment.