Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding TIME SERIES models (AR, MA, ARMA, ARIMA) #830

Merged
merged 19 commits into from
Nov 6, 2023
4 changes: 4 additions & 0 deletions verticapy/_utils/_sql/_vertica_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
from verticapy.errors import VersionError

MINIMUM_VERTICA_VERSION = {
"ARIMA": [12, 0, 3],
"AR": [11, 0, 0],
"ARMA": [12, 0, 0],
"Balance": [8, 1, 1],
"BernoulliNB": [8, 0, 0],
"BisectingKMeans": [9, 3, 1],
Expand All @@ -47,6 +50,7 @@
"LogisticRegression": [8, 0, 0],
"KMeans": [8, 0, 0],
"KPrototypes": [12, 0, 3],
"MA": [11, 0, 0],
"MCA": [9, 1, 0],
"MinMaxScaler": [8, 1, 0],
"MultinomialNB": [8, 0, 0],
Expand Down
1 change: 1 addition & 0 deletions verticapy/machine_learning/vertica/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,4 @@
DummyTreeClassifier,
DummyTreeRegressor,
)
from verticapy.machine_learning.vertica.tsa import ARIMA, ARMA, AR, MA
38 changes: 22 additions & 16 deletions verticapy/machine_learning/vertica/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -783,6 +783,8 @@ def fit(
str
model's summary.
"""

# Initialization
if self.overwrite_model:
self.drop()
else:
Expand Down Expand Up @@ -819,7 +821,7 @@ def fit(
ROW_NUMBER() OVER
(ORDER BY {', '.join(X)})
AS {id_column_name}"""
tmp_view = False
tmp_view = False
if isinstance(input_relation, vDataFrame) or (id_column):
tmp_view = True
if isinstance(input_relation, vDataFrame):
Expand All @@ -830,10 +832,9 @@ def fit(
relation = gen_tmp_name(
schema=schema_relation(self.model_name)[0], name="view"
)
drop(relation, method="view")
_executeSQL(
query=f"""
CREATE VIEW {relation} AS
CREATE OR REPLACE VIEW {relation} AS
SELECT
/*+LABEL('learn.VerticaModel.fit')*/
*{id_column}
Expand All @@ -849,6 +850,7 @@ def fit(
self.test_relation = test_relation
else:
self.test_relation = self.input_relation
# Fitting
if self._is_native:
parameters = self._get_vertica_param_dict()
if (
Expand Down Expand Up @@ -917,15 +919,6 @@ def _attributes(self) -> list:
def __init__(self) -> None:
"""Must be overridden in the child class"""
self.features_importance_trees_ = {}
return None
# self.input_relation = None
# self.test_relation = None
# self.X = None
# self.y = None
# self.parameters = {}
# self.classes_ = None
# for att in self._attributes:
# setattr(self, att, None)

def _compute_trees_arrays(
self, tree: TableSample, X: list, return_probability: bool = False
Expand Down Expand Up @@ -1265,6 +1258,10 @@ def plot_tree(


class BinaryClassifier(Supervised):
"""
Base Class for Vertica Binary Classifier.
"""

# Properties.

@property
Expand Down Expand Up @@ -1805,13 +1802,16 @@ def roc_curve(


class MulticlassClassifier(Supervised):
"""
Base Class for Vertica Multiclass Classifiers.
"""

# System & Special Methods.

@abstractmethod
def __init__(self, name: str, overwrite_model: bool = False) -> None:
"""Must be overridden in the child class"""
super().__init__(name, overwrite_model)
# self.classes_ = None

def _check_pos_label(self, pos_label: PythonScalar) -> PythonScalar:
"""
Expand Down Expand Up @@ -2645,6 +2645,10 @@ def roc_curve(


class Regressor(Supervised):
"""
Base Class for Vertica Regressors.
"""

# System & Special Methods.

@abstractmethod
Expand Down Expand Up @@ -2845,7 +2849,7 @@ def predict(
Returns
-------
vDataFrame
the input object.
the input object.
"""
if hasattr(self, "_predict"):
return self._predict(vdf=vdf, X=X, name=name, inplace=inplace)
Expand Down Expand Up @@ -2904,6 +2908,8 @@ def fit(
str
model's summary.
"""

# Initialization
if self.overwrite_model:
self.drop()
else:
Expand Down Expand Up @@ -2938,10 +2944,9 @@ def fit(
relation = gen_tmp_name(
schema=schema_relation(self.model_name)[0], name="view"
)
drop(relation, method="view")
_executeSQL(
query=f"""
CREATE VIEW {relation} AS
CREATE OR REPLACE VIEW {relation} AS
SELECT
/*+LABEL('learn.VerticaModel.fit')*/ *
{id_column}
Expand All @@ -2962,6 +2967,7 @@ def fit(
parameters = self._get_vertica_param_dict()
if "num_components" in parameters and not parameters["num_components"]:
del parameters["num_components"]
# Fitting
fun = self._vertica_fit_sql if self._model_type != "MCA" else "PCA"
query = f"""
SELECT
Expand Down
8 changes: 8 additions & 0 deletions verticapy/machine_learning/vertica/linear_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@


class LinearModel:
"""
Base Class for Vertica Linear Models.
"""

# Properties.

@property
Expand Down Expand Up @@ -188,6 +192,10 @@ def plot(


class LinearModelClassifier(LinearModel):
"""
Base Class for Vertica Linear Models Classifiers.
"""

# Properties.

@property
Expand Down
26 changes: 26 additions & 0 deletions verticapy/machine_learning/vertica/model_management.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,13 @@
Lasso,
LinearRegression,
LogisticRegression,
PoissonRegressor,
Ridge,
)
from verticapy.machine_learning.vertica.naive_bayes import NaiveBayes
from verticapy.machine_learning.vertica.preprocessing import Scaler, OneHotEncoder
from verticapy.machine_learning.vertica.svm import LinearSVC, LinearSVR
from verticapy.machine_learning.vertica.tsa import ARIMA, AR, MA


@save_verticapy_logs
Expand Down Expand Up @@ -139,6 +141,9 @@ def load_model(
info = info[0]
info = eval("[" + info + "]")
lookup_table = {
"arima": ARIMA,
"autoregressor": AR,
"moving_average": MA,
"rf_regressor": RandomForestRegressor,
"rf_classifier": RandomForestClassifier,
"iforest": IsolationForest,
Expand All @@ -149,6 +154,7 @@ def load_model(
"svm_regressor": LinearSVR,
"svm_classifier": LinearSVC,
"linear_reg": LinearRegression,
"poisson_reg": PoissonRegressor,
"kmeans": KMeans,
"kprototypes": KPrototypes,
"bisecting_kmeans": BisectingKMeans,
Expand All @@ -158,9 +164,20 @@ def load_model(
}
model = lookup_table[model_type](name)
if model_type != "svd":
# Variables used in the CALL STRING
true, false = True, False
squarederror = "squarederror"
crossentropy = "crossentropy"
ols = "ols"
hr = "hr"
linear_interpolation = "linear_interpolation"
zero = "zero"
error = "error"
drop = "drop"
if "method=yule-walker," in parameters:
parameters = parameters.replace(
"method=yule-walker,", "method='yule-walker',"
)
if " lambda=" in parameters:
parameters = parameters.replace(" lambda=", " C=")
try:
Expand All @@ -186,6 +203,15 @@ def load_model(
model.y = info[2]
model.X = eval("[" + info[3] + "]")
model.test_relation = test_relation if (test_relation) else model.input_relation
elif model._model_category == "TIMESERIES":
model.y = info[2]
model.ts = info[3]
model.test_relation = test_relation if (test_relation) else model.input_relation
if model._model_type == "ARIMA":
p = int(model.get_vertica_attributes("p")["p"][0])
d = int(model.get_vertica_attributes("d")["d"][0])
q = int(model.get_vertica_attributes("q")["q"][0])
model.set_params({"order": (p, d, q)})
else:
model.X = eval("[" + info[2] + "]")
model._compute_attributes()
Expand Down
Loading