From 990cd80585dd945e0ad5015abb194f457f1c8b15 Mon Sep 17 00:00:00 2001 From: Badr Date: Tue, 7 Nov 2023 11:57:11 -0500 Subject: [PATCH 1/3] Version correction TS --- verticapy/_utils/_sql/_vertica_version.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/verticapy/_utils/_sql/_vertica_version.py b/verticapy/_utils/_sql/_vertica_version.py index 2a694ff34..4fdc0c633 100755 --- a/verticapy/_utils/_sql/_vertica_version.py +++ b/verticapy/_utils/_sql/_vertica_version.py @@ -22,9 +22,9 @@ from verticapy.errors import VersionError MINIMUM_VERTICA_VERSION = { - "ARIMA": [23, 3, 0], + "ARIMA": [23, 4, 0], "AR": [11, 0, 0], - "ARMA": [12, 0, 3], + "ARMA": [12, 0, 4], "Balance": [8, 1, 1], "BernoulliNB": [8, 0, 0], "BisectingKMeans": [9, 3, 1], From f7173c77762846a7379a00a96d234ab885607fb0 Mon Sep 17 00:00:00 2001 From: Badr Date: Tue, 7 Nov 2023 19:35:32 -0500 Subject: [PATCH 2/3] TF INTEGRATION + PMML Correction - TF object - improvement of PMML - load_model support for PMML and TF - more precise docstring --- verticapy/_utils/_sql/_vertica_version.py | 1 + .../machine_learning/vertica/__init__.py | 1 + verticapy/machine_learning/vertica/base.py | 46 +++++- .../vertica/model_management.py | 32 +++- verticapy/machine_learning/vertica/pmml.py | 6 +- .../machine_learning/vertica/tensorflow.py | 152 ++++++++++++++++++ .../vertica/test_base_model_methods.py | 2 +- 7 files changed, 230 insertions(+), 10 deletions(-) create mode 100644 verticapy/machine_learning/vertica/tensorflow.py diff --git a/verticapy/_utils/_sql/_vertica_version.py b/verticapy/_utils/_sql/_vertica_version.py index 4fdc0c633..04e129b28 100755 --- a/verticapy/_utils/_sql/_vertica_version.py +++ b/verticapy/_utils/_sql/_vertica_version.py @@ -71,6 +71,7 @@ "soundex_matches": [10, 1, 0], "StandardScaler": [8, 1, 0], "SVD": [9, 1, 0], + "TensorFlowModel": [10, 0, 0], "XGBClassifier": [11, 1, 0], "XGBRegressor": [11, 1, 0], } diff --git a/verticapy/machine_learning/vertica/__init__.py b/verticapy/machine_learning/vertica/__init__.py index d7583b83a..69f5fa5e3 100755 --- a/verticapy/machine_learning/vertica/__init__.py +++ b/verticapy/machine_learning/vertica/__init__.py @@ -67,6 +67,7 @@ StandardScaler, ) from verticapy.machine_learning.vertica.svm import LinearSVC, LinearSVR +from verticapy.machine_learning.vertica.tensorflow import TensorFlowModel from verticapy.machine_learning.vertica.tree import ( DecisionTreeClassifier, DecisionTreeRegressor, diff --git a/verticapy/machine_learning/vertica/base.py b/verticapy/machine_learning/vertica/base.py index 916993bf9..3b673e9a2 100755 --- a/verticapy/machine_learning/vertica/base.py +++ b/verticapy/machine_learning/vertica/base.py @@ -200,7 +200,8 @@ def _is_already_stored( If set to True and an error occurs, raises the error. return_model_type: bool, optional - If set to True, returns the model type. + If set to True, returns a tuple with + the model category and type. Returns ------- @@ -232,7 +233,8 @@ def does_model_exists( If set to True and an error occurs, raises the error. return_model_type: bool, optional - If set to True, returns the model type. + If set to True, returns a tuple with + the model category and type. Returns ------- @@ -246,7 +248,8 @@ def does_model_exists( res = _executeSQL( query=f""" SELECT - /*+LABEL('learn.tools._is_already_stored')*/ + /*+LABEL('learn.tools._is_already_stored')*/ + category, model_type FROM MODELS WHERE LOWER(model_name) = LOWER('{model_name}') @@ -256,7 +259,7 @@ def does_model_exists( print_time_sql=False, ) if res: - model_type = res[0] + model_type = res res = True else: res = False @@ -319,6 +322,20 @@ def get_attributes(self, attr_name: Optional[str] = None) -> Any: Any model attribute. """ + if hasattr(self, "_model_subcategory") and self._model_subcategory in ( + "TENSORFLOW", + "PMML", + ): + if not attr_name: + return self.get_vertica_attributes()["attr_name"] + else: + res = self.get_vertica_attributes(attr_name) + if res.shape() == (1, 1): + return res.to_list()[0][0] + elif res.shape()[0] == 1: + return np.array([l[0] for l in res.to_list()]) + else: + return res if not attr_name: return self._attributes elif attr_name in self._attributes: @@ -658,6 +675,13 @@ def to_binary(self, path: str): Absolute path of an output directory to store the exported models. + .. warning:: + + This function operates solely on the server + side and is not accessible locally. + The 'path' provided should match the location + where the file(s) will be exported on the server. + Returns ------- bool @@ -675,6 +699,13 @@ def to_pmml(self, path: str): Absolute path of an output directory to store the exported models. + .. warning:: + + This function operates solely on the server + side and is not accessible locally. + The 'path' provided should match the location + where the file(s) will be exported on the server. + Returns ------- bool @@ -692,6 +723,13 @@ def to_tf(self, path: str): Absolute path of an output directory to store the exported model. + .. warning:: + + This function operates solely on the server + side and is not accessible locally. + The 'path' provided should match the location + where the file(s) will be exported on the server. + Returns ------- bool diff --git a/verticapy/machine_learning/vertica/model_management.py b/verticapy/machine_learning/vertica/model_management.py index 0071ab0fc..18bf76548 100755 --- a/verticapy/machine_learning/vertica/model_management.py +++ b/verticapy/machine_learning/vertica/model_management.py @@ -16,7 +16,7 @@ """ from typing import Literal, Optional -from verticapy._typing import SQLRelation +from verticapy._typing import NoneType, SQLRelation from verticapy._utils._sql._collect import save_verticapy_logs from verticapy._utils._sql._format import schema_relation from verticapy._utils._sql._sys import _executeSQL @@ -44,8 +44,10 @@ Ridge, ) from verticapy.machine_learning.vertica.naive_bayes import NaiveBayes +from verticapy.machine_learning.vertica.pmml import PMMLModel from verticapy.machine_learning.vertica.preprocessing import Scaler, OneHotEncoder from verticapy.machine_learning.vertica.svm import LinearSVC, LinearSVR +from verticapy.machine_learning.vertica.tensorflow import TensorFlowModel from verticapy.machine_learning.vertica.tsa import ARIMA, AR, MA @@ -73,6 +75,13 @@ def export_models( path: str Absolute path of an output directory to store the exported models. + + .. warning:: + + This function operates solely on the server + side and is not accessible locally. + The 'path' provided should match the location + where the file(s) will be exported on the server. kind: str, optional The category of models to export, one of the following: @@ -119,6 +128,15 @@ def import_models( - The parent directory of multiple model directories: ``parent-dir-path/*`` + + .. warning:: + + This function only operates on the server + side and is not accessible locally. + The 'path' should correspond to the location + of the file(s) on the server. Please make + sure you have successfully transferred your + file(s) to the server. schema: str, optional An existing schema where the machine learning models are imported. If omitted, models are @@ -191,13 +209,19 @@ def load_model( model The model. """ - model_type = VerticaModel.does_model_exists( + res = VerticaModel.does_model_exists( name=name, raise_error=False, return_model_type=True ) + if isinstance(res, NoneType): + raise NameError(f"The model '{name}' doesn't exist.") + model_category, model_type = res + model_category = model_category.lower() + if model_category == "pmml": + return PMMLModel(name) + elif model_category == "tensorflow": + return TensorFlowModel(name) schema, model_name = schema_relation(name) schema, model_name = schema[1:-1], name[1:-1] - if not model_type: - raise NameError(f"The model '{name}' doesn't exist.") if model_type.lower() in ( "kmeans", "kprototypes", diff --git a/verticapy/machine_learning/vertica/pmml.py b/verticapy/machine_learning/vertica/pmml.py index 84e09c279..0ff1b6472 100644 --- a/verticapy/machine_learning/vertica/pmml.py +++ b/verticapy/machine_learning/vertica/pmml.py @@ -70,10 +70,14 @@ def _model_type(self) -> Literal["PMMLModel"]: @save_verticapy_logs def __init__( self, - name: str = None, + name: str, ) -> None: super().__init__(name, False) self.parameters = {} + self.X = self.get_attributes("data_fields")["name"] + if self.get_attributes("is_supervised"): + self.y = self.X[-1] + self.X = self.X[:-1] # Prediction / Transformation Methods. diff --git a/verticapy/machine_learning/vertica/tensorflow.py b/verticapy/machine_learning/vertica/tensorflow.py new file mode 100644 index 000000000..e87099189 --- /dev/null +++ b/verticapy/machine_learning/vertica/tensorflow.py @@ -0,0 +1,152 @@ +""" +Copyright (c) 2018-2023 Open Text or one of its +affiliates. Licensed under the Apache License, +Version 2.0 (the "License"); You may not use this +file except in compliance with the License. + +You may obtain a copy of the License at: +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in +writing, software distributed under the License is +distributed on an "AS IS" BASIS, WITHOUT WARRANTIES +OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing +permissions and limitations under the License. +""" +from typing import Literal, Optional + +from verticapy._typing import SQLColumns, SQLRelation +from verticapy._utils._gen import gen_name +from verticapy._utils._sql._collect import save_verticapy_logs +from verticapy._utils._sql._format import format_type, quote_ident +from verticapy._utils._sql._vertica_version import check_minimum_version + +from verticapy.core.vdataframe.base import vDataFrame + +from verticapy.machine_learning.vertica.base import VerticaModel + + +class TensorFlowModel(VerticaModel): + """ + Creates a TensorFlow object. + + .. versionadded:: 10.0.0 + + Parameters + ---------- + name: str, optional + Name of the model. The model must be stored in + the database. If it is not the case, you can use + :py:mod:`verticapy.machine_learning.vertica.import_models` + to import your TensorFlow model. + """ + + # Properties. + + @property + def _vertica_fit_sql(self) -> None: + return None + + @property + def _vertica_predict_sql(self) -> Literal["PREDICT_TENSORFLOW_SCALAR"]: + return "PREDICT_TENSORFLOW_SCALAR" + + @property + def _vertica_predict_transform_sql(self) -> Literal["PREDICT_TENSORFLOW"]: + return "PREDICT_TENSORFLOW" + + @property + def _model_category(self) -> Literal["INTEGRATION"]: + return "INTEGRATION" + + @property + def _model_subcategory(self) -> Literal["TENSORFLOW"]: + return "TENSORFLOW" + + @property + def _model_type(self) -> Literal["TensorFlowModel"]: + return "TensorFlowModel" + + # System & Special Methods. + + @check_minimum_version + @save_verticapy_logs + def __init__( + self, + name: str, + ) -> None: + super().__init__(name, False) + self.parameters = {} + attr = self.get_attributes() + if "input_desc" in attr: + self.X = self.get_attributes("input_desc")["op_name"] + if "output_desc" in attr: + self.y = self.get_attributes("output_desc")["op_name"][0] + + # Prediction / Transformation Methods. + + def predict( + self, + vdf: SQLRelation, + X: Optional[SQLColumns] = None, + name: Optional[str] = None, + inplace: bool = True, + ) -> vDataFrame: + """ + Predicts using the input relation. + + Parameters + ---------- + vdf: SQLRelation + Object used to run the prediction. You can + also specify a customized relation, but you + must enclose it with an alias. For example, + "(SELECT 1) x" is valid, whereas "(SELECT 1)" + and "SELECT 1" are invalid. + X: SQLColumns + List of the columns used to deploy the models. + name: str, optional + Name of the added vDataColumn. If empty, a name + is generated. + + .. note:: + + This parameter is only used when the input + 'X' is a complex data type, otherwise it is + ignored. + inplace: bool, optional + If set to True, the prediction is added to the + vDataFrame. + + Returns + ------- + vDataFrame + the input object. + """ + X = format_type(X, dtype=list, na_out=self.X) + X = quote_ident(X) + if isinstance(vdf, str): + vdf = vDataFrame(vdf) + inplace = True + if not name: + name = gen_name([self._model_type, self.model_name]) + if len(X) == 1 and vdf[X[0]].category() == "complex": + if inplace: + return vdf.eval(name, self.deploySQL(X=X)) + else: + return vdf.copy().eval(name, self.deploySQL(X=X)) + else: + columns = vdf.get_columns() + n = len(columns) + sql = f""" + SELECT + {self._vertica_predict_transform_sql}({', '.join(columns + X)} + USING PARAMETERS + model_name = '{self.model_name}', + num_passthru_cols = '{n}') + OVER(PARTITION BEST) FROM {vdf}""" + if inplace: + return vdf.__init__(sql) + else: + return vDataFrame(sql) diff --git a/verticapy/tests_new/machine_learning/vertica/test_base_model_methods.py b/verticapy/tests_new/machine_learning/vertica/test_base_model_methods.py index 6ec5c3676..186795fcb 100644 --- a/verticapy/tests_new/machine_learning/vertica/test_base_model_methods.py +++ b/verticapy/tests_new/machine_learning/vertica/test_base_model_methods.py @@ -1169,7 +1169,7 @@ def test_does_model_exists(self, get_models): assert get_models.vpy.model.does_model_exists( name=model_name_with_schema, return_model_type=True - ) in [ + )[1] in [ "LINEAR_REGRESSION", "SVM_REGRESSOR", "POISSON_REGRESSION", From 769507cf5c60a5b584c9d88e4de787040dd2e3ae Mon Sep 17 00:00:00 2001 From: Badr Date: Tue, 7 Nov 2023 22:07:54 -0500 Subject: [PATCH 3/3] Update test_tools.py --- verticapy/tests/vModel/test_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/verticapy/tests/vModel/test_tools.py b/verticapy/tests/vModel/test_tools.py index 1df4d550c..e14b3ba58 100755 --- a/verticapy/tests/vModel/test_tools.py +++ b/verticapy/tests/vModel/test_tools.py @@ -59,7 +59,7 @@ def test__is_already_stored(self, titanic_vd): model.fit(titanic_vd, ["age", "fare"], "survived") assert model._is_already_stored() assert ( - model._is_already_stored(return_model_type=True).lower() + model._is_already_stored(return_model_type=True)[1].lower() == "linear_regression" ) model.drop()