Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PMML and TF INTEGRATION #838

Merged
merged 3 commits into from
Nov 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions verticapy/_utils/_sql/_vertica_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
from verticapy.errors import VersionError

MINIMUM_VERTICA_VERSION = {
"ARIMA": [23, 3, 0],
"ARIMA": [23, 4, 0],
"AR": [11, 0, 0],
"ARMA": [12, 0, 3],
"ARMA": [12, 0, 4],
"Balance": [8, 1, 1],
"BernoulliNB": [8, 0, 0],
"BisectingKMeans": [9, 3, 1],
Expand Down Expand Up @@ -71,6 +71,7 @@
"soundex_matches": [10, 1, 0],
"StandardScaler": [8, 1, 0],
"SVD": [9, 1, 0],
"TensorFlowModel": [10, 0, 0],
"XGBClassifier": [11, 1, 0],
"XGBRegressor": [11, 1, 0],
}
Expand Down
1 change: 1 addition & 0 deletions verticapy/machine_learning/vertica/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
StandardScaler,
)
from verticapy.machine_learning.vertica.svm import LinearSVC, LinearSVR
from verticapy.machine_learning.vertica.tensorflow import TensorFlowModel
from verticapy.machine_learning.vertica.tree import (
DecisionTreeClassifier,
DecisionTreeRegressor,
Expand Down
46 changes: 42 additions & 4 deletions verticapy/machine_learning/vertica/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,8 @@ def _is_already_stored(
If set to True and an error occurs,
raises the error.
return_model_type: bool, optional
If set to True, returns the model type.
If set to True, returns a tuple with
the model category and type.

Returns
-------
Expand Down Expand Up @@ -232,7 +233,8 @@ def does_model_exists(
If set to True and an error occurs,
raises the error.
return_model_type: bool, optional
If set to True, returns the model type.
If set to True, returns a tuple with
the model category and type.

Returns
-------
Expand All @@ -246,7 +248,8 @@ def does_model_exists(
res = _executeSQL(
query=f"""
SELECT
/*+LABEL('learn.tools._is_already_stored')*/
/*+LABEL('learn.tools._is_already_stored')*/
category,
model_type
FROM MODELS
WHERE LOWER(model_name) = LOWER('{model_name}')
Expand All @@ -256,7 +259,7 @@ def does_model_exists(
print_time_sql=False,
)
if res:
model_type = res[0]
model_type = res
res = True
else:
res = False
Expand Down Expand Up @@ -319,6 +322,20 @@ def get_attributes(self, attr_name: Optional[str] = None) -> Any:
Any
model attribute.
"""
if hasattr(self, "_model_subcategory") and self._model_subcategory in (
"TENSORFLOW",
"PMML",
):
if not attr_name:
return self.get_vertica_attributes()["attr_name"]
else:
res = self.get_vertica_attributes(attr_name)
if res.shape() == (1, 1):
return res.to_list()[0][0]
elif res.shape()[0] == 1:
return np.array([l[0] for l in res.to_list()])
else:
return res
if not attr_name:
return self._attributes
elif attr_name in self._attributes:
Expand Down Expand Up @@ -658,6 +675,13 @@ def to_binary(self, path: str):
Absolute path of an output directory to store
the exported models.

.. warning::

This function operates solely on the server
side and is not accessible locally.
The 'path' provided should match the location
where the file(s) will be exported on the server.

Returns
-------
bool
Expand All @@ -675,6 +699,13 @@ def to_pmml(self, path: str):
Absolute path of an output directory to store
the exported models.

.. warning::

This function operates solely on the server
side and is not accessible locally.
The 'path' provided should match the location
where the file(s) will be exported on the server.

Returns
-------
bool
Expand All @@ -692,6 +723,13 @@ def to_tf(self, path: str):
Absolute path of an output directory to store
the exported model.

.. warning::

This function operates solely on the server
side and is not accessible locally.
The 'path' provided should match the location
where the file(s) will be exported on the server.

Returns
-------
bool
Expand Down
32 changes: 28 additions & 4 deletions verticapy/machine_learning/vertica/model_management.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"""
from typing import Literal, Optional

from verticapy._typing import SQLRelation
from verticapy._typing import NoneType, SQLRelation
from verticapy._utils._sql._collect import save_verticapy_logs
from verticapy._utils._sql._format import schema_relation
from verticapy._utils._sql._sys import _executeSQL
Expand Down Expand Up @@ -44,8 +44,10 @@
Ridge,
)
from verticapy.machine_learning.vertica.naive_bayes import NaiveBayes
from verticapy.machine_learning.vertica.pmml import PMMLModel
from verticapy.machine_learning.vertica.preprocessing import Scaler, OneHotEncoder
from verticapy.machine_learning.vertica.svm import LinearSVC, LinearSVR
from verticapy.machine_learning.vertica.tensorflow import TensorFlowModel
from verticapy.machine_learning.vertica.tsa import ARIMA, AR, MA


Expand Down Expand Up @@ -73,6 +75,13 @@ def export_models(
path: str
Absolute path of an output directory to store
the exported models.

.. warning::

This function operates solely on the server
side and is not accessible locally.
The 'path' provided should match the location
where the file(s) will be exported on the server.
kind: str, optional
The category of models to export, one of the
following:
Expand Down Expand Up @@ -119,6 +128,15 @@ def import_models(
- The parent directory of multiple model
directories:
``parent-dir-path/*``

.. warning::

This function only operates on the server
side and is not accessible locally.
The 'path' should correspond to the location
of the file(s) on the server. Please make
sure you have successfully transferred your
file(s) to the server.
schema: str, optional
An existing schema where the machine learning
models are imported. If omitted, models are
Expand Down Expand Up @@ -191,13 +209,19 @@ def load_model(
model
The model.
"""
model_type = VerticaModel.does_model_exists(
res = VerticaModel.does_model_exists(
name=name, raise_error=False, return_model_type=True
)
if isinstance(res, NoneType):
raise NameError(f"The model '{name}' doesn't exist.")
model_category, model_type = res
model_category = model_category.lower()
if model_category == "pmml":
return PMMLModel(name)
elif model_category == "tensorflow":
return TensorFlowModel(name)
schema, model_name = schema_relation(name)
schema, model_name = schema[1:-1], name[1:-1]
if not model_type:
raise NameError(f"The model '{name}' doesn't exist.")
if model_type.lower() in (
"kmeans",
"kprototypes",
Expand Down
6 changes: 5 additions & 1 deletion verticapy/machine_learning/vertica/pmml.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,14 @@ def _model_type(self) -> Literal["PMMLModel"]:
@save_verticapy_logs
def __init__(
self,
name: str = None,
name: str,
) -> None:
super().__init__(name, False)
self.parameters = {}
self.X = self.get_attributes("data_fields")["name"]
if self.get_attributes("is_supervised"):
self.y = self.X[-1]
self.X = self.X[:-1]

# Prediction / Transformation Methods.

Expand Down
152 changes: 152 additions & 0 deletions verticapy/machine_learning/vertica/tensorflow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
"""
Copyright (c) 2018-2023 Open Text or one of its
affiliates. Licensed under the Apache License,
Version 2.0 (the "License"); You may not use this
file except in compliance with the License.

You may obtain a copy of the License at:
http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in
writing, software distributed under the License is
distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing
permissions and limitations under the License.
"""
from typing import Literal, Optional

from verticapy._typing import SQLColumns, SQLRelation
from verticapy._utils._gen import gen_name
from verticapy._utils._sql._collect import save_verticapy_logs
from verticapy._utils._sql._format import format_type, quote_ident
from verticapy._utils._sql._vertica_version import check_minimum_version

from verticapy.core.vdataframe.base import vDataFrame

from verticapy.machine_learning.vertica.base import VerticaModel


class TensorFlowModel(VerticaModel):
"""
Creates a TensorFlow object.

.. versionadded:: 10.0.0

Parameters
----------
name: str, optional
Name of the model. The model must be stored in
the database. If it is not the case, you can use
:py:mod:`verticapy.machine_learning.vertica.import_models`
to import your TensorFlow model.
"""

# Properties.

@property
def _vertica_fit_sql(self) -> None:
return None

@property
def _vertica_predict_sql(self) -> Literal["PREDICT_TENSORFLOW_SCALAR"]:
return "PREDICT_TENSORFLOW_SCALAR"

@property
def _vertica_predict_transform_sql(self) -> Literal["PREDICT_TENSORFLOW"]:
return "PREDICT_TENSORFLOW"

@property
def _model_category(self) -> Literal["INTEGRATION"]:
return "INTEGRATION"

@property
def _model_subcategory(self) -> Literal["TENSORFLOW"]:
return "TENSORFLOW"

@property
def _model_type(self) -> Literal["TensorFlowModel"]:
return "TensorFlowModel"

# System & Special Methods.

@check_minimum_version
@save_verticapy_logs
def __init__(
self,
name: str,
) -> None:
super().__init__(name, False)
self.parameters = {}
attr = self.get_attributes()
if "input_desc" in attr:
self.X = self.get_attributes("input_desc")["op_name"]
if "output_desc" in attr:
self.y = self.get_attributes("output_desc")["op_name"][0]

# Prediction / Transformation Methods.

def predict(
self,
vdf: SQLRelation,
X: Optional[SQLColumns] = None,
name: Optional[str] = None,
inplace: bool = True,
) -> vDataFrame:
"""
Predicts using the input relation.

Parameters
----------
vdf: SQLRelation
Object used to run the prediction. You can
also specify a customized relation, but you
must enclose it with an alias. For example,
"(SELECT 1) x" is valid, whereas "(SELECT 1)"
and "SELECT 1" are invalid.
X: SQLColumns
List of the columns used to deploy the models.
name: str, optional
Name of the added vDataColumn. If empty, a name
is generated.

.. note::

This parameter is only used when the input
'X' is a complex data type, otherwise it is
ignored.
inplace: bool, optional
If set to True, the prediction is added to the
vDataFrame.

Returns
-------
vDataFrame
the input object.
"""
X = format_type(X, dtype=list, na_out=self.X)
X = quote_ident(X)
if isinstance(vdf, str):
vdf = vDataFrame(vdf)
inplace = True
if not name:
name = gen_name([self._model_type, self.model_name])
if len(X) == 1 and vdf[X[0]].category() == "complex":
if inplace:
return vdf.eval(name, self.deploySQL(X=X))
else:
return vdf.copy().eval(name, self.deploySQL(X=X))
else:
columns = vdf.get_columns()
n = len(columns)
sql = f"""
SELECT
{self._vertica_predict_transform_sql}({', '.join(columns + X)}
USING PARAMETERS
model_name = '{self.model_name}',
num_passthru_cols = '{n}')
OVER(PARTITION BEST) FROM {vdf}"""
if inplace:
return vdf.__init__(sql)
else:
return vDataFrame(sql)
2 changes: 1 addition & 1 deletion verticapy/tests/vModel/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def test__is_already_stored(self, titanic_vd):
model.fit(titanic_vd, ["age", "fare"], "survived")
assert model._is_already_stored()
assert (
model._is_already_stored(return_model_type=True).lower()
model._is_already_stored(return_model_type=True)[1].lower()
== "linear_regression"
)
model.drop()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1169,7 +1169,7 @@ def test_does_model_exists(self, get_models):

assert get_models.vpy.model.does_model_exists(
name=model_name_with_schema, return_model_type=True
) in [
)[1] in [
"LINEAR_REGRESSION",
"SVM_REGRESSOR",
"POISSON_REGRESSION",
Expand Down