From b0c87a50f8e3094feb82a4ced593eabfc26fd8b3 Mon Sep 17 00:00:00 2001
From: Badr <32390048+oualib@users.noreply.github.com>
Date: Fri, 27 Aug 2021 12:36:23 +0200
Subject: [PATCH] Adding memModel Trees and some code correction (#161)

* Adding memModel Trees and some code correction

 - memModel Trees (Unit Tests to do)
 - Adding matrix rotation for PCA (tests available)
 - simplifying code for vModel

TO DO (to complete 0.7.0):
 - tests for trees
 - memModel for Naive Bayes (+ tests)

* Adding tests for trees

TO DO:
test for the SQL syntax in vDataFrame

* copyedits

* reword

* Update memmodel.py

Co-authored-by: kxu <52899649+ansleis@users.noreply.github.com>
---
 verticapy/learn/memmodel.py             | 411 ++++++++++++++++++++++--
 verticapy/learn/tools.py                |  52 ++-
 verticapy/learn/vmodel.py               |   5 +-
 verticapy/stats/tools.py                |   6 +-
 verticapy/tests/vModel/test_memmodel.py | 255 +++++++++++++++
 verticapy/tests/vModel/test_tools.py    |  13 +-
 6 files changed, 698 insertions(+), 44 deletions(-)

diff --git a/verticapy/learn/memmodel.py b/verticapy/learn/memmodel.py
index 4826b5722..b27f5d5c3 100644
--- a/verticapy/learn/memmodel.py
+++ b/verticapy/learn/memmodel.py
@@ -50,14 +50,159 @@
 #
 # Standard Python Modules
 import numpy as np
+from collections.abc import Iterable
 
 # VerticaPy Modules
 from verticapy.toolbox import *
 from verticapy.errors import *
 
 # ---#
-def predict_from_coef(X: Union[list, np.array], 
-                      coefficients: Union[list, np.array], 
+def predict_from_binary_tree(X: Union[list, np.ndarray], 
+                             children_left: list,
+                             children_right: list,
+                             feature: list,
+                             threshold: list,
+                             value: list,
+                             classes: Union[list, np.ndarray] = [],
+                             return_proba: bool = False,
+                             is_regressor: bool = True,):
+    """
+    ---------------------------------------------------------------------------
+    Predicts using a binary tree model and the input attributes.
+
+    Parameters
+    ----------
+    X: list / numpy.array
+        Data on which to make the prediction.
+    children_left: list
+        A list of node IDs, where children_left[i] is the node id of the left child of node i.
+    children_right: list
+        A list of node IDs, children_right[i] is the node id of the right child of node i.
+    feature: list
+        A list of features, where feature[i] is the feature to split on for the internal node i.
+    threshold: list
+        A list of thresholds, where threshold[i] is the threshold for the internal node i.
+    value: list
+        Contains the constant prediction value of each node.
+    classes: list / numpy.array, optional
+        The classes for the binary tree model.
+    return_proba: bool, optional
+        If set to True, the probability of each class is returned.
+    is_regressor: bool, optional
+        If set to True, the parameter 'value' corresponds to the result of
+        a regression.
+
+    Returns
+    -------
+    numpy.array
+        Predicted values
+    """
+    check_types([("X", X, [list, np.ndarray,],),
+                 ("children_left", children_left, [list,],),
+                 ("children_right", children_right, [list,],),
+                 ("feature", feature, [list,],),
+                 ("threshold", threshold, [list,],),
+                 ("value", value, [list,],),
+                 ("classes", classes, [list, np.ndarray,],),
+                 ("return_proba", return_proba, [bool,],),
+                 ("is_regressor", is_regressor, [bool,],),])
+    def predict_tree(children_left, children_right, feature, threshold, value,  node_id, X,):
+        if children_left[node_id] == children_right[node_id]:
+            if not(is_regressor) and not(return_proba) and isinstance(value, Iterable):
+                if isinstance(classes, Iterable) and len(classes) > 0:
+                    return classes[np.argmax(value[node_id])]
+                else:
+                    return np.argmax(value[node_id])
+            else:
+                return value[node_id]
+        else:
+            if (isinstance(threshold[node_id], str) and str(X[feature[node_id]]) == threshold[node_id]) or (not(isinstance(threshold[node_id], str)) and float(X[feature[node_id]]) < float(threshold[node_id])):
+                return predict_tree(children_left, children_right, feature, threshold, value, children_left[node_id], X)
+            else:
+                return predict_tree(children_left, children_right, feature, threshold, value, children_right[node_id], X)
+    def predict_tree_final(X,):
+        return predict_tree(children_left, children_right, feature, threshold, value, 0, X,)
+    return np.apply_along_axis(predict_tree_final, 1, np.array(X))
+
+# ---#
+def sql_from_binary_tree(X: Union[list, np.ndarray], 
+                         children_left: list,
+                         children_right: list,
+                         feature: list,
+                         threshold: list,
+                         value: list,
+                         classes: Union[list, np.ndarray] = [],
+                         return_proba: bool = False,
+                         is_regressor: bool = True,):
+    """
+    ---------------------------------------------------------------------------
+    Returns the SQL code needed to deploy a binary tree model using its attributes.
+
+    Parameters
+    ----------
+    X: list / numpy.array
+        Data on which to make the prediction.
+    children_left: list
+        A list of node IDs, where children_left[i] is the node id of the left child of node i.
+    children_right: list
+        A list of node IDs, children_right[i] is the node id of the right child of node i.
+    feature: list
+        A list of features, where feature[i] is the feature to split on for the internal node i.
+    threshold: list
+        A list of thresholds, where threshold[i] is the threshold for the internal node i.
+    value: list
+        Contains the constant prediction value of each node. If used for classification and if return_proba is set to True, each element of the list must be a sublist
+        with the probabilities of each classes.
+    classes: list / numpy.array, optional
+        The classes for the binary tree model.
+    return_proba: bool, optional
+        If set to True, the probability of each class is returned.
+    is_regressor: bool, optional
+        If set to True, the parameter 'value' corresponds to the result of
+        a regression.
+
+    Returns
+    -------
+    str / list
+        SQL code
+    """
+    check_types([("X", X, [list, np.ndarray,],),
+                 ("children_left", children_left, [list,],),
+                 ("children_right", children_right, [list,],),
+                 ("feature", feature, [list,],),
+                 ("threshold", threshold, [list,],),
+                 ("value", value, [list,],),
+                 ("classes", classes, [list, np.ndarray,],),
+                 ("return_proba", return_proba, [bool,],),
+                 ("is_regressor", is_regressor, [bool,],),])
+    def predict_tree(children_left, children_right, feature, threshold, value,  node_id, X, prob_ID = 0):
+        if children_left[node_id] == children_right[node_id]:
+            if return_proba:
+                return value[node_id][prob_ID]
+            else:
+                if not(is_regressor) and isinstance(classes, Iterable) and len(classes) > 0:
+                    result = classes[np.argmax(value[node_id])]
+                    if isinstance(result, str):
+                      return "'" + result + "'"
+                    else:
+                      return result
+                else:
+                    return value[node_id]
+        else:
+            op = '=' if isinstance(threshold[node_id], str) else '<'
+            return "(CASE WHEN {} {} '{}' THEN {} ELSE {} END)".format(X[feature[node_id]], 
+                                                                       op, threshold[node_id], 
+                                                                       predict_tree(children_left, children_right, feature, threshold, value, children_left[node_id], X, prob_ID), 
+                                                                       predict_tree(children_left, children_right, feature, threshold, value, children_right[node_id], X, prob_ID))
+    if return_proba:
+        n = max([len(l) if l != None else 0 for l in value])
+        return [predict_tree(children_left, children_right, feature, threshold, value, 0, X, i) for i in range(n)]
+    else:
+        return predict_tree(children_left, children_right, feature, threshold, value, 0, X,)
+
+# ---#
+def predict_from_coef(X: Union[list, np.ndarray], 
+                      coefficients: Union[list, np.ndarray], 
                       intercept: float, 
                       method: str = "LinearRegression",
                       return_proba: bool = False,):
@@ -85,8 +230,8 @@ def predict_from_coef(X: Union[list, np.array],
     numpy.array
         Predicted values
     """
-    check_types([("X", X, [list,],), 
-                 ("coefficients", coefficients, [list,],),
+    check_types([("X", X, [list, np.ndarray,],), 
+                 ("coefficients", coefficients, [list, np.ndarray,],),
                  ("intercept", intercept, [float, int,],),
                  ("method", method, ["LinearRegression", "LinearSVR", "LogisticRegression", "LinearSVC"],),
                  ("return_proba", return_proba, [bool],),])
@@ -138,10 +283,10 @@ def sql_from_coef(X: list,
     return sql
 
 # ---#
-def predict_from_bisecting_kmeans(X: Union[list, np.array], 
-                                  clusters: Union[list, np.array],
-                                  left_child: Union[list, np.array],
-                                  right_child: Union[list, np.array],
+def predict_from_bisecting_kmeans(X: Union[list, np.ndarray], 
+                                  clusters: Union[list, np.ndarray],
+                                  left_child: Union[list, np.ndarray],
+                                  right_child: Union[list, np.ndarray],
                                   p: int = 2,):
     """
     ---------------------------------------------------------------------------
@@ -167,10 +312,10 @@ def predict_from_bisecting_kmeans(X: Union[list, np.array],
     numpy.array
         Predicted values
     """
-    check_types([("X", X, [list,],), 
-                 ("clusters", clusters, [list,],),
-                 ("left_child", left_child, [list,],),
-                 ("right_child", right_child, [list,],),
+    check_types([("X", X, [list, np.ndarray,],), 
+                 ("clusters", clusters, [list, np.ndarray,],),
+                 ("left_child", left_child, [list, np.ndarray,],),
+                 ("right_child", right_child, [list, np.ndarray,],),
                  ("p", p, [int,],),])
     centroids = np.array(clusters)
     def predict_tree(right_child, left_child, row, node_id, centroids):
@@ -248,11 +393,11 @@ def predict_tree(right_child: list, left_child: list, node_id: int, clusters_dis
     return sql_final
 
 # ---#
-def predict_from_clusters(X: Union[list, np.array], 
-                          clusters: Union[list, np.array],
+def predict_from_clusters(X: Union[list, np.ndarray], 
+                          clusters: Union[list, np.ndarray],
                           return_distance_clusters: bool = False,
                           return_proba: bool = False,
-                          classes: Union[list, np.array] = [],
+                          classes: Union[list, np.ndarray] = [],
                           p: int = 2,):
     """
     ---------------------------------------------------------------------------
@@ -278,11 +423,11 @@ def predict_from_clusters(X: Union[list, np.array],
     numpy.array
         Predicted values
     """
-    check_types([("X", X, [list,],), 
-                 ("clusters", clusters, [list,],),
+    check_types([("X", X, [list, np.ndarray,],), 
+                 ("clusters", clusters, [list, np.ndarray,],),
                  ("return_distance_clusters", return_distance_clusters, [bool,],),
                  ("return_proba", return_proba, [bool,],),
-                 ("classes", classes, [list,],),
+                 ("classes", classes, [list, np.ndarray,],),
                  ("p", p, [int,],),])
     assert not(return_distance_clusters) or not(return_proba), ParameterError("Parameters 'return_distance_clusters' and 'return_proba' cannot both be set to True.")
     centroids = np.array(clusters)
@@ -377,9 +522,9 @@ def sql_from_clusters(X: list,
     return sql_final
 
 # ---#
-def transform_from_pca(X: Union[list, np.array],
-                       principal_components: Union[list, np.array],
-                       mean: Union[list, np.array]):
+def transform_from_pca(X: Union[list, np.ndarray],
+                       principal_components: Union[list, np.ndarray],
+                       mean: Union[list, np.ndarray]):
     """
     ---------------------------------------------------------------------------
     Transforms the data with a PCA model using the input attributes.
@@ -398,9 +543,9 @@ def transform_from_pca(X: Union[list, np.array],
     numpy.array
         Transformed data
     """
-    check_types([("X", X, [list],), 
-                 ("principal_components", principal_components, [list],),
-                 ("mean", mean, [list],),])
+    check_types([("X", X, [list, np.ndarray,],), 
+                 ("principal_components", principal_components, [list, np.ndarray,],),
+                 ("mean", mean, [list, np.ndarray,],),])
     pca_values = np.array(principal_components)
     result = (X - np.array(mean))
     L, n = [], len(principal_components[0])
@@ -682,7 +827,7 @@ class memModel:
     'SVD,' 'PCA,' 'BisectingKMeans,' 'KMeans,' 'NaiveBayes,' 
     'XGBoostClassifier,' 'XGBoostRegressor,' 'RandomForestClassifier,' 
     'RandomForestRegressor,' 'LinearSVR,' 'LinearSVC,' 'LogisticRegression,' 
-    'LinearRegression'
+    'LinearRegression', 'BinaryTreeRegressor', 'BinaryTreeClassifier'
 attributes: dict
     Dictionary which includes all the model's attributes.
         For OneHotEncoder: {"categories": List of the different feature categories.
@@ -709,13 +854,34 @@ class memModel:
                             "mean": List of the input predictors average.}
         For SVD:           {"vectors": Matrix of the right singular vectors.
                             "values": List of the singular values.}
-        For Normalizer:       {"values": List of tuples including the model's attributes.
-                                      The required tuple depends on the specified method: 
-                                       'zscore': (mean, std)
-                                       'robust_zscore': (median, mad)
-                                       'minmax': (min, max)
+        For Normalizer:    {"values": List of tuples including the model's attributes.
+                                The required tuple depends on the specified method: 
+                                    'zscore': (mean, std)
+                                    'robust_zscore': (median, mad)
+                                    'minmax': (min, max)
                             "method": The model's category, one of the following: 'zscore', 
                                       'robust_zscore', or 'minmax'.}
+        For BinaryTreeRegressor, BinaryTreeClassifier:
+                            {children_left: A list of node IDs, where children_left[i] is the node id of the left 
+                                            A list of node IDs, where child of node i.
+                             children_right: children_right[i] is the node id of the 
+                                             right child of node i.
+                             feature: A list of features, where feature[i] is the feature to split on, for the internal 
+                                      node i.
+                             threshold: threshold[i] is the threshold for the internal node i.
+                             value: Contains the constant prediction value of each node.
+                             classes: [Only for Classifier] The classes for the binary tree model.}
+        For RandomForestClassifier, RandomForestRegressor, XGBoostClassifier, XGBoostRegressor:
+                            {trees: list of memModels of type 'BinaryTreeRegressor' or 
+                                    'BinaryTreeClassifier'
+                             learning_rate: [Only for XGBoostClassifier and XGBoostRegressor]
+                                            Learning rate.
+                             mean: [Only for XGBoostRegressor]
+                                   Average of the response column.
+                             logodds: [Only for XGBoostClassifier]
+                                   List of the logodds of the response classes.}
+
+
     """
     #
     # Special Methods
@@ -737,6 +903,8 @@ def __init__(
                                                   "XGBoostClassifier",
                                                   "XGBoostRegressor",
                                                   "RandomForestClassifier",
+                                                  "BinaryTreeClassifier",
+                                                  "BinaryTreeRegressor",
                                                   "RandomForestRegressor",
                                                   "LinearSVR",
                                                   "LinearSVC",
@@ -744,7 +912,56 @@ def __init__(
                                                   "LinearRegression",
                                                   "NearestCentroids",],),])
         attributes_ = {}
-        if model_type == "OneHotEncoder":
+        if model_type in ("RandomForestRegressor", "XGBoostRegressor", "RandomForestClassifier", "XGBoostClassifier",):
+            if ("trees" not in attributes):
+                raise ParameterError("{}'s attributes must include a list of memModels representing each tree.".format(model_type))
+            attributes_["trees"] = []
+            for tree in attributes["trees"]:
+                assert isinstance(tree, memModel), ParameterError("Each tree of the model must be a memModel, found '{}'.".format(type(tree)))
+                if model_type in ("RandomForestClassifier", "XGBoostClassifier",):
+                    assert tree.model_type_ in ("BinaryTreeClassifier",), ParameterError("Each tree of the model must be a BinaryTreeClassifier, found '{}'.".format(tree.model_type_))
+                else:
+                    assert tree.model_type_ in ("BinaryTreeRegressor",), ParameterError("Each tree of the model must be a BinaryTreeRegressor, found '{}'.".format(tree.model_type_))
+                attributes_["trees"] += [tree]
+            represent = "<{}>\n\nntrees = {}".format(model_type, len(attributes_["trees"]))
+            if model_type == "XGBoostRegressor":
+                if ("learning_rate" not in attributes or 'mean' not in attributes):
+                    raise ParameterError("{}'s attributes must include the response average and the learning rate.".format(model_type))
+                attributes_["mean"] = attributes["mean"]
+                check_types([("mean", attributes_["mean"], [int, float,],),])
+                represent += "\n\nmean = {}".format(attributes_["mean"])
+            if model_type == "XGBoostClassifier":
+                if ("learning_rate" not in attributes or 'logodds' not in attributes):
+                    raise ParameterError("{}'s attributes must include the response classes logodds and the learning rate.".format(model_type))
+                attributes_["logodds"] = np.copy(attributes["logodds"])
+                check_types([("logodds", attributes_["logodds"], [list,],),])
+                represent += "\n\nlogodds = {}".format(attributes_["logodds"])
+            if model_type in ("XGBoostRegressor", "XGBoostClassifier",):
+                attributes_["learning_rate"] = attributes["learning_rate"]
+                check_types([("learning_rate", attributes_["learning_rate"], [int, float,],),])
+                represent += "\n\nlearning_rate = {}".format(attributes_["learning_rate"])
+        elif model_type in ("BinaryTreeClassifier", "BinaryTreeRegressor"):
+            if ("children_left" not in attributes or "children_right" not in attributes or "feature" not in attributes or "threshold" not in attributes or "value" not in attributes):
+                raise ParameterError("{}'s attributes must include at least the following lists: children_left, children_right, feature, threshold, value.".format(model_type))
+            for elem in ("children_left", "children_right", "feature", "threshold", "value",):
+                if isinstance(attributes[elem], list):
+                    attributes_[elem] = attributes[elem].copy()
+                else:
+                    attributes_[elem] = np.copy(attributes[elem])
+            check_types([("children_left", attributes_["children_left"], [list,],),
+                         ("children_right", attributes_["children_right"], [list,],),
+                         ("feature", attributes_["feature"], [list,],),
+                         ("threshold", attributes_["threshold"], [list,],),
+                         ("value", attributes_["value"], [list,],),])
+            represent = "<{}>\n\nchildren_left = {}\n\nchildren_right = {}\n\nfeature = {}\n\nthreshold = {}\n\nvalue =\n{}".format(model_type, attributes_["children_left"], attributes_["children_right"], attributes_["feature"], attributes_["threshold"], attributes_["value"])
+            if model_type in ("BinaryTreeClassifier",):
+                if "classes" not in attributes:
+                    attributes_["classes"] = []
+                else:
+                    attributes_["classes"] = np.copy(attributes["classes"])
+                check_types([("classes", attributes_["classes"], [list,],),])
+                represent += "\n\nclasses = {}".format(attributes_["classes"])
+        elif model_type == "OneHotEncoder":
             if "categories" not in attributes:
                 raise ParameterError("OneHotEncoder's attributes must include a list with all the feature categories for the 'categories' parameter.")
             attributes_["categories"] = attributes["categories"].copy()
@@ -808,7 +1025,7 @@ def __init__(
             attributes_["mean"] = np.copy(attributes["mean"])
             check_types([("principal_components", attributes_["principal_components"], [list,],),
                          ("mean", attributes_["mean"], [list,],),])
-            represent = "<{}>\n\nprincipal_components = {}\n\nmean = {}".format(model_type, attributes_["principal_components"], attributes_["mean"])
+            represent = "<{}>\n\nprincipal_components = \n{}\n\nmean = {}".format(model_type, attributes_["principal_components"], attributes_["mean"])
         elif model_type in ("SVD",):
             if ("vectors" not in attributes or "values" not in attributes):
                 raise ParameterError("SVD's attributes must include 2 lists: one with all the right singular vectors and one with the singular values of each input feature.")
@@ -816,7 +1033,7 @@ def __init__(
             attributes_["values"] = np.copy(attributes["values"])
             check_types([("vectors", attributes_["vectors"], [list,],),
                          ("values", attributes_["values"], [list,],),])
-            represent = "<{}>\n\nvectors = {}\n\nvalues = {}".format(model_type, attributes_["vectors"], attributes_["values"])
+            represent = "<{}>\n\nvectors = \n{}\n\nvalues = {}".format(model_type, attributes_["vectors"], attributes_["values"])
         elif model_type in ("Normalizer",):
             if ("values" not in attributes or "method" not in attributes):
                 raise ParameterError("Normalizer's attributes must include a list including the model's aggregations and a string representing the model's method.")
@@ -826,7 +1043,7 @@ def __init__(
                          ("method", attributes_["method"], ["minmax", "zscore", "robust_zscore",],),])
             represent = "<{}>\n\nvalues = {}\n\nmethod = {}".format(model_type, attributes_["values"], attributes_["method"])
         else:
-            raise ParameterError("Model type '{}' is not yet available.".format(self.model_type_))
+            raise ParameterError("Model type '{}' is not yet available.".format(model_type))
         self.attributes_ = attributes_
         self.model_type_ = model_type
         self.represent_ = represent
@@ -888,6 +1105,18 @@ def predict(self, X: list):
             return predict_from_clusters(X, self.attributes_["clusters"], p=self.attributes_["p"], classes=self.attributes_["classes"])
         elif self.model_type_ in ("BisectingKMeans",):
             return predict_from_bisecting_kmeans(X, self.attributes_["clusters"], self.attributes_["left_child"], self.attributes_["right_child"], p=self.attributes_["p"])
+        elif self.model_type_ in ("BinaryTreeRegressor", "BinaryTreeClassifier",):
+            return predict_from_binary_tree(X, self.attributes_["children_left"], self.attributes_["children_right"], self.attributes_["feature"], self.attributes_["threshold"], self.attributes_["value"], self.attributes_["classes"] if self.model_type_ in ("BinaryTreeClassifier",) else [], is_regressor=self.model_type_ in ("BinaryTreeRegressor",),)
+        elif self.model_type_ in ("RandomForestRegressor", "XGBoostRegressor",):
+            result = [tree.predict(X) for tree in self.attributes_["trees"]]
+            if self.model_type_ in ("RandomForestRegressor",):
+                return np.average(np.column_stack(result), axis=1)
+            else:
+                return np.sum(np.column_stack(result), axis=1) * self.attributes_["learning_rate"] + self.attributes_["mean"]
+        elif self.model_type_ in ("RandomForestClassifier", "XGBoostClassifier",):
+            result = np.argmax(self.predict_proba(X), axis=1)
+            result = np.array([self.attributes_["trees"][0].attributes_["classes"][i] for i in result])
+            return result
         else:
             raise FunctionError("Method 'predict' is not available for model type '{}'.".format(self.model_type_))
 
@@ -918,11 +1147,40 @@ def predict_sql(self, X: list):
             return sql_from_clusters(X, self.attributes_["clusters"], p=self.attributes_["p"], classes=self.attributes_["classes"])
         elif self.model_type_ in ("BisectingKMeans",):
             return sql_from_bisecting_kmeans(X, self.attributes_["clusters"], self.attributes_["left_child"], self.attributes_["right_child"], p=self.attributes_["p"])
+        elif self.model_type_ in ("BinaryTreeRegressor", "BinaryTreeClassifier",):
+            return sql_from_binary_tree(X, self.attributes_["children_left"], self.attributes_["children_right"], self.attributes_["feature"], self.attributes_["threshold"], self.attributes_["value"], self.attributes_["classes"] if self.model_type_ in ("BinaryTreeClassifier",) else [], is_regressor=self.model_type_ in ("BinaryTreeRegressor",),)
+        elif self.model_type_ in ("RandomForestRegressor", "XGBoostRegressor",):
+            result = [tree.predict_sql(X) for tree in self.attributes_["trees"]]
+            if self.model_type_ in ("RandomForestRegressor",):
+                return "(" + " + ".join(result) + ") / {}".format(len(result))
+            else:
+                return "(" + " + ".join(result) + ") * {} + {}".format(self.attributes_["learning_rate"], self.attributes_["mean"],)
+        elif self.model_type_ in ("RandomForestClassifier", "XGBoostClassifier",):
+            classes = self.attributes_["trees"][0].attributes_["classes"]
+            m = len(classes)
+            result_proba = self.predict_proba_sql(X,)
+            if m == 2:
+                return "(CASE WHEN {} > 0.5 THEN {} ELSE {} END)".format(result_proba[1], classes[1], classes[0])
+            else:
+                sql = []
+                for i in range(m):
+                    list_tmp = []
+                    for j in range(i):
+                        list_tmp += ["{} <= {}".format(result_proba[i], result_proba[j])]
+                    sql += [" AND ".join(list_tmp)]
+                sql = sql[1:]
+                sql.reverse()
+                sql_final = "CASE WHEN {} THEN NULL".format(" OR ".join(["{} IS NULL".format(elem) for elem in X]))
+                for i in range(m - 1):
+                    class_i = classes[m - i - 1]
+                    sql_final += " WHEN {} THEN {}".format(sql[i], "'{}'".format(class_i) if isinstance(class_i, str) else class_i)
+                sql_final += " ELSE {} END".format("'{}'".format(classes[0]) if isinstance(classes[0], str) else classes[0])
+                return sql_final
         else:
             raise FunctionError("Method 'predict_sql' is not available for model type '{}'.".format(self.model_type_)) 
 
     # ---#
-    def predict_proba(self, X: list):
+    def predict_proba(self, X: list,):
         """
     ---------------------------------------------------------------------------
     Predicts probabilities using the model's attributes.
@@ -943,6 +1201,24 @@ def predict_proba(self, X: list):
             return predict_from_clusters(X, self.attributes_["clusters"], p=self.attributes_["p"], return_proba=True,)
         elif self.model_type_ in ("NearestCentroids",):
             return predict_from_clusters(X, self.attributes_["clusters"], p=self.attributes_["p"], classes=self.attributes_["classes"], return_proba=True,)
+        elif self.model_type_ in ("BinaryTreeClassifier",):
+            return predict_from_binary_tree(X, self.attributes_["children_left"], self.attributes_["children_right"], self.attributes_["feature"], self.attributes_["threshold"], self.attributes_["value"], self.attributes_["classes"], True, is_regressor=False,)
+        elif self.model_type_ in ("RandomForestClassifier",):
+            result, n = 0, len(self.attributes_["trees"])
+            for i in range(n):
+                result_tmp = self.attributes_["trees"][i].predict_proba(X)
+                result_tmp_arg = np.zeros_like(result_tmp)
+                result_tmp_arg[np.arange(len(result_tmp)), result_tmp.argmax(1)] = 1
+                result += result_tmp_arg
+            return result / n
+        elif self.model_type_ in ("XGBoostClassifier",):
+            result = 0
+            for tree in self.attributes_["trees"]:
+                result += tree.predict_proba(X)
+            result = self.attributes_["logodds"] + self.attributes_["learning_rate"] * result
+            result = 1 / (1 + np.exp(- result))
+            result /=  np.sum(result, axis=1)[:,None]
+            return result
         else:
             raise FunctionError("Method 'predict_proba' is not available for model type '{}'.".format(self.model_type_))
 
@@ -969,6 +1245,39 @@ def predict_proba_sql(self, X: list):
             return sql_from_clusters(X, self.attributes_["clusters"], p=self.attributes_["p"], return_proba=True,)
         elif self.model_type_ in ("NearestCentroids",):
             return sql_from_clusters(X, self.attributes_["clusters"], p=self.attributes_["p"], classes=self.attributes_["classes"], return_proba=True,)
+        elif self.model_type_ in ("BinaryTreeClassifier",):
+            return sql_from_binary_tree(X, self.attributes_["children_left"], self.attributes_["children_right"], self.attributes_["feature"], self.attributes_["threshold"], self.attributes_["value"], self.attributes_["classes"], True, is_regressor=False,)
+        elif self.model_type_ in ("RandomForestClassifier",):
+            trees, n, m = [], len(self.attributes_["trees"]), len(self.attributes_["trees"][0].attributes_["classes"])
+            for i in range(n):
+                val = []
+                for elem in self.attributes_["trees"][i].attributes_["value"]:
+                    if isinstance(elem, type(None)):
+                        val += [elem]
+                    else:
+                        value_tmp = np.zeros_like([elem])
+                        value_tmp[np.arange(1), np.array([elem]).argmax(1)] = 1
+                        val += [list(value_tmp[0])]
+                tree = memModel("BinaryTreeClassifier", {"children_left": self.attributes_["trees"][i].attributes_["children_left"],
+                                                         "children_right": self.attributes_["trees"][i].attributes_["children_right"],
+                                                         "feature": self.attributes_["trees"][i].attributes_["feature"],
+                                                         "threshold": self.attributes_["trees"][i].attributes_["threshold"],
+                                                         "value": val,
+                                                         "classes": self.attributes_["trees"][i].attributes_["classes"],})
+                trees += [tree]
+            result = [trees[i].predict_proba_sql(X) for i in range(n)]
+            classes_proba = []
+            for i in range(m):
+                classes_proba += ["(" + " + ".join([val[i] for val in result]) + ") / {}".format(n)]
+            return classes_proba
+        elif self.model_type_ in ("XGBoostClassifier",):
+            result, n, m = [], len(self.attributes_["trees"]), len(self.attributes_["trees"][0].attributes_["classes"])
+            all_probas = [self.attributes_["trees"][i].predict_proba_sql(X) for i in range(n)]
+            for i in range(m):
+                result += ["(1 / (1 + EXP(- ({} + {} * (".format(self.attributes_["logodds"][i], self.attributes_["learning_rate"]) + " + ".join(all_probas[i]) + ")))))"]
+            sum_result = "(" + " + ".join(result) + ")"
+            result = [item + " / {}".format(sum_result) for item in result]
+            return result
         else:
             raise FunctionError("Method 'predict_proba_sql' is not available for model type '{}'.".format(self.model_type_))
 
@@ -1030,3 +1339,35 @@ def transform_sql(self, X: list):
         else:
             raise FunctionError("Method 'transform_sql' is not available for model type '{}'.".format(self.model_type_))
 
+    # ---#
+    def rotate(self, gamma: float = 1.0, q: int = 20, tol: float = 1e-6):
+        """
+    ---------------------------------------------------------------------------
+    Performs a Oblimin (Varimax, Quartimax) rotation on the the model's 
+    PCA matrix.
+
+    Parameters
+    ----------
+    gamma: float, optional
+        Oblimin rotation factor, determines the type of rotation.
+        It must be between 0.0 and 1.0.
+            gamma = 0.0 results in a Quartimax rotation.
+            gamma = 1.0 results in a Varimax rotation.
+    q: int, optional
+        Maximum number of iterations.
+    tol: float, optional
+        The algorithm stops when the Frobenius norm of gradient is less than tol.
+
+    Returns
+    -------
+    self
+        memModel
+        """
+        from verticapy.learn.tools import matrix_rotation
+
+        if self.model_type_ in ("PCA",):
+            principal_components = matrix_rotation(self.get_attributes()["principal_components"], gamma, q, tol)
+            self.set_attributes({"principal_components": principal_components})
+        else:
+            raise FunctionError("Method 'rotate' is not available for model type '{}'.".format(self.model_type_))
+        return self
diff --git a/verticapy/learn/tools.py b/verticapy/learn/tools.py
index 8412e8895..fb075661e 100644
--- a/verticapy/learn/tools.py
+++ b/verticapy/learn/tools.py
@@ -54,6 +54,8 @@
 
 # Standard Python Modules
 import numpy as np
+from numpy import eye, asarray, dot, sum, diag
+from numpy.linalg import svd
 from typing import Union
 
 #
@@ -635,4 +637,52 @@ def load_model(name: str, cursor=None, input_relation: str = "", test_relation:
             model.classes_ = [0, 1]
         if model_type in ("svm_classifier", "svm_regressor", "logistic_reg", "linear_reg",):
             model.coef_ = model.get_attr("details")
-    return model
\ No newline at end of file
+    return model
+
+# ---#
+# This piece of code was taken from
+# https://en.wikipedia.org/wiki/Talk:Varimax_rotation
+def matrix_rotation(Phi: list, 
+					gamma: float = 1.0, 
+					q: int = 20, 
+					tol: float = 1e-6):
+    """
+---------------------------------------------------------------------------
+Performs a Oblimin (Varimax, Quartimax) rotation on the the model's 
+PCA matrix.
+
+Parameters
+----------
+Phi: list / numpy.array
+	input matrix.
+gamma: float, optional
+    Oblimin rotation factor, determines the type of rotation.
+    It must be between 0.0 and 1.0.
+        gamma = 0.0 results in a Quartimax rotation.
+        gamma = 1.0 results in a Varimax rotation.
+q: int, optional
+	Maximum number of iterations.
+tol: float, optional
+    The algorithm stops when the Frobenius norm of gradient is less than tol.
+
+Returns
+-------
+model
+    The model.
+    """
+    check_types([("Phi", Phi, [list,],),
+    			 ("gamma", gamma, [int, float,],),
+    			 ("q", q, [int, float,],),
+    			 ("tol", tol, [int, float,],),])
+    Phi = np.array(Phi)
+    p,k = Phi.shape
+    R = eye(k)
+    d=0
+    for i in range(q):
+        d_old = d
+        Lambda = dot(Phi, R)
+        u,s,vh = svd(dot(Phi.T,asarray(Lambda)**3 - (gamma/p) * dot(Lambda, diag(diag(dot(Lambda.T,Lambda))))))
+        R = dot(u,vh)
+        d = sum(s)
+        if d_old!=0 and d/d_old < 1 + tol: break
+    return dot(Phi, R)
\ No newline at end of file
diff --git a/verticapy/learn/vmodel.py b/verticapy/learn/vmodel.py
index 38fd86d3e..856ece168 100644
--- a/verticapy/learn/vmodel.py
+++ b/verticapy/learn/vmodel.py
@@ -51,7 +51,6 @@
 # Standard Python Modules
 import os, warnings
 import numpy as np
-from collections.abc import Iterable
 from typing import Union
 
 # VerticaPy Modules
@@ -2387,10 +2386,8 @@ def to_python(self, name: str = "predict", return_proba: bool = False, return_di
         func = "def {}(X):\n\timport numpy as np\n\t".format(name)
         if self.type in ("LinearRegression", "LinearSVR", "LogisticRegression", "LinearSVC",):
             result = "{} + np.sum(np.array({}) * np.array(X), axis=1)".format(self.coef_["coefficient"][0], self.coef_["coefficient"][1:])
-            if self.type in ("LogisticRegression",):
+            if self.type in ("LogisticRegression", "LinearSVC",):
                 func += f"result = 1 / (1 + np.exp(- ({result})))"
-            elif self.type in ("LinearSVC",):
-                func += f"result =  1 - 1 / (1 + np.exp({result}))"
             else:
                 func += "result =  " + result
             if return_proba and self.type in ("LogisticRegression", "LinearSVC",):
diff --git a/verticapy/stats/tools.py b/verticapy/stats/tools.py
index 7d779cd56..1a521d8a0 100644
--- a/verticapy/stats/tools.py
+++ b/verticapy/stats/tools.py
@@ -635,16 +635,16 @@ def het_breuschpagan(
 ):
     """
 ---------------------------------------------------------------------------
-Breusch-Pagan test for heteroscedasticity.
+Uses the Breusch-Pagan to test a model for heteroskedasticity.
 
 Parameters
 ----------
 vdf: vDataFrame
     Input vDataFrame.
 eps: str
-    Input residual vcolumn.
+    Input residual vColumn.
 X: list
-    Exogenous Variables to test the heteroscedasticity on.
+    The exogenous variables to test.
 
 Returns
 -------
diff --git a/verticapy/tests/vModel/test_memmodel.py b/verticapy/tests/vModel/test_memmodel.py
index 8b2bed7a5..37444b23d 100644
--- a/verticapy/tests/vModel/test_memmodel.py
+++ b/verticapy/tests/vModel/test_memmodel.py
@@ -113,6 +113,12 @@ def test_PCA(self,):
         assert attributes["principal_components"][0][1] == 0.2
         assert attributes["principal_components"][1][0] == 0.7
         assert attributes["principal_components"][1][1] == 0.8
+        model = model.rotate()
+        attributes = model.get_attributes()
+        assert attributes["principal_components"][0][0] == pytest.approx(0.05887149)
+        assert attributes["principal_components"][0][1] == pytest.approx(0.21571775)
+        assert attributes["principal_components"][1][0] == pytest.approx(0.01194755)
+        assert attributes["principal_components"][1][1] == pytest.approx(1.06294744)
         assert attributes["mean"][0] == 0.9
         assert attributes["mean"][1] == 0.8
         assert model.model_type_ == "PCA"
@@ -339,4 +345,253 @@ def test_BisectingKMeans(self,):
         assert attributes["p"] == 3
         assert model.model_type_ == "BisectingKMeans"
 
+    def test_BinaryTreeRegressor(self,):
+        model = memModel("BinaryTreeRegressor", {"children_left": [1, 3, None, None, None], 
+                                                 "children_right": [2, 4, None, None, None],
+                                                 "feature": [0, 1, None, None, None],
+                                                 "threshold": ['female', 30, None, None, None],
+                                                 "value": [None, None, 3, 11, 1993],})
+        prediction = model.predict([['male', 100], ['female', 20] , ['female', 50]])
+        assert prediction[0] == pytest.approx(3.0)
+        assert prediction[1] == pytest.approx(11.0)
+        assert prediction[2] == pytest.approx(1993.0)
+        assert model.predict_sql(['sex', 'fare']) == "(CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 11 ELSE 1993 END) ELSE 3 END)"
+        attributes = model.get_attributes()
+        assert attributes["children_left"][0] == 1
+        assert attributes["children_left"][1] == 3
+        assert attributes["children_right"][0] == 2
+        assert attributes["children_right"][1] == 4
+        assert attributes["feature"][0] == 0
+        assert attributes["feature"][1] == 1
+        assert attributes["threshold"][0] == 'female'
+        assert attributes["threshold"][1] == 30
+        assert attributes["value"][2] == 3
+        assert attributes["value"][3] == 11
+        assert model.model_type_ == "BinaryTreeRegressor"
+
+    def test_BinaryTreeClassifier(self,):
+        model = memModel("BinaryTreeClassifier", {"children_left": [1, 3, None, None, None], 
+                                                  "children_right": [2, 4, None, None, None],
+                                                  "feature": [0, 1, None, None, None],
+                                                  "threshold": ['female', 30, None, None, None],
+                                                  "value": [None, None, [0.8, 0.1, 0.1], [0.1, 0.8, 0.1], [0.2, 0.2, 0.6]],
+                                                  "classes": ['a', 'b', 'c',]})
+        prediction = model.predict([['male', 100], ['female', 20] , ['female', 50]])
+        assert prediction[0] == 'a'
+        assert prediction[1] == 'b'
+        assert prediction[2] == 'c'
+        assert model.predict_sql(['sex', 'fare']) == "(CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 'b' ELSE 'c' END) ELSE 'a' END)"
+        prediction = model.predict_proba([['male', 100], ['female', 20] , ['female', 50]])
+        assert prediction[0][0] == 0.8
+        assert prediction[0][1] == 0.1
+        assert prediction[0][2] == 0.1
+        assert prediction[1][0] == 0.1
+        assert prediction[1][1] == 0.8
+        assert prediction[1][2] == 0.1
+        assert prediction[2][0] == 0.2
+        assert prediction[2][1] == 0.2
+        assert prediction[2][2] == 0.6
+        attributes = model.get_attributes()
+        assert attributes["children_left"][0] == 1
+        assert attributes["children_left"][1] == 3
+        assert attributes["children_right"][0] == 2
+        assert attributes["children_right"][1] == 4
+        assert attributes["feature"][0] == 0
+        assert attributes["feature"][1] == 1
+        assert attributes["threshold"][0] == 'female'
+        assert attributes["threshold"][1] == 30
+        assert attributes["value"][2][0] == 0.8
+        assert attributes["value"][3][0] == 0.1
+        model.set_attributes({"classes": [0, 1, 2],})
+        attributes = model.get_attributes()
+        assert attributes["classes"][0] == 0
+        assert attributes["classes"][1] == 1
+        assert attributes["classes"][2] == 2
+        assert model.model_type_ == "BinaryTreeClassifier"
+
+    def test_RandomForestRegressor(self,):
+        model1 = memModel("BinaryTreeRegressor", {"children_left": [1, 3, None, None, None], 
+                                                  "children_right": [2, 4, None, None, None],
+                                                  "feature": [0, 1, None, None, None],
+                                                  "threshold": ['female', 30, None, None, None],
+                                                  "value": [None, None, 3, 11, 1993],})
+        model2 = memModel("BinaryTreeRegressor", {"children_left": [1, 3, None, None, None], 
+                                                  "children_right": [2, 4, None, None, None],
+                                                  "feature": [0, 1, None, None, None],
+                                                  "threshold": ['female', 30, None, None, None],
+                                                  "value": [None, None, -3, -11, -1993],})
+        model3 = memModel("BinaryTreeRegressor", {"children_left": [1, 3, None, None, None], 
+                                                  "children_right": [2, 4, None, None, None],
+                                                  "feature": [0, 1, None, None, None],
+                                                  "threshold": ['female', 30, None, None, None],
+                                                  "value": [None, None, 0, 3, 6],})
+        model = memModel("RandomForestRegressor", {"trees": [model1, model2, model3]})
+        prediction = model.predict([['male', 100], ['female', 20] , ['female', 50]])
+        assert prediction[0] == pytest.approx(0.0)
+        assert prediction[1] == pytest.approx(1.0)
+        assert prediction[2] == pytest.approx(2.0)
+        assert model.predict_sql(['sex', 'fare']) == "((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 11 ELSE 1993 END) ELSE 3 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN -11 ELSE -1993 END) ELSE -3 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 3 ELSE 6 END) ELSE 0 END)) / 3"
+        attributes = model.get_attributes()["trees"][0].get_attributes()
+        assert attributes["children_left"][0] == 1
+        assert attributes["children_left"][1] == 3
+        assert attributes["children_right"][0] == 2
+        assert attributes["children_right"][1] == 4
+        assert attributes["feature"][0] == 0
+        assert attributes["feature"][1] == 1
+        assert attributes["threshold"][0] == 'female'
+        assert attributes["threshold"][1] == 30
+        assert attributes["value"][2] == 3
+        assert attributes["value"][3] == 11
+        assert model.model_type_ == "RandomForestRegressor"
+
+    def test_RandomForestClassifier(self,):
+        model1 = memModel("BinaryTreeClassifier", {"children_left": [1, 3, None, None, None], 
+                                                   "children_right": [2, 4, None, None, None],
+                                                   "feature": [0, 1, None, None, None],
+                                                   "threshold": ['female', 30, None, None, None],
+                                                   "value": [None, None, [0.8, 0.1, 0.1], [0.1, 0.8, 0.1], [0.1, 0.1, 0.8]],
+                                                   "classes": ['a', 'b', 'c'],})
+        model2 = memModel("BinaryTreeClassifier", {"children_left": [1, 3, None, None, None], 
+                                                   "children_right": [2, 4, None, None, None],
+                                                   "feature": [0, 1, None, None, None],
+                                                   "threshold": ['female', 30, None, None, None],
+                                                   "value": [None, None, [0.7, 0.15, 0.15], [0.2, 0.6, 0.2], [0.2, 0.2, 0.6]],
+                                                   "classes": ['a', 'b', 'c'],})
+        model3 = memModel("BinaryTreeClassifier", {"children_left": [1, 3, None, None, None], 
+                                                   "children_right": [2, 4, None, None, None],
+                                                   "feature": [0, 1, None, None, None],
+                                                   "threshold": ['female', 30, None, None, None],
+                                                   "value": [None, None, [0.3, 0.7, 0.0], [0.0, 0.4, 0.6], [0.9, 0.1, 0.0]],
+                                                   "classes": ['a', 'b', 'c'],})
+        model = memModel("RandomForestClassifier", {"trees": [model1, model2, model3]})
+        prediction = model.predict([['male', 100], ['female', 20] , ['female', 50]])
+        assert prediction[0] == 'a'
+        assert prediction[1] == 'b'
+        assert prediction[2] == 'c'
+        assert model.predict_sql(['sex', 'fare']) == "CASE WHEN sex IS NULL OR fare IS NULL THEN NULL WHEN ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.0 ELSE 1.0 END) ELSE 0.0 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.0 ELSE 1.0 END) ELSE 0.0 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 1.0 ELSE 0.0 END) ELSE 0.0 END)) / 3 <= ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.0 ELSE 0.0 END) ELSE 1.0 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.0 ELSE 0.0 END) ELSE 1.0 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.0 ELSE 1.0 END) ELSE 0.0 END)) / 3 AND ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.0 ELSE 1.0 END) ELSE 0.0 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.0 ELSE 1.0 END) ELSE 0.0 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 1.0 ELSE 0.0 END) ELSE 0.0 END)) / 3 <= ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 1.0 ELSE 0.0 END) ELSE 0.0 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 1.0 ELSE 0.0 END) ELSE 0.0 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.0 ELSE 0.0 END) ELSE 1.0 END)) / 3 THEN 'c' WHEN ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 1.0 ELSE 0.0 END) ELSE 0.0 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 1.0 ELSE 0.0 END) ELSE 0.0 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.0 ELSE 0.0 END) ELSE 1.0 END)) / 3 <= ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.0 ELSE 0.0 END) ELSE 1.0 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.0 ELSE 0.0 END) ELSE 1.0 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.0 ELSE 1.0 END) ELSE 0.0 END)) / 3 THEN 'b' ELSE 'a' END"
+        prediction = model.predict_proba([['male', 100], ['female', 20] , ['female', 50]])
+        assert prediction[0][0] == pytest.approx(0.66666667)
+        assert prediction[0][1] == pytest.approx(0.33333333)
+        assert prediction[0][2] == pytest.approx(0.0)
+        assert prediction[1][0] == pytest.approx(0.0)
+        assert prediction[1][1] == pytest.approx(0.66666667)
+        assert prediction[1][2] == pytest.approx(0.33333333)
+        assert prediction[2][0] == pytest.approx(0.33333333)
+        assert prediction[2][1] == pytest.approx(0.0)
+        assert prediction[2][2] == pytest.approx(0.66666667)
+        prediction = model.predict_proba_sql(["sex", "fare"])
+        assert prediction[0] == "((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.0 ELSE 0.0 END) ELSE 1.0 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.0 ELSE 0.0 END) ELSE 1.0 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.0 ELSE 1.0 END) ELSE 0.0 END)) / 3"
+        assert prediction[1] == "((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 1.0 ELSE 0.0 END) ELSE 0.0 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 1.0 ELSE 0.0 END) ELSE 0.0 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.0 ELSE 0.0 END) ELSE 1.0 END)) / 3"
+        assert prediction[2] == "((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.0 ELSE 1.0 END) ELSE 0.0 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.0 ELSE 1.0 END) ELSE 0.0 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 1.0 ELSE 0.0 END) ELSE 0.0 END)) / 3"
+        attributes = model.get_attributes()["trees"][0].get_attributes()
+        assert attributes["children_left"][0] == 1
+        assert attributes["children_left"][1] == 3
+        assert attributes["children_right"][0] == 2
+        assert attributes["children_right"][1] == 4
+        assert attributes["feature"][0] == 0
+        assert attributes["feature"][1] == 1
+        assert attributes["threshold"][0] == 'female'
+        assert attributes["threshold"][1] == 30
+        assert attributes["value"][2][0] == 0.8
+        assert attributes["value"][3][0] == 0.1
+        assert model.model_type_ == "RandomForestClassifier"
+
+    def test_XGBoostRegressor(self,):
+        model1 = memModel("BinaryTreeRegressor", {"children_left": [1, 3, None, None, None], 
+                                                  "children_right": [2, 4, None, None, None],
+                                                  "feature": [0, 1, None, None, None],
+                                                  "threshold": ['female', 30, None, None, None],
+                                                  "value": [None, None, 3, 11, 1993],})
+        model2 = memModel("BinaryTreeRegressor", {"children_left": [1, 3, None, None, None], 
+                                                  "children_right": [2, 4, None, None, None],
+                                                  "feature": [0, 1, None, None, None],
+                                                  "threshold": ['female', 30, None, None, None],
+                                                  "value": [None, None, -3, -11, -1993],})
+        model3 = memModel("BinaryTreeRegressor", {"children_left": [1, 3, None, None, None], 
+                                                  "children_right": [2, 4, None, None, None],
+                                                  "feature": [0, 1, None, None, None],
+                                                  "threshold": ['female', 30, None, None, None],
+                                                  "value": [None, None, 0, 3, 6],})
+        model = memModel("XGBoostRegressor", {"trees": [model1, model2, model3],
+                                              "learning_rate": 0.1,
+                                              "mean": 1.0})
+        prediction = model.predict([['male', 100], ['female', 20] , ['female', 50]])
+        assert prediction[0] == pytest.approx(1.0)
+        assert prediction[1] == pytest.approx(1.3)
+        assert prediction[2] == pytest.approx(1.6)
+        assert model.predict_sql(['sex', 'fare']) == "((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 11 ELSE 1993 END) ELSE 3 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN -11 ELSE -1993 END) ELSE -3 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 3 ELSE 6 END) ELSE 0 END)) * 0.1 + 1.0"
+        attributes = model.get_attributes()["trees"][0].get_attributes()
+        assert attributes["children_left"][0] == 1
+        assert attributes["children_left"][1] == 3
+        assert attributes["children_right"][0] == 2
+        assert attributes["children_right"][1] == 4
+        assert attributes["feature"][0] == 0
+        assert attributes["feature"][1] == 1
+        assert attributes["threshold"][0] == 'female'
+        assert attributes["threshold"][1] == 30
+        assert attributes["value"][2] == 3
+        assert attributes["value"][3] == 11
+        attributes = model.get_attributes()
+        assert attributes["learning_rate"] == 0.1
+        assert attributes["mean"] == 1.0
+        model.set_attributes({"learning_rate": 0.2, "mean": 2.0})
+        attributes = model.get_attributes()
+        assert attributes["learning_rate"] == 0.2
+        assert attributes["mean"] == 2.0
+        assert model.model_type_ == "XGBoostRegressor"
+
+    def test_XGBoostClassifier(self,):
+        model1 = memModel("BinaryTreeClassifier", {"children_left": [1, 3, None, None, None], 
+                                                   "children_right": [2, 4, None, None, None],
+                                                   "feature": [0, 1, None, None, None],
+                                                   "threshold": ['female', 30, None, None, None],
+                                                   "value": [None, None, [0.8, 0.1, 0.1], [0.1, 0.8, 0.1], [0.1, 0.1, 0.8]],
+                                                   "classes": ['a', 'b', 'c'],})
+        model2 = memModel("BinaryTreeClassifier", {"children_left": [1, 3, None, None, None], 
+                                                   "children_right": [2, 4, None, None, None],
+                                                   "feature": [0, 1, None, None, None],
+                                                   "threshold": ['female', 30, None, None, None],
+                                                   "value": [None, None, [0.7, 0.15, 0.15], [0.2, 0.6, 0.2], [0.2, 0.2, 0.6]],
+                                                   "classes": ['a', 'b', 'c'],})
+        model3 = memModel("BinaryTreeClassifier", {"children_left": [1, 3, None, None, None], 
+                                                   "children_right": [2, 4, None, None, None],
+                                                   "feature": [0, 1, None, None, None],
+                                                   "threshold": ['female', 30, None, None, None],
+                                                   "value": [None, None, [0.3, 0.7, 0.0], [0.0, 0.4, 0.6], [0.9, 0.1, 0.0]],
+                                                   "classes": ['a', 'b', 'c'],})
+        model = memModel("XGBoostClassifier", {"trees": [model1, model2, model3],
+                                               "learning_rate": 0.1,
+                                               "logodds": [0.1, 0.12, 0.15]})
+        prediction = model.predict([['male', 100], ['female', 20] , ['female', 50]])
+        assert prediction[0] == 'a'
+        assert prediction[1] == 'b'
+        assert prediction[2] == 'c'
+        assert model.predict_sql(['sex', 'fare']) == "CASE WHEN sex IS NULL OR fare IS NULL THEN NULL WHEN (1 / (1 + EXP(- (0.15 + 0.1 * ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.0 ELSE 0.9 END) ELSE 0.3 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.4 ELSE 0.1 END) ELSE 0.7 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.6 ELSE 0.0 END) ELSE 0.0 END)))))) / ((1 / (1 + EXP(- (0.1 + 0.1 * ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.1 ELSE 0.1 END) ELSE 0.8 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.8 ELSE 0.1 END) ELSE 0.1 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.1 ELSE 0.8 END) ELSE 0.1 END)))))) + (1 / (1 + EXP(- (0.12 + 0.1 * ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.2 ELSE 0.2 END) ELSE 0.7 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.6 ELSE 0.2 END) ELSE 0.15 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.2 ELSE 0.6 END) ELSE 0.15 END)))))) + (1 / (1 + EXP(- (0.15 + 0.1 * ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.0 ELSE 0.9 END) ELSE 0.3 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.4 ELSE 0.1 END) ELSE 0.7 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.6 ELSE 0.0 END) ELSE 0.0 END))))))) <= (1 / (1 + EXP(- (0.1 + 0.1 * ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.1 ELSE 0.1 END) ELSE 0.8 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.8 ELSE 0.1 END) ELSE 0.1 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.1 ELSE 0.8 END) ELSE 0.1 END)))))) / ((1 / (1 + EXP(- (0.1 + 0.1 * ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.1 ELSE 0.1 END) ELSE 0.8 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.8 ELSE 0.1 END) ELSE 0.1 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.1 ELSE 0.8 END) ELSE 0.1 END)))))) + (1 / (1 + EXP(- (0.12 + 0.1 * ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.2 ELSE 0.2 END) ELSE 0.7 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.6 ELSE 0.2 END) ELSE 0.15 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.2 ELSE 0.6 END) ELSE 0.15 END)))))) + (1 / (1 + EXP(- (0.15 + 0.1 * ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.0 ELSE 0.9 END) ELSE 0.3 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.4 ELSE 0.1 END) ELSE 0.7 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.6 ELSE 0.0 END) ELSE 0.0 END))))))) AND (1 / (1 + EXP(- (0.15 + 0.1 * ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.0 ELSE 0.9 END) ELSE 0.3 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.4 ELSE 0.1 END) ELSE 0.7 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.6 ELSE 0.0 END) ELSE 0.0 END)))))) / ((1 / (1 + EXP(- (0.1 + 0.1 * ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.1 ELSE 0.1 END) ELSE 0.8 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.8 ELSE 0.1 END) ELSE 0.1 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.1 ELSE 0.8 END) ELSE 0.1 END)))))) + (1 / (1 + EXP(- (0.12 + 0.1 * ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.2 ELSE 0.2 END) ELSE 0.7 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.6 ELSE 0.2 END) ELSE 0.15 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.2 ELSE 0.6 END) ELSE 0.15 END)))))) + (1 / (1 + EXP(- (0.15 + 0.1 * ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.0 ELSE 0.9 END) ELSE 0.3 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.4 ELSE 0.1 END) ELSE 0.7 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.6 ELSE 0.0 END) ELSE 0.0 END))))))) <= (1 / (1 + EXP(- (0.12 + 0.1 * ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.2 ELSE 0.2 END) ELSE 0.7 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.6 ELSE 0.2 END) ELSE 0.15 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.2 ELSE 0.6 END) ELSE 0.15 END)))))) / ((1 / (1 + EXP(- (0.1 + 0.1 * ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.1 ELSE 0.1 END) ELSE 0.8 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.8 ELSE 0.1 END) ELSE 0.1 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.1 ELSE 0.8 END) ELSE 0.1 END)))))) + (1 / (1 + EXP(- (0.12 + 0.1 * ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.2 ELSE 0.2 END) ELSE 0.7 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.6 ELSE 0.2 END) ELSE 0.15 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.2 ELSE 0.6 END) ELSE 0.15 END)))))) + (1 / (1 + EXP(- (0.15 + 0.1 * ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.0 ELSE 0.9 END) ELSE 0.3 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.4 ELSE 0.1 END) ELSE 0.7 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.6 ELSE 0.0 END) ELSE 0.0 END))))))) THEN 'c' WHEN (1 / (1 + EXP(- (0.12 + 0.1 * ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.2 ELSE 0.2 END) ELSE 0.7 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.6 ELSE 0.2 END) ELSE 0.15 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.2 ELSE 0.6 END) ELSE 0.15 END)))))) / ((1 / (1 + EXP(- (0.1 + 0.1 * ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.1 ELSE 0.1 END) ELSE 0.8 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.8 ELSE 0.1 END) ELSE 0.1 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.1 ELSE 0.8 END) ELSE 0.1 END)))))) + (1 / (1 + EXP(- (0.12 + 0.1 * ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.2 ELSE 0.2 END) ELSE 0.7 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.6 ELSE 0.2 END) ELSE 0.15 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.2 ELSE 0.6 END) ELSE 0.15 END)))))) + (1 / (1 + EXP(- (0.15 + 0.1 * ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.0 ELSE 0.9 END) ELSE 0.3 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.4 ELSE 0.1 END) ELSE 0.7 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.6 ELSE 0.0 END) ELSE 0.0 END))))))) <= (1 / (1 + EXP(- (0.1 + 0.1 * ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.1 ELSE 0.1 END) ELSE 0.8 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.8 ELSE 0.1 END) ELSE 0.1 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.1 ELSE 0.8 END) ELSE 0.1 END)))))) / ((1 / (1 + EXP(- (0.1 + 0.1 * ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.1 ELSE 0.1 END) ELSE 0.8 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.8 ELSE 0.1 END) ELSE 0.1 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.1 ELSE 0.8 END) ELSE 0.1 END)))))) + (1 / (1 + EXP(- (0.12 + 0.1 * ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.2 ELSE 0.2 END) ELSE 0.7 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.6 ELSE 0.2 END) ELSE 0.15 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.2 ELSE 0.6 END) ELSE 0.15 END)))))) + (1 / (1 + EXP(- (0.15 + 0.1 * ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.0 ELSE 0.9 END) ELSE 0.3 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.4 ELSE 0.1 END) ELSE 0.7 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.6 ELSE 0.0 END) ELSE 0.0 END))))))) THEN 'b' ELSE 'a' END"
+        prediction = model.predict_proba([['male', 100], ['female', 20] , ['female', 50]])
+        assert prediction[0][0] == pytest.approx(0.34171499)
+        assert prediction[0][1] == pytest.approx(0.33211396)
+        assert prediction[0][2] == pytest.approx(0.32617105)
+        assert prediction[1][0] == pytest.approx(0.31948336)
+        assert prediction[1][1] == pytest.approx(0.34467713)
+        assert prediction[1][2] == pytest.approx(0.33583951)
+        assert prediction[2][0] == pytest.approx(0.33286283)
+        assert prediction[2][1] == pytest.approx(0.32394435)
+        assert prediction[2][2] == pytest.approx(0.34319282)
+        prediction = model.predict_proba_sql(["sex", "fare"])
+        assert prediction[0] == "(1 / (1 + EXP(- (0.1 + 0.1 * ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.1 ELSE 0.1 END) ELSE 0.8 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.8 ELSE 0.1 END) ELSE 0.1 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.1 ELSE 0.8 END) ELSE 0.1 END)))))) / ((1 / (1 + EXP(- (0.1 + 0.1 * ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.1 ELSE 0.1 END) ELSE 0.8 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.8 ELSE 0.1 END) ELSE 0.1 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.1 ELSE 0.8 END) ELSE 0.1 END)))))) + (1 / (1 + EXP(- (0.12 + 0.1 * ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.2 ELSE 0.2 END) ELSE 0.7 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.6 ELSE 0.2 END) ELSE 0.15 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.2 ELSE 0.6 END) ELSE 0.15 END)))))) + (1 / (1 + EXP(- (0.15 + 0.1 * ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.0 ELSE 0.9 END) ELSE 0.3 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.4 ELSE 0.1 END) ELSE 0.7 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.6 ELSE 0.0 END) ELSE 0.0 END)))))))"
+        assert prediction[1] == "(1 / (1 + EXP(- (0.12 + 0.1 * ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.2 ELSE 0.2 END) ELSE 0.7 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.6 ELSE 0.2 END) ELSE 0.15 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.2 ELSE 0.6 END) ELSE 0.15 END)))))) / ((1 / (1 + EXP(- (0.1 + 0.1 * ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.1 ELSE 0.1 END) ELSE 0.8 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.8 ELSE 0.1 END) ELSE 0.1 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.1 ELSE 0.8 END) ELSE 0.1 END)))))) + (1 / (1 + EXP(- (0.12 + 0.1 * ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.2 ELSE 0.2 END) ELSE 0.7 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.6 ELSE 0.2 END) ELSE 0.15 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.2 ELSE 0.6 END) ELSE 0.15 END)))))) + (1 / (1 + EXP(- (0.15 + 0.1 * ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.0 ELSE 0.9 END) ELSE 0.3 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.4 ELSE 0.1 END) ELSE 0.7 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.6 ELSE 0.0 END) ELSE 0.0 END)))))))"
+        assert prediction[2] == "(1 / (1 + EXP(- (0.15 + 0.1 * ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.0 ELSE 0.9 END) ELSE 0.3 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.4 ELSE 0.1 END) ELSE 0.7 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.6 ELSE 0.0 END) ELSE 0.0 END)))))) / ((1 / (1 + EXP(- (0.1 + 0.1 * ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.1 ELSE 0.1 END) ELSE 0.8 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.8 ELSE 0.1 END) ELSE 0.1 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.1 ELSE 0.8 END) ELSE 0.1 END)))))) + (1 / (1 + EXP(- (0.12 + 0.1 * ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.2 ELSE 0.2 END) ELSE 0.7 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.6 ELSE 0.2 END) ELSE 0.15 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.2 ELSE 0.6 END) ELSE 0.15 END)))))) + (1 / (1 + EXP(- (0.15 + 0.1 * ((CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.0 ELSE 0.9 END) ELSE 0.3 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.4 ELSE 0.1 END) ELSE 0.7 END) + (CASE WHEN sex = 'female' THEN (CASE WHEN fare < '30' THEN 0.6 ELSE 0.0 END) ELSE 0.0 END)))))))"
+        attributes = model.get_attributes()["trees"][0].get_attributes()
+        assert attributes["children_left"][0] == 1
+        assert attributes["children_left"][1] == 3
+        assert attributes["children_right"][0] == 2
+        assert attributes["children_right"][1] == 4
+        assert attributes["feature"][0] == 0
+        assert attributes["feature"][1] == 1
+        assert attributes["threshold"][0] == 'female'
+        assert attributes["threshold"][1] == 30
+        assert attributes["value"][2][0] == 0.8
+        assert attributes["value"][3][0] == 0.1
+        assert model.model_type_ == "XGBoostClassifier"
+
 
diff --git a/verticapy/tests/vModel/test_tools.py b/verticapy/tests/vModel/test_tools.py
index 5ddb31af9..861287616 100755
--- a/verticapy/tests/vModel/test_tools.py
+++ b/verticapy/tests/vModel/test_tools.py
@@ -24,6 +24,7 @@
 from verticapy.learn.decomposition import *
 from verticapy.learn.preprocessing import *
 from verticapy.learn.tsa import *
+from verticapy.learn.tools import *
 
 import matplotlib.pyplot as plt
 
@@ -291,5 +292,15 @@ def test_load_model(self, base, titanic_vd):
         #model.drop()
         base.cursor.execute("DROP SCHEMA load_model_test CASCADE")
 
-
+    def test_matrix_rotation(self,):
+        result = matrix_rotation([[0.5, 0.6], [0.1, 0.2]])
+        assert result[0][0] == pytest.approx(0.01539405)
+        assert result[0][1] == pytest.approx(0.78087324)
+        assert result[1][0] == pytest.approx(0.05549495)
+        assert result[1][1] == pytest.approx(0.21661097)
+        result = matrix_rotation([[0.5, 0.6], [0.1, 0.2]], gamma=0.0)
+        assert result[0][0] == pytest.approx(0.0010429389547800816)
+        assert result[0][1] == pytest.approx(0.78102427)
+        assert result[1][0] == pytest.approx(-0.05092405)
+        assert result[1][1] == pytest.approx(0.21773089)
     
\ No newline at end of file