Skip to content

Commit

Permalink
Testunits 8 (#48)
Browse files Browse the repository at this point in the history
* Added the CONTRIBUTING.md file

* Added the requirements-dev.txt file

* Added the tox.ini file

* Implementing test_linear_regression.py except its get_plot test

* Implementing the tests of test_logistic_regression.py except test_plot and test_set_cursor
  • Loading branch information
afard authored Oct 26, 2020
1 parent 64ba59e commit 5b9029f
Show file tree
Hide file tree
Showing 2 changed files with 180 additions and 12 deletions.
3 changes: 2 additions & 1 deletion verticapy/tests/vModel/test_linear_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def test_features_importance(self, model):

assert fim["index"] == ['alcohol', 'residual_sugar', 'citric_acid']
assert fim["importance"] == [52.25, 32.58, 15.17]
# TODO: it is nicer not to have Decimal for sign
assert fim["sign"] == [Decimal('1'), Decimal('1'), Decimal('1')]

def test_get_model_attribute(self, model):
Expand Down Expand Up @@ -158,7 +159,7 @@ def test_set_params(self, model):
assert model.get_params()['max_iter'] == 1000

@pytest.mark.skip(reason="feautre not implemented")
def test_model_from_vDF(self, winequality_vd):
def test_model_from_vDF(self, base, winequality_vd):
base.cursor.execute("DROP MODEL IF EXISTS linreg_from_vDF")
model_test = LinearRegression("linreg_from_vDF", cursor=base.cursor)
model_test.fit(winequality_vd, ["alcohol"], "quality")
Expand Down
189 changes: 178 additions & 11 deletions verticapy/tests/vModel/test_logistic_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,24 +13,66 @@

import pytest
from verticapy.learn.linear_model import LogisticRegression
from verticapy import drop_table
from decimal import Decimal


@pytest.fixture(scope="module")
def model(base):
def titanic_vd(base):
from verticapy.learn.datasets import load_titanic
from verticapy import drop_table

titanic = load_titanic(cursor=base.cursor)
titanic.set_display_parameters(print_info=False)
yield titanic
drop_table(name="public.titanic", cursor=base.cursor)


@pytest.fixture(scope="module")
def model(base, titanic_vd):
base.cursor.execute("DROP MODEL IF EXISTS logreg_model_test")
model_class = LogisticRegression("logreg_model_test", cursor=base.cursor)
model_class.fit("public.titanic", ["age", "fare"], "survived")
yield model_class
model_class.drop()
drop_table(name="public.titanic", cursor=base.cursor)


class TestLogisticRegression:
@pytest.mark.xfail(reason = "The returned cutoff value is wrong")
def test_classification_report(self, model):
cls_rep1 = model.classification_report().transpose()

assert cls_rep1["auc"][0] == pytest.approx(0.6974762740166146)
assert cls_rep1["prc_auc"][0] == pytest.approx(0.6003540469187277)
assert cls_rep1["accuracy"][0] == pytest.approx(0.6969205834683955)
assert cls_rep1["log_loss"][0] == pytest.approx(0.281741003041208)
assert cls_rep1["precision"][0] == pytest.approx(0.6194968553459119)
assert cls_rep1["recall"][0] == pytest.approx(0.43777777777777777)
assert cls_rep1["f1_score"][0] == pytest.approx(0.5769062584198693)
assert cls_rep1["mcc"][0] == pytest.approx(0.31193616529653234)
assert cls_rep1["informedness"][0] == pytest.approx(0.2834410430839003)
assert cls_rep1["markedness"][0] == pytest.approx(0.34329598198346645)
assert cls_rep1["csi"][0] == pytest.approx(0.3450087565674256)
assert cls_rep1["cutoff"][0] == pytest.approx(0.5)

cls_rep2 = model.classification_report(cutoff = 0.2).transpose()

assert cls_rep2["cutoff"][0] == pytest.approx(0.2)

def test_confusion_matrix(self, model):
conf_mat1 = model.confusion_matrix()

assert conf_mat1[0][0] == 663
assert conf_mat1[0][1] == 253
assert conf_mat1[1][0] == 121
assert conf_mat1[1][1] == 197

conf_mat2 = model.confusion_matrix(cutoff = 0.2)

assert conf_mat2[0][0] == 179
assert conf_mat2[0][1] == 59
assert conf_mat2[1][0] == 605
assert conf_mat2[1][1] == 391

def test_deploySQL(self, model):
expected_sql = "PREDICT_LOGISTIC_REG(\"age\", \"fare\" USING PARAMETERS model_name = 'logreg_model_test', type = 'probability', match_by_pos = 'true')"
result_sql = model.deploySQL()
Expand All @@ -53,18 +95,143 @@ def test_drop(self, base):
)
assert base.cursor.fetchone() is None

@pytest.mark.skip(reason="test not implemented")
def test_features_importance(self):
pass
def test_features_importance(self, model):
f_imp = model.features_importance()

@pytest.mark.skip(reason="test not implemented")
def test_get_model_attribute(self):
pass
assert f_imp["index"] == ['fare', 'age']
assert f_imp["importance"] == [87.36, 12.64]
# TODO: it is nicer not to have Decimal for sign
assert f_imp["sign"] == [Decimal('1'), Decimal('-1')]

def test_lift_chart(self, model):
lift_ch = model.lift_chart()

assert lift_ch["decision_boundary"][10] == pytest.approx(0.01)
assert lift_ch["positive_prediction_ratio"][10] == pytest.approx(0.010230179028133)
assert lift_ch["lift"][10] == pytest.approx(2.54731457800512)
assert lift_ch["decision_boundary"][900] == pytest.approx(0.9)
assert lift_ch["positive_prediction_ratio"][900] == pytest.approx(1.0)
assert lift_ch["lift"][900] == pytest.approx(1.0)

@pytest.mark.skip(reason="test not implemented")
def test_get_model_fun(self):
def test_plot(self):
pass

def test_get_model_attribute(self, model):
attr = model.get_model_attribute()
assert attr["attr_name"] == ['details', 'regularization', 'iteration_count', 'rejected_row_count',
'accepted_row_count', 'call_string']
assert attr["attr_fields"] == ['predictor, coefficient, std_err, z_value, p_value', 'type, lambda',
'iteration_count', 'rejected_row_count', 'accepted_row_count', 'call_string']
assert attr["#_of_rows"] == [3, 1, 1, 1, 1, 1]

details = model.get_model_attribute('details')
assert details["predictor"] == ['Intercept', 'age', 'fare']
assert details["coefficient"][0] == pytest.approx(-0.091348758337523)
assert details["coefficient"][1] == pytest.approx(-0.0143850235204284)
assert details["coefficient"][2] == pytest.approx(0.0154603623341147)
assert details["std_err"][0] == pytest.approx(0.155594583418985)
assert details["std_err"][1] == pytest.approx(0.00475381848744905)
assert details["std_err"][2] == pytest.approx(0.00211946971061136)
assert details["z_value"][0] == pytest.approx(-0.587094719689174)
assert details["z_value"][1] == pytest.approx(-3.02599343210254)
assert details["z_value"][2] == pytest.approx(7.29444835031644)
assert details["p_value"][0] == pytest.approx(0.557140093691285)
assert details["p_value"][1] == pytest.approx(0.00247817685818198)
assert details["p_value"][2] == pytest.approx(2.99885239324552e-13)

reg = model.get_model_attribute('regularization')
assert reg["type"][0] == 'l2'
assert reg["lambda"][0] == 1.0

assert model.get_model_attribute('iteration_count')["iteration_count"][0] == 5
assert model.get_model_attribute('rejected_row_count')["rejected_row_count"][0] == 238
assert model.get_model_attribute('accepted_row_count')["accepted_row_count"][0] == 996
assert model.get_model_attribute('call_string')["call_string"][0] == 'logistic_reg(\'public.logreg_model_test\', \'public.titanic\', \'"survived"\', \'"age", "fare"\'\nUSING PARAMETERS optimizer=\'cgd\', epsilon=0.0001, max_iterations=100, regularization=\'l2\', lambda=1, alpha=0)'

def test_get_params(self, model):
params = model.get_params()

assert params == {'solver': 'cgd', 'penalty': 'l2', 'max_iter': 100, 'l1_ratio': 0.5, 'C': 1, 'tol': 0.0001}

def test_prc_curve(self, model):
prc = model.prc_curve()

assert prc["threshold"][10] == pytest.approx(0.009)
assert prc["recall"][10] == pytest.approx(1.0)
assert prc["precision"][10] == pytest.approx(0.392570281124498)
assert prc["threshold"][900] == pytest.approx(0.899)
assert prc["recall"][900] == pytest.approx(0.0664961636828645)
assert prc["precision"][900] == pytest.approx(0.702702702702703)

def test_predict(self, titanic_vd, model):
titanic_copy = titanic_vd.copy()

model.predict(titanic_copy, name = "pred_probability")
assert titanic_copy["pred_probability"].min() == pytest.approx(0.261992872793673)

model.predict(titanic_copy, name = "pred_class1", cutoff = 0.7)
assert titanic_copy["pred_class1"].sum() == 86

model.predict(titanic_copy, name = "pred_class2", cutoff = 0.3)
assert titanic_copy["pred_class2"].sum() == 989

def test_roc_curve(self, model):
roc = model.roc_curve()

assert roc["threshold"][100] == pytest.approx(0.1)
assert roc["false_positive"][100] == pytest.approx(1.0)
assert roc["true_positive"][100] == pytest.approx(1.0)
assert roc["threshold"][900] == pytest.approx(0.9)
assert roc["false_positive"][900] == pytest.approx(0.0181818181818182)
assert roc["true_positive"][900] == pytest.approx(0.0664961636828645)

def test_score(self, model):
assert model.score(cutoff = 0.7, method = "accuracy") == pytest.approx(0.6709886547811994)
assert model.score(cutoff = 0.3, method = "accuracy") == pytest.approx(0.4659643435980551)
assert model.score(cutoff = 0.7, method = "auc") == pytest.approx(0.6974762740166146)
assert model.score(cutoff = 0.3, method = "auc") == pytest.approx(0.6974762740166146)
assert model.score(cutoff = 0.7, method = "best_cutoff") == pytest.approx(0.458)
assert model.score(cutoff = 0.3, method = "best_cutoff") == pytest.approx(0.458)
assert model.score(cutoff = 0.7, method = "bm") == pytest.approx(0.11765873015873018)
assert model.score(cutoff = 0.3, method = "bm") == pytest.approx(0.10263605442176882)
assert model.score(cutoff = 0.7, method = "csi") == pytest.approx(0.13800424628450106)
assert model.score(cutoff = 0.3, method = "csi") == pytest.approx(0.37178265014299333)
assert model.score(cutoff = 0.7, method = "f1") == pytest.approx(0.24253731343283583)
assert model.score(cutoff = 0.3, method = "f1") == pytest.approx(0.5420430854760251)
assert model.score(cutoff = 0.7, method = "logloss") == pytest.approx(0.281741003041208)
assert model.score(cutoff = 0.3, method = "logloss") == pytest.approx(0.281741003041208)
assert model.score(cutoff = 0.7, method = "mcc") == pytest.approx(0.22241715204459717)
assert model.score(cutoff = 0.3, method = "mcc") == pytest.approx(0.12384630352469281)
assert model.score(cutoff = 0.7, method = "mk") == pytest.approx(0.42044809982983544)
assert model.score(cutoff = 0.3, method = "mk") == pytest.approx(0.14943975567982504)
assert model.score(cutoff = 0.7, method = "npv") == pytest.approx(0.7558139534883721)
assert model.score(cutoff = 0.3, method = "npv") == pytest.approx(0.3943377148634985)
assert model.score(cutoff = 0.7, method = "prc_auc") == pytest.approx(0.6003540469187277)
assert model.score(cutoff = 0.3, method = "prc_auc") == pytest.approx(0.6003540469187277)
assert model.score(cutoff = 0.7, method = "precision") == pytest.approx(0.7558139534883721)
assert model.score(cutoff = 0.3, method = "precision") == pytest.approx(0.3943377148634985)
assert model.score(cutoff = 0.7, method = "specificity") == pytest.approx(0.9732142857142857)
assert model.score(cutoff = 0.3, method = "specificity") == pytest.approx(0.23596938775510204)

@pytest.mark.skip(reason="test not implemented")
def test_get_params(self):
def test_set_cursor(self):
pass

def test_set_params(self, model):
model.set_params({"max_iter": 1000})

assert model.get_params()['max_iter'] == 1000

@pytest.mark.skip(reason="feautre not implemented")
def test_model_from_vDF(self, base, titanic_vd):
base.cursor.execute("DROP MODEL IF EXISTS logreg_from_vDF")
model_test = LinearRegression("logreg_from_vDF", cursor=base.cursor)
model_test.fit(titanic_vd, ["age", "fare"], "survived")

base.cursor.execute(
"SELECT model_name FROM models WHERE model_name = 'logreg_from_vDF'"
)
assert base.cursor.fetchone()[0] == "logreg_from_vDF"

model_test.drop()

0 comments on commit 5b9029f

Please sign in to comment.