From 811b7c6dcb5fd56fe07db36384a4d506dbbac061 Mon Sep 17 00:00:00 2001 From: NIPS-BRITS Date: Wed, 28 Nov 2018 09:10:38 -0500 Subject: [PATCH] add rf baseline --- result/light_gbm.py | 37 ------------------------------------- result/random_forest.py | 27 +++++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 37 deletions(-) delete mode 100644 result/light_gbm.py create mode 100644 result/random_forest.py diff --git a/result/light_gbm.py b/result/light_gbm.py deleted file mode 100644 index cdc0f8f7..00000000 --- a/result/light_gbm.py +++ /dev/null @@ -1,37 +0,0 @@ -import xgboost as xgb -import numpy as np - -model_name = 'm_rnn' - -impute = np.load('./{}_data.npy'.format(model_name)).reshape(-1, 48 * 35) -label = np.load('./{}_label.npy'.format(model_name)) - -#data = fill -data = impute - -n_train = 3000 - -print(impute.shape) -print(label.shape) - -dtrain = xgb.DMatrix(data[:n_train], label = label[:n_train]) -dtest = xgb.DMatrix(data[n_train:], label = label[n_train:]) - -param = {'max_depth': 3, 'objective': 'binary:logistic', 'nthread': 10, 'eval_metric': 'auc'} - -num_round = 100 - -evallist = [(dtest, 'eval'), (dtrain, 'train')] - -bst = xgb.train(param, dtrain, num_round, evallist) - -from sklearn.linear_model import LogisticRegression -from sklearn.metrics import roc_auc_score - -model = LogisticRegression().fit(data[:n_train], label[:n_train]) -pred = model.predict_proba(data[n_train:]) - -from ipdb import set_trace - - -print roc_auc_score(label[n_train:].reshape(-1,), pred[:, 1].reshape(-1, )) diff --git a/result/random_forest.py b/result/random_forest.py new file mode 100644 index 00000000..a8778063 --- /dev/null +++ b/result/random_forest.py @@ -0,0 +1,27 @@ +import xgboost as xgb +import numpy as np + +model_name = 'brits_i' + +impute = np.load('./{}_data.npy'.format(model_name)).reshape(-1, 48 * 35) +label = np.load('./{}_label.npy'.format(model_name)) + +data = np.nan_to_num(impute) + +n_train = 3000 + +print(impute.shape) +print(label.shape) + +from sklearn.ensemble import RandomForestClassifier +from sklearn.metrics import roc_auc_score + +auc = [] + +for i in range(10): + model = RandomForestClassifier().fit(data[:n_train], label[:n_train]) + pred = model.predict_proba(data[n_train:]) + + auc.append(roc_auc_score(label[n_train:].reshape(-1,), pred[:, 1].reshape(-1, ))) + +print(np.mean(auc))