-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathxgb_fd_signs.py
89 lines (67 loc) · 2.58 KB
/
xgb_fd_signs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#from __future__ import print_function
import pandas as pd
import numpy as np
from sklearn.cross_validation import train_test_split
import xgboost as xgb
PLUS = 5.0
MINUS = 1.0
## Start of main script
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
#ids=test[['PLAYER_ID','PLAYER_NAME']]
test.fillna(111, inplace=True)
features_train = train.columns[51:56]
features_test = test.columns[-5:]
team_dummies = [c for c in test.columns.values if 'Opponent_' in c]
player_dummies = [c for c in test.columns.values if 'PLAYER_ID_' in c]
position_dummies = [c for c in test.columns.values if 'Position_' in c ]
player_dummies.remove('PLAYER_ID_0') # this shows up in test but not in train
other14_features = test.columns.values[26:46]
features_test = team_dummies+player_dummies+position_dummies+['home','away','rest','recent_minutes','AGE']+list(other14_features)
features_train =features_test
print('training data processed')
def rmspe(y, yhat):
diff = y - yhat
over = np.maximum(diff, 0)
under = np.maximum(-diff, 0)
weighted = PLUS*over + MINUS*under
ret = np.sqrt(np.mean((weighted) ** 2))
return ret
def rmspe_xg(yhat, y):
y = y.get_label()
return "rmspe", rmspe(y, yhat)
print("Train xgboost model")
params = {"objective": "reg:linear",
"booster" : "gbtree",
"eta": 0.003,
"max_depth": 9,
"subsample": 0.85,
"colsample_bytree": 0.4,
"min_child_weight": 4,
"silent": 1,
"thread": 1,
"seed": 104
}
num_boost_round = 10000
print("Train a XGBoost model")
X_train, X_valid = train_test_split(train, test_size=0.12, random_state=12)
y_train = X_train.FanDuel
y_valid = X_valid.FanDuel
print(len(y_valid))
dtrain = xgb.DMatrix(X_train[features_train], y_train)
dvalid = xgb.DMatrix(X_valid[features_train], y_valid)
watchlist = [(dtrain, 'train'), (dvalid, 'eval')]
gbm = xgb.train(params, dtrain, num_boost_round, evals=watchlist, early_stopping_rounds=200, \
feval=rmspe_xg, verbose_eval=True)
print("Validating")
yhat = gbm.predict(xgb.DMatrix(X_valid[features_train]))
error = rmspe(X_valid.FanDuel.values, yhat)
print('RMSPE: {:.6f}'.format(error))
print("Make predictions on the test set")
dtest = xgb.DMatrix(test[features_test])
test_probs = gbm.predict(dtest)
# Make Submission
result = pd.DataFrame({"Id": test["Id"], 'FanDuel': test_probs, 'Last Name' : test['Last Name'], 'FPPG_historical' : test['FPPG'], 'Salary': test['Salary'], 'Team': test['Team']})
#result = pd.merge(result, ids, on='PLAYER_ID', how='left')
print (result.head(10))
result.to_csv("nbapred.csv", index=False)