-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmlutils.py
69 lines (59 loc) · 2.44 KB
/
mlutils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
"""
Helper module for various machine learning utilites and scoring functions.s
"""
from sklearn.externals import joblib
def save_xgb_and_desc(model, model_name, params, score, std,
seed, desc_file='models/model_desc.txt'):
"""
Save a trained XGBoost model to a file
and log its performance.
Args:
model - a trained XGBoost model.
model_name - file name for the model.
params - parameters used to train the model.
score - cross-validation score for the model.
std - std deviation of the cross-validation score.
seed - seed used to train the model.
desc_file - file to log model information.
"""
model_name = model_name
model_path = '{}/{}'.format('models', model_name)
model.save_model(model_path)
with open('models/model_desc.txt', 'a') as f:
f.write('Model {0} was trained with the following params:\n{1}\n'
.format(model_name, params))
f.write('seed: {}\n'.format(seed))
f.write('Training cross-validation (mean_score, std_dev):{},{}\n'
.format(score, std))
def save_model_and_desc(model, model_name, params, score, std,
seed, desc_file='models/model_desc.txt'):
"""
Save a trained scikit-learn model to a file
and log its performance.
Args:
model - a trained sklearn model.
model_name - file name for the model.
params - parameters used to train the model.
score - cross-validation score for the model.
std - std deviation of the cross-validation score.
seed - seed used to train the model.
desc_file - file to log model information.
"""
model_name = model_name
model_path = '{}/{}.pkl'.format('models', model_name)
joblib.dump(model, model_path)
with open('models/model_desc.txt', 'a') as f:
f.write('Model {0} was trained with the following params:\n{1}\n'
.format(model_name, params))
f.write('seed: {}\n'.format(seed))
f.write('Training cross-validation (mean_score, std_dev):{},{}\n'
.format(score, std))
def write_results(outfile, predictions):
"""
Write a model's predictions to a Kaggle-ready submission file.
"""
ID = range(49999, 99998 + 1)
with open(outfile, 'w') as f:
f.write('id,Y\n')
for instance, prediction in zip(ID, predictions):
f.write('{},{}\n'.format(instance, prediction))