-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel_training.py
162 lines (123 loc) · 5.93 KB
/
model_training.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
from misc.data_loader import DataLoader
from misc.models import MODELS
from misc.evaluation_metrics import *
from tensorflow.keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(patience = 3)
import json
import warnings
warnings.filterwarnings("ignore")
import argparse
parser = argparse.ArgumentParser(
prog = "Vanilla Model Training",
description = "Trains the models defined in 'models.py' file and saves them to 'models/'"
)
parser.add_argument("--setting", required=True)
parser.add_argument("--datasets")
parser.add_argument("--models")
args = parser.parse_args()
settings = args.setting.lower()
datasets = args.datasets
models = args.models
settings = [item.strip() for item in settings.split(",")]
dl = DataLoader()
info = dl.get_info()
df = dl.load(dataset_index = 0)
if datasets:
datasets = [item.strip() for item in datasets.split(",")]
for dataset in datasets:
if not dataset in info.name.tolist():
raise ValueError("Dataset '" + dataset + "' does not exist in defined datasets.")
info = info[info.name.isin(datasets)]
if models:
models = [item.strip() for item in models.split(",")]
for model in models:
if not model in MODELS.keys():
raise ValueError("Model '" + model + "' does not exist in defined models.")
MODELS = dict(filter(lambda x: x[0] in models, MODELS.items()))
model_info = {
}
for setting in settings:
if setting == "mto":
for index, row in info.iterrows():
model_info[row["name"]] = {}
df = dl.load(index)
if row["name"] == "Electricity Transformer Data - 15 min":
(X_train, y_train, X_test, y_test), scaler = dl.prepare_dataset(23*4,1)
elif row["name"] == "Metro Interstate Human Traffic Volume":
(X_train, y_train, X_test, y_test), scaler = dl.prepare_dataset(23,1)
elif row["name"] == "Beijing-Guanyuan Air-Quality":
(X_train, y_train, X_test, y_test), scaler = dl.prepare_dataset(23,1)
elif row["name"] == "Solar Generation - EnerjiSA":
(X_train, y_train, X_test, y_test), scaler = dl.prepare_dataset(23,1)
for (model_name, model) in MODELS.items():
model = model((X_train.shape[1],X_train.shape[2]), 1)
model.summary()
# train LSTM model
model.compile(optimizer="adam",loss="MSE")
model.fit(
X_train,
y_train,
validation_split=0.25,
epochs=50,
callbacks=[early_stopping])
#Prediction Info
pred = model.predict(X_test)
pred = scaler.inverse_transform(pred).reshape(1,-1)[0]
y_test_inv = scaler.inverse_transform(y_test).reshape(1,-1)[0]
save_path = "models/many-to-one/" + row["name"] + "--" + model_name + ".h5"
model_info[row["name"]][model_name] = {
"R2" : r2_score(y_test_inv,pred),
"MAE" : round(MAE(y_test_inv,pred),2),
"RMSE" : round(RMSE(y_test_inv,pred),2),
"MSE" : round(MSE(y_test_inv,pred),2),
"MAPE" : round(MAPE(y_test_inv,pred),2),
"SMAPE" : round(SMAPE(y_test_inv,pred),2),
"MDAPE" : round(MDAPE(y_test_inv,pred),2),
"path": save_path
}
model.save(save_path)
with open("models/many-to-one/model_info.json", "w") as outfile:
json.dump(model_info, outfile, indent = 4)
print("Models have been saved to 'models/many-to-one/'")
elif setting == "mtm":
for index, row in info.iterrows():
model_info[row["name"]] = {}
df = dl.load(index)
if row["name"] == "Electricity Transformer Data - 15 min":
(X_train, y_train, X_test, y_test), scaler = dl.prepare_dataset(24*7,12)
elif row["name"] == "Metro Interstate Human Traffic Volume":
(X_train, y_train, X_test, y_test), scaler = dl.prepare_dataset(24*7,12)
elif row["name"] == "Beijing-Guanyuan Air-Quality":
(X_train, y_train, X_test, y_test), scaler = dl.prepare_dataset(24*7,12)
elif row["name"] == "Solar Generation - EnerjiSA":
(X_train, y_train, X_test, y_test), scaler = dl.prepare_dataset(24*7,12)
for (model_name, model) in MODELS.items():
model = model((X_train.shape[1],X_train.shape[2]), 12)
model.summary()
# train LSTM model
model.compile(optimizer="adam",loss="MSE")
model.fit(
X_train,
y_train,
validation_split=0.25,
epochs=50,
callbacks=[early_stopping])
#Prediction Info
pred = model.predict(X_test)
pred = scaler.inverse_transform(pred).reshape(1,-1)[0]
y_test_inv = scaler.inverse_transform(y_test).reshape(1,-1)[0]
save_path = "models/many-to-many/" + row["name"] + "--" + model_name + ".h5"
model_info[row["name"]][model_name] = {
"R2" : r2_score(y_test_inv,pred),
"MAE" : round(MAE(y_test_inv,pred),2),
"RMSE" : round(RMSE(y_test_inv,pred),2),
"MSE" : round(MSE(y_test_inv,pred),2),
"MAPE" : round(MAPE(y_test_inv,pred),2),
"SMAPE" : round(SMAPE(y_test_inv,pred),2),
"MDAPE" : round(MDAPE(y_test_inv,pred),2),
"path": save_path
}
model.save(save_path)
with open("models/many-to-many/model_info.json", "w") as outfile:
json.dump(model_info, outfile, indent = 4)
print("Models have been saved to 'models/many-to-one/'")