-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathWideNDeep_pt.py
295 lines (248 loc) · 10.9 KB
/
WideNDeep_pt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
import numpy as np
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import roc_auc_score
use_cuda = torch.cuda.is_available()
class WideDeepLoader(Dataset):
"""Helper to facilitate loading the data to the pytorch models.
Parameters:
--------
data: namedtuple with 3 elements - (wide_input_data, deep_inp_data, target)
"""
def __init__(self, data):
self.X_wide = data.wide
self.X_deep = data.deep
self.Y = data.labels
def __getitem__(self, idx):
xw = self.X_wide[idx]
xd = self.X_deep[idx]
y = self.Y[idx]
return xw, xd, y
def __len__(self):
return len(self.Y)
class WideDeep(nn.Module):
""" Wide and Deep model. As explained in Heng-Tze Cheng et al., 2016, the
model taked the wide features and the deep features after being passed through
the hidden layers and connects them to an output neuron. For details, please
refer to the paper and the corresponding tutorial in the tensorflow site:
https://www.tensorflow.org/tutorials/wide_and_deep
Parameters:
--------
wide_dim (int) : dim of the wide-side input tensor
embeddings_input (tuple): 3-elements tuple with the embeddings "set-up" -
(col_name, unique_values, embeddings dim)
continuous_cols (list) : list with the name of the continuum columns
deep_column_idx (dict) : dictionary where the keys are column names and the values
their corresponding index in the deep-side input tensor
hidden_layers (list) : list with the number of units per hidden layer
encoding_dict (dict) : dictionary with the label-encode mapping
n_class (int) : number of classes. Defaults to 1 if logistic or regression
dropout (float)
"""
def __init__(self,
wide_dim,
embeddings_input,
continuous_cols,
deep_column_idx,
hidden_layers,
dropout,
encoding_dict,
n_class):
super(WideDeep, self).__init__()
self.wide_dim = wide_dim
self.deep_column_idx = deep_column_idx
self.embeddings_input = embeddings_input
self.continuous_cols = continuous_cols
self.hidden_layers = hidden_layers
self.dropout = dropout
self.encoding_dict = encoding_dict
self.n_class = n_class
# Build the embedding layers to be passed through the deep-side
for col,val,dim in self.embeddings_input:
setattr(self, 'emb_layer_'+col, nn.Embedding(val, dim))
# Build the deep-side hidden layers with dropout if specified
input_emb_dim = np.sum([emb[2] for emb in self.embeddings_input])
self.linear_1 = nn.Linear(input_emb_dim+len(continuous_cols), self.hidden_layers[0])
if self.dropout:
self.linear_1_drop = nn.Dropout(self.dropout[0])
for i,h in enumerate(self.hidden_layers[1:],1):
setattr(self, 'linear_'+str(i+1), nn.Linear( self.hidden_layers[i-1], self.hidden_layers[i] ))
if self.dropout:
setattr(self, 'linear_'+str(i+1)+'_drop', nn.Dropout(self.dropout[i]))
# Connect the wide- and dee-side of the model to the output neuron(s)
self.output = nn.Linear(self.hidden_layers[-1]+self.wide_dim, self.n_class)
def compile(self, method="logistic", optimizer="Adam", learning_rate=0.001, momentum=0.0):
"""Wrapper to set the activation, loss and the optimizer.
Parameters:
----------
method (str) : regression, logistic or multiclass
optimizer (str): SGD, Adam, or RMSprop
"""
if method == 'regression':
self.activation, self.criterion = None, F.mse_loss
if method == 'logistic':
self.activation, self.criterion = torch.sigmoid, F.binary_cross_entropy
if method == 'multiclass':
self.activation, self.criterion = torch.softmax, F.cross_entropy
if optimizer == "Adam":
self.optimizer = torch.optim.Adam(self.parameters(), lr=learning_rate)
if optimizer == "RMSprop":
self.optimizer = torch.optim.RMSprop(self.parameters(), lr=learning_rate)
if optimizer == "SGD":
self.optimizer = torch.optim.SGD(self.parameters(), lr=learning_rate, momentum=momentum)
self.method = method
def forward(self, X_w, X_d):
"""Implementation of the forward pass.
Parameters:
----------
X_w (torch.tensor) : wide-side input tensor
X_d (torch.tensor) : deep-side input tensor
Returns:
--------
out (torch.tensor) : result of the output neuron(s)
"""
# Deep Side
emb = [getattr(self, 'emb_layer_'+col)(X_d[:,self.deep_column_idx[col]].long())
for col,_,_ in self.embeddings_input]
if self.continuous_cols:
cont_idx = [self.deep_column_idx[col] for col in self.continuous_cols]
cont = [X_d[:, cont_idx].float()]
deep_inp = torch.cat(emb+cont, 1)
else:
deep_inp = torch.cat(emb, 1)
x_deep = F.relu(self.linear_1(deep_inp))
if self.dropout:
x_deep = self.linear_1_drop(x_deep)
for i in range(1,len(self.hidden_layers)):
x_deep = F.relu( getattr(self, 'linear_'+str(i+1))(x_deep) )
if self.dropout:
x_deep = getattr(self, 'linear_'+str(i+1)+'_drop')(x_deep)
# Deep + Wide sides
wide_deep_input = torch.cat([x_deep, X_w.float()], 1)
if not self.activation:
out = self.output(wide_deep_input)
else:
out = self.activation(self.output(wide_deep_input))
return out
def fit(self, dataset, n_epochs, batch_size):
"""Run the model for the training set at dataset.
Parameters:
----------
dataset (dict): dictionary with the training sets -
X_wide_train, X_deep_train, target
n_epochs (int)
batch_size (int)
"""
widedeep_dataset = WideDeepLoader(dataset)
train_loader = torch.utils.data.DataLoader(dataset=widedeep_dataset,
batch_size=batch_size,
shuffle=True)
# set the model in training mode
net = self.train()
for epoch in range(n_epochs):
total = 0
correct = 0
y_proba = []
y_true = []
for i, (X_wide, X_deep, target) in enumerate(train_loader):
X_w = Variable(X_wide)
X_d = Variable(X_deep)
y = (Variable(target).float() if self.method != 'multiclass' else Variable(target))
if use_cuda:
X_w, X_d, y = X_w.cuda(), X_d.cuda(), y.cuda()
self.optimizer.zero_grad()
y_pred = net(X_w, X_d)
loss = self.criterion(y_pred, y.reshape(-1, 1))
loss.backward()
self.optimizer.step()
# print(target)
y_proba.extend(y_pred.cpu().squeeze(1).data.numpy().tolist())
y_true.extend(target.cpu().data.numpy().tolist())
if self.method != "regression":
total+= y.size(0)
if self.method == 'logistic':
y_pred_cat = (y_pred > 0.5).squeeze(1).float()
if self.method == "multiclass":
_, y_pred_cat = torch.max(y_pred, 1)
# print(y_pred_cat, y)
correct+= float((y_pred_cat == y).sum().data)
if self.method != "regression":
print ('Epoch {} of {}, Loss: {}, accuracy: {}, auc: {}'.format(epoch+1,
n_epochs, round(float(loss.data), 3), round(correct/total, 4), roc_auc_score(y_true, y_proba)))
else:
print ('Epoch {} of {}, Loss: {}'.format(epoch+1, n_epochs,
round(float(loss.data), 3)))
def predict(self, dataset):
"""Predict target for dataset.
Parameters:
----------
dataset (dict): dictionary with the testing dataset -
X_wide_test, X_deep_test, target
Returns:
--------
array-like with the target for dataset
"""
X_w = Variable(torch.from_numpy(dataset.wide)).float()
X_d = Variable(torch.from_numpy(dataset.deep))
if use_cuda:
X_w, X_d = X_w.cuda(), X_d.cuda()
# set the model in evaluation mode so dropout is not applied
net = self.eval()
pred = net(X_w,X_d).cpu()
if self.method == "regression":
return pred.squeeze(1).data.numpy()
if self.method == "logistic":
return (pred > 0.5).squeeze(1).data.numpy().astype(int)
if self.method == "multiclass":
_, pred_cat = torch.max(pred, 1)
return pred_cat.data.numpy()
def predict_proba(self, dataset):
"""Predict predict probability for dataset.
This method will only work with method logistic/multiclass
Parameters:
----------
dataset (dict): dictionary with the testing dataset -
X_wide_test, X_deep_test, target
Returns:
--------
array-like with the probability for dataset.
"""
X_w = Variable(torch.from_numpy(dataset.wide)).float()
X_d = Variable(torch.from_numpy(dataset.deep))
if use_cuda:
X_w, X_d = X_w.cuda(), X_d.cuda()
# set the model in evaluation mode so dropout is not applied
net = self.eval()
pred = net(X_w,X_d).cpu()
if self.method == "logistic":
pred = pred.squeeze(1).data.numpy()
probs = np.zeros([pred.shape[0],2])
probs[:,0] = 1-pred
probs[:,1] = pred
return probs
if self.method == "multiclass":
return pred.data.numpy()
def get_embeddings(self, col_name):
"""Extract the embeddings for the embedding columns.
Parameters:
-----------
col_name (str) : column we want the embedding for
Returns:
--------
embeddings_dict (dict): dictionary with the column values and the embeddings
"""
params = list(self.named_parameters())
emb_layers = [p for p in params if 'emb_layer' in p[0]]
emb_layer = [layer for layer in emb_layers if col_name in layer[0]][0]
embeddings = emb_layer[1].cpu().data.numpy()
col_label_encoding = self.encoding_dict[col_name]
inv_dict = {v:k for k,v in col_label_encoding.items()}
embeddings_dict = {}
for idx,value in inv_dict.items():
embeddings_dict[value] = embeddings[idx]
return embeddings_dict