-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcheck_grad.py
95 lines (73 loc) · 2.89 KB
/
check_grad.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import numpy as np
def check_wordEmbedding_grad(optimizer,data,epsilon=1e-6):
raise "current not work"
err1 = 0.0
count = 0.0
# check dL separately since dict
dL = grad[0]
L = self.stack[0]
err2 = 0.0
count = 0.0
print "Checking dL..."
for j in range(self.numWords):
for i in xrange(L.shape[0]):
L[i,j] += epsilon
costP,_ = self.costAndGrad(data)
L[i,j] -= epsilon
numGrad = (costP - cost)/epsilon
err = np.abs(dL[i,j] - numGrad)
#print "Analytic %.9f, Numerical %.9f, Relative Error %.9f"%(dL[j][i],numGrad,err)
err2+=err
count+=1
if 0.001 > err2/count:
print "Grad Check Passed for dL"
else:
print "Grad Check Failed for dL: Sum of Error = %.9f" % (err2/count)
def check_param_grad(optimizer, data, epsilon=1e-6):
cost, mlp_grad = optimizer.costAndGrad_theano_single_grad(data)
err1 = 0.0
count = 0.0
hw_1 = optimizer.classifier.hiddenLayer.params[0].eval()
hw_2 = optimizer.classifier.hiddenLayer.params[1].eval()
hb = optimizer.classifier.hiddenLayer.params[2].eval()
log_w = optimizer.classifier.logRegressionLayer.params[0].eval()
log_b = optimizer.classifier.logRegressionLayer.params[1].eval()
mlp_stack = [hw_1, hw_2, hb, log_w, log_b]
grad = optimizer.rep_model.dstack + mlp_grad
stack = optimizer.rep_model.stack + mlp_stack
print "Checking dW... (might take a while)"
idx =0
for W,dW in zip(stack[1:],grad[1:]):
print idx
idx += 1
W = W[...,None,None] # add dimension since bias is flat
dW = dW[...,None,None]
for i in xrange(W.shape[0]):
for j in xrange(W.shape[1]):
for k in xrange(W.shape[2]):
W[i,j,k] += epsilon
costP,_ = optimizer.costAndGrad_theano_single_grad(data)
W[i,j,k] -= epsilon
numGrad = (costP - cost)/epsilon
err = np.abs(dW[i,j,k] - numGrad)
#print "Analytic %.9f, Numerical %.9f, Relative Error %.9f"%(dW[i,j,k],numGrad,err)
err1+=err
count+=1
if 0.001 > err1/count:
print "Grad Check Passed for dW"
else:
print "Grad Check Failed for dW: Sum of Error = %.9f" % (err1/count)
if __name__ == '__main__':
print "Numerical gradient check..."
import dependency_tree as tr
trainTrees = tr.loadTrees("train")
print "train number %d"%len(trainTrees)
mbData = trainTrees[:4]
from optimization import Optimization
optimizer = Optimization(alpha=0.01, optimizer="sgd")
wvecDim = 10
outputDim = 5
hiddenDim = 50
optimizer.initial_RepModel(tr, "RNN", wvecDim)
optimizer.initial_theano_mlp(hiddenDim, outputDim, batchMLP=False)
check_param_grad(optimizer, mbData)