-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgradients.py
59 lines (49 loc) · 1.7 KB
/
gradients.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import numpy as np
from hw_helpers import sigmoid
def compute_gradient_mse(y, tx, w):
"""
Compute the gradient of the mean square error cost function
:param y: labels
:param tx: features
:param w: weights
:return: the gradient of the mse cost function
"""
N = y.shape[0]
e = y-(tx@w)
return -1/N*(tx.T@e)
def compute_gradient_likelihood(y, tx, w, lamdba_=0):
"""
Compute the gradient of the negative log likelihood cost function
:param y: labels
:param tx: features
:param w: weights
:param lamdba_: regularization
:return: the gradient of the negative log likelihood cost function
"""
return tx.T@(sigmoid(tx@w)-y) + lamdba_*w
def calculate_hessian(y, tx, w):
"""
Compute the hessian of the negative log likelihood cost function
:param y: labels
:param tx: features
:param w: weights
:return: the hessian of the negative log likelihood cost function
"""
S = np.diag((sigmoid(tx@w)*(1-sigmoid(tx@w))).T[0])
return tx.T@S@tx
def double_pen_gradient_likelihood(y, tx, w, lambda_):
"""
Compute the gradient of the negative log likelihood function with a double penalization when we are not able to
predict a "1". Because the labels are unbalanced with 2 times more -1s than 1s in the training data we double
the weights of those errors
:param y: labels
:param tx: features
:param w: weights
:param lambda_: regularization
:return: the modified gradient
"""
error = sigmoid(tx@w) - y
#if error < -0.5 it means that y = 1 and we predict a "0". The error is doubled.
error[error < -0.5] = error[error < -0.5] * 2
gradient = tx.T@error + lambda_*w
return gradient