-
Notifications
You must be signed in to change notification settings - Fork 35
/
Copy pathlda.py
77 lines (60 loc) · 2.29 KB
/
lda.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import numpy as np
class LDA:
def __init__(self, n_components):
self.n_components = n_components
self.linear_discriminants = None
def fit(self, X, y):
n_features = X.shape[1]
class_labels = np.unique(y)
# Within class scatter matrix:
# SW = sum((X_c - mean_X_c)^2 )
# Between class scatter:
# SB = sum( n_c * (mean_X_c - mean_overall)^2 )
mean_overall = np.mean(X, axis=0)
SW = np.zeros((n_features, n_features))
SB = np.zeros((n_features, n_features))
for c in class_labels:
X_c = X[y == c]
mean_c = np.mean(X_c, axis=0)
# (4, n_c) * (n_c, 4) = (4,4) -> transpose
SW += (X_c - mean_c).T.dot((X_c - mean_c))
# (4, 1) * (1, 4) = (4,4) -> reshape
n_c = X_c.shape[0]
mean_diff = (mean_c - mean_overall).reshape(n_features, 1)
SB += n_c * (mean_diff).dot(mean_diff.T)
# Determine SW^-1 * SB
A = np.linalg.inv(SW).dot(SB)
# Get eigenvalues and eigenvectors of SW^-1 * SB
eigenvalues, eigenvectors = np.linalg.eig(A)
# -> eigenvector v = [:,i] column vector, transpose for easier calculations
# sort eigenvalues high to low
eigenvectors = eigenvectors.T
idxs = np.argsort(abs(eigenvalues))[::-1]
eigenvalues = eigenvalues[idxs]
eigenvectors = eigenvectors[idxs]
# store first n eigenvectors
self.linear_discriminants = eigenvectors[0 : self.n_components]
def transform(self, X):
# project data
return np.dot(X, self.linear_discriminants.T)
# Testing
if __name__ == "__main__":
# Imports
import matplotlib.pyplot as plt
from sklearn import datasets
data = datasets.load_iris()
X, y = data.data, data.target
# Project the data onto the 2 primary linear discriminants
lda = LDA(2)
lda.fit(X, y)
X_projected = lda.transform(X)
print("Shape of X:", X.shape)
print("Shape of transformed X:", X_projected.shape)
x1, x2 = X_projected[:, 0], X_projected[:, 1]
plt.scatter(
x1, x2, c=y, edgecolor="none", alpha=0.8, cmap=plt.cm.get_cmap("viridis", 3)
)
plt.xlabel("Linear Discriminant 1")
plt.ylabel("Linear Discriminant 2")
plt.colorbar()
plt.show()