-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsvm.py
121 lines (91 loc) · 3.3 KB
/
svm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sklearn.svm as svm
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from matplotlib.colors import ListedColormap
'''
SVM: an supervised algorithm that could accomplish clean nonlinear classification
'''
file = pd.read_csv('wine.data')
data = file.values
y_data = data[:, 0]
x_data = data[:, 1:]
shuffle = np.random.permutation(x_data.shape[0])
x_data = x_data[shuffle]
y_data = y_data[shuffle]
total = x_data.shape[0]
train_end = int(total * 0.8)
x_train = x_data[0:train_end, :]
y_train = y_data[0:train_end]
x_test = x_data[train_end:, :]
y_test = y_data[train_end:]
print(x_train.shape)
scaler = StandardScaler() # use standardize to scale the data
scaler.fit(x_train) # compute variance & mean for the scaler object attribute
# Scaling
x_train_std = scaler.transform(x_train)
x_test_std = scaler.transform(x_test)
#test_prob = svm.predict_proba(x_test) # predict probability of each class
def PCA(x):
cov_x = np.cov(x.T)
u, s, v = np.linalg.svd(cov_x)
k = 2
proj = u[:, 0:k]
pca_x = np.matmul(x, proj)
return pca_x
x_train_std = PCA(x_train_std)
x_test_std = PCA(x_test_std)
svm = svm.SVC(kernel = 'linear', probability = True)
svm.fit(x_train_std, y_train)
predict_y = svm.predict(x_test_std)
label_y = y_test
print(predict_y)
print(label_y)
correct = (label_y == predict_y).astype(int)
correct_rate = np.mean(correct)
print('Correct test rate', correct_rate)
def plot_decision_boundary(X, y, clf, test_ind = None, resolution = 0.02):
'''
x: 2D array, size [batch, features] , features = 2
'''
markers = ('s', 'x', 'v') # markers for plot
colors = ('red', 'green', 'blue', 'gray')
n_class = len(np.unique(y))
cmap = ListedColormap(colors[:n_class])
x1min, x1max = X[:, 0].min(), X[:, 0].max()
x2min, x2max = X[:, 1].min(), X[:, 1].max()
xx, yy = np.meshgrid(np.arange(x1min, x1max, resolution), np.arange(x2min, x2max, resolution))
grid_point = np.c_[xx.ravel(), yy.ravel()] # [feature, sampples]
z = svm.predict(grid_point).reshape(xx.shape)
plt.contour(xx, yy, z, alpha = 0.4, cmap = cmap)
plt.xlim(x1min, x1max)
plt.ylim(x2min, x2max)
# plot data points
for idx, c1 in enumerate(np.unique(y)): # for class 1, 2, 3
plt.scatter(
x = X[y == c1, 0], # data points of each class separately
y = X[y == c1, 1],
c = cmap(idx), # use index of class to get from cmap
alpha = 0.4,
edgecolor = 'black',
marker = markers[idx],
)
# highlight test samples
if test_ind:
plt.scatter(
x = x_test[:, 0],
y = x_test[:, 1],
c = '',
alpha = 1.0, #透明度of markder
marker = 'o',
edgecolor = 'black',
linewidths = 2,
s = 55 # size of marker
)
plot_decision_boundary(x_train_std, y_train, True)
plt.xlabel('component 1')
plt.ylabel('component 2')
plt.title('Test accuracy: %s'%str(correct_rate))
plt.show()