-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathsvm.py
36 lines (27 loc) · 997 Bytes
/
svm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import json
import os
import numpy as np
from nltk.corpus import reuters as rt
from sklearn.svm import SVC
from utils import OUTPUTS_DIR, TESTING_SET, TRAINING_SET
svmmodel = SVC(kernel='linear')
# remove things which have multiple classes
TRAINING_SET = list(filter(lambda x: len(rt.categories(x)) == 1, TRAINING_SET))
TESTING_SET = list(filter(lambda x: len(rt.categories(x)) == 1, TESTING_SET))
X = []
for i in TRAINING_SET:
with open(os.path.join(OUTPUTS_DIR, i)) as f:
X.append(json.load(f))
X = np.array(X)
y = [] # Yes, this is a small letter y. No, that is not a mistake.
for i in TRAINING_SET:
y.append(rt.categories(i))
y = np.array(y)
svmmodel.fit(X, y.ravel())
Z = []
for i in TESTING_SET:
with open(os.path.join(OUTPUTS_DIR, i)) as f:
Z.append(json.load(f))
Z = np.array(Z)
total = list(map(lambda x: x[0] == x[1], list(zip(svmmodel.predict(Z), map(lambda x: x[0], map(lambda x: rt.categories(x), TESTING_SET))))))
print(sum(total) / len(total))