forked from mengzaiqiao/CAN
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclassification.py
61 lines (49 loc) · 1.78 KB
/
classification.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from __future__ import division
from __future__ import print_function
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC,SVC
from sklearn.metrics import *
def multiclass_node_classification_eval(X, y, ratio=0.5, rnd=2018):
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=ratio, random_state=rnd)
clf = SVC()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
macro_f1 = f1_score(y_test, y_pred, average="macro")
micro_f1 = f1_score(y_test, y_pred, average="micro")
return macro_f1, micro_f1
def node_classification_F1(Embeddings, y, ratio):
macro_f1_avg = 0
micro_f1_avg = 0
for i in range(10):
rnd = np.random.randint(2018)
macro_f1, micro_f1 = multiclass_node_classification_eval(
Embeddings, y, ratio, rnd)
macro_f1_avg += macro_f1
micro_f1_avg += micro_f1
macro_f1_avg /= 10
micro_f1_avg /= 10
print ("Macro_f1: " + str(macro_f1_avg))
print ("Micro_f1: " + str(micro_f1_avg))
def read_label(inputFileName):
f = open(inputFileName, "r")
lines = f.readlines()
f.close()
N = len(lines)
y = np.zeros(N, dtype=int)
i = 0
for line in lines:
l = line.strip("\n\r")
y[i] = int(l)
i += 1
return y
datasets = ['cora' ]#'cora', 'citeseer', 'pubmed', 'pubmed','BlogCatalog']
for datasetname in datasets:
for ratio in [0.2]:
print('dataset:', datasetname, ',ratio:', ratio)
embedding_node_result_file = "result/AGAE_{}_n_mu.emb.npy".format(datasetname)
label_file = "data/" + datasetname + ".label"
y = read_label(label_file)
Embeddings = np.load(embedding_node_result_file)
node_classification_F1(Embeddings, y, ratio)