-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils_ensemble.py
122 lines (104 loc) · 4.03 KB
/
utils_ensemble.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import pandas as pd
import numpy as np
from sklearn.metrics import *
import math, os
from sklearn.preprocessing import label_binarize
def getfile(filename):
root=''
file = root+filename
if '.csv' not in filename:
file = file+'.csv'
df = pd.read_csv(file,header=None)
df = np.asarray(df)
labels=[]
for i, c in enumerate(os.listdir(root+"data/val/")):
for j in range(len(os.listdir(root+"data/val/"+c))):
labels.append(i)
labels = np.asarray(labels)
return df,labels
#ROC-AUC
from sklearn.metrics import roc_curve,auc
import matplotlib.pyplot as plt
def plot_roc(val_label,decision_val, caption='ROC Curve'):
num_classes=np.unique(val_label).shape[0]
classes = []
for i in range(num_classes):
classes.append(i)
plt.figure()
decision_val = label_binarize(decision_val, classes=classes)
if num_classes!=2:
# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(num_classes):
y_val = label_binarize(val_label, classes=classes)
fpr[i], tpr[i], _ = roc_curve(y_val[:, i], decision_val[:, i])
roc_auc[i] = auc(fpr[i], tpr[i])
for i in range(num_classes):
plt.plot(fpr[i], tpr[i], label='ROC curve of class {0} (area = {1:0.2f})'
''.format(i+1, roc_auc[i]))
else:
fpr,tpr,_ = roc_curve(val_label,decision_val, pos_label=1)
roc_auc = auc(fpr,tpr)*100
plt.plot(fpr,tpr,label='ROC curve (AUC=%0.2f)'%roc_auc)
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title(caption)
plt.legend(loc="lower right")
plt.savefig(str(len(classes))+'.png',dpi=300)
def predicting(ensemble_prob):
prediction = np.zeros((ensemble_prob.shape[0],))
for i in range(ensemble_prob.shape[0]):
temp = ensemble_prob[i]
t = np.where(temp == np.max(temp))[0][0]
prediction[i] = t
return prediction
def metrics(labels,predictions,classes):
print("Classification Report:")
print(classification_report(labels, predictions, target_names = classes,digits = 4))
matrix = confusion_matrix(labels, predictions)
print("Confusion matrix:")
print(matrix)
print("\nClasswise Accuracy :{}".format(matrix.diagonal()/matrix.sum(axis = 1)))
print("\nBalanced Accuracy Score: ",balanced_accuracy_score(labels,predictions))
def fuzzy_rank(CF, top):
R_L = np.zeros(CF.shape)
for i in range(CF.shape[0]):
for j in range(CF.shape[1]):
for k in range(CF.shape[2]):
R_L[i][j][k] = 1 - math.exp(-math.exp(-2.0*CF[i][j][k])) #Gompertz Function
K_L = 0.632*np.ones(shape = R_L.shape) #initiate all values as penalty values
for i in range(R_L.shape[0]):
for sample in range(R_L.shape[1]):
for k in range(top):
a = R_L[i][sample]
idx = np.where(a==np.partition(a, k)[k])
#if sample belongs to top 'k' classes, R_L =R_L, else R_L = penalty value
K_L[i][sample][idx] = R_L[i][sample][idx]
return K_L
def CFS_func(CF, K_L):
H = CF.shape[0] #no. of classifiers
for f in range(CF.shape[0]):
for i in range(CF.shape[1]):
idx = np.where(K_L[f][i] == 0.632)
CF[f][i][idx] = 0
CFS = 1 - np.sum(CF,axis=0)/H
return CFS
def Gompertz(top = 2, *argv):
L = 0 #Number of classifiers
for arg in argv:
L += 1
num_classes = arg.shape[1]
CF = np.zeros(shape = (L,arg.shape[0], arg.shape[1]))
for i, arg in enumerate(argv):
CF[:][:][i] = arg
R_L = fuzzy_rank(CF, top) #R_L is with penalties
RS = np.sum(R_L, axis=0)
CFS = CFS_func(CF, R_L)
FS = RS*CFS
predictions = np.argmin(FS,axis=1)
return predictions