-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathVisualizer.py
135 lines (126 loc) · 5.38 KB
/
Visualizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
from matplotlib import pyplot as plt
import matplotlib.gridspec as gridspec
from sklearn.metrics import confusion_matrix
import seaborn as sns
import warnings
class Visualizer:
def make_euclidean_scatter_plot(self, estimator_name, data, param, predicted=None, centers=None):
'''
Form a scatter graph.
:param estimator_name: part of path. Using as directory name
:param data: points coordinates to make graph
:param param: estimator parameters. Use as graph name
:param predicted: use as color sequence
:param centers: centers of clusters
:return: None
'''
fig = plt.figure()
plt.scatter(data[:, 0], data[:, 1], c=predicted, s=50, cmap='viridis')
if not centers.all():
fig.savefig(f'graphs/{estimator_name}/{param}')
plt.close(fig)
return
else:
plt.scatter(centers[:, 0], centers[:, 1], c='black', s=200, alpha=0.5)
fig.savefig(f'graphs/{estimator_name}/{param}')
plt.close(fig)
def make_compare_plot(self, points_x, points_y, name):
'''
Form graphs from lists in points_y.
:param points_x: points in ox. Same for all lists from points_y
:param points_y: list of point lists.
:param name: name of graph
:return: None
'''
fig = plt.figure()
number_of_graphics = max(list(map(len, [i for i in points_y])))
gs = gridspec.GridSpec(number_of_graphics, 1, fig)
for i in range(number_of_graphics):
fig.add_subplot(gs[i, 0], )
plt.plot(points_x, [j[i] for j in points_y])
plt.savefig(f'graphs/{name}.png')
def make_simple_graph(self, points_x, i):
'''
Form a simple graph
:param points_x: values for graph
:param i: parameter use as number for graph name
:return: None
'''
fig = plt.figure()
plt.plot([x for x in points_x])
plt.savefig(f'graphs/normal_analysis/intervals with step {i}.png')
plt.close(fig)
def make_heatmap(self, data, ages=None, heatmap_name = None, cluster_n = None):
'''
Form a heatmap
:param data: data for heatmap
:param ages: ages columns for drop
:return: None
'''
warnings.filterwarnings('error')
plt.figure(figsize=(20,20))
try:
for age in ages:
data = data.drop(str(age), 1)
except (Exception, Warning) as e:
print(f'Found {e} while heatmap forming')
sns.heatmap(data.corr(), annot=True, linewidths=.3, cbar=False, square=True)
plt.savefig(f'graphs/clasification_heatmap/{heatmap_name}_{cluster_n}.png')
plt.close('all')
def make_pairplot(self, data, ages, name='new_pairplot'):
'''
Form a plot in features space
:param data: data for graph
:param ages: columns for drop
:param name: string use as path of filename
:return: None
'''
if str(ages[0]) in data.columns:
for age in ages:
data = data.drop(str(age), 1)
sns.pairplot(data, hue='clusters', diag_kind='hist')
plt.savefig(f'graphs/pairplots/pairplot_{name}.png')
plt.close('all')
def make_overfit_check_plot(self, train_sc, test_sc, ranging, clname, cmp_flag=False):
'''
Form a plot for train and test distributions.
:param train_sc: points in train distribution
:param test_sc: points in test distribution
:param ranging: number of points. Same for train and test lists.
:param clname: string use as path of filename
:return: None
'''
plt.plot(ranging, train_sc, label='train distr')
plt.plot(ranging, test_sc, label='test distr')
plt.legend()
plt.savefig(f'graphs/{clname}.png')
plt.close('all')
def make_confusion_matrix(self, true_labels, predicted_labels, train_l):
'''
Form a confusion matrix.
:param true_labels: true labels from dataset
:param predicted_labels: predicted labels from dataset
:param train_l: length of train distribution. Use as part of filename
:return: None
'''
plt.figure(figsize=(15, 15))
confusion = confusion_matrix(true_labels, predicted_labels)
sns.heatmap(confusion, annot=True, square=True)
plt.savefig(f'graphs/classification_heatmap/heatmap_{train_l}.png', bbox_inches='tight')
with open(f'heatmaps/heatmap{train_l}.txt', 'w') as hmp:
hmp.write(str(confusion))
plt.close('all')
def distribution_hist(self, predictor_val, cluster_n, predictor_name, predictor_prob=None):
'''
Form a histogram.
:param predictor: list of predictor values
:param cluster_n: cluster number
:param predictor_name: name of predictor
:return:
'''
sorted_features = sorted(predictor_val)
sns.kdeplot(sorted_features)
#if predictor_prob:
# plt.hist([i for i in predictor_prob.keys()], [i for i in predictor_prob.values()])
plt.savefig(f'graphs/predictor_per_cluster_distribution/distribution_in_{cluster_n}_for_{predictor_name}.png')
plt.close('all')