-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathprocessing.py
108 lines (83 loc) · 3.71 KB
/
processing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import os
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from tensorflow import keras
from tensorflow_docs import plots as tfplots
import utils
def preprocess_dataset(name, path, classes=None, shuffle=True, expand_dims=False, one_hot_encode=False, verbose=2):
X = utils.load_dataset('X_{}.npy'.format(name), path)
y = utils.load_dataset('y_{}.npy'.format(name), path)
if verbose > 0:
print('\n> Input ({}) data:'.format(name))
print(' - X shape: {}\n - Y shape: {}'.format(X.shape, y.shape))
X, y = utils.extract_subset_by_classes(X, y, classes)
num_classes = len(set(y))
if verbose > 1:
print('> Extracted {} classes'.format(num_classes))
if verbose > 0:
print('\n> Filtered ({}) data:'.format(name))
print(' - X shape: {}\n - Y shape: {}'.format(X.shape, y.shape))
if shuffle:
X, y = utils.shuffle(X, y)
if verbose > 1:
print('> Shuffling dataset')
if expand_dims:
# Reshape X to: (X.shape[0], X.shape[1], 1)
X = np.expand_dims(X, axis=-1)
if verbose > 1:
print('> Expanding dimensions')
if not one_hot_encode:
return [X, y]
# One hot encoding procedure
y_mapping = { k: i for i, k in enumerate(set(y)) }
y = list(map(lambda x: y_mapping[x], y))
y = keras.utils.to_categorical(y, num_classes)
if verbose > 1:
print('> Applying one-hot encoding to y values')
return X, y, y_mapping
def grid_search_summary(grid):
print('\n - Best parameters set found on development set:')
print(grid.best_score_, grid.best_params_)
print('\n - Grid scores on development set:')
means = grid.cv_results_['mean_test_score']
stds = grid.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, grid.cv_results_['params']):
print('{:0.3f} (+/-{:0.03}) for {}'.format(mean, std * 2, params))
def save_history(model, history, output, persist_model=True):
if persist_model:
print('\n - Saving model and weights to file')
model.save(os.path.join(output, 'model', 'model.h5'))
model.save_weights(os.path.join(output, 'model', 'model_weights.h5'))
print(' - Saving network training plots')
histories = {'': history}
accuracy_plotter = tfplots.HistoryPlotter(metric='accuracy', smoothing_std=0)
loss_plotter = tfplots.HistoryPlotter(metric='loss', smoothing_std=0)
plt.figure(figsize=(15, 10))
accuracy_plotter.plot(histories)
plt.savefig(os.path.join(output, 'accuracy_plot.pdf'))
plt.figure(figsize=(15, 10))
loss_plotter.plot(histories)
plt.savefig(os.path.join(output, 'loss_plot.pdf'))
def performance_summary(y_test, y_predicted, output, y_mapping=None, y_labels=None):
scores = {}
scores['Accuracy'] = accuracy_score(y_test, y_predicted)
scores['Precision'] = precision_score(y_test, y_predicted, average='macro')
scores['Recall'] = recall_score(y_test, y_predicted, average='macro')
scores['F1'] = f1_score(y_test, y_predicted, average='macro')
print(scores)
print(' - Detailed classification report:')
if y_mapping is not None:
y_test = list(map(y_mapping, y_test))
y_predicted = list(map(y_mapping, y_predicted))
detailed_report = classification_report(y_test, y_predicted)
print(detailed_report, end='\n')
with open(os.path.join(output, 'classification_report.txt'), 'w') as out:
out.writelines(detailed_report)
print(' - Saving confusion matrix')
utils.plot_confusion_matrix(
y_test,
y_predicted,
labels=y_labels,
output=os.path.join(output, 'confusion_matrix.pdf')
)