forked from yaoyao-liu/mini-imagenet-tools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmini_imagenet_dataloader.py
209 lines (184 loc) · 9.83 KB
/
mini_imagenet_dataloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
## Created by: Yaoyao Liu
## NUS School of Computing
## Email: [email protected]
## Copyright (c) 2019
##
## This source code is licensed under the MIT-style license found in the
## LICENSE file in the root directory of this source tree
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
import os
import random
import numpy as np
import scipy.misc as scm
from tqdm import trange
class MiniImageNetDataLoader(object):
def __init__(self, shot_num, way_num, episode_test_sample_num):
self.shot_num = shot_num
self.way_num = way_num
self.episode_test_sample_num = episode_test_sample_num
self.num_samples_per_class = episode_test_sample_num + shot_num
metatrain_folder = './processed_images/train'
metaval_folder = './processed_images/val'
metatest_folder = './processed_images/test'
npy_dir = './episode_filename_list/'
if not os.path.exists(npy_dir):
os.mkdir(npy_dir)
self.npy_base_dir = npy_dir + str(self.shot_num) + 'shot_' + str(self.way_num) + 'way/'
if not os.path.exists(self.npy_base_dir):
os.mkdir(self.npy_base_dir)
self.metatrain_folders = [os.path.join(metatrain_folder, label) \
for label in os.listdir(metatrain_folder) \
if os.path.isdir(os.path.join(metatrain_folder, label)) \
]
self.metaval_folders = [os.path.join(metaval_folder, label) \
for label in os.listdir(metaval_folder) \
if os.path.isdir(os.path.join(metaval_folder, label)) \
]
self.metatest_folders = [os.path.join(metatest_folder, label) \
for label in os.listdir(metatest_folder) \
if os.path.isdir(os.path.join(metatest_folder, label)) \
]
def get_images(self, paths, labels, nb_samples=None, shuffle=True):
if nb_samples is not None:
sampler = lambda x: random.sample(x, nb_samples)
else:
sampler = lambda x: x
images = [(i, os.path.join(path, image)) \
for i, path in zip(labels, paths) \
for image in sampler(os.listdir(path))]
if shuffle:
random.shuffle(images)
return images
def generate_data_list(self, phase='train', episode_num=None):
if phase=='train':
folders = self.metatrain_folders
if episode_num is None:
episode_num = 20000
if not os.path.exists(self.npy_base_dir+'/train_filenames.npy'):
print('Generating train filenames')
all_filenames = []
for _ in trange(episode_num):
sampled_character_folders = random.sample(folders, self.way_num)
random.shuffle(sampled_character_folders)
labels_and_images = self.get_images(sampled_character_folders, range(self.way_num), nb_samples=self.num_samples_per_class, shuffle=False)
labels = [li[0] for li in labels_and_images]
filenames = [li[1] for li in labels_and_images]
all_filenames.extend(filenames)
np.save(self.npy_base_dir+'/train_labels.npy', labels)
np.save(self.npy_base_dir+'/train_filenames.npy', all_filenames)
print('Train filename and label lists are saved')
elif phase=='val':
folders = self.metaval_folders
if episode_num is None:
episode_num = 600
if not os.path.exists(self.npy_base_dir+'/val_filenames.npy'):
print('Generating val filenames')
all_filenames = []
for _ in trange(episode_num):
sampled_character_folders = random.sample(folders, self.way_num)
random.shuffle(sampled_character_folders)
labels_and_images = self.get_images(sampled_character_folders, range(self.way_num), nb_samples=self.num_samples_per_class, shuffle=False)
labels = [li[0] for li in labels_and_images]
filenames = [li[1] for li in labels_and_images]
all_filenames.extend(filenames)
np.save(self.npy_base_dir+'/val_labels.npy', labels)
np.save(self.npy_base_dir+'/val_filenames.npy', all_filenames)
print('Val filename and label lists are saved')
elif phase=='test':
folders = self.metatest_folders
if episode_num is None:
episode_num = 600
if not os.path.exists(self.npy_base_dir+'/test_filenames.npy'):
print('Generating test filenames')
all_filenames = []
for _ in trange(episode_num):
sampled_character_folders = random.sample(folders, self.way_num)
random.shuffle(sampled_character_folders)
labels_and_images = self.get_images(sampled_character_folders, range(self.way_num), nb_samples=self.num_samples_per_class, shuffle=False)
labels = [li[0] for li in labels_and_images]
filenames = [li[1] for li in labels_and_images]
all_filenames.extend(filenames)
np.save(self.npy_base_dir+'/test_labels.npy', labels)
np.save(self.npy_base_dir+'/test_filenames.npy', all_filenames)
print('Test filename and label lists are saved')
else:
print('Please select vaild phase')
def load_list(self, phase='train'):
if phase=='train':
self.train_filenames = np.load(self.npy_base_dir + 'train_filenames.npy').tolist()
self.train_labels = np.load(self.npy_base_dir + 'train_labels.npy').tolist()
elif phase=='val':
self.val_filenames = np.load(self.npy_base_dir + 'val_filenames.npy').tolist()
self.val_labels = np.load(self.npy_base_dir + 'val_labels.npy').tolist()
elif phase=='test':
self.test_filenames = np.load(self.npy_base_dir + 'test_filenames.npy').tolist()
self.test_labels = np.load(self.npy_base_dir + 'test_labels.npy').tolist()
elif phase=='all':
self.train_filenames = np.load(self.npy_base_dir + 'train_filenames.npy').tolist()
self.train_labels = np.load(self.npy_base_dir + 'train_labels.npy').tolist()
self.val_filenames = np.load(self.npy_base_dir + 'val_filenames.npy').tolist()
self.val_labels = np.load(self.npy_base_dir + 'val_labels.npy').tolist()
self.test_filenames = np.load(self.npy_base_dir + 'test_filenames.npy').tolist()
self.test_labels = np.load(self.npy_base_dir + 'test_labels.npy').tolist()
else:
print('Please select vaild phase')
def process_batch(self, input_filename_list, input_label_list, batch_sample_num, reshape_with_one=True):
new_path_list = []
new_label_list = []
for k in range(batch_sample_num):
class_idxs = range(0, self.way_num)
random.shuffle(class_idxs)
for class_idx in class_idxs:
true_idx = class_idx*batch_sample_num + k
new_path_list.append(input_filename_list[true_idx])
new_label_list.append(input_label_list[true_idx])
img_list = []
for filepath in new_path_list:
this_img = scm.imread(filepath)
this_img = this_img / 255.0
img_list.append(this_img)
if reshape_with_one:
img_array = np.array(img_list)
label_array = one_hot(np.array(new_label_list)).reshape([1, self.way_num*batch_sample_num, -1])
else:
img_array = np.array(img_list)
label_array = self.one_hot(np.array(new_label_list)).reshape([self.way_num*batch_sample_num, -1])
return img_array, label_array
def one_hot(self, inp):
n_class = inp.max() + 1
n_sample = inp.shape[0]
out = np.zeros((n_sample, n_class))
for idx in range(n_sample):
out[idx, inp[idx]] = 1
return out
def get_batch(self, phase='train', idx=0):
if phase=='train':
all_filenames = self.train_filenames
labels = self.train_labels
elif phase=='val':
all_filenames = self.val_filenames
labels = self.val_labels
elif phase=='test':
all_filenames = self.test_filenames
labels = self.test_labels
else:
print('Please select vaild phase')
one_episode_sample_num = self.num_samples_per_class*self.shot_num
this_task_filenames = all_filenames[idx*one_episode_sample_num:(idx+1)*one_episode_sample_num]
epitr_sample_num = self.shot_num
epite_sample_num = self.episode_test_sample_num
this_task_tr_filenames = []
this_task_tr_labels = []
this_task_te_filenames = []
this_task_te_labels = []
for class_k in range(self.way_num):
this_class_filenames = this_task_filenames[class_k*self.num_samples_per_class:(class_k+1)*self.num_samples_per_class]
this_class_label = labels[class_k*self.num_samples_per_class:(class_k+1)*self.num_samples_per_class]
this_task_tr_filenames += this_class_filenames[0:epitr_sample_num]
this_task_tr_labels += this_class_label[0:epitr_sample_num]
this_task_te_filenames += this_class_filenames[epitr_sample_num:]
this_task_te_labels += this_class_label[epitr_sample_num:]
this_inputa, this_labela = self.process_batch(this_task_tr_filenames, this_task_tr_labels, epitr_sample_num, reshape_with_one=False)
this_inputb, this_labelb = self.process_batch(this_task_te_filenames, this_task_te_labels, epite_sample_num, reshape_with_one=False)
return this_inputa, this_labela, this_inputb, this_labelb