-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbuild_records.py
184 lines (141 loc) · 6.28 KB
/
build_records.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
# import the required libraries
from config import lisa_config as config
from sklearn.model_selection import train_test_split
import os
import cv2
import numpy as np
from utils import BoundingBox, bbox_iou
from hdf5datasetwriter import HDF5DatasetWriter
from tqdm import tqdm
def parse_annotations():
# initialize a data dictionary used to map each image filename to all
# bounding boxes associated with the image, then load the contents of the
# annotation file
D = {}
rows = open(config.ANNOT_PATH).read().strip().split("\n")
# loop over the individual rows, skipping the header
for row in rows[1:]:
# break the row into components
row = row.split(",")[0].split(";")
(imagePath, label, startX, startY, endX, endY, _) = row
(startX, startY) = (float(startX), float(startY))
(endX, endY) = (float(endX), float(endY))
# if we are not interested in the label, ignore it
if label not in config.CLASSES:
continue
# build the path to the input image, then grab any other bounding-boxes + labels
# associated with that image path
p = os.path.sep.join([config.BASE_PATH, imagePath])
b = D.get(p, [])
# build a tuple consisting of the label and bounding box,
# then update the list and store it in dictionary
b.append((label, (startX, startY, endX, endY)))
D[p] = b
return D
def create_dataset(D):
# Takes in the parsed annotation file as a dictionary
# returns train/val/test dataset
# create the training and testing splits from our data dictionary
(trainKeys, testKeys) = train_test_split(list(D.keys()),
test_size=config.TEST_SIZE, random_state=42)
# initialize the data split files
datasets = [
("train", trainKeys, config.TRAIN_DATA),
("test", testKeys, config.TEST_DATA)
]
return datasets
def best_anchor_box(box):
# find the anchor that best predicts this box
best_anchor = -1
max_iou = -1
shifted_box = BoundingBox(0, 0, box[2], box[3])
anchors = [BoundingBox(0, 0, config.ANCHORS[2*i], config.ANCHORS[2*i+1]) for i in range(len(config.ANCHORS)//2)]
for i in range(len(anchors)):
anchor = anchors[i]
iou = bbox_iou(shifted_box, anchor)
if max_iou < iou:
best_anchor = i
max_iou = iou
return best_anchor
def build_dataset():
print("inside build_dataset")
D = parse_annotations()
datasets = create_dataset(D)
# loop over the datasets
for (dType, keys, outputPath) in datasets:
# Define the size of arrays
x_shape = (len(keys), config.IMAGE_H, config.IMAGE_W, 3)
y_shape = (len(keys), config.GRID_S, config.GRID_S, config.BOXES, 4 + 1 + config.NUM_CLASSES)
# Array to store all images
x = np.zeros(x_shape)
# Array to store all grid + bb coords + labels
y = np.zeros(y_shape)
# initialize the writer and initialize the total number
# of examples written to file
print("[INFO] processing '{}'...".format(dType))
writer = HDF5DatasetWriter(x_shape, y_shape, outputPath)
total = 0
# loop over all the keys in the current set
for i, k in tqdm(enumerate(keys)):
# load the input image from disk
image = cv2.imread(k)
(h, w, c) = image.shape
# Check if the image is grayscale, convert it to BGR
# It will still look gray, but we need 3 channels to feed to our model
if c == 1:
image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
# Resize image (e.g. 416 x 416 for YOLO) and store it in x
img = cv2.resize(image, (config.IMAGE_H, config.IMAGE_W), interpolation=cv2.INTER_AREA)
x[i] = img
# Calculate the ratios of resized to original dimensions
w_ratio = config.IMAGE_W / w
h_ratio = config.IMAGE_H / h
# Calculate the grid cell width and height
grid_cell_w = config.IMAGE_W / config.GRID_S
grid_cell_h = config.IMAGE_H / config.GRID_S
#anchor_box = 0
# loop over the bounding boxes + labels associated with the image
for (label, (startX, startY, endX, endY)) in D[k]:
# Transform bb coordinates according to resized image
startX *= w_ratio
startY *= h_ratio
endX *= w_ratio
endY *= h_ratio
# Transform bb coord to center, w, h
c_x = (startX + endX) * 0.5
c_y = (startY + endY) * 0.5
c_w = abs(endX - startX)
c_h = abs(endY - endX)
# Scale the coordinates to grid cell units
c_x_grid = c_x / grid_cell_w
c_y_grid = c_y / grid_cell_h
c_w_grid = c_w / grid_cell_w
c_h_grid = c_h / grid_cell_h
box = [c_x_grid, c_y_grid, c_w_grid, c_h_grid]
# Determine the grid cell to place the center coordinates in
grid_x = int(c_x_grid)
grid_y = int(c_y_grid)
# increment the total number of examples(bb)
total += 1
# Find the most suitable anchor box
anchor_box = best_anchor_box(box)
# anchor_box += 1
# Figure the index of the label
label_index = config.CLASSES[label]
# Create the Y array
y[i, grid_x, grid_y, anchor_box, :4] = box
y[i, grid_x, grid_y, anchor_box, 4] = 1
y[i, grid_x, grid_y, anchor_box, 4+label_index] = 1
"""
# Display a few images to visually check the transformations of coordinates
cv2.rectangle(img, (int(startX), int(startY)), (int(endX), int(endY)), (0, 255, 0), 2)
cv2.imshow('Image', img)
cv2.waitKey(0)
"""
# add the image and label to the HDF5
writer.add(x, y)
# close the writer and print the diagnostics information to the user
writer.close()
print("[INFO] {} examples (objects) saved for '{}'".format(total, dType))
if __name__ == '__main__':
build_dataset()