-
-
Notifications
You must be signed in to change notification settings - Fork 29
/
Copy pathresize_corners.cpp
443 lines (360 loc) · 11.8 KB
/
resize_corners.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
/* DarkHelp - C++ helper class for Darknet's C API.
* Copyright 2019-2024 Stephane Charette <[email protected]>
* MIT license applies. See "license.txt" for details.
*/
#include <filesystem>
#include <fstream>
#include <map>
#include <regex>
#include <set>
#include <sstream>
#include <string>
#include <opencv2/opencv.hpp>
/** @file
* This simple tool looks for classes named "TL", "TR", "BL", and "BR". This is typically used to indicate the
* corners of objects, where "TL" is "top-left", "BR is "bottom-right", etc. If it finds any of these classes, the
* tool will then read through all of the existing annotations, resize the corners to a specific size, and re-write
* the annotation files with the new sizes.
*/
/** Size to use for corner rectangles. This size is in pixels **AFTER** the image has been resized to the neural
* network dimensions! But the only way to do this is to parse the .cfg file for the width and height of the network.
*/
const int corner_size = 16;
/// Annotation indexes, such as "tl" -> 0, "tr" -> 1, ...
std::map<std::string, int> indexes;
/// Annotation indexes, such as 0 -> "tl", 1 -> "tr", ...
std::map<int, std::string> corners;
/// The size of the network will be parsed from the width=... and height=... lines in the .cfg file.
cv::Size network_dimensions(-1, -1);
/// These are all of the images with annotations that we need to process. @see @ref find_all_images()
std::vector<std::string> annotated_image_filenames;
std::string lowercase(const std::string & raw)
{
std::string str = raw;
std::transform(str.begin(), str.end(), str.begin(),
[](unsigned char c)
{
return std::tolower(c);
});
return str;
}
/** Read the .names file and find all of the corner indexes we need to resize. These classes will have names such as
* @p TL, @p TR, @p BR, and @p BL. The search for names is case-insensitive.
*/
void parse_names_file(const std::filesystem::path & names_file)
{
std::cout << "Input .names file .. " << names_file.string() << std::endl;
std::ifstream ifs(names_file);
std::string line;
int idx = 0;
while (std::getline(ifs, line))
{
line = lowercase(line);
if (line.find("tl") == 0 or
line.find("tr") == 0 or
line.find("br") == 0 or
line.find("bl") == 0)
{
// this annotation seems to be a corner we need to resize
indexes[line] = idx;
corners[idx] = line;
std::cout << "-> #" << idx << " = " << line << std::endl;
}
idx ++;
}
/* We typically have either 2 classes such as TL and TR, or we have the full 4 classes including BR and BL.
* This is not a hard requirement, but I've not yet run into a case where we have just 1 or 3. If this
* situation ever comes up then we can get rid of this check.
*/
if (indexes.size() != 2 and indexes.size() != 4)
{
throw std::logic_error("expected either 2 or 4 corner type indexes, but found " + std::to_string(indexes.size()));
}
return;
}
/// Find the width=... and height=... values from the .cfg file.
void parse_cfg_file(const std::filesystem::path & cfg_file)
{
std::cout << "Input .cfg file .... " << cfg_file.string() << std::endl;
const std::regex rx(
"^" // start of line
"[ \t]*" // optional whitespace
"(" // group #1
"[^#= \t]+" // key (everything up to #, =, or whitespace)
")" // end of group #1
"[ \t]*" // optional whitespace
"=" // =
"[ \t]*" // optional whitespace
"(" // group #2
"[^#]+" // value (everything up to #)
")" // end of group #2
);
std::ifstream ifs(cfg_file);
std::string line;
while (std::getline(ifs, line))
{
line = lowercase(line);
std::smatch matches;
if (std::regex_search(line, matches, rx))
{
// line is a key-value pair
const auto key = matches.str(1);
const auto val = matches.str(2);
if (key == "width")
{
network_dimensions.width = std::stoi(val);
}
else if (key == "height")
{
network_dimensions.height = std::stoi(val);
}
if (network_dimensions.width > 0 and network_dimensions.height > 0)
{
break;
}
}
}
std::cout << "Network dimensions . " << network_dimensions.width << " x " << network_dimensions.height << std::endl;
if (network_dimensions.width < 32 or network_dimensions.height < 32)
{
throw std::runtime_error("invalid network dimensions");
}
return;
}
/** Perform a recursive directory search to find all the images. Then we exclude anything in DarkMark's image cache
* or which doesn't have an annotation file. Results are stored in the global variable @ref annotated_image_filenames.
*/
void find_all_images(const std::filesystem::path & root_directory)
{
std::cout << "Search directory ... " << root_directory.string() << std::endl;
std::vector<std::string> all_images;
for (const auto & entry : std::filesystem::recursive_directory_iterator(root_directory))
{
if (entry.path().string().find("darkmark_image_cache") != std::string::npos)
{
continue;
}
const auto ext = lowercase(entry.path().extension().string());
// might need to expand the set of extensions we look for (only need lowercase)
if (ext == ".png" or
ext == ".jpg" or
ext == ".jpeg")
{
all_images.push_back(entry.path().string());
}
}
// only keep the images that have annotations
annotated_image_filenames.clear();
size_t negative_samples = 0;
for (const auto & fn : all_images)
{
const auto annotation_filename = std::filesystem::path(fn).replace_extension(".txt");
if (std::filesystem::exists(annotation_filename))
{
if (std::filesystem::file_size(annotation_filename) > 0)
{
annotated_image_filenames.push_back(fn);
}
else
{
negative_samples ++;
}
}
}
std::cout
<< "Total images ....... " << all_images.size() << std::endl
<< "Negative samples ... " << negative_samples << std::endl
<< "Annotated images ... " << annotated_image_filenames.size() << std::endl;
std::sort(annotated_image_filenames.begin(), annotated_image_filenames.end());
return;
}
/// Loop through all of the images and resize the corner annotations.
void process_images()
{
std::cout << "Resize corners to .. " << corner_size << " x " << corner_size << std::endl;
size_t rewritten_files = 0;
size_t unmodified_files = 0;
const float images_to_process = annotated_image_filenames.size();
float images_processed = 0.0f;
// keep track of the corners that we end up modifying
std::map<std::string, size_t> count_modified_corners;
for (auto iter : corners)
{
count_modified_corners[iter.second] = 0;
}
for (const auto & fn : annotated_image_filenames)
{
images_processed ++;
std::cout
<< "\rProcessing images .. "
<< static_cast<int>(std::round(images_processed * 100.0f / images_to_process))
<< "% " << std::flush;
const auto annotation_filename = std::filesystem::path(fn).replace_extension(".txt");
#if 0 // don't bother reading the image, use the network dimensions instead
cv::Mat mat = cv::imread(fn);
if (mat.empty())
{
throw std::runtime_error("failed to read the image " + fn);
}
const double width = mat.cols;
const double height = mat.rows;
#else
const double width = network_dimensions.width;
const double height = network_dimensions.height;
#endif
std::stringstream ss;
ss << std::fixed << std::setprecision(10);
bool modified = false;
std::ifstream ifs(annotation_filename);
while (ifs.good())
{
int idx = -1;
double cx = -1.0;
double cy = -1.0;
double w = -1.0;
double h = -1.0;
ifs >> idx >> cx >> cy >> w >> h;
if (not ifs.good())
{
break;
}
if (corners.count(idx) and
cx > 0.0 and
cy > 0.0 and
w > 0.0 and
h > 0.0)
{
int im_x = std::round(width * (cx - w / 2.0));
int im_y = std::round(height * (cy - h / 2.0));
int im_w = std::round(width * w);
int im_h = std::round(height * h);
if (im_w != corner_size or im_h != corner_size)
{
if (corners[idx] == "tl")
{
// leave the X and Y coordinates unchanged
im_w = corner_size;
im_h = corner_size;
count_modified_corners["tl"] ++;
}
else if (corners[idx] == "tr")
{
// move the X, leave the Y unchanged
im_x += (im_w - corner_size);
im_w = corner_size;
im_h = corner_size;
count_modified_corners["tr"] ++;
}
else if (corners[idx] == "br")
{
// move both X and Y
im_x += (im_w - corner_size);
im_y += (im_h - corner_size);
im_w = corner_size;
im_h = corner_size;
count_modified_corners["br"] ++;
}
else if (corners[idx] == "bl")
{
// move the Y, leave the X unchanged
im_y += (im_h - corner_size);
im_w = corner_size;
im_h = corner_size;
count_modified_corners["bl"] ++;
}
else
{
throw std::logic_error("corner type \"" + corners[idx] + "\" is unknown");
}
// now that we know the new image coordinates, calculate the new normalized coordinates
w = im_w / width;
h = im_h / height;
cx = (im_x + (im_w / 2.0)) / width;
cy = (im_y + (im_h / 2.0)) / height;
modified = true;
}
}
ss << idx << " " << cx << " " << cy << " " << w << " " << h << std::endl;
}
ifs.close();
if (not modified)
{
unmodified_files ++;
}
else
{
rewritten_files ++;
std::ofstream ofs(annotation_filename);
ofs << ss.str();
// if this file also has a DarkMark .json file associated with it,
// then it must be deleted to force DarkMark to re-import the .txt file
const auto json_filename = std::filesystem::path(fn).replace_extension(".json");
if (std::filesystem::exists(json_filename))
{
std::filesystem::remove(json_filename);
}
}
}
std::cout
<< "" << std::endl
<< "Unmodified files ... " << unmodified_files << std::endl
<< "Re-written files ... " << rewritten_files << std::endl;
for (const auto & [key, val] : count_modified_corners)
{
std::cout << "-> " << key << ": " << val << std::endl;
}
return;
}
int main(int argc, char * argv[])
{
int rc = 1;
try
{
std::cout << "Resize Darknet/YOLO Corner Annotations (TL, TR, BL, BR)" << std::endl << std::endl;
if (argc != 2)
{
std::cout
<< "Usage:" << std::endl
<< "" << std::endl
<< "\t" << argv[0] << " <filename>" << std::endl
<< "" << std::endl
<< "Specify either the .cfg or the .names file of the Darknet/YOLO project." << std::endl
<< "(It is assumed the project uses .cfg and .names as file extensions.)" << std::endl
<< "" << std::endl
<< "WARNING:" << std::endl
<< "" << std::endl
<< "This tool will re-write your annotations! Make sure" << std::endl
<< "you have a backup of your data before you run it." << std::endl
<< "" << std::endl;
throw std::invalid_argument("invalid parameter");
}
std::filesystem::path names_filename = std::filesystem::path(argv[1]).replace_extension(".names");
std::filesystem::path cfg_filename = std::filesystem::path(argv[1]).replace_extension(".cfg");
for (const auto & fn : {names_filename, cfg_filename})
{
if (not std::filesystem::exists(fn))
{
throw std::invalid_argument("expected file does not exist: " + fn.string());
}
if (not std::filesystem::is_regular_file(fn))
{
throw std::invalid_argument("expected file to be a regular file: " + fn.string());
}
}
names_filename = std::filesystem::canonical(names_filename);
cfg_filename = std::filesystem::canonical(cfg_filename);
parse_names_file(names_filename);
parse_cfg_file(cfg_filename);
std::filesystem::path root_directory = names_filename.parent_path();
find_all_images(root_directory);
process_images();
std::cout << "Done!" << std::endl;
rc = 0;
}
catch (const std::exception & e)
{
std::cout << "ERROR: " << e.what() << std::endl;
rc = 2;
}
return rc;
}