-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmatlab1.m
279 lines (229 loc) · 9.57 KB
/
matlab1.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
% Load vehicle data set
data = load('fasterRCNNVehicleTrainingData.mat');
vehicleDataset = data.vehicleTrainingData;
% Display first few rows of the data set.
vehicleDataset(1:4,:)
%%
% Display one of the images from the data set to understand the type of
% images it contains.
% Add full path to the local vehicle data folder.
dataDir = fullfile(toolboxdir('vision'),'visiondata');
vehicleDataset.imageFilename = fullfile(dataDir, vehicleDataset.imageFilename);
% Read one of the images.
I = imread(vehicleDataset.imageFilename{10});
% Insert the ROI labels.
I = insertShape(I, 'Rectangle', vehicleDataset.vehicle{10});
% Resize and display image.
I = imresize(I, 3);
figure
imshow(I)
%%
% Split data into a training and test set.
idx = floor(0.6 * height(vehicleDataset));
trainingData = vehicleDataset(1:idx,:);
testData = vehicleDataset(idx:end,:);
%% Create a Convolutional Neural Network (CNN)
% Create image input layer.
inputLayer = imageInputLayer([32 32 3]);
%% middle layers of the network.
% Define the convolutional layer parameters.
filterSize = [3 3];
numFilters = 32;
% Create the middle layers.
middleLayers = [
convolution2dLayer(filterSize, numFilters, 'Padding', 1)
reluLayer()
convolution2dLayer(filterSize, numFilters, 'Padding', 1)
reluLayer()
maxPooling2dLayer(3, 'Stride',2)
];
%% The final layers of the CNN are typically composed of fully connected layers and a softmax loss layer.
finalLayers = [
% Add a fully connected layer with 64 output neurons. The output size
% of this layer will be an array with a length of 64.
fullyConnectedLayer(64)
% Add ReLU nonlinearity.
reluLayer
% Add the last fully connected layer. At this point, the network must
% produce outputs that can be used to measure whether the input image
% belongs to one of the object classes or to the background. This
% measurement is made using the subsequent loss layers.
fullyConnectedLayer(width(vehicleDataset))
% Add the softmax loss layer and classification layer.
softmaxLayer
classificationLayer
];
%%
% Combine the input, middle, and final layers.
layers = [
inputLayer
middleLayers
finalLayers
]
%% Configure Training Options
% |trainFasterRCNNObjectDetector| trains the detector in four steps. The first
% two steps train the region proposal and detection networks used in Faster
% R-CNN. The final two steps combine the networks from the first two steps
% such that a single network is created for detection [1]. Each training
% step can have different convergence rates, so it is beneficial to specify
% independent training options for each step. To specify the network
% training options use |trainingOptions| from Deep Learning Toolbox(TM).
% Options for step 1.
optionsStage1 = trainingOptions('sgdm', ...
'MaxEpochs', 10, ...
'MiniBatchSize', 1, ...
'InitialLearnRate', 1e-3, ...
'CheckpointPath', tempdir);
% Options for step 2.
optionsStage2 = trainingOptions('sgdm', ...
'MaxEpochs', 10, ...
'MiniBatchSize', 1, ...
'InitialLearnRate', 1e-3, ...
'CheckpointPath', tempdir);
% Options for step 3.
optionsStage3 = trainingOptions('sgdm', ...
'MaxEpochs', 10, ...
'MiniBatchSize', 1, ...
'InitialLearnRate', 1e-3, ...
'CheckpointPath', tempdir);
% Options for step 4.
optionsStage4 = trainingOptions('sgdm', ...
'MaxEpochs', 10, ...
'MiniBatchSize', 1, ...
'InitialLearnRate', 1e-3, ...
'CheckpointPath', tempdir);
options = [
optionsStage1
optionsStage2
optionsStage3
optionsStage4
];
%%
% Here, the learning rate for the first two steps is set higher than the
% last two steps. Because the last two steps are fine-tuning steps, the
% network weights can be modified more slowly than in the first two steps.
% The mini-batch size must be 1 for Faster R-CNN training, which processes
% multiple image regions from one training image every iteration.
%
% In addition, |'CheckpointPath'| is set to a temporary location for all
% the training options. This name-value pair enables the saving of
% partially trained detectors during the training process. If training is
% interrupted, such as from a power outage or system failure, you can
% resume training from the saved checkpoint.
%% Train Faster R-CNN
% Now that the CNN and training options are defined, you can train the
% detector using |trainFasterRCNNObjectDetector|.
%
% During training, multiple image regions are processed from the training
% images. The number of image regions per image is controlled by
% |'NumRegionsToSample'|. The |'PositiveOverlapRange'| and
% |'NegativeOverlapRange'| name-value pairs control which image regions are
% used for training. Positive training samples are those that overlap with
% the ground truth boxes by 0.6 to 1.0, as measured by the bounding box
% intersection over union metric. Negative training samples are those that
% overlap by 0 to 0.3. The best values for these parameters should be
% chosen by testing the trained detector on a validation set. To choose the
% best values for these name-value pairs, test the trained detector on a
% validation set.
%
% For Faster R-CNN training, *the use of a parallel pool of MATLAB workers
% is highly recommended to reduce training time*.
% |trainFasterRCNNObjectDetector| automatically creates and uses a parallel
% pool based on your parallel preferences defined in
% <docid:vision_gs#bugsb2y-1 Computer Vision System Toolbox Preferences>. Ensure that the
% use of the parallel pool is enabled prior to training.
%
% A CUDA-capable NVIDIA(TM) GPU with compute capability 3.0 or higher is
% highly recommended for training.
%
% To save time while running this example, a pretrained network is loaded
% from disk. To train the network yourself, set the |doTrainingAndEval|
% variable shown here to true.
% A trained network is loaded from disk to save time when running the
% example. Set this flag to true to train the network.
doTrainingAndEval = false;
if doTrainingAndEval
% Set random seed to ensure example training reproducibility.
rng(0);
% Train Faster R-CNN detector. Select a BoxPyramidScale of 1.2 to allow
% for finer resolution for multiscale object detection.
detector = trainFasterRCNNObjectDetector(trainingData, layers, options, ...
'NegativeOverlapRange', [0 0.3], ...
'PositiveOverlapRange', [0.5 1], ...
'NumRegionsToSample', [256 128 256 128], ...
'BoxPyramidScale', 1.2);
else
% Load pretrained detector for the example.
detector = data.detector;
end
%%
% To quickly verify the training, run the detector on a test image.
% Read a test image.
I = imread(testData.imageFilename{1});
% Run the detector.
[bboxes, scores] = detect(detector, I);
% Annotate detections in the image.
I = insertObjectAnnotation(I, 'rectangle', bboxes, scores);
figure
imshow(I)
%% Evaluate Detector Using Test Set
% Testing a single image showed promising results. To fully evaluate the
% detector, testing it on a larger set of images is recommended. Computer
% Vision System Toolbox(TM) provides object detector evaluation functions
% to measure common metrics such as average precision
% (|evaluateDetectionPrecision|) and log-average miss rates
% (|evaluateDetectionMissRate|). Here, the average precision metric is
% used. The average precision provides a single number that incorporates
% the ability of the detector to make correct classifications (precision)
% and the ability of the detector to find all relevant objects (recall).
%
% The first step for detector evaluation is to collect the detection
% results by running the detector on the test set. To avoid long evaluation
% time, the results are loaded from disk. Set the |doTrainingAndEval| flag
% from the previous section to true to execute the evaluation locally.
if doTrainingAndEval
% Run detector on each image in the test set and collect results.
resultsStruct = struct([]);
for i = 1:height(testData)
% Read the image.
I = imread(testData.imageFilename{i});
% Run the detector.
[bboxes, scores, labels] = detect(detector, I);
% Collect the results.
resultsStruct(i).Boxes = bboxes;
resultsStruct(i).Scores = scores;
resultsStruct(i).Labels = labels;
end
% Convert the results into a table.
results = struct2table(resultsStruct);
else
results = data.results;
end
% Extract expected bounding box locations from test data.
expectedResults = testData(:, 2:end);
% Evaluate the object detector using average precision metric.
[ap, recall, precision] = evaluateDetectionPrecision(results, expectedResults);
%%
% The precision/recall (PR) curve highlights how precise a detector is at
% varying levels of recall. Ideally, the precision would be 1 at all recall
% levels. In this example, the average precision is 0.6. The use of
% additional layers in the network can help improve the average precision,
% but might require additional training data and longer training time.
% Plot precision/recall curve
figure
plot(recall, precision)
xlabel('Recall')
ylabel('Precision')
grid on
title(sprintf('Average Precision = %.2f', ap))
%% Summary
% This example showed how to train a vehicle detector using deep learning.
% You can follow similar steps to train detectors for traffic signs,
% pedestrians, or other objects.
%
% To learn more about deep learning, see <docid:vision_doccenter#bvd8yot-1
% Deep Learning for Computer Vision>.
%% References
% [1] Ren, Shaoqing, et al. "Faster R-CNN: Towards Real-Time Object
% detection with Region Proposal Networks." _Advances in Neural Information
% Processing Systems._ 2015.