forked from openvinotoolkit/model_server
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmulti_inputs.py
290 lines (250 loc) · 10.4 KB
/
multi_inputs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
#
# Copyright (c) 2019-2020 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Multi-threaded sample to run a RMNet & SSDMobilenet v2 that will
# detect only person, bike and vehicle (change the output parsing
# for more classes)
#
# Example usage:
# RMNet: python3.6 multi_inputs.py -n "RMNet" -l "data" -o "detection_out"
# -d 1024 -i 127.0.0.1 -p 9001 -c 1
# -f /var/repos/github/sample-videos/person-bicycle-car-detection.mp4
# SSDMobileNet: python3.6 multi_inputs.py -n "SSDMobileNet" -l "image_tensor"
# -o "DetectionOutput" -d 300 -i 127.0.0.1 -p 9001 -c 1
# -f /var/repos/github/sample-videos/person-bicycle-car-detection.mp4
from __future__ import print_function
from argparse import ArgumentParser, SUPPRESS
from tensorflow_serving.apis import predict_pb2
from tensorflow_serving.apis import prediction_service_pb2_grpc
from time import time, sleep
import sys
import os
import cv2
import grpc
import threading
import logging as log
from tensorflow import make_tensor_proto, make_ndarray
# global data (shared between threads & main)
CLASSES = ["None", "Pedestrian", "Vehicle", "Bike", "Other"]
COLORS = [(255, 255, 255), (255, 0, 0), (0, 255, 0), (0, 0, 255), (128, 128, 128)]
SRC_TYPE = ["Camera", "Video"]
exit_ok = False # manage thread loop
CAM_WIDTH = 640 # camera width
CAM_HEIGHT = 480 # camera height
CAM_FPS = 30 # camera speed
CONFIDENCE_THRESHOLD = 0.75 # detection confidence
#####################################################################################
def build_argparser():
parser = ArgumentParser(add_help=False)
args = parser.add_argument_group('Options')
args.add_argument('-h', '--help', action='help', default=SUPPRESS,
help='Show this help message and exit.')
args.add_argument('-n', '--network_name', required=True,
type=str, help='Network name')
args.add_argument('-l', '--input_layer', required=True,
type=str, help='Input layer name')
args.add_argument('-o', '--output_layer', required=True,
type=str, help='Output layer name')
args.add_argument('-d', '--frame_size', required=True,
type=int, help='Input frame width and height that matches used model')
args.add_argument('-c', '--num_cameras', help='Number of cameras to be used',
required=False, type=int, default=1)
args.add_argument('-f', '--file', help='Path to the video file',
required=False, type=str)
args.add_argument('-i', '--ip', help='ip address of the ovms', required=True)
args.add_argument('-p', '--port', help='port of the ovms', required=True)
return parser
# Decoding idea based on the link below. Not very accurate. So pls implement yours
# https://github.com/opencv/open_model_zoo/blob/master/intel_models/\
# person-vehicle-bike-detection-crossroad-0078/\
# description/person-vehicle-bike-detection-crossroad-0078.md
def parse_output(thr_id, res, frame):
for batch, data in enumerate(res):
pred = data[0]
for values in enumerate(pred):
# tuple
index = values[0]
l_pred = values[1]
# actual predictions
img_id = l_pred[0]
label = l_pred[1]
conf = l_pred[2]
x_min = l_pred[3]
y_min = l_pred[4]
x_max = l_pred[5]
y_max = l_pred[6]
# preventing any wrong array indexing (for RMNet)
if label > 4:
# Unsupported class label detected. Change to `other`.
label = 4
# Do you want confidence level to be passed from command line?
if img_id != -1 and conf >= CONFIDENCE_THRESHOLD:
# draw the bounding boxes on the frame
height, width = frame.shape[:2]
cv2.rectangle(frame, (int(width * x_min), int(height * y_min)),
(int(width * x_max), int(height * y_max)), COLORS[int(label)], 2)
cv2.putText(frame, str(CLASSES[int(label)]), (int(width * x_min)-10,
int(height * y_min)-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
COLORS[int(label)], 2)
return frame
# This is common for both the camera & video files
def thread_function(thr_id, network_name, input_layer, output_layer, input_dimension,
ip, port, disp_buf, src_type, src_name):
if src_type == "Camera":
# UVC camera init - camera threads always come first and we use it
# to generate the camera indexes
cam = cv2.VideoCapture(thr_id)
if not (cam.isOpened()):
log.error("Failed to open the UVC camera {}".format(thr_id))
return
cam.set(cv2.CAP_PROP_FRAME_WIDTH, CAM_WIDTH)
cam.set(cv2.CAP_PROP_FRAME_HEIGHT, CAM_HEIGHT)
# not all UVC cameras honor below request
cam.set(cv2.CAP_PROP_FPS, CAM_FPS)
# If your camera sends other than MJPEG, change below
cam.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*"MJPG"))
elif src_type == "Video":
# Assumption: src_name will be valid
cam = cv2.VideoCapture(src_name)
# inference stats
fps = 0 # camera fps
inf_fps = 0 # inference fps
dropped_fps = 0 # dropped frame fps
cam_start_time = time()
# ovms connection
channel = grpc.insecure_channel("{}:{}".format(ip, port))
stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
request = predict_pb2.PredictRequest()
# Note: Pls maintain the same name while launching ovms docker container
request.model_spec.name = network_name
global exit_ok
while exit_ok == False:
ret, frame = cam.read()
if src_type == "Video":
# restart the video file when it reaches the end
if not ret:
cam.set(cv2.CAP_PROP_POS_FRAMES, 0)
continue
# normalize the video frame dimension to that of the camera
else:
# to maintain the frame inferencing parity with the cameras, lets sleep
# here to maintain cam_fps speed
sleep((1000 / CAM_FPS) / 1000)
# enable below line to keep video file & camera output window dimensions the same
# frame = cv2.resize(frame, (CAM_WIDTH, CAM_HEIGHT))
fps = fps + 1
if (time() - cam_start_time) * 1000 >= 1000:
log.warning('{}{} fps: {}, Inf fps: {}, dropped fps: {}'
.format(src_type, thr_id, fps, inf_fps, dropped_fps))
fps = 0
inf_fps = 0
dropped_fps = 0
cam_start_time = time()
# resize the frame to what network input layer expects it to be
image = cv2.resize(frame, (input_dimension, input_dimension))
image = image.transpose(2, 0, 1).reshape(1, 3, input_dimension, input_dimension)
image = image.astype('float32')
inf_time = time()
# send the input as protobuf
request.inputs[input_layer].CopyFrom(
make_tensor_proto(image, shape=None))
try:
result = stub.Predict(request, 10.0)
except Exception as e:
log.error('Caught exception {}'.format(e))
cam.release()
return
duration = time() - inf_time
# decode the received output as protobuf
res = make_ndarray(result.outputs[output_layer])
if not res.any():
log.error('Thr{}: Predictions came back with wrong output layer name'.format(thr_id))
dropped_fps = dropped_fps + 1
disp_buf[thr_id] = frame
else:
log.debug('Predictions came back fine')
inf_fps = inf_fps + 1
disp_buf[thr_id] = parse_output(thr_id, res, frame)
# while exit_ok == False
cam.release()
log.warning('Exiting thread {}'.format(thr_id))
#####################################################################################
def main():
log.basicConfig(format="[$(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout)
args = build_argparser().parse_args()
num_cam = args.num_cameras if (args.num_cameras) else 0
vid_src = args.file
network_name = args.network_name
input_layer = args.input_layer
output_layer = args.output_layer
input_dimension = args.frame_size
ip_addr = args.ip
port_no = args.port
if not args.file and not args.num_cameras:
log.error('Please supply either the camera or the video file. Try -f for options')
return
if not ip_addr or not port_no:
log.error('Please supply valid IP and/or port number of OVMS server')
return
video_files = []
if vid_src:
if os.path.isdir(vid_src):
for r, d, f in os.walk(vid_src):
for f_ in f:
# only mp4 files supported as of now
if '.mp4' in f_:
video_files.append(r + f_)
elif os.path.isfile(vid_src):
if '.mp4' in vid_src:
video_files.append(vid_src)
# thread management
thr = [None] * (num_cam + len(video_files))
# display buffers shared between camera threads
disp_buf = {}
# Known issue: Depending on the USB enumeration, camera nodes need not be
# in sequence. Pls pass the device node info through a file or command line
# if it happens in your system
for i in range(num_cam):
disp_buf[i] = None
thr[i] = threading.Thread(target=thread_function,
args=(i, network_name, input_layer, output_layer, input_dimension,
ip_addr, port_no, disp_buf, SRC_TYPE[0], None))
thr[i].start()
for i in range(num_cam, num_cam + len(video_files)):
disp_buf[i] = None
thr[i] = threading.Thread(target=thread_function,
args=(i, network_name, input_layer, output_layer, input_dimension,
ip_addr, port_no, disp_buf, SRC_TYPE[1], video_files[i - num_cam]))
thr[i].start()
# For whatever reasons, cv2.imshow() doesnt work from threads. Hence we shove the
# infered data to the main thread to display.
global exit_ok
while exit_ok == False:
for i in range(num_cam + len(video_files)):
if disp_buf[i] is not None:
cv2.imshow('Predictions {}'.format(i), disp_buf[i])
disp_buf[i] = None
# exit the program if 'q' is pressed on any window
if cv2.waitKey(1) == ord('q'):
exit_ok = True
break
# wait for all the threads to join
for i in range(num_cam):
thr[i].join()
# close all open windows
cv2.destroyAllWindows()
log.warning('Good Bye!')
if __name__ == '__main__':
sys.exit(main() or 0)