-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathintegration_utils.py
405 lines (345 loc) · 16.5 KB
/
integration_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
from imports import *
import streamlit as st
class DepthEstimator:
def __init__(self, model_path, model_type="large", optimize=True):
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.optimize = optimize
if model_type == "dpt_large": # DPT-Large
self.model = DPTDepthModel(
path=model_path,
backbone="vitl16_384",
non_negative=True,
)
self.net_w, self.net_h = 384, 384
self.resize_mode = "minimal"
self.normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
elif model_type == "dpt_hybrid": #DPT-Hybrid
self.model = DPTDepthModel(
path=model_path,
backbone="vitb_rn50_384",
non_negative=True,
)
self.net_w, self.net_h = 384, 384
self.resize_mode="minimal"
self.normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
elif model_type == "midas_v21":
self.model = MidasNet(model_path, non_negative=True)
self.net_w, self.net_h = 384, 384
self.resize_mode="upper_bound"
self.normalization = NormalizeImage(
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
)
elif model_type == "midas_v21_small":
self.model = MidasNet_small(model_path, features=64, backbone="efficientnet_lite3", exportable=True, non_negative=True, blocks={'expand': True})
self.net_w, self.net_h = 256, 256
self.resize_mode="upper_bound"
self.normalization = NormalizeImage(
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
)
else:
print(f"model_type '{model_type}' not implemented, use: --model_type large")
assert False
self.transform = Compose(
[
Resize(
self.net_w,
self.net_h,
resize_target=None,
keep_aspect_ratio=True,
ensure_multiple_of=32,
resize_method=self.resize_mode,
image_interpolation_method=cv2.INTER_CUBIC,
),
self.normalization,
PrepareForNet(),
]
)
self.model.eval()
if self.optimize:
if self.device == torch.device("cuda"):
self.model = self.model.to(memory_format=torch.channels_last)
self.model = self.model.half()
self.model.to(self.device)
def read_image(self, path):
img = cv2.imread(path)
if img.ndim == 2:
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0
return img
def post_process_depth(self, depth, bits=2):
depth_min = depth.min()
depth_max = depth.max()
max_val = (2**(8*bits))-1
if depth_max - depth_min > np.finfo("float").eps:
out = max_val * (depth - depth_min) / (depth_max - depth_min)
else:
out = np.zeros(depth.shape, dtype=depth.type)
return out
def run(self, input_path):
img_name = input_path
img = self.read_image(img_name)
img_input = self.transform({"image": img})["image"]
with torch.no_grad():
sample = torch.from_numpy(img_input).to(self.device).unsqueeze(0)
if self.optimize and self.device == torch.device("cuda"):
sample = sample.to(memory_format=torch.channels_last)
sample = sample.half()
prediction = self.model.forward(sample)
prediction = (
torch.nn.functional.interpolate(
prediction.unsqueeze(1),
size=img.shape[:2],
mode="bicubic",
align_corners=False,
)
.squeeze()
.cpu()
.numpy()
)
# prediction = self.post_process_depth(prediction)
return prediction
def read_pfm(path):
"""Read pfm file.
Args:
path (str): path to file
Returns:
tuple: (data, scale)
"""
with open(path, "rb") as file:
color = None
width = None
height = None
scale = None
endian = None
header = file.readline().rstrip()
if header.decode("ascii") == "PF":
color = True
elif header.decode("ascii") == "Pf":
color = False
else:
raise Exception("Not a PFM file: " + path)
dim_match = re.match(r"^(\d+)\s(\d+)\s$", file.readline().decode("ascii"))
if dim_match:
width, height = list(map(int, dim_match.groups()))
else:
raise Exception("Malformed PFM header.")
scale = float(file.readline().decode("ascii").rstrip())
if scale < 0:
# little-endian
endian = "<"
scale = -scale
else:
# big-endian
endian = ">"
data = np.fromfile(file, endian + "f")
shape = (height, width, 3) if color else (height, width)
data = np.reshape(data, shape)
data = np.flipud(data)
return data, scale
def write_pfm(path, image, scale=1):
"""Write pfm file.
Args:
path (str): pathto file
image (array): data
scale (int, optional): Scale. Defaults to 1.
"""
with open(path, "wb") as file:
color = None
if image.dtype.name != "float32":
raise Exception("Image dtype must be float32.")
image = np.flipud(image)
if len(image.shape) == 3 and image.shape[2] == 3: # color image
color = True
elif (
len(image.shape) == 2 or len(image.shape) == 3 and image.shape[2] == 1
): # greyscale
color = False
else:
raise Exception("Image must have H x W x 3, H x W x 1 or H x W dimensions.")
file.write("PF\n" if color else "Pf\n".encode())
file.write("%d %d\n".encode() % (image.shape[1], image.shape[0]))
endian = image.dtype.byteorder
if endian == "<" or endian == "=" and sys.byteorder == "little":
scale = -scale
file.write("%f\n".encode() % scale)
image.tofile(file)
class DepthCalibrationPipeline:
def __init__(self, segmentation_model_path, depth_model_base_path, depth_model_type = "dpt_hybrid", depth_optimize = True):
self.segmentation_model_path = segmentation_model_path
self.ins = instanceSegmentation()
self.ins.load_model(self.segmentation_model_path)
self.depth_model_base_path = depth_model_base_path
self.depth_model_type = depth_model_type
self.depth_optimize = depth_optimize
default_models = {
"midas_v21_small": os.path.join(depth_model_base_path,"midas_v21_small-70d6b9c8.pt"),
"midas_v21": os.path.join(depth_model_base_path,"/midas_v21-f6b98070.pt"),
"dpt_large": os.path.join(depth_model_base_path,"dpt_large-midas-2f21e586.pt"),
"dpt_hybrid": os.path.join(depth_model_base_path,"dpt_hybrid-midas-501f0c75.pt"),
}
self.depth_model_weights = default_models[depth_model_type]
torch.backends.cudnn.enabled = True
torch.backends.cudnn.benchmark = True
self.depth_model = DepthEstimator(self.depth_model_weights, model_type=depth_model_type, optimize=depth_optimize)
self.segmentation_results = None
self.depth_map = None
self.depth_scale = None
self.scaled_depth_map = None
self.obj_req_data = {'id':None, 'name':None, 'box':None, 'mask':None}
def segmentor(self, img_path):
return self.ins.segmentImage(img_path, show_bboxes = True, extract_segmented_objects = True)
def get_depth_map(self, img_path):
predictions = self.depth_model.run(img_path)
predictions_minmax = (predictions-np.min(predictions))/np.max(predictions)
depth_map = 1-predictions_minmax
return predictions, depth_map
def get_scaled_depthmap_v1(self, segmentation_results, depth_map, ref_obj_name, ref_gt_depth): # reference object based scaling
ref_obj_name = "bottle" # should be present in class_names in predictions
ref_obj_idx = segmentation_results['class_names'].index(ref_obj_name)
ref_obj_mask = segmentation_results['masks'][:,:,ref_obj_idx]
ref_obj_depth_map = ref_obj_mask*depth_map
ref_obj_area, ref_depth_area = np.sum(ref_obj_mask), np.sum(ref_obj_depth_map)
ref_avg_depth = ref_depth_area/ref_obj_area
scale = ref_gt_depth/ref_avg_depth
scaled_depth_map = scale*depth_map
return scaled_depth_map
def get_scaled_depthmap(self, gt_background_depth, margin = 50, centered=1): # background based scaling
def compute_area(xl, yl, xr, yr):
return abs((xr-xl)*(yr-yl))
H,W,C = self.segmentation_img.shape
xc, yc = W//2, H//2
min_area = float('inf')
for i, box in enumerate(self.segmentation_results['boxes']):
xl, yl, xr, yr = box
area = compute_area(xl, yl, xr, yr)
if centered:
if (((xl<=xc<=xr) and (yl<=yc<=yr)) or \
((self.obj_req_data['id']!=None) and \
(self.obj_req_data['id'] == self.segmentation_results['class_ids'][i]))) and \
(area < min_area):
self.obj_req_data['id'] = self.segmentation_results['class_ids'][i]
self.obj_req_data['name'] = self.segmentation_results['class_names'][i]
self.obj_req_data['box'] = box
self.obj_req_data['mask'] = self.segmentation_results['masks'][:,:,i]
min_area = area
else:
self.obj_req_data['id'] = self.segmentation_results['class_ids'][i]
self.obj_req_data['name'] = self.segmentation_results['class_names'][i]
self.obj_req_data['box'] = box
self.obj_req_data['mask'] = self.segmentation_results['masks'][:,:,i]
min_area = area
xl, yl, xr, yr = self.obj_req_data['box']
# xml, yml, xmr, ymr = max(xl-margin,0), max(yl-margin,0), min(xr+margin,W-1), min(yr+margin,H-1)
xml, yml, xmr, ymr = max(xl-2*margin,0), max(yl-2*margin,0), min(xr-margin,W-1), min(yr-margin,H-1)
background_mask = np.zeros((H,W))
background_mask[yml:ymr,xml:xmr] = 1
background_mask[yl:yr,xl:xr] = 0
background_depth_map = background_mask*self.depth_map
area_background, area_obj = compute_area(xml, yml, xmr, ymr), compute_area(xl, yl, xr, yr)
area_margin = area_background-area_obj
total_background_depth = np.sum(background_depth_map)
avg_background_depth = total_background_depth/area_margin
self.depth_scale = gt_background_depth/avg_background_depth
self.scaled_depth_map = self.depth_scale*self.depth_map
return self.depth_scale, self.scaled_depth_map
def obj2depth(self, scaled_depth_map, results, obj_name):
obj_idx = results['class_names'].index(obj_name)
obj_mask = results['masks'][:,:,obj_idx]
obj_scaled_depth_map = obj_mask*scaled_depth_map
obj_area = np.sum(obj_mask)
depth_area = np.sum(obj_scaled_depth_map)
avg_depth = depth_area/obj_area
return avg_depth
def run_base_block(self, img_path, depth_save_dir=None):
self.img_path = img_path
self.depth_save_dir = depth_save_dir
segmentor_output = self.segmentor(img_path)
self.segmentation_results = segmentor_output[0]
self.segmentation_img = segmentor_output[1]
self.midas_pred, self.depth_map = self.get_depth_map(img_path)
if depth_save_dir is not None:
processed_depth_map = self.depth_model.post_process_depth(self.midas_pred, bits=1)
# depth_img = Image.fromarray(255-np.uint8(self.depth_map*255)).convert('RGB')
fname = img_path.split('/')[-1]
cv2.imwrite(f'./{depth_save_dir}/{fname}', processed_depth_map.astype("uint8"))
write_pfm(f"./{depth_save_dir}/{fname.split('.')[0]}"+'.pfm', self.midas_pred.astype(np.float32))
def run_scaler(self, ref_obj_name, ref_gt_depth):
self.scaled_depth_map = self.get_scaled_depthmap(self.segmentation_results, self.depth_map, ref_obj_name, ref_gt_depth)
def run_without_base(self, obj_name, ref_obj_name, ref_gt_depth):
self.scaled_depth_map = self.get_scaled_depthmap(self.segmentation_results, self.depth_map, ref_obj_name, ref_gt_depth)
return self.obj2depth(self.scaled_depth_map, self.segmentation_results, obj_name)
def run_full_pipeline(self, img_path, obj_name, ref_obj_name, ref_gt_depth):
self.run_base_block(img_path)
return self.run_without_base(obj_name, ref_obj_name, ref_gt_depth)
def run_only_on_obj(self, obj_name):
return self.obj2depth(self.scaled_depth_map, self.segmentation_results, obj_name)
def id2pc(img_path, depth_pfm_path, cam_mat_path, cloud_save_dir, depth_scale):
img = cv2.imread(img_path)
color = o3d.geometry.Image(img)
if cam_mat_path is not None:
cm_file = cv2.FileStorage()
cm_file.open(cam_mat_path,cv2.FileStorage_READ)
camera_intrinsic_matrix = cm_file.getNode('intrinsic').mat()
camera_intrinsic = o3d.camera.PinholeCameraIntrinsic(width=640, height=480,
fx=camera_intrinsic_matrix[0][0],
fy=camera_intrinsic_matrix[1][1],
cx=camera_intrinsic_matrix[0][-1],
cy=camera_intrinsic_matrix[1][-1])
else:
camera_intrinsic = o3d.camera.PinholeCameraIntrinsic(o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault)
# camera_intrinsic = o3d.camera.PinholeCameraIntrinsic(o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault)
# print(camera_intrinsic.intrinsic_matrix)
# st.write(camera_intrinsic.intrinsic_matrix)
idepth = read_pfm(depth_pfm_path)[0]
idepth = idepth - np.amin(idepth)
idepth /= np.amax(idepth)
depth = (1 - idepth)+1e-6
depth = o3d.geometry.Image(depth)
idepth_scale = 1/depth_scale
rgbdi = o3d.geometry.RGBDImage.create_from_color_and_depth(color, depth, depth_scale=idepth_scale, depth_trunc=1000)
pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbdi, camera_intrinsic, project_valid_depth_only=True)
# pcd.transform([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
if cloud_save_dir is not None:
if not os.path.exists(cloud_save_dir):
os.makedirs(cloud_save_dir)
filename = img_path.split('/')[-1].split('.')[0]+'.ply'
o3d.io.write_point_cloud(os.path.join(cloud_save_dir,filename), pcd,
write_ascii=False, compressed=False, print_progress=False)
return pcd
def obj2cloud(depth_calibration_pipeline, gt_background_depth, intrinsics_mat_path='cameraIntrinsic.xml', cloud_save_dir='pointclouds', margin=50, centered=1):
input_image_shape = depth_calibration_pipeline.segmentation_img.shape
img_path = depth_calibration_pipeline.img_path
fname = img_path.split('/')[-1]
depth_path = f'./{depth_calibration_pipeline.depth_save_dir}/{fname}'
depth_pfm_path = f"./{depth_calibration_pipeline.depth_save_dir}/{fname.split('.')[0]}"+'.pfm'
img = cv2.imread(img_path)
color = o3d.geometry.Image(img)
depth_scale, _ = depth_calibration_pipeline.get_scaled_depthmap(gt_background_depth=gt_background_depth, margin=margin, centered=centered)
pcd = id2pc(img_path, depth_pfm_path, intrinsics_mat_path, cloud_save_dir, depth_scale)
point_cloud_flat = np.asarray(pcd.points)
obj_mask = depth_calibration_pipeline.obj_req_data['mask']
obj_scaled_depth_map = obj_mask*depth_calibration_pipeline.depth_map
obj_mask = (np.reshape(
obj_scaled_depth_map, (obj_scaled_depth_map.shape[0] * obj_scaled_depth_map.shape[1])) > 0
)
obj_points = point_cloud_flat[obj_mask, :]
non_obj_points = point_cloud_flat[np.logical_not(obj_mask), :]
obj_points_df = pd.DataFrame(obj_points, columns=['x','y','z'])
obj_cloud = PyntCloud(obj_points_df)
transformed_cloud = obj_cloud
transformed_cloud_np = transformed_cloud.points.to_numpy()
transformed_cloud_np = transformed_cloud_np[~np.isnan(transformed_cloud_np).any(axis=1),:]
transformed_cloud_o3d = o3d.geometry.PointCloud()
transformed_cloud_o3d.points = o3d.utility.Vector3dVector(transformed_cloud_np)
transformed_cloud_o3d.transform([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
return transformed_cloud_o3d
def pc_post_process(pcd, nb_neighbors=15, std_ratio=1.1, voxel_size=5e-3):
cl, ind = pcd.remove_statistical_outlier(nb_neighbors=nb_neighbors,
std_ratio=std_ratio)
pcd_clean = pcd.select_by_index(ind)
voxel_pcd = pcd_clean.voxel_down_sample(voxel_size=voxel_size)
return voxel_pcd
def compute_volume(pcd):
from scipy.spatial import ConvexHull
hull = ConvexHull(np.asarray(pcd.points))
return hull.volume