-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscene_summary.py
125 lines (109 loc) · 3.6 KB
/
scene_summary.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
"""
implement an algorithm for scene understanding
divide segmentation result by grids and describe objects in each grid
"""
import os
import sys
from collections import defaultdict
import numpy as np
import matplotlib.pyplot as plt
def get_names():
names = {}
names[1] = 'wall'
names[2] = 'floor'
names[3] = 'plant'
names[4] = 'ceiling'
names[5] = 'furniture'
names[6] = 'person'
names[7] = 'door'
names[8] = 'objects'
return names
def get_small_portion(h, w, grid = (2, 3)):
"""
in a grid, how many pixels are consider small?
consider small = smaller than 5% of grid size
"""
grid_r, grid_c = grid
grid_h = int(h / grid_r)
grid_w = int(w / grid_c)
grid_n = grid_h * grid_w
threshold = int(grid_n * 0.05)
print('(h = {}, w= {}), grid = ({})'.format(h, w, grid))
print('grid size = {}'.format(grid_n))
print('threshold = {}'.format(threshold))
return threshold
def create_grid(h, w, grid = (2, 3)):
"""
create a numpy array with (h, w)
divide into region_n grids, each have different value
idx from left to right, top to dow
input:
grid -- tuple, (# row grid, # col grid)
"""
mat = np.zeros(shape = (h, w), dtype = np.uint8)
grid_r, grid_c = grid
grid_h = int(h / grid_r)
grid_w = int(w / grid_c)
mapping = {1: 0, 2: 10, 3: 20, 11: 30, 12: 40, 13: 50}
for i in range(grid_r):
for j in range(grid_c):
idx = 10*i + j + 1
val = mapping[idx]
# the end of row
if i == grid_r - 1 and j == grid_c - 1:
mat[i*grid_h : , j*grid_w : ] = val
elif i == grid_r - 1:
mat[i*grid_h : , j*grid_w : (j+1)*grid_w] = val
# the end of column
elif j == grid_c - 1:
mat[i*grid_h : (i+1)*grid_h , j*grid_w : ] = val
else:
mat[i*grid_h : (i+1)*grid_h , j*grid_w : (j+1)*grid_w] = val
return mat
def filter_idxs(idxs, counts, threshold = 900):
"""
kick out idxs if its count <= grid sizes * 0.05
input:
idxs, counts -- output from np.unique(. , return_counts = True)
return:
new_idxs
"""
new_idxs = [i for i, c in zip(idxs, counts) if c > threshold]
return new_idxs
def scene_summarize(pred_idx, mat, names, threshold = 900):
new_pred_idx = pred_idx + mat
idxs, counts = np.unique(new_pred_idx, return_counts = True)
# kick out idxs with small counts
new_idxs = filter_idxs(idxs, counts, threshold = threshold)
grid_dict = grid_name_mapping(new_idxs, names)
return grid_dict
def grid_name_mapping(new_idxs, names):
"""
create a dict with key = grid index, value = list of objects in the grid
input:
new_idxs -- list of new indexes (with grid adding) after count filtering
names -- dict of names mapping from index
output:
grid_dict: {grid_idx: names}
"""
grid_dict = defaultdict(list)
for idx in new_idxs:
obj = names[idx%10 + 1]
grid_dict[idx//10].append(obj)
return grid_dict
def test_create_grid():
"""
visualize the created numpy grid, check the dtype
"""
h = 240
w = 484
grid = (2, 3)
mat = create_grid(h, w, grid = (2, 3))
n = grid[0] * grid[1]
mat_h, mat_w = mat.shape
idx, _ = np.unique(mat, return_counts = True)
assert h == mat_h and w == mat_w, 'WRONG SHAPE'
assert len(idx) == n, 'WRONG FILLING'
plt.imshow(mat)
plt.show()
return mat