-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathextract-features-and-opticalflow-from-detections.lua
160 lines (126 loc) · 4.92 KB
/
extract-features-and-opticalflow-from-detections.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
require 'torch'
require 'nn'
require 'ffmpeg'
require 'liuflow'
torch.setdefaulttensortype('torch.FloatTensor')
require 'loadcaffe'
local matio = require 'matio'
matio.use_lua_strings = true
dofile('/local/nrakover/meng/load-and-process-img.lua')
function distanceTransform( A )
-- Transformed field
local B = torch.FloatTensor(A:size())
-- Base case
B[1][1] = A[1][1]
-- Leftmost column
for i = 2, A:size(1) do
B[i][1] = B[i-1][1] + A[i][1]
end
-- Top row
for j = 2, A:size(2) do
B[1][j] = B[1][j-1] + A[1][j]
end
-- Inner grid
for i = 2, A:size(1) do
for j = 2, A:size(2) do
B[i][j] = B[i-1][j] + B[i][j-1] - B[i-1][j-1] + A[i][j]
end
end
return B
end
function normalizeImage(im)
local mean_img = torch.FloatTensor(im:size())
mean_img[{{1},{},{}}] = -123.68
mean_img[{{2},{},{}}] = -116.779
mean_img[{{3},{},{}}] = -103.939
mean_img = mean_img:float()
return torch.add(im,mean_img):float()
end
local IMG_DIM = 224
local LAYER_TO_EXTRACT = 43
function extractFeatures(img, net)
local processed_img = processImage(img, IMG_DIM)
local normd_img = normalizeImage(processed_img)
net:forward(normd_img)
local features = net:get(LAYER_TO_EXTRACT).output:clone()
return torch.squeeze(nn.View(1):forward(features)):double()
end
local net = loadcaffe.load('/local/nrakover/meng/networks/VGG/VGG_ILSVRC_19_layers_deploy.prototxt', '/local/nrakover/meng/networks/VGG/VGG_ILSVRC_19_layers.caffemodel', 'nn')
function extractFeaturesAndOpticalFlow(detectionsByFrame, video_filepath, compute_opticalflow)
if compute_opticalflow == nil then
compute_opticalflow = true
end
local w = detectionsByFrame.width[1][1]
local h = detectionsByFrame.height[1][1]
local frameRate = detectionsByFrame.fps[1][1]
local duration = detectionsByFrame.length[1][1]
local vid = ffmpeg.Video{path=video_filepath, height=h, width=w, fps=frameRate, length=duration, silent=true}
local videoFrames = vid:totensor(1,1,detectionsByFrame.detections:size(1))
print('Num frames: '..detectionsByFrame.detections:size(1))
local featuresByFrame = {}
local opticalflowByFrame = {}
local prevFrame = nil
-- Iterate over frames
for frameIndx = 1,detectionsByFrame.detections:size(1) do
local frameDetections = detectionsByFrame.detections[frameIndx]
local frame = videoFrames[frameIndx]
featuresByFrame[frameIndx] = {}
if frameIndx ~= 1 and compute_opticalflow then
local flow_norm, flow_angle, warp, fx, fy = liuflow.infer({prevFrame, frame})
print('Optical flow computed for frame '..frameIndx)
opticalflowByFrame[frameIndx] = {flow_x=distanceTransform(torch.squeeze(fx)), flow_y=distanceTransform(torch.squeeze(fy))}
end
-- for detIndx = detectionsByFrame.person_detector_indices[frameIndx][1], frameDetections:size(1) do
-- Iterate over detections
for detIndx = 1,frameDetections:size(1) do
-- Get image region
local x_min = math.min(w-2, frameDetections[detIndx][1])
local y_min = math.min(h-2, frameDetections[detIndx][2])
local x_max = math.min(w-1, frameDetections[detIndx][3])
local y_max = math.min(h-1, frameDetections[detIndx][4])
-- Skip null detections
if x_min == 0 and x_max == 0 and y_min == 0 and y_max == 0 then
featuresByFrame[frameIndx][detIndx] = torch.DoubleTensor(4096):fill(0)
else
x_min = math.max(1, x_min)
y_min = math.max(1, y_min)
if y_max == y_min then y_max = math.min(h-1, y_max + 1) end
if x_max == x_min then x_max = math.min(w-1, x_max + 1) end
local frame_region = nil
if pcall( function() frame_region = image.crop(frame, x_min, y_min, x_max, y_max) end ) then
-- Compute features
local frame_region_features = extractFeatures(frame_region, net)
-- table.insert(featuresByFrame[frameIndx], frame_region_features:clone())
featuresByFrame[frameIndx][detIndx] = frame_region_features:clone()
else
print('\nERROR')
print(x_min, y_min, x_max, y_max, w, h)
print(frame:size())
image.crop(frame, x_min, y_min, x_max, y_max) -- KILLS PROCESS
end
end
-- Show progress on the current frame
io.write((' '..(100 * detIndx / frameDetections:size(1)))..'%', '\r'); io.flush();
end
prevFrame = frame:clone()
print('Done with frame '..frameIndx)
end
return featuresByFrame, opticalflowByFrame
end
function extractFeaturesGivenFrameAndBounds(bounds, frame)
-- Get image region
local x_min = bounds[1]
local y_min = bounds[2]
local x_max = bounds[3]
local y_max = bounds[4]
if y_max == y_min then y_max = y_max + 1 end
if x_max == x_min then x_max = x_max + 1 end
local frame_region = image.crop(frame, x_min, y_min, x_max, y_max)
-- Compute features
return extractFeatures(frame_region, net)
end
-- Run on test data
-- local detectionsByFrame = matio.load('script_in/nico2.mat' , 'detections_by_frame')
-- local features, opticalflow = extractFeaturesAndOpticalFlow(detectionsByFrame, 'script_in/nico2.avi')
-- torch.save('script_in/nico2_features.t7', features)
-- torch.save('script_in/nico2_opticalflow.t7', opticalflow)