1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include <math.h>
17
18 #include "tensorflow/examples/android/jni/object_tracking/geom.h"
19 #include "tensorflow/examples/android/jni/object_tracking/image-inl.h"
20 #include "tensorflow/examples/android/jni/object_tracking/image.h"
21 #include "tensorflow/examples/android/jni/object_tracking/time_log.h"
22 #include "tensorflow/examples/android/jni/object_tracking/utils.h"
23
24 #include "tensorflow/examples/android/jni/object_tracking/config.h"
25 #include "tensorflow/examples/android/jni/object_tracking/flow_cache.h"
26 #include "tensorflow/examples/android/jni/object_tracking/frame_pair.h"
27 #include "tensorflow/examples/android/jni/object_tracking/image_data.h"
28 #include "tensorflow/examples/android/jni/object_tracking/keypoint.h"
29 #include "tensorflow/examples/android/jni/object_tracking/keypoint_detector.h"
30 #include "tensorflow/examples/android/jni/object_tracking/optical_flow.h"
31
32 namespace tf_tracking {
33
OpticalFlow(const OpticalFlowConfig * const config)34 OpticalFlow::OpticalFlow(const OpticalFlowConfig* const config)
35 : config_(config),
36 frame1_(NULL),
37 frame2_(NULL),
38 working_size_(config->image_size) {}
39
40
NextFrame(const ImageData * const image_data)41 void OpticalFlow::NextFrame(const ImageData* const image_data) {
42 // Special case for the first frame: make sure the image ends up in
43 // frame1_ so that keypoint detection can be done on it if desired.
44 frame1_ = (frame1_ == NULL) ? image_data : frame2_;
45 frame2_ = image_data;
46 }
47
48
49 // Static heart of the optical flow computation.
50 // Lucas Kanade algorithm.
FindFlowAtPoint_LK(const Image<uint8_t> & img_I,const Image<uint8_t> & img_J,const Image<int32_t> & I_x,const Image<int32_t> & I_y,const float p_x,const float p_y,float * out_g_x,float * out_g_y)51 bool OpticalFlow::FindFlowAtPoint_LK(const Image<uint8_t>& img_I,
52 const Image<uint8_t>& img_J,
53 const Image<int32_t>& I_x,
54 const Image<int32_t>& I_y, const float p_x,
55 const float p_y, float* out_g_x,
56 float* out_g_y) {
57 float g_x = *out_g_x;
58 float g_y = *out_g_y;
59 // Get values for frame 1. They remain constant through the inner
60 // iteration loop.
61 float vals_I[kFlowArraySize];
62 float vals_I_x[kFlowArraySize];
63 float vals_I_y[kFlowArraySize];
64
65 const int kPatchSize = 2 * kFlowIntegrationWindowSize + 1;
66 const float kWindowSizeFloat = static_cast<float>(kFlowIntegrationWindowSize);
67
68 #if USE_FIXED_POINT_FLOW
69 const int fixed_x_max = RealToFixed1616(img_I.width_less_one_) - 1;
70 const int fixed_y_max = RealToFixed1616(img_I.height_less_one_) - 1;
71 #else
72 const float real_x_max = I_x.width_less_one_ - EPSILON;
73 const float real_y_max = I_x.height_less_one_ - EPSILON;
74 #endif
75
76 // Get the window around the original point.
77 const float src_left_real = p_x - kWindowSizeFloat;
78 const float src_top_real = p_y - kWindowSizeFloat;
79 float* vals_I_ptr = vals_I;
80 float* vals_I_x_ptr = vals_I_x;
81 float* vals_I_y_ptr = vals_I_y;
82 #if USE_FIXED_POINT_FLOW
83 // Source integer coordinates.
84 const int src_left_fixed = RealToFixed1616(src_left_real);
85 const int src_top_fixed = RealToFixed1616(src_top_real);
86
87 for (int y = 0; y < kPatchSize; ++y) {
88 const int fp_y = Clip(src_top_fixed + (y << 16), 0, fixed_y_max);
89
90 for (int x = 0; x < kPatchSize; ++x) {
91 const int fp_x = Clip(src_left_fixed + (x << 16), 0, fixed_x_max);
92
93 *vals_I_ptr++ = img_I.GetPixelInterpFixed1616(fp_x, fp_y);
94 *vals_I_x_ptr++ = I_x.GetPixelInterpFixed1616(fp_x, fp_y);
95 *vals_I_y_ptr++ = I_y.GetPixelInterpFixed1616(fp_x, fp_y);
96 }
97 }
98 #else
99 for (int y = 0; y < kPatchSize; ++y) {
100 const float y_pos = Clip(src_top_real + y, 0.0f, real_y_max);
101
102 for (int x = 0; x < kPatchSize; ++x) {
103 const float x_pos = Clip(src_left_real + x, 0.0f, real_x_max);
104
105 *vals_I_ptr++ = img_I.GetPixelInterp(x_pos, y_pos);
106 *vals_I_x_ptr++ = I_x.GetPixelInterp(x_pos, y_pos);
107 *vals_I_y_ptr++ = I_y.GetPixelInterp(x_pos, y_pos);
108 }
109 }
110 #endif
111
112 // Compute the spatial gradient matrix about point p.
113 float G[] = { 0, 0, 0, 0 };
114 CalculateG(vals_I_x, vals_I_y, kFlowArraySize, G);
115
116 // Find the inverse of G.
117 float G_inv[4];
118 if (!Invert2x2(G, G_inv)) {
119 return false;
120 }
121
122 #if NORMALIZE
123 const float mean_I = ComputeMean(vals_I, kFlowArraySize);
124 const float std_dev_I = ComputeStdDev(vals_I, kFlowArraySize, mean_I);
125 #endif
126
127 // Iterate kNumIterations times or until we converge.
128 for (int iteration = 0; iteration < kNumIterations; ++iteration) {
129 // Get values for frame 2.
130 float vals_J[kFlowArraySize];
131
132 // Get the window around the destination point.
133 const float left_real = p_x + g_x - kWindowSizeFloat;
134 const float top_real = p_y + g_y - kWindowSizeFloat;
135 float* vals_J_ptr = vals_J;
136 #if USE_FIXED_POINT_FLOW
137 // The top-left sub-pixel is set for the current iteration (in 16:16
138 // fixed). This is constant over one iteration.
139 const int left_fixed = RealToFixed1616(left_real);
140 const int top_fixed = RealToFixed1616(top_real);
141
142 for (int win_y = 0; win_y < kPatchSize; ++win_y) {
143 const int fp_y = Clip(top_fixed + (win_y << 16), 0, fixed_y_max);
144 for (int win_x = 0; win_x < kPatchSize; ++win_x) {
145 const int fp_x = Clip(left_fixed + (win_x << 16), 0, fixed_x_max);
146 *vals_J_ptr++ = img_J.GetPixelInterpFixed1616(fp_x, fp_y);
147 }
148 }
149 #else
150 for (int win_y = 0; win_y < kPatchSize; ++win_y) {
151 const float y_pos = Clip(top_real + win_y, 0.0f, real_y_max);
152 for (int win_x = 0; win_x < kPatchSize; ++win_x) {
153 const float x_pos = Clip(left_real + win_x, 0.0f, real_x_max);
154 *vals_J_ptr++ = img_J.GetPixelInterp(x_pos, y_pos);
155 }
156 }
157 #endif
158
159 #if NORMALIZE
160 const float mean_J = ComputeMean(vals_J, kFlowArraySize);
161 const float std_dev_J = ComputeStdDev(vals_J, kFlowArraySize, mean_J);
162
163 // TODO(andrewharp): Probably better to completely detect and handle the
164 // "corner case" where the patch is fully outside the image diagonally.
165 const float std_dev_ratio = std_dev_J > 0.0f ? std_dev_I / std_dev_J : 1.0f;
166 #endif
167
168 // Compute image mismatch vector.
169 float b_x = 0.0f;
170 float b_y = 0.0f;
171
172 vals_I_ptr = vals_I;
173 vals_J_ptr = vals_J;
174 vals_I_x_ptr = vals_I_x;
175 vals_I_y_ptr = vals_I_y;
176
177 for (int win_y = 0; win_y < kPatchSize; ++win_y) {
178 for (int win_x = 0; win_x < kPatchSize; ++win_x) {
179 #if NORMALIZE
180 // Normalized Image difference.
181 const float dI =
182 (*vals_I_ptr++ - mean_I) - (*vals_J_ptr++ - mean_J) * std_dev_ratio;
183 #else
184 const float dI = *vals_I_ptr++ - *vals_J_ptr++;
185 #endif
186 b_x += dI * *vals_I_x_ptr++;
187 b_y += dI * *vals_I_y_ptr++;
188 }
189 }
190
191 // Optical flow... solve n = G^-1 * b
192 const float n_x = (G_inv[0] * b_x) + (G_inv[1] * b_y);
193 const float n_y = (G_inv[2] * b_x) + (G_inv[3] * b_y);
194
195 // Update best guess with residual displacement from this level and
196 // iteration.
197 g_x += n_x;
198 g_y += n_y;
199
200 // LOGV("Iteration %d: delta (%.3f, %.3f)", iteration, n_x, n_y);
201
202 // Abort early if we're already below the threshold.
203 if (Square(n_x) + Square(n_y) < Square(kTrackingAbortThreshold)) {
204 break;
205 }
206 } // Iteration.
207
208 // Copy value back into output.
209 *out_g_x = g_x;
210 *out_g_y = g_y;
211 return true;
212 }
213
214
215 // Pointwise flow using translational 2dof ESM.
FindFlowAtPoint_ESM(const Image<uint8_t> & img_I,const Image<uint8_t> & img_J,const Image<int32_t> & I_x,const Image<int32_t> & I_y,const Image<int32_t> & J_x,const Image<int32_t> & J_y,const float p_x,const float p_y,float * out_g_x,float * out_g_y)216 bool OpticalFlow::FindFlowAtPoint_ESM(
217 const Image<uint8_t>& img_I, const Image<uint8_t>& img_J,
218 const Image<int32_t>& I_x, const Image<int32_t>& I_y,
219 const Image<int32_t>& J_x, const Image<int32_t>& J_y, const float p_x,
220 const float p_y, float* out_g_x, float* out_g_y) {
221 float g_x = *out_g_x;
222 float g_y = *out_g_y;
223 const float area_inv = 1.0f / static_cast<float>(kFlowArraySize);
224
225 // Get values for frame 1. They remain constant through the inner
226 // iteration loop.
227 uint8_t vals_I[kFlowArraySize];
228 uint8_t vals_J[kFlowArraySize];
229 int16_t src_gradient_x[kFlowArraySize];
230 int16_t src_gradient_y[kFlowArraySize];
231
232 // TODO(rspring): try out the IntegerPatchAlign() method once
233 // the code for that is in ../common.
234 const float wsize_float = static_cast<float>(kFlowIntegrationWindowSize);
235 const int src_left_fixed = RealToFixed1616(p_x - wsize_float);
236 const int src_top_fixed = RealToFixed1616(p_y - wsize_float);
237 const int patch_size = 2 * kFlowIntegrationWindowSize + 1;
238
239 // Create the keypoint template patch from a subpixel location.
240 if (!img_I.ExtractPatchAtSubpixelFixed1616(src_left_fixed, src_top_fixed,
241 patch_size, patch_size, vals_I) ||
242 !I_x.ExtractPatchAtSubpixelFixed1616(src_left_fixed, src_top_fixed,
243 patch_size, patch_size,
244 src_gradient_x) ||
245 !I_y.ExtractPatchAtSubpixelFixed1616(src_left_fixed, src_top_fixed,
246 patch_size, patch_size,
247 src_gradient_y)) {
248 return false;
249 }
250
251 int bright_offset = 0;
252 int sum_diff = 0;
253
254 // The top-left sub-pixel is set for the current iteration (in 16:16 fixed).
255 // This is constant over one iteration.
256 int left_fixed = RealToFixed1616(p_x + g_x - wsize_float);
257 int top_fixed = RealToFixed1616(p_y + g_y - wsize_float);
258
259 // The truncated version gives the most top-left pixel that is used.
260 int left_trunc = left_fixed >> 16;
261 int top_trunc = top_fixed >> 16;
262
263 // Compute an initial brightness offset.
264 if (kDoBrightnessNormalize &&
265 left_trunc >= 0 && top_trunc >= 0 &&
266 (left_trunc + patch_size) < img_J.width_less_one_ &&
267 (top_trunc + patch_size) < img_J.height_less_one_) {
268 int templ_index = 0;
269 const uint8_t* j_row = img_J[top_trunc] + left_trunc;
270
271 const int j_stride = img_J.stride();
272
273 for (int y = 0; y < patch_size; ++y, j_row += j_stride) {
274 for (int x = 0; x < patch_size; ++x) {
275 sum_diff += static_cast<int>(j_row[x]) - vals_I[templ_index++];
276 }
277 }
278
279 bright_offset = static_cast<int>(static_cast<float>(sum_diff) * area_inv);
280 }
281
282 // Iterate kNumIterations times or until we go out of image.
283 for (int iteration = 0; iteration < kNumIterations; ++iteration) {
284 int jtj[3] = { 0, 0, 0 };
285 int jtr[2] = { 0, 0 };
286 sum_diff = 0;
287
288 // Extract the target image values.
289 // Extract the gradient from the target image patch and accumulate to
290 // the gradient of the source image patch.
291 if (!img_J.ExtractPatchAtSubpixelFixed1616(left_fixed, top_fixed,
292 patch_size, patch_size,
293 vals_J)) {
294 break;
295 }
296
297 const uint8_t* templ_row = vals_I;
298 const uint8_t* extract_row = vals_J;
299 const int16_t* src_dx_row = src_gradient_x;
300 const int16_t* src_dy_row = src_gradient_y;
301
302 for (int y = 0; y < patch_size; ++y, templ_row += patch_size,
303 src_dx_row += patch_size, src_dy_row += patch_size,
304 extract_row += patch_size) {
305 const int fp_y = top_fixed + (y << 16);
306 for (int x = 0; x < patch_size; ++x) {
307 const int fp_x = left_fixed + (x << 16);
308 int32_t target_dx = J_x.GetPixelInterpFixed1616(fp_x, fp_y);
309 int32_t target_dy = J_y.GetPixelInterpFixed1616(fp_x, fp_y);
310
311 // Combine the two Jacobians.
312 // Right-shift by one to account for the fact that we add
313 // two Jacobians.
314 int32_t dx = (src_dx_row[x] + target_dx) >> 1;
315 int32_t dy = (src_dy_row[x] + target_dy) >> 1;
316
317 // The current residual b - h(q) == extracted - (template + offset)
318 int32_t diff = static_cast<int32_t>(extract_row[x]) -
319 static_cast<int32_t>(templ_row[x]) - bright_offset;
320
321 jtj[0] += dx * dx;
322 jtj[1] += dx * dy;
323 jtj[2] += dy * dy;
324
325 jtr[0] += dx * diff;
326 jtr[1] += dy * diff;
327
328 sum_diff += diff;
329 }
330 }
331
332 const float jtr1_float = static_cast<float>(jtr[0]);
333 const float jtr2_float = static_cast<float>(jtr[1]);
334
335 // Add some baseline stability to the system.
336 jtj[0] += kEsmRegularizer;
337 jtj[2] += kEsmRegularizer;
338
339 const int64_t prod1 = static_cast<int64_t>(jtj[0]) * jtj[2];
340 const int64_t prod2 = static_cast<int64_t>(jtj[1]) * jtj[1];
341
342 // One ESM step.
343 const float jtj_1[4] = { static_cast<float>(jtj[2]),
344 static_cast<float>(-jtj[1]),
345 static_cast<float>(-jtj[1]),
346 static_cast<float>(jtj[0]) };
347 const double det_inv = 1.0 / static_cast<double>(prod1 - prod2);
348
349 g_x -= det_inv * (jtj_1[0] * jtr1_float + jtj_1[1] * jtr2_float);
350 g_y -= det_inv * (jtj_1[2] * jtr1_float + jtj_1[3] * jtr2_float);
351
352 if (kDoBrightnessNormalize) {
353 bright_offset +=
354 static_cast<int>(area_inv * static_cast<float>(sum_diff) + 0.5f);
355 }
356
357 // Update top left position.
358 left_fixed = RealToFixed1616(p_x + g_x - wsize_float);
359 top_fixed = RealToFixed1616(p_y + g_y - wsize_float);
360
361 left_trunc = left_fixed >> 16;
362 top_trunc = top_fixed >> 16;
363
364 // Abort iterations if we go out of borders.
365 if (left_trunc < 0 || top_trunc < 0 ||
366 (left_trunc + patch_size) >= J_x.width_less_one_ ||
367 (top_trunc + patch_size) >= J_y.height_less_one_) {
368 break;
369 }
370 } // Iteration.
371
372 // Copy value back into output.
373 *out_g_x = g_x;
374 *out_g_y = g_y;
375 return true;
376 }
377
378
FindFlowAtPointReversible(const int level,const float u_x,const float u_y,const bool reverse_flow,float * flow_x,float * flow_y) const379 bool OpticalFlow::FindFlowAtPointReversible(
380 const int level, const float u_x, const float u_y,
381 const bool reverse_flow,
382 float* flow_x, float* flow_y) const {
383 const ImageData& frame_a = reverse_flow ? *frame2_ : *frame1_;
384 const ImageData& frame_b = reverse_flow ? *frame1_ : *frame2_;
385
386 // Images I (prev) and J (next).
387 const Image<uint8_t>& img_I = *frame_a.GetPyramidSqrt2Level(level * 2);
388 const Image<uint8_t>& img_J = *frame_b.GetPyramidSqrt2Level(level * 2);
389
390 // Computed gradients.
391 const Image<int32_t>& I_x = *frame_a.GetSpatialX(level);
392 const Image<int32_t>& I_y = *frame_a.GetSpatialY(level);
393 const Image<int32_t>& J_x = *frame_b.GetSpatialX(level);
394 const Image<int32_t>& J_y = *frame_b.GetSpatialY(level);
395
396 // Shrink factor from original.
397 const float shrink_factor = (1 << level);
398
399 // Image position vector (p := u^l), scaled for this level.
400 const float scaled_p_x = u_x / shrink_factor;
401 const float scaled_p_y = u_y / shrink_factor;
402
403 float scaled_flow_x = *flow_x / shrink_factor;
404 float scaled_flow_y = *flow_y / shrink_factor;
405
406 // LOGE("FindFlowAtPoint level %d: %5.2f, %5.2f (%5.2f, %5.2f)", level,
407 // scaled_p_x, scaled_p_y, &scaled_flow_x, &scaled_flow_y);
408
409 const bool success = kUseEsm ?
410 FindFlowAtPoint_ESM(img_I, img_J, I_x, I_y, J_x, J_y,
411 scaled_p_x, scaled_p_y,
412 &scaled_flow_x, &scaled_flow_y) :
413 FindFlowAtPoint_LK(img_I, img_J, I_x, I_y,
414 scaled_p_x, scaled_p_y,
415 &scaled_flow_x, &scaled_flow_y);
416
417 *flow_x = scaled_flow_x * shrink_factor;
418 *flow_y = scaled_flow_y * shrink_factor;
419
420 return success;
421 }
422
423
FindFlowAtPointSingleLevel(const int level,const float u_x,const float u_y,const bool filter_by_fb_error,float * flow_x,float * flow_y) const424 bool OpticalFlow::FindFlowAtPointSingleLevel(
425 const int level,
426 const float u_x, const float u_y,
427 const bool filter_by_fb_error,
428 float* flow_x, float* flow_y) const {
429 if (!FindFlowAtPointReversible(level, u_x, u_y, false, flow_x, flow_y)) {
430 return false;
431 }
432
433 if (filter_by_fb_error) {
434 const float new_position_x = u_x + *flow_x;
435 const float new_position_y = u_y + *flow_y;
436
437 float reverse_flow_x = 0.0f;
438 float reverse_flow_y = 0.0f;
439
440 // Now find the backwards flow and confirm it lines up with the original
441 // starting point.
442 if (!FindFlowAtPointReversible(level, new_position_x, new_position_y,
443 true,
444 &reverse_flow_x, &reverse_flow_y)) {
445 LOGE("Backward error!");
446 return false;
447 }
448
449 const float discrepancy_length =
450 sqrtf(Square(*flow_x + reverse_flow_x) +
451 Square(*flow_y + reverse_flow_y));
452
453 const float flow_length = sqrtf(Square(*flow_x) + Square(*flow_y));
454
455 return discrepancy_length <
456 (kMaxForwardBackwardErrorAllowed * flow_length);
457 }
458
459 return true;
460 }
461
462
463 // An implementation of the Pyramidal Lucas-Kanade Optical Flow algorithm.
464 // See http://robots.stanford.edu/cs223b04/algo_tracking.pdf for details.
FindFlowAtPointPyramidal(const float u_x,const float u_y,const bool filter_by_fb_error,float * flow_x,float * flow_y) const465 bool OpticalFlow::FindFlowAtPointPyramidal(const float u_x, const float u_y,
466 const bool filter_by_fb_error,
467 float* flow_x, float* flow_y) const {
468 const int max_level = MAX(kMinNumPyramidLevelsToUseForAdjustment,
469 kNumPyramidLevels - kNumCacheLevels);
470
471 // For every level in the pyramid, update the coordinates of the best match.
472 for (int l = max_level - 1; l >= 0; --l) {
473 if (!FindFlowAtPointSingleLevel(l, u_x, u_y,
474 filter_by_fb_error, flow_x, flow_y)) {
475 return false;
476 }
477 }
478
479 return true;
480 }
481
482 } // namespace tf_tracking
483