1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 // Various keypoint detecting functions.
17
18 #include <float.h>
19
20 #include "tensorflow/examples/android/jni/object_tracking/image-inl.h"
21 #include "tensorflow/examples/android/jni/object_tracking/image.h"
22 #include "tensorflow/examples/android/jni/object_tracking/time_log.h"
23 #include "tensorflow/examples/android/jni/object_tracking/utils.h"
24
25 #include "tensorflow/examples/android/jni/object_tracking/config.h"
26 #include "tensorflow/examples/android/jni/object_tracking/keypoint.h"
27 #include "tensorflow/examples/android/jni/object_tracking/keypoint_detector.h"
28
29 namespace tf_tracking {
30
GetDistSquaredBetween(const int * vec1,const int * vec2)31 static inline int GetDistSquaredBetween(const int* vec1, const int* vec2) {
32 return Square(vec1[0] - vec2[0]) + Square(vec1[1] - vec2[1]);
33 }
34
ScoreKeypoints(const ImageData & image_data,const int num_candidates,Keypoint * const candidate_keypoints)35 void KeypointDetector::ScoreKeypoints(const ImageData& image_data,
36 const int num_candidates,
37 Keypoint* const candidate_keypoints) {
38 const Image<int>& I_x = *image_data.GetSpatialX(0);
39 const Image<int>& I_y = *image_data.GetSpatialY(0);
40
41 if (config_->detect_skin) {
42 const Image<uint8_t>& u_data = *image_data.GetU();
43 const Image<uint8_t>& v_data = *image_data.GetV();
44
45 static const int reference[] = {111, 155};
46
47 // Score all the keypoints.
48 for (int i = 0; i < num_candidates; ++i) {
49 Keypoint* const keypoint = candidate_keypoints + i;
50
51 const int x_pos = keypoint->pos_.x * 2;
52 const int y_pos = keypoint->pos_.y * 2;
53
54 const int curr_color[] = {u_data[y_pos][x_pos], v_data[y_pos][x_pos]};
55 keypoint->score_ =
56 HarrisFilter(I_x, I_y, keypoint->pos_.x, keypoint->pos_.y) /
57 GetDistSquaredBetween(reference, curr_color);
58 }
59 } else {
60 // Score all the keypoints.
61 for (int i = 0; i < num_candidates; ++i) {
62 Keypoint* const keypoint = candidate_keypoints + i;
63 keypoint->score_ =
64 HarrisFilter(I_x, I_y, keypoint->pos_.x, keypoint->pos_.y);
65 }
66 }
67 }
68
69
KeypointCompare(const void * const a,const void * const b)70 inline int KeypointCompare(const void* const a, const void* const b) {
71 return (reinterpret_cast<const Keypoint*>(a)->score_ -
72 reinterpret_cast<const Keypoint*>(b)->score_) <= 0 ? 1 : -1;
73 }
74
75
76 // Quicksorts detected keypoints by score.
SortKeypoints(const int num_candidates,Keypoint * const candidate_keypoints) const77 void KeypointDetector::SortKeypoints(const int num_candidates,
78 Keypoint* const candidate_keypoints) const {
79 qsort(candidate_keypoints, num_candidates, sizeof(Keypoint), KeypointCompare);
80
81 #ifdef SANITY_CHECKS
82 // Verify that the array got sorted.
83 float last_score = FLT_MAX;
84 for (int i = 0; i < num_candidates; ++i) {
85 const float curr_score = candidate_keypoints[i].score_;
86
87 // Scores should be monotonically increasing.
88 SCHECK(last_score >= curr_score,
89 "Quicksort failure! %d: %.5f > %d: %.5f (%d total)",
90 i - 1, last_score, i, curr_score, num_candidates);
91
92 last_score = curr_score;
93 }
94 #endif
95 }
96
97
SelectKeypointsInBox(const BoundingBox & box,const Keypoint * const candidate_keypoints,const int num_candidates,const int max_keypoints,const int num_existing_keypoints,const Keypoint * const existing_keypoints,Keypoint * const final_keypoints) const98 int KeypointDetector::SelectKeypointsInBox(
99 const BoundingBox& box,
100 const Keypoint* const candidate_keypoints,
101 const int num_candidates,
102 const int max_keypoints,
103 const int num_existing_keypoints,
104 const Keypoint* const existing_keypoints,
105 Keypoint* const final_keypoints) const {
106 if (max_keypoints <= 0) {
107 return 0;
108 }
109
110 // This is the distance within which keypoints may be placed to each other
111 // within this box, roughly based on the box dimensions.
112 const int distance =
113 MAX(1, MIN(box.GetWidth(), box.GetHeight()) * kClosestPercent / 2.0f);
114
115 // First, mark keypoints that already happen to be inside this region. Ignore
116 // keypoints that are outside it, however close they might be.
117 interest_map_->Clear(false);
118 for (int i = 0; i < num_existing_keypoints; ++i) {
119 const Keypoint& candidate = existing_keypoints[i];
120
121 const int x_pos = candidate.pos_.x;
122 const int y_pos = candidate.pos_.y;
123 if (box.Contains(candidate.pos_)) {
124 MarkImage(x_pos, y_pos, distance, interest_map_.get());
125 }
126 }
127
128 // Now, go through and check which keypoints will still fit in the box.
129 int num_keypoints_selected = 0;
130 for (int i = 0; i < num_candidates; ++i) {
131 const Keypoint& candidate = candidate_keypoints[i];
132
133 const int x_pos = candidate.pos_.x;
134 const int y_pos = candidate.pos_.y;
135
136 if (!box.Contains(candidate.pos_) ||
137 !interest_map_->ValidPixel(x_pos, y_pos)) {
138 continue;
139 }
140
141 if (!(*interest_map_)[y_pos][x_pos]) {
142 final_keypoints[num_keypoints_selected++] = candidate;
143 if (num_keypoints_selected >= max_keypoints) {
144 break;
145 }
146 MarkImage(x_pos, y_pos, distance, interest_map_.get());
147 }
148 }
149 return num_keypoints_selected;
150 }
151
152
SelectKeypoints(const std::vector<BoundingBox> & boxes,const Keypoint * const candidate_keypoints,const int num_candidates,FramePair * const curr_change) const153 void KeypointDetector::SelectKeypoints(
154 const std::vector<BoundingBox>& boxes,
155 const Keypoint* const candidate_keypoints,
156 const int num_candidates,
157 FramePair* const curr_change) const {
158 // Now select all the interesting keypoints that fall insider our boxes.
159 curr_change->number_of_keypoints_ = 0;
160 for (std::vector<BoundingBox>::const_iterator iter = boxes.begin();
161 iter != boxes.end(); ++iter) {
162 const BoundingBox bounding_box = *iter;
163
164 // Count up keypoints that have already been selected, and fall within our
165 // box.
166 int num_keypoints_already_in_box = 0;
167 for (int i = 0; i < curr_change->number_of_keypoints_; ++i) {
168 if (bounding_box.Contains(curr_change->frame1_keypoints_[i].pos_)) {
169 ++num_keypoints_already_in_box;
170 }
171 }
172
173 const int max_keypoints_to_find_in_box =
174 MIN(kMaxKeypointsForObject - num_keypoints_already_in_box,
175 kMaxKeypoints - curr_change->number_of_keypoints_);
176
177 const int num_new_keypoints_in_box = SelectKeypointsInBox(
178 bounding_box,
179 candidate_keypoints,
180 num_candidates,
181 max_keypoints_to_find_in_box,
182 curr_change->number_of_keypoints_,
183 curr_change->frame1_keypoints_,
184 curr_change->frame1_keypoints_ + curr_change->number_of_keypoints_);
185
186 curr_change->number_of_keypoints_ += num_new_keypoints_in_box;
187
188 LOGV("Selected %d keypoints!", curr_change->number_of_keypoints_);
189 }
190 }
191
192
193 // Walks along the given circle checking for pixels above or below the center.
194 // Returns a score, or 0 if the keypoint did not pass the criteria.
195 //
196 // Parameters:
197 // circle_perimeter: the circumference in pixels of the circle.
198 // threshold: the minimum number of contiguous pixels that must be above or
199 // below the center value.
200 // center_ptr: the location of the center pixel in memory
201 // offsets: the relative offsets from the center pixel of the edge pixels.
TestCircle(const int circle_perimeter,const int threshold,const uint8_t * const center_ptr,const int * offsets)202 inline int TestCircle(const int circle_perimeter, const int threshold,
203 const uint8_t* const center_ptr, const int* offsets) {
204 // Get the actual value of the center pixel for easier reference later on.
205 const int center_value = static_cast<int>(*center_ptr);
206
207 // Number of total pixels to check. Have to wrap around some in case
208 // the contiguous section is split by the array edges.
209 const int num_total = circle_perimeter + threshold - 1;
210
211 int num_above = 0;
212 int above_diff = 0;
213
214 int num_below = 0;
215 int below_diff = 0;
216
217 // Used to tell when this is definitely not going to meet the threshold so we
218 // can early abort.
219 int minimum_by_now = threshold - num_total + 1;
220
221 // Go through every pixel along the perimeter of the circle, and then around
222 // again a little bit.
223 for (int i = 0; i < num_total; ++i) {
224 // This should be faster than mod.
225 const int perim_index = i < circle_perimeter ? i : i - circle_perimeter;
226
227 // This gets the value of the current pixel along the perimeter by using
228 // a precomputed offset.
229 const int curr_value =
230 static_cast<int>(center_ptr[offsets[perim_index]]);
231
232 const int difference = curr_value - center_value;
233
234 if (difference > kFastDiffAmount) {
235 above_diff += difference;
236 ++num_above;
237
238 num_below = 0;
239 below_diff = 0;
240
241 if (num_above >= threshold) {
242 return above_diff;
243 }
244 } else if (difference < -kFastDiffAmount) {
245 below_diff += difference;
246 ++num_below;
247
248 num_above = 0;
249 above_diff = 0;
250
251 if (num_below >= threshold) {
252 return below_diff;
253 }
254 } else {
255 num_above = 0;
256 num_below = 0;
257 above_diff = 0;
258 below_diff = 0;
259 }
260
261 // See if there's any chance of making the threshold.
262 if (MAX(num_above, num_below) < minimum_by_now) {
263 // Didn't pass.
264 return 0;
265 }
266 ++minimum_by_now;
267 }
268
269 // Didn't pass.
270 return 0;
271 }
272
273
274 // Returns a score in the range [0.0, positive infinity) which represents the
275 // relative likelihood of a point being a corner.
HarrisFilter(const Image<int32_t> & I_x,const Image<int32_t> & I_y,const float x,const float y) const276 float KeypointDetector::HarrisFilter(const Image<int32_t>& I_x,
277 const Image<int32_t>& I_y, const float x,
278 const float y) const {
279 if (I_x.ValidInterpPixel(x - kHarrisWindowSize, y - kHarrisWindowSize) &&
280 I_x.ValidInterpPixel(x + kHarrisWindowSize, y + kHarrisWindowSize)) {
281 // Image gradient matrix.
282 float G[] = { 0, 0, 0, 0 };
283 CalculateG(kHarrisWindowSize, x, y, I_x, I_y, G);
284
285 const float dx = G[0];
286 const float dy = G[3];
287 const float dxy = G[1];
288
289 // Harris-Nobel corner score.
290 return (dx * dy - Square(dxy)) / (dx + dy + FLT_MIN);
291 }
292
293 return 0.0f;
294 }
295
296
AddExtraCandidatesForBoxes(const std::vector<BoundingBox> & boxes,const int max_num_keypoints,Keypoint * const keypoints) const297 int KeypointDetector::AddExtraCandidatesForBoxes(
298 const std::vector<BoundingBox>& boxes,
299 const int max_num_keypoints,
300 Keypoint* const keypoints) const {
301 int num_keypoints_added = 0;
302
303 for (std::vector<BoundingBox>::const_iterator iter = boxes.begin();
304 iter != boxes.end(); ++iter) {
305 const BoundingBox box = *iter;
306
307 for (int i = 0; i < kNumToAddAsCandidates; ++i) {
308 for (int j = 0; j < kNumToAddAsCandidates; ++j) {
309 if (num_keypoints_added >= max_num_keypoints) {
310 LOGW("Hit cap of %d for temporary keypoints!", max_num_keypoints);
311 return num_keypoints_added;
312 }
313
314 Keypoint& curr_keypoint = keypoints[num_keypoints_added++];
315 curr_keypoint.pos_ = Point2f(
316 box.left_ + box.GetWidth() * (i + 0.5f) / kNumToAddAsCandidates,
317 box.top_ + box.GetHeight() * (j + 0.5f) / kNumToAddAsCandidates);
318 curr_keypoint.type_ = KEYPOINT_TYPE_INTEREST;
319 }
320 }
321 }
322
323 return num_keypoints_added;
324 }
325
326
FindKeypoints(const ImageData & image_data,const std::vector<BoundingBox> & rois,const FramePair & prev_change,FramePair * const curr_change)327 void KeypointDetector::FindKeypoints(const ImageData& image_data,
328 const std::vector<BoundingBox>& rois,
329 const FramePair& prev_change,
330 FramePair* const curr_change) {
331 // Copy keypoints from second frame of last pass to temp keypoints of this
332 // pass.
333 int number_of_tmp_keypoints = CopyKeypoints(prev_change, tmp_keypoints_);
334
335 const int max_num_fast = kMaxTempKeypoints - number_of_tmp_keypoints;
336 number_of_tmp_keypoints +=
337 FindFastKeypoints(image_data, max_num_fast,
338 tmp_keypoints_ + number_of_tmp_keypoints);
339
340 TimeLog("Found FAST keypoints");
341
342 if (number_of_tmp_keypoints >= kMaxTempKeypoints) {
343 LOGW("Hit cap of %d for temporary keypoints (FAST)! %d keypoints",
344 kMaxTempKeypoints, number_of_tmp_keypoints);
345 }
346
347 if (kAddArbitraryKeypoints) {
348 // Add some for each object prior to scoring.
349 const int max_num_box_keypoints =
350 kMaxTempKeypoints - number_of_tmp_keypoints;
351 number_of_tmp_keypoints +=
352 AddExtraCandidatesForBoxes(rois, max_num_box_keypoints,
353 tmp_keypoints_ + number_of_tmp_keypoints);
354 TimeLog("Added box keypoints");
355
356 if (number_of_tmp_keypoints >= kMaxTempKeypoints) {
357 LOGW("Hit cap of %d for temporary keypoints (boxes)! %d keypoints",
358 kMaxTempKeypoints, number_of_tmp_keypoints);
359 }
360 }
361
362 // Score them...
363 LOGV("Scoring %d keypoints!", number_of_tmp_keypoints);
364 ScoreKeypoints(image_data, number_of_tmp_keypoints, tmp_keypoints_);
365 TimeLog("Scored keypoints");
366
367 // Now pare it down a bit.
368 SortKeypoints(number_of_tmp_keypoints, tmp_keypoints_);
369 TimeLog("Sorted keypoints");
370
371 LOGV("%d keypoints to select from!", number_of_tmp_keypoints);
372
373 SelectKeypoints(rois, tmp_keypoints_, number_of_tmp_keypoints, curr_change);
374 TimeLog("Selected keypoints");
375
376 LOGV("Picked %d (%d max) final keypoints out of %d potential.",
377 curr_change->number_of_keypoints_,
378 kMaxKeypoints, number_of_tmp_keypoints);
379 }
380
381
CopyKeypoints(const FramePair & prev_change,Keypoint * const new_keypoints)382 int KeypointDetector::CopyKeypoints(const FramePair& prev_change,
383 Keypoint* const new_keypoints) {
384 int number_of_keypoints = 0;
385
386 // Caching values from last pass, just copy and compact.
387 for (int i = 0; i < prev_change.number_of_keypoints_; ++i) {
388 if (prev_change.optical_flow_found_keypoint_[i]) {
389 new_keypoints[number_of_keypoints] =
390 prev_change.frame2_keypoints_[i];
391
392 new_keypoints[number_of_keypoints].score_ =
393 prev_change.frame1_keypoints_[i].score_;
394
395 ++number_of_keypoints;
396 }
397 }
398
399 TimeLog("Copied keypoints");
400 return number_of_keypoints;
401 }
402
403
404 // FAST keypoint detector.
FindFastKeypoints(const Image<uint8_t> & frame,const int quadrant,const int downsample_factor,const int max_num_keypoints,Keypoint * const keypoints)405 int KeypointDetector::FindFastKeypoints(const Image<uint8_t>& frame,
406 const int quadrant,
407 const int downsample_factor,
408 const int max_num_keypoints,
409 Keypoint* const keypoints) {
410 /*
411 // Reference for a circle of diameter 7.
412 const int circle[] = {0, 0, 1, 1, 1, 0, 0,
413 0, 1, 0, 0, 0, 1, 0,
414 1, 0, 0, 0, 0, 0, 1,
415 1, 0, 0, 0, 0, 0, 1,
416 1, 0, 0, 0, 0, 0, 1,
417 0, 1, 0, 0, 0, 1, 0,
418 0, 0, 1, 1, 1, 0, 0};
419 const int circle_offset[] =
420 {2, 3, 4, 8, 12, 14, 20, 21, 27, 28, 34, 36, 40, 44, 45, 46};
421 */
422
423 // Quick test of compass directions. Any length 16 circle with a break of up
424 // to 4 pixels will have at least 3 of these 4 pixels active.
425 static const int short_circle_perimeter = 4;
426 static const int short_threshold = 3;
427 static const int short_circle_x[] = { -3, 0, +3, 0 };
428 static const int short_circle_y[] = { 0, -3, 0, +3 };
429
430 // Precompute image offsets.
431 int short_offsets[short_circle_perimeter];
432 for (int i = 0; i < short_circle_perimeter; ++i) {
433 short_offsets[i] = short_circle_x[i] + short_circle_y[i] * frame.GetWidth();
434 }
435
436 // Large circle values.
437 static const int full_circle_perimeter = 16;
438 static const int full_threshold = 12;
439 static const int full_circle_x[] =
440 { -1, 0, +1, +2, +3, +3, +3, +2, +1, +0, -1, -2, -3, -3, -3, -2 };
441 static const int full_circle_y[] =
442 { -3, -3, -3, -2, -1, 0, +1, +2, +3, +3, +3, +2, +1, +0, -1, -2 };
443
444 // Precompute image offsets.
445 int full_offsets[full_circle_perimeter];
446 for (int i = 0; i < full_circle_perimeter; ++i) {
447 full_offsets[i] = full_circle_x[i] + full_circle_y[i] * frame.GetWidth();
448 }
449
450 const int scratch_stride = frame.stride();
451
452 keypoint_scratch_->Clear(0);
453
454 // Set up the bounds on the region to test based on the passed-in quadrant.
455 const int quadrant_width = (frame.GetWidth() / 2) - kFastBorderBuffer;
456 const int quadrant_height = (frame.GetHeight() / 2) - kFastBorderBuffer;
457 const int start_x =
458 kFastBorderBuffer + ((quadrant % 2 == 0) ? 0 : quadrant_width);
459 const int start_y =
460 kFastBorderBuffer + ((quadrant < 2) ? 0 : quadrant_height);
461 const int end_x = start_x + quadrant_width;
462 const int end_y = start_y + quadrant_height;
463
464 // Loop through once to find FAST keypoint clumps.
465 for (int img_y = start_y; img_y < end_y; ++img_y) {
466 const uint8_t* curr_pixel_ptr = frame[img_y] + start_x;
467
468 for (int img_x = start_x; img_x < end_x; ++img_x) {
469 // Only insert it if it meets the quick minimum requirements test.
470 if (TestCircle(short_circle_perimeter, short_threshold,
471 curr_pixel_ptr, short_offsets) != 0) {
472 // Longer test for actual keypoint score..
473 const int fast_score = TestCircle(full_circle_perimeter,
474 full_threshold,
475 curr_pixel_ptr,
476 full_offsets);
477
478 // Non-zero score means the keypoint was found.
479 if (fast_score != 0) {
480 uint8_t* const center_ptr = (*keypoint_scratch_)[img_y] + img_x;
481
482 // Increase the keypoint count on this pixel and the pixels in all
483 // 4 cardinal directions.
484 *center_ptr += 5;
485 *(center_ptr - 1) += 1;
486 *(center_ptr + 1) += 1;
487 *(center_ptr - scratch_stride) += 1;
488 *(center_ptr + scratch_stride) += 1;
489 }
490 }
491
492 ++curr_pixel_ptr;
493 } // x
494 } // y
495
496 TimeLog("Found FAST keypoints.");
497
498 int num_keypoints = 0;
499 // Loop through again and Harris filter pixels in the center of clumps.
500 // We can shrink the window by 1 pixel on every side.
501 for (int img_y = start_y + 1; img_y < end_y - 1; ++img_y) {
502 const uint8_t* curr_pixel_ptr = (*keypoint_scratch_)[img_y] + start_x;
503
504 for (int img_x = start_x + 1; img_x < end_x - 1; ++img_x) {
505 if (*curr_pixel_ptr >= kMinNumConnectedForFastKeypoint) {
506 Keypoint* const keypoint = keypoints + num_keypoints;
507 keypoint->pos_ = Point2f(
508 img_x * downsample_factor, img_y * downsample_factor);
509 keypoint->score_ = 0;
510 keypoint->type_ = KEYPOINT_TYPE_FAST;
511
512 ++num_keypoints;
513 if (num_keypoints >= max_num_keypoints) {
514 return num_keypoints;
515 }
516 }
517
518 ++curr_pixel_ptr;
519 } // x
520 } // y
521
522 TimeLog("Picked FAST keypoints.");
523
524 return num_keypoints;
525 }
526
FindFastKeypoints(const ImageData & image_data,const int max_num_keypoints,Keypoint * const keypoints)527 int KeypointDetector::FindFastKeypoints(const ImageData& image_data,
528 const int max_num_keypoints,
529 Keypoint* const keypoints) {
530 int downsample_factor = 1;
531 int num_found = 0;
532
533 // TODO(andrewharp): Get this working for multiple image scales.
534 for (int i = 0; i < 1; ++i) {
535 const Image<uint8_t>& frame = *image_data.GetPyramidSqrt2Level(i);
536 num_found += FindFastKeypoints(
537 frame, fast_quadrant_,
538 downsample_factor, max_num_keypoints, keypoints + num_found);
539 downsample_factor *= 2;
540 }
541
542 // Increment the current quadrant.
543 fast_quadrant_ = (fast_quadrant_ + 1) % 4;
544
545 return num_found;
546 }
547
548 } // namespace tf_tracking
549