1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 // See docs in ../ops/image_ops.cc
17
18 #define EIGEN_USE_THREADS
19
20 #include "tensorflow/core/kernels/image/non_max_suppression_op.h"
21
22 #include <cmath>
23 #include <functional>
24 #include <queue>
25 #include <vector>
26
27 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
28 #include "tensorflow/core/framework/bounds_check.h"
29 #include "tensorflow/core/framework/op_kernel.h"
30 #include "tensorflow/core/framework/register_types.h"
31 #include "tensorflow/core/framework/tensor.h"
32 #include "tensorflow/core/framework/tensor_shape.h"
33 #include "tensorflow/core/framework/types.h"
34 #include "tensorflow/core/lib/core/status.h"
35 #include "tensorflow/core/platform/logging.h"
36
37 namespace tensorflow {
38 namespace {
39
40 typedef Eigen::ThreadPoolDevice CPUDevice;
41
CheckScoreSizes(OpKernelContext * context,int num_boxes,const Tensor & scores)42 static inline void CheckScoreSizes(OpKernelContext* context, int num_boxes,
43 const Tensor& scores) {
44 // The shape of 'scores' is [num_boxes]
45 OP_REQUIRES(context, scores.dims() == 1,
46 errors::InvalidArgument(
47 "scores must be 1-D", scores.shape().DebugString(),
48 " (Shape must be rank 1 but is rank ", scores.dims(), ")"));
49 OP_REQUIRES(
50 context, scores.dim_size(0) == num_boxes,
51 errors::InvalidArgument("scores has incompatible shape (Dimensions must "
52 "be equal, but are ",
53 num_boxes, " and ", scores.dim_size(0), ")"));
54 }
55
ParseAndCheckOverlapSizes(OpKernelContext * context,const Tensor & overlaps,int * num_boxes)56 static inline void ParseAndCheckOverlapSizes(OpKernelContext* context,
57 const Tensor& overlaps,
58 int* num_boxes) {
59 // the shape of 'overlaps' is [num_boxes, num_boxes]
60 OP_REQUIRES(context, overlaps.dims() == 2,
61 errors::InvalidArgument("overlaps must be 2-D",
62 overlaps.shape().DebugString()));
63
64 *num_boxes = overlaps.dim_size(0);
65 OP_REQUIRES(context, overlaps.dim_size(1) == *num_boxes,
66 errors::InvalidArgument("overlaps must be square",
67 overlaps.shape().DebugString()));
68 }
69
ParseAndCheckBoxSizes(OpKernelContext * context,const Tensor & boxes,int * num_boxes)70 static inline void ParseAndCheckBoxSizes(OpKernelContext* context,
71 const Tensor& boxes, int* num_boxes) {
72 // The shape of 'boxes' is [num_boxes, 4]
73 OP_REQUIRES(context, boxes.dims() == 2,
74 errors::InvalidArgument(
75 "boxes must be 2-D", boxes.shape().DebugString(),
76 " (Shape must be rank 2 but is rank ", boxes.dims(), ")"));
77 *num_boxes = boxes.dim_size(0);
78 OP_REQUIRES(context, boxes.dim_size(1) == 4,
79 errors::InvalidArgument("boxes must have 4 columns (Dimension "
80 "must be 4 but is ",
81 boxes.dim_size(1), ")"));
82 }
83
CheckCombinedNMSScoreSizes(OpKernelContext * context,int num_boxes,const Tensor & scores)84 static inline void CheckCombinedNMSScoreSizes(OpKernelContext* context,
85 int num_boxes,
86 const Tensor& scores) {
87 // The shape of 'scores' is [batch_size, num_boxes, num_classes]
88 OP_REQUIRES(context, scores.dims() == 3,
89 errors::InvalidArgument("scores must be 3-D",
90 scores.shape().DebugString()));
91 OP_REQUIRES(context, scores.dim_size(1) == num_boxes,
92 errors::InvalidArgument("scores has incompatible shape"));
93 }
94
ParseAndCheckCombinedNMSBoxSizes(OpKernelContext * context,const Tensor & boxes,int * num_boxes,const int num_classes)95 static inline void ParseAndCheckCombinedNMSBoxSizes(OpKernelContext* context,
96 const Tensor& boxes,
97 int* num_boxes,
98 const int num_classes) {
99 // The shape of 'boxes' is [batch_size, num_boxes, q, 4]
100 OP_REQUIRES(context, boxes.dims() == 4,
101 errors::InvalidArgument("boxes must be 4-D",
102 boxes.shape().DebugString()));
103
104 bool box_check = boxes.dim_size(2) == 1 || boxes.dim_size(2) == num_classes;
105 OP_REQUIRES(context, box_check,
106 errors::InvalidArgument(
107 "third dimension of boxes must be either 1 or num classes"));
108 *num_boxes = boxes.dim_size(1);
109 OP_REQUIRES(context, boxes.dim_size(3) == 4,
110 errors::InvalidArgument("boxes must have 4 columns"));
111 }
112 // Return intersection-over-union overlap between boxes i and j
113 template <typename T>
IOU(typename TTypes<T,2>::ConstTensor boxes,int i,int j)114 static inline float IOU(typename TTypes<T, 2>::ConstTensor boxes, int i,
115 int j) {
116 const float ymin_i = Eigen::numext::mini<float>(boxes(i, 0), boxes(i, 2));
117 const float xmin_i = Eigen::numext::mini<float>(boxes(i, 1), boxes(i, 3));
118 const float ymax_i = Eigen::numext::maxi<float>(boxes(i, 0), boxes(i, 2));
119 const float xmax_i = Eigen::numext::maxi<float>(boxes(i, 1), boxes(i, 3));
120 const float ymin_j = Eigen::numext::mini<float>(boxes(j, 0), boxes(j, 2));
121 const float xmin_j = Eigen::numext::mini<float>(boxes(j, 1), boxes(j, 3));
122 const float ymax_j = Eigen::numext::maxi<float>(boxes(j, 0), boxes(j, 2));
123 const float xmax_j = Eigen::numext::maxi<float>(boxes(j, 1), boxes(j, 3));
124 const float area_i = (ymax_i - ymin_i) * (xmax_i - xmin_i);
125 const float area_j = (ymax_j - ymin_j) * (xmax_j - xmin_j);
126 if (area_i <= 0 || area_j <= 0) {
127 return 0.0;
128 }
129 const float intersection_ymin = Eigen::numext::maxi<float>(ymin_i, ymin_j);
130 const float intersection_xmin = Eigen::numext::maxi<float>(xmin_i, xmin_j);
131 const float intersection_ymax = Eigen::numext::mini<float>(ymax_i, ymax_j);
132 const float intersection_xmax = Eigen::numext::mini<float>(xmax_i, xmax_j);
133 const float intersection_area =
134 Eigen::numext::maxi<float>(intersection_ymax - intersection_ymin, 0.0) *
135 Eigen::numext::maxi<float>(intersection_xmax - intersection_xmin, 0.0);
136 return intersection_area / (area_i + area_j - intersection_area);
137 }
138
139 template <typename T>
Overlap(typename TTypes<T,2>::ConstTensor overlaps,int i,int j)140 static inline T Overlap(typename TTypes<T, 2>::ConstTensor overlaps, int i,
141 int j) {
142 return overlaps(i, j);
143 }
144
145 template <typename T>
CreateIOUSimilarityFn(const Tensor & boxes)146 static inline std::function<float(int, int)> CreateIOUSimilarityFn(
147 const Tensor& boxes) {
148 typename TTypes<T, 2>::ConstTensor boxes_data = boxes.tensor<T, 2>();
149 return std::bind(&IOU<T>, boxes_data, std::placeholders::_1,
150 std::placeholders::_2);
151 }
152
153 template <typename T>
CreateOverlapSimilarityFn(const Tensor & overlaps)154 static inline std::function<T(int, int)> CreateOverlapSimilarityFn(
155 const Tensor& overlaps) {
156 typename TTypes<T, 2>::ConstTensor overlaps_data =
157 overlaps.tensor<float, 2>();
158 return std::bind(&Overlap<T>, overlaps_data, std::placeholders::_1,
159 std::placeholders::_2);
160 }
161
162 template <typename T>
DoNonMaxSuppressionOp(OpKernelContext * context,const Tensor & scores,int num_boxes,const Tensor & max_output_size,const T similarity_threshold,const T score_threshold,const T soft_nms_sigma,const std::function<float (int,int)> & similarity_fn,bool return_scores_tensor=false,bool pad_to_max_output_size=false,int * ptr_num_valid_outputs=nullptr)163 void DoNonMaxSuppressionOp(OpKernelContext* context, const Tensor& scores,
164 int num_boxes, const Tensor& max_output_size,
165 const T similarity_threshold,
166 const T score_threshold, const T soft_nms_sigma,
167 const std::function<float(int, int)>& similarity_fn,
168 bool return_scores_tensor = false,
169 bool pad_to_max_output_size = false,
170 int* ptr_num_valid_outputs = nullptr) {
171 const int output_size = max_output_size.scalar<int>()();
172 OP_REQUIRES(context, output_size >= 0,
173 errors::InvalidArgument("output size must be non-negative"));
174
175 std::vector<T> scores_data(num_boxes);
176 std::copy_n(scores.flat<T>().data(), num_boxes, scores_data.begin());
177
178 // Data structure for a selection candidate in NMS.
179 struct Candidate {
180 int box_index;
181 T score;
182 int suppress_begin_index;
183 };
184
185 auto cmp = [](const Candidate bs_i, const Candidate bs_j) {
186 return ((bs_i.score == bs_j.score) && (bs_i.box_index > bs_j.box_index)) ||
187 bs_i.score < bs_j.score;
188 };
189 std::priority_queue<Candidate, std::deque<Candidate>, decltype(cmp)>
190 candidate_priority_queue(cmp);
191 for (int i = 0; i < scores_data.size(); ++i) {
192 if (scores_data[i] > score_threshold) {
193 candidate_priority_queue.emplace(Candidate({i, scores_data[i], 0}));
194 }
195 }
196
197 T scale = static_cast<T>(0.0);
198 bool is_soft_nms = soft_nms_sigma > static_cast<T>(0.0);
199 if (is_soft_nms) {
200 scale = static_cast<T>(-0.5) / soft_nms_sigma;
201 }
202
203 auto suppress_weight = [similarity_threshold, scale,
204 is_soft_nms](const T sim) {
205 const T weight = Eigen::numext::exp<T>(scale * sim * sim);
206 return is_soft_nms || sim <= similarity_threshold ? weight
207 : static_cast<T>(0.0);
208 };
209
210 std::vector<int> selected;
211 std::vector<T> selected_scores;
212 float similarity;
213 T original_score;
214 Candidate next_candidate;
215
216 while (selected.size() < output_size && !candidate_priority_queue.empty()) {
217 next_candidate = candidate_priority_queue.top();
218 original_score = next_candidate.score;
219 candidate_priority_queue.pop();
220
221 // Overlapping boxes are likely to have similar scores, therefore we
222 // iterate through the previously selected boxes backwards in order to
223 // see if `next_candidate` should be suppressed. We also enforce a property
224 // that a candidate can be suppressed by another candidate no more than
225 // once via `suppress_begin_index` which tracks which previously selected
226 // boxes have already been compared against next_candidate prior to a given
227 // iteration. These previous selected boxes are then skipped over in the
228 // following loop.
229 bool should_hard_suppress = false;
230 for (int j = static_cast<int>(selected.size()) - 1;
231 j >= next_candidate.suppress_begin_index; --j) {
232 similarity = similarity_fn(next_candidate.box_index, selected[j]);
233
234 next_candidate.score *= suppress_weight(static_cast<T>(similarity));
235
236 // First decide whether to perform hard suppression
237 if (!is_soft_nms && static_cast<T>(similarity) > similarity_threshold) {
238 should_hard_suppress = true;
239 break;
240 }
241
242 // If next_candidate survives hard suppression, apply soft suppression
243 if (next_candidate.score <= score_threshold) break;
244 }
245 // If `next_candidate.score` has not dropped below `score_threshold`
246 // by this point, then we know that we went through all of the previous
247 // selections and can safely update `suppress_begin_index` to
248 // `selected.size()`. If on the other hand `next_candidate.score`
249 // *has* dropped below the score threshold, then since `suppress_weight`
250 // always returns values in [0, 1], further suppression by items that were
251 // not covered in the above for loop would not have caused the algorithm
252 // to select this item. We thus do the same update to
253 // `suppress_begin_index`, but really, this element will not be added back
254 // into the priority queue in the following.
255 next_candidate.suppress_begin_index = selected.size();
256
257 if (!should_hard_suppress) {
258 if (next_candidate.score == original_score) {
259 // Suppression has not occurred, so select next_candidate
260 selected.push_back(next_candidate.box_index);
261 selected_scores.push_back(next_candidate.score);
262 continue;
263 }
264 if (next_candidate.score > score_threshold) {
265 // Soft suppression has occurred and current score is still greater than
266 // score_threshold; add next_candidate back onto priority queue.
267 candidate_priority_queue.push(next_candidate);
268 }
269 }
270 }
271
272 int num_valid_outputs = selected.size();
273 if (pad_to_max_output_size) {
274 selected.resize(output_size, 0);
275 selected_scores.resize(output_size, static_cast<T>(0));
276 }
277 if (ptr_num_valid_outputs) {
278 *ptr_num_valid_outputs = num_valid_outputs;
279 }
280
281 // Allocate output tensors
282 Tensor* output_indices = nullptr;
283 TensorShape output_shape({static_cast<int>(selected.size())});
284 OP_REQUIRES_OK(context,
285 context->allocate_output(0, output_shape, &output_indices));
286 TTypes<int, 1>::Tensor output_indices_data = output_indices->tensor<int, 1>();
287 std::copy_n(selected.begin(), selected.size(), output_indices_data.data());
288
289 if (return_scores_tensor) {
290 Tensor* output_scores = nullptr;
291 OP_REQUIRES_OK(context,
292 context->allocate_output(1, output_shape, &output_scores));
293 typename TTypes<T, 1>::Tensor output_scores_data =
294 output_scores->tensor<T, 1>();
295 std::copy_n(selected_scores.begin(), selected_scores.size(),
296 output_scores_data.data());
297 }
298 }
299
300 struct ResultCandidate {
301 int box_index;
302 float score;
303 int class_idx;
304 float box_coord[4];
305 };
306
DoNMSPerClass(int batch_idx,int class_idx,const float * boxes_data,const float * scores_data,int num_boxes,int q,int num_classes,const int size_per_class,const float score_threshold,const float iou_threshold,std::vector<ResultCandidate> & result_candidate_vec)307 void DoNMSPerClass(int batch_idx, int class_idx, const float* boxes_data,
308 const float* scores_data, int num_boxes, int q,
309 int num_classes, const int size_per_class,
310 const float score_threshold, const float iou_threshold,
311 std::vector<ResultCandidate>& result_candidate_vec) {
312 std::vector<float> class_scores_data;
313 class_scores_data.reserve(num_boxes);
314 std::vector<float> class_boxes_data;
315 class_boxes_data.reserve(num_boxes * 4);
316
317 for (int box_idx = 0; box_idx < num_boxes; ++box_idx) {
318 class_scores_data.push_back(scores_data[box_idx * num_classes + class_idx]);
319 for (int cid = 0; cid < 4; ++cid) {
320 if (q > 1) {
321 class_boxes_data.push_back(
322 boxes_data[(box_idx * q + class_idx) * 4 + cid]);
323 } else {
324 class_boxes_data.push_back(boxes_data[box_idx * 4 + cid]);
325 }
326 }
327 }
328
329 // Do NMS, get the candidate indices of form vector<int>
330 // Data structure for selection candidate in NMS.
331 struct Candidate {
332 int box_index;
333 float score;
334 };
335 auto cmp = [](const Candidate bs_i, const Candidate bs_j) {
336 return bs_i.score < bs_j.score;
337 };
338 std::priority_queue<Candidate, std::vector<Candidate>, decltype(cmp)>
339 candidate_priority_queue(cmp);
340 for (int i = 0; i < num_boxes; ++i) {
341 if (class_scores_data[i] > score_threshold) {
342 candidate_priority_queue.emplace(Candidate({i, class_scores_data[i]}));
343 }
344 }
345
346 std::vector<int> selected;
347 std::vector<float> selected_boxes;
348 Candidate next_candidate;
349
350 // Move class_boxes_data to a tensor
351 Eigen::array<Eigen::DenseIndex, 2> boxesShape = {num_boxes, 4};
352 typename TTypes<float, 2>::ConstTensor boxes_data_t(class_boxes_data.data(),
353 boxesShape);
354 float iou;
355 while (selected.size() < size_per_class &&
356 !candidate_priority_queue.empty()) {
357 next_candidate = candidate_priority_queue.top();
358 candidate_priority_queue.pop();
359 // Overlapping boxes are likely to have similar scores,
360 // therefore we iterate through the previously selected boxes backwards
361 // in order to see if `next_candidate` should be suppressed.
362 bool should_select = true;
363 for (int j = selected.size() - 1; j >= 0; --j) {
364 iou = IOU<float>(boxes_data_t, next_candidate.box_index, selected[j]);
365 if (iou > iou_threshold) {
366 should_select = false;
367 break;
368 }
369 }
370
371 if (should_select) {
372 // Add the selected box to the result candidate. Sorted by score
373 int id = next_candidate.box_index;
374 result_candidate_vec[selected.size() + size_per_class * class_idx] = {
375 next_candidate.box_index,
376 next_candidate.score,
377 class_idx,
378 {boxes_data_t(id, 0), boxes_data_t(id, 1), boxes_data_t(id, 2),
379 boxes_data_t(id, 3)}};
380 selected.push_back(next_candidate.box_index);
381 }
382 }
383 }
384
SelectResultPerBatch(std::vector<float> & nmsed_boxes,std::vector<float> & nmsed_scores,std::vector<float> & nmsed_classes,std::vector<ResultCandidate> & result_candidate_vec,std::vector<int> & final_valid_detections,const int batch_idx,int total_size_per_batch,bool pad_per_class,int max_size_per_batch,bool clip_boxes,int per_batch_size)385 void SelectResultPerBatch(std::vector<float>& nmsed_boxes,
386 std::vector<float>& nmsed_scores,
387 std::vector<float>& nmsed_classes,
388 std::vector<ResultCandidate>& result_candidate_vec,
389 std::vector<int>& final_valid_detections,
390 const int batch_idx, int total_size_per_batch,
391 bool pad_per_class, int max_size_per_batch,
392 bool clip_boxes, int per_batch_size) {
393 auto rc_cmp = [](const ResultCandidate rc_i, const ResultCandidate rc_j) {
394 return rc_i.score > rc_j.score;
395 };
396 std::sort(result_candidate_vec.begin(), result_candidate_vec.end(), rc_cmp);
397
398 int max_detections = 0;
399 int result_candidate_size =
400 std::count_if(result_candidate_vec.begin(), result_candidate_vec.end(),
401 [](ResultCandidate rc) { return rc.box_index > -1; });
402 // If pad_per_class is false, we always pad to max_total_size
403 if (!pad_per_class) {
404 max_detections = std::min(result_candidate_size, total_size_per_batch);
405 } else {
406 max_detections = std::min(per_batch_size, result_candidate_size);
407 }
408
409 final_valid_detections[batch_idx] = max_detections;
410
411 int curr_total_size = max_detections;
412 int result_idx = 0;
413 // Pick the top max_detections values
414 while (curr_total_size > 0 && result_idx < result_candidate_vec.size()) {
415 ResultCandidate next_candidate = result_candidate_vec[result_idx++];
416 // Add to final output vectors
417 if (clip_boxes) {
418 const float box_min = 0.0;
419 const float box_max = 1.0;
420 nmsed_boxes.push_back(
421 std::max(std::min(next_candidate.box_coord[0], box_max), box_min));
422 nmsed_boxes.push_back(
423 std::max(std::min(next_candidate.box_coord[1], box_max), box_min));
424 nmsed_boxes.push_back(
425 std::max(std::min(next_candidate.box_coord[2], box_max), box_min));
426 nmsed_boxes.push_back(
427 std::max(std::min(next_candidate.box_coord[3], box_max), box_min));
428 } else {
429 nmsed_boxes.push_back(next_candidate.box_coord[0]);
430 nmsed_boxes.push_back(next_candidate.box_coord[1]);
431 nmsed_boxes.push_back(next_candidate.box_coord[2]);
432 nmsed_boxes.push_back(next_candidate.box_coord[3]);
433 }
434 nmsed_scores.push_back(next_candidate.score);
435 nmsed_classes.push_back(next_candidate.class_idx);
436 curr_total_size--;
437 }
438
439 nmsed_boxes.resize(per_batch_size * 4, 0);
440 nmsed_scores.resize(per_batch_size, 0);
441 nmsed_classes.resize(per_batch_size, 0);
442 }
443
BatchedNonMaxSuppressionOp(OpKernelContext * context,const Tensor & inp_boxes,const Tensor & inp_scores,int num_boxes,const int max_size_per_class,const int total_size_per_batch,const float score_threshold,const float iou_threshold,bool pad_per_class=false,bool clip_boxes=true)444 void BatchedNonMaxSuppressionOp(
445 OpKernelContext* context, const Tensor& inp_boxes, const Tensor& inp_scores,
446 int num_boxes, const int max_size_per_class, const int total_size_per_batch,
447 const float score_threshold, const float iou_threshold,
448 bool pad_per_class = false, bool clip_boxes = true) {
449 const int num_batches = inp_boxes.dim_size(0);
450 int num_classes = inp_scores.dim_size(2);
451 int q = inp_boxes.dim_size(2);
452
453 const float* scores_data =
454 const_cast<float*>(inp_scores.flat<float>().data());
455 const float* boxes_data = const_cast<float*>(inp_boxes.flat<float>().data());
456
457 int boxes_per_batch = num_boxes * q * 4;
458 int scores_per_batch = num_boxes * num_classes;
459 const int size_per_class = std::min(max_size_per_class, num_boxes);
460 std::vector<std::vector<ResultCandidate>> result_candidate_vec(
461 num_batches,
462 std::vector<ResultCandidate>(size_per_class * num_classes,
463 {-1, -1.0, -1, {0.0, 0.0, 0.0, 0.0}}));
464
465 // [num_batches, per_batch_size * 4]
466 std::vector<std::vector<float>> nmsed_boxes(num_batches);
467 // [num_batches, per_batch_size]
468 std::vector<std::vector<float>> nmsed_scores(num_batches);
469 // [num_batches, per_batch_size]
470 std::vector<std::vector<float>> nmsed_classes(num_batches);
471 // [num_batches]
472 std::vector<int> final_valid_detections(num_batches);
473
474 auto shard_nms = [&](int begin, int end) {
475 for (int idx = begin; idx < end; ++idx) {
476 int batch_idx = idx / num_classes;
477 int class_idx = idx % num_classes;
478 DoNMSPerClass(batch_idx, class_idx,
479 boxes_data + boxes_per_batch * batch_idx,
480 scores_data + scores_per_batch * batch_idx, num_boxes, q,
481 num_classes, size_per_class, score_threshold, iou_threshold,
482 result_candidate_vec[batch_idx]);
483 }
484 };
485
486 int length = num_batches * num_classes;
487 // Input data boxes_data, scores_data
488 int input_bytes = num_boxes * 10 * sizeof(float);
489 int output_bytes = num_boxes * 10 * sizeof(float);
490 int compute_cycles = Eigen::TensorOpCost::AddCost<int>() * num_boxes * 14 +
491 Eigen::TensorOpCost::MulCost<int>() * num_boxes * 9 +
492 Eigen::TensorOpCost::MulCost<float>() * num_boxes * 9 +
493 Eigen::TensorOpCost::AddCost<float>() * num_boxes * 8;
494 // The cost here is not the actual number of cycles, but rather a set of
495 // hand-tuned numbers that seem to work best.
496 const Eigen::TensorOpCost cost(input_bytes, output_bytes, compute_cycles);
497 const CPUDevice& d = context->eigen_device<CPUDevice>();
498 d.parallelFor(length, cost, shard_nms);
499
500 int per_batch_size = total_size_per_batch;
501 if (pad_per_class) {
502 per_batch_size =
503 std::min(total_size_per_batch, max_size_per_class * num_classes);
504 }
505
506 Tensor* valid_detections_t = nullptr;
507 TensorShape valid_detections_shape({num_batches});
508 OP_REQUIRES_OK(context, context->allocate_output(3, valid_detections_shape,
509 &valid_detections_t));
510 auto valid_detections_flat = valid_detections_t->template flat<int>();
511
512 auto shard_result = [&](int begin, int end) {
513 for (int batch_idx = begin; batch_idx < end; ++batch_idx) {
514 SelectResultPerBatch(
515 nmsed_boxes[batch_idx], nmsed_scores[batch_idx],
516 nmsed_classes[batch_idx], result_candidate_vec[batch_idx],
517 final_valid_detections, batch_idx, total_size_per_batch,
518 pad_per_class, max_size_per_class * num_classes, clip_boxes,
519 per_batch_size);
520 valid_detections_flat(batch_idx) = final_valid_detections[batch_idx];
521 }
522 };
523 length = num_batches;
524 // Input data boxes_data, scores_data
525 input_bytes =
526 num_boxes * 10 * sizeof(float) + per_batch_size * 6 * sizeof(float);
527 output_bytes =
528 num_boxes * 5 * sizeof(float) + per_batch_size * 6 * sizeof(float);
529 compute_cycles = Eigen::TensorOpCost::AddCost<int>() * num_boxes * 5 +
530 Eigen::TensorOpCost::AddCost<float>() * num_boxes * 5;
531 // The cost here is not the actual number of cycles, but rather a set of
532 // hand-tuned numbers that seem to work best.
533 const Eigen::TensorOpCost cost_result(input_bytes, output_bytes,
534 compute_cycles);
535 d.parallelFor(length, cost_result, shard_result);
536
537 Tensor* nmsed_boxes_t = nullptr;
538 TensorShape boxes_shape({num_batches, per_batch_size, 4});
539 OP_REQUIRES_OK(context,
540 context->allocate_output(0, boxes_shape, &nmsed_boxes_t));
541 auto nmsed_boxes_flat = nmsed_boxes_t->template flat<float>();
542
543 Tensor* nmsed_scores_t = nullptr;
544 TensorShape scores_shape({num_batches, per_batch_size});
545 OP_REQUIRES_OK(context,
546 context->allocate_output(1, scores_shape, &nmsed_scores_t));
547 auto nmsed_scores_flat = nmsed_scores_t->template flat<float>();
548
549 Tensor* nmsed_classes_t = nullptr;
550 OP_REQUIRES_OK(context,
551 context->allocate_output(2, scores_shape, &nmsed_classes_t));
552 auto nmsed_classes_flat = nmsed_classes_t->template flat<float>();
553
554 auto shard_copy_result = [&](int begin, int end) {
555 for (int idx = begin; idx < end; ++idx) {
556 int batch_idx = idx / per_batch_size;
557 int j = idx % per_batch_size;
558 nmsed_scores_flat(idx) = nmsed_scores[batch_idx][j];
559 nmsed_classes_flat(idx) = nmsed_classes[batch_idx][j];
560 for (int k = 0; k < 4; ++k) {
561 nmsed_boxes_flat(idx * 4 + k) = nmsed_boxes[batch_idx][j * 4 + k];
562 }
563 }
564 };
565 length = num_batches * per_batch_size;
566 // Input data boxes_data, scores_data
567 input_bytes = 6 * sizeof(float);
568 output_bytes = 6 * sizeof(float);
569 compute_cycles = Eigen::TensorOpCost::AddCost<int>() * 2 +
570 Eigen::TensorOpCost::MulCost<int>() * 2 +
571 Eigen::TensorOpCost::DivCost<float>() * 2;
572 const Eigen::TensorOpCost cost_copy_result(input_bytes, output_bytes,
573 compute_cycles);
574 d.parallelFor(length, cost_copy_result, shard_copy_result);
575 }
576
577 } // namespace
578
579 template <typename Device>
580 class NonMaxSuppressionOp : public OpKernel {
581 public:
NonMaxSuppressionOp(OpKernelConstruction * context)582 explicit NonMaxSuppressionOp(OpKernelConstruction* context)
583 : OpKernel(context) {
584 OP_REQUIRES_OK(context, context->GetAttr("iou_threshold", &iou_threshold_));
585 }
586
Compute(OpKernelContext * context)587 void Compute(OpKernelContext* context) override {
588 // boxes: [num_boxes, 4]
589 const Tensor& boxes = context->input(0);
590 // scores: [num_boxes]
591 const Tensor& scores = context->input(1);
592 // max_output_size: scalar
593 const Tensor& max_output_size = context->input(2);
594 OP_REQUIRES(
595 context, TensorShapeUtils::IsScalar(max_output_size.shape()),
596 errors::InvalidArgument("max_output_size must be 0-D, got shape ",
597 max_output_size.shape().DebugString()));
598
599 OP_REQUIRES(context, iou_threshold_ >= 0 && iou_threshold_ <= 1,
600 errors::InvalidArgument("iou_threshold must be in [0, 1]"));
601 int num_boxes = 0;
602 ParseAndCheckBoxSizes(context, boxes, &num_boxes);
603 CheckScoreSizes(context, num_boxes, scores);
604 if (!context->status().ok()) {
605 return;
606 }
607 auto similarity_fn = CreateIOUSimilarityFn<float>(boxes);
608
609 const float score_threshold_val = std::numeric_limits<float>::lowest();
610 const float dummy_soft_nms_sigma = static_cast<float>(0.0);
611 DoNonMaxSuppressionOp<float>(context, scores, num_boxes, max_output_size,
612 iou_threshold_, score_threshold_val,
613 dummy_soft_nms_sigma, similarity_fn);
614 }
615
616 private:
617 float iou_threshold_;
618 };
619
620 template <typename Device, typename T>
621 class NonMaxSuppressionV2Op : public OpKernel {
622 public:
NonMaxSuppressionV2Op(OpKernelConstruction * context)623 explicit NonMaxSuppressionV2Op(OpKernelConstruction* context)
624 : OpKernel(context) {}
625
Compute(OpKernelContext * context)626 void Compute(OpKernelContext* context) override {
627 // boxes: [num_boxes, 4]
628 const Tensor& boxes = context->input(0);
629 // scores: [num_boxes]
630 const Tensor& scores = context->input(1);
631 // max_output_size: scalar
632 const Tensor& max_output_size = context->input(2);
633 OP_REQUIRES(
634 context, TensorShapeUtils::IsScalar(max_output_size.shape()),
635 errors::InvalidArgument("max_output_size must be 0-D, got shape ",
636 max_output_size.shape().DebugString()));
637 // iou_threshold: scalar
638 const Tensor& iou_threshold = context->input(3);
639 OP_REQUIRES(context, TensorShapeUtils::IsScalar(iou_threshold.shape()),
640 errors::InvalidArgument("iou_threshold must be 0-D, got shape ",
641 iou_threshold.shape().DebugString()));
642 const T iou_threshold_val = iou_threshold.scalar<T>()();
643
644 OP_REQUIRES(context,
645 iou_threshold_val >= static_cast<T>(0.0) &&
646 iou_threshold_val <= static_cast<T>(1.0),
647 errors::InvalidArgument("iou_threshold must be in [0, 1]"));
648 int num_boxes = 0;
649 ParseAndCheckBoxSizes(context, boxes, &num_boxes);
650 CheckScoreSizes(context, num_boxes, scores);
651 if (!context->status().ok()) {
652 return;
653 }
654 auto similarity_fn = CreateIOUSimilarityFn<T>(boxes);
655
656 const T score_threshold_val = std::numeric_limits<T>::lowest();
657 const T dummy_soft_nms_sigma = static_cast<T>(0.0);
658 DoNonMaxSuppressionOp<T>(context, scores, num_boxes, max_output_size,
659 iou_threshold_val, score_threshold_val,
660 dummy_soft_nms_sigma, similarity_fn);
661 }
662 };
663
664 template <typename Device, typename T>
665 class NonMaxSuppressionV3Op : public OpKernel {
666 public:
NonMaxSuppressionV3Op(OpKernelConstruction * context)667 explicit NonMaxSuppressionV3Op(OpKernelConstruction* context)
668 : OpKernel(context) {}
669
Compute(OpKernelContext * context)670 void Compute(OpKernelContext* context) override {
671 // boxes: [num_boxes, 4]
672 const Tensor& boxes = context->input(0);
673 // scores: [num_boxes]
674 const Tensor& scores = context->input(1);
675 // max_output_size: scalar
676 const Tensor& max_output_size = context->input(2);
677 OP_REQUIRES(
678 context, TensorShapeUtils::IsScalar(max_output_size.shape()),
679 errors::InvalidArgument("max_output_size must be 0-D, got shape ",
680 max_output_size.shape().DebugString(),
681 " (Shape must be rank 0 but is ", "rank ",
682 max_output_size.dims(), ")"));
683 // iou_threshold: scalar
684 const Tensor& iou_threshold = context->input(3);
685 OP_REQUIRES(context, TensorShapeUtils::IsScalar(iou_threshold.shape()),
686 errors::InvalidArgument("iou_threshold must be 0-D, got shape ",
687 iou_threshold.shape().DebugString(),
688 " (Shape must be rank 0 but is rank ",
689 iou_threshold.dims(), ")"));
690 const T iou_threshold_val = iou_threshold.scalar<T>()();
691 OP_REQUIRES(context,
692 iou_threshold_val >= static_cast<T>(0.0) &&
693 iou_threshold_val <= static_cast<T>(1.0),
694 errors::InvalidArgument("iou_threshold must be in [0, 1]"));
695 // score_threshold: scalar
696 const Tensor& score_threshold = context->input(4);
697 OP_REQUIRES(
698 context, TensorShapeUtils::IsScalar(score_threshold.shape()),
699 errors::InvalidArgument("score_threshold must be 0-D, got shape ",
700 score_threshold.shape().DebugString()));
701 const T score_threshold_val = score_threshold.scalar<T>()();
702
703 int num_boxes = 0;
704 ParseAndCheckBoxSizes(context, boxes, &num_boxes);
705 CheckScoreSizes(context, num_boxes, scores);
706 if (!context->status().ok()) {
707 return;
708 }
709
710 auto similarity_fn = CreateIOUSimilarityFn<T>(boxes);
711
712 const T dummy_soft_nms_sigma = static_cast<T>(0.0);
713 DoNonMaxSuppressionOp<T>(context, scores, num_boxes, max_output_size,
714 iou_threshold_val, score_threshold_val,
715 dummy_soft_nms_sigma, similarity_fn);
716 }
717 };
718
719 template <typename Device, typename T>
720 class NonMaxSuppressionV4Op : public OpKernel {
721 public:
NonMaxSuppressionV4Op(OpKernelConstruction * context)722 explicit NonMaxSuppressionV4Op(OpKernelConstruction* context)
723 : OpKernel(context) {
724 OP_REQUIRES_OK(context, context->GetAttr("pad_to_max_output_size",
725 &pad_to_max_output_size_));
726 }
727
Compute(OpKernelContext * context)728 void Compute(OpKernelContext* context) override {
729 // boxes: [num_boxes, 4]
730 const Tensor& boxes = context->input(0);
731 // scores: [num_boxes]
732 const Tensor& scores = context->input(1);
733 // max_output_size: scalar
734 const Tensor& max_output_size = context->input(2);
735 OP_REQUIRES(
736 context, TensorShapeUtils::IsScalar(max_output_size.shape()),
737 errors::InvalidArgument("max_output_size must be 0-D, got shape ",
738 max_output_size.shape().DebugString()));
739 // iou_threshold: scalar
740 const Tensor& iou_threshold = context->input(3);
741 OP_REQUIRES(context, TensorShapeUtils::IsScalar(iou_threshold.shape()),
742 errors::InvalidArgument("iou_threshold must be 0-D, got shape ",
743 iou_threshold.shape().DebugString()));
744 const T iou_threshold_val = iou_threshold.scalar<T>()();
745 OP_REQUIRES(context,
746 iou_threshold_val >= static_cast<T>(0.0) &&
747 iou_threshold_val <= static_cast<T>(1.0),
748 errors::InvalidArgument("iou_threshold must be in [0, 1]"));
749 // score_threshold: scalar
750 const Tensor& score_threshold = context->input(4);
751 OP_REQUIRES(
752 context, TensorShapeUtils::IsScalar(score_threshold.shape()),
753 errors::InvalidArgument("score_threshold must be 0-D, got shape ",
754 score_threshold.shape().DebugString()));
755 const T score_threshold_val = score_threshold.scalar<T>()();
756
757 int num_boxes = 0;
758 ParseAndCheckBoxSizes(context, boxes, &num_boxes);
759 CheckScoreSizes(context, num_boxes, scores);
760 if (!context->status().ok()) {
761 return;
762 }
763
764 auto similarity_fn = CreateIOUSimilarityFn<T>(boxes);
765 int num_valid_outputs;
766
767 bool return_scores_tensor_ = false;
768 const T dummy_soft_nms_sigma = static_cast<T>(0.0);
769 DoNonMaxSuppressionOp<T>(
770 context, scores, num_boxes, max_output_size, iou_threshold_val,
771 score_threshold_val, dummy_soft_nms_sigma, similarity_fn,
772 return_scores_tensor_, pad_to_max_output_size_, &num_valid_outputs);
773 if (!context->status().ok()) {
774 return;
775 }
776
777 // Allocate scalar output tensor for number of indices computed.
778 Tensor* num_outputs_t = nullptr;
779 OP_REQUIRES_OK(context, context->allocate_output(
780 1, tensorflow::TensorShape{}, &num_outputs_t));
781 num_outputs_t->scalar<int32>().setConstant(num_valid_outputs);
782 }
783
784 private:
785 bool pad_to_max_output_size_;
786 };
787
788 template <typename Device, typename T>
789 class NonMaxSuppressionV5Op : public OpKernel {
790 public:
NonMaxSuppressionV5Op(OpKernelConstruction * context)791 explicit NonMaxSuppressionV5Op(OpKernelConstruction* context)
792 : OpKernel(context) {
793 OP_REQUIRES_OK(context, context->GetAttr("pad_to_max_output_size",
794 &pad_to_max_output_size_));
795 }
796
Compute(OpKernelContext * context)797 void Compute(OpKernelContext* context) override {
798 // boxes: [num_boxes, 4]
799 const Tensor& boxes = context->input(0);
800 // scores: [num_boxes]
801 const Tensor& scores = context->input(1);
802 // max_output_size: scalar
803 const Tensor& max_output_size = context->input(2);
804 OP_REQUIRES(
805 context, TensorShapeUtils::IsScalar(max_output_size.shape()),
806 errors::InvalidArgument("max_output_size must be 0-D, got shape ",
807 max_output_size.shape().DebugString()));
808 // iou_threshold: scalar
809 const Tensor& iou_threshold = context->input(3);
810 OP_REQUIRES(context, TensorShapeUtils::IsScalar(iou_threshold.shape()),
811 errors::InvalidArgument("iou_threshold must be 0-D, got shape ",
812 iou_threshold.shape().DebugString()));
813 const T iou_threshold_val = iou_threshold.scalar<T>()();
814 OP_REQUIRES(context,
815 iou_threshold_val >= static_cast<T>(0.0) &&
816 iou_threshold_val <= static_cast<T>(1.0),
817 errors::InvalidArgument("iou_threshold must be in [0, 1]"));
818 // score_threshold: scalar
819 const Tensor& score_threshold = context->input(4);
820 OP_REQUIRES(
821 context, TensorShapeUtils::IsScalar(score_threshold.shape()),
822 errors::InvalidArgument("score_threshold must be 0-D, got shape ",
823 score_threshold.shape().DebugString()));
824 const T score_threshold_val = score_threshold.scalar<T>()();
825
826 // soft_nms_sigma: scalar
827 const Tensor& soft_nms_sigma = context->input(5);
828 OP_REQUIRES(
829 context, TensorShapeUtils::IsScalar(soft_nms_sigma.shape()),
830 errors::InvalidArgument("soft_nms_sigma must be 0-D, got shape ",
831 soft_nms_sigma.shape().DebugString()));
832 const T soft_nms_sigma_val = soft_nms_sigma.scalar<T>()();
833 OP_REQUIRES(context, soft_nms_sigma_val >= static_cast<T>(0.0),
834 errors::InvalidArgument("soft_nms_sigma_val must be >= 0"));
835
836 int num_boxes = 0;
837 ParseAndCheckBoxSizes(context, boxes, &num_boxes);
838 CheckScoreSizes(context, num_boxes, scores);
839 if (!context->status().ok()) {
840 return;
841 }
842
843 auto similarity_fn = CreateIOUSimilarityFn<T>(boxes);
844 int num_valid_outputs;
845
846 // For NonMaxSuppressionV5Op, we always return a second output holding
847 // corresponding scores, so `return_scores_tensor` should never be false.
848 const bool return_scores_tensor_ = true;
849 DoNonMaxSuppressionOp<T>(
850 context, scores, num_boxes, max_output_size, iou_threshold_val,
851 score_threshold_val, soft_nms_sigma_val, similarity_fn,
852 return_scores_tensor_, pad_to_max_output_size_, &num_valid_outputs);
853 if (!context->status().ok()) {
854 return;
855 }
856
857 // Allocate scalar output tensor for number of indices computed.
858 Tensor* num_outputs_t = nullptr;
859 OP_REQUIRES_OK(context, context->allocate_output(
860 2, tensorflow::TensorShape{}, &num_outputs_t));
861 num_outputs_t->scalar<int32>().setConstant(num_valid_outputs);
862 }
863
864 private:
865 bool pad_to_max_output_size_;
866 };
867
868 template <typename Device>
869 class NonMaxSuppressionWithOverlapsOp : public OpKernel {
870 public:
NonMaxSuppressionWithOverlapsOp(OpKernelConstruction * context)871 explicit NonMaxSuppressionWithOverlapsOp(OpKernelConstruction* context)
872 : OpKernel(context) {}
873
Compute(OpKernelContext * context)874 void Compute(OpKernelContext* context) override {
875 // overlaps: [num_boxes, num_boxes]
876 const Tensor& overlaps = context->input(0);
877 // scores: [num_boxes]
878 const Tensor& scores = context->input(1);
879 // max_output_size: scalar
880 const Tensor& max_output_size = context->input(2);
881 OP_REQUIRES(
882 context, TensorShapeUtils::IsScalar(max_output_size.shape()),
883 errors::InvalidArgument("max_output_size must be 0-D, got shape ",
884 max_output_size.shape().DebugString()));
885 // overlap_threshold: scalar
886 const Tensor& overlap_threshold = context->input(3);
887 OP_REQUIRES(
888 context, TensorShapeUtils::IsScalar(overlap_threshold.shape()),
889 errors::InvalidArgument("overlap_threshold must be 0-D, got shape ",
890 overlap_threshold.shape().DebugString()));
891 const float overlap_threshold_val = overlap_threshold.scalar<float>()();
892
893 // score_threshold: scalar
894 const Tensor& score_threshold = context->input(4);
895 OP_REQUIRES(
896 context, TensorShapeUtils::IsScalar(score_threshold.shape()),
897 errors::InvalidArgument("score_threshold must be 0-D, got shape ",
898 score_threshold.shape().DebugString()));
899 const float score_threshold_val = score_threshold.scalar<float>()();
900
901 int num_boxes = 0;
902 ParseAndCheckOverlapSizes(context, overlaps, &num_boxes);
903 CheckScoreSizes(context, num_boxes, scores);
904 if (!context->status().ok()) {
905 return;
906 }
907 auto similarity_fn = CreateOverlapSimilarityFn<float>(overlaps);
908
909 const float dummy_soft_nms_sigma = static_cast<float>(0.0);
910 DoNonMaxSuppressionOp<float>(context, scores, num_boxes, max_output_size,
911 overlap_threshold_val, score_threshold_val,
912 dummy_soft_nms_sigma, similarity_fn);
913 }
914 };
915
916 template <typename Device>
917 class CombinedNonMaxSuppressionOp : public OpKernel {
918 public:
CombinedNonMaxSuppressionOp(OpKernelConstruction * context)919 explicit CombinedNonMaxSuppressionOp(OpKernelConstruction* context)
920 : OpKernel(context) {
921 OP_REQUIRES_OK(context, context->GetAttr("pad_per_class", &pad_per_class_));
922 OP_REQUIRES_OK(context, context->GetAttr("clip_boxes", &clip_boxes_));
923 }
924
Compute(OpKernelContext * context)925 void Compute(OpKernelContext* context) override {
926 // boxes: [batch_size, num_anchors, q, 4]
927 const Tensor& boxes = context->input(0);
928 // scores: [batch_size, num_anchors, num_classes]
929 const Tensor& scores = context->input(1);
930 OP_REQUIRES(
931 context, (boxes.dim_size(0) == scores.dim_size(0)),
932 errors::InvalidArgument("boxes and scores must have same batch size"));
933
934 // max_output_size: scalar
935 const Tensor& max_output_size = context->input(2);
936 OP_REQUIRES(
937 context, TensorShapeUtils::IsScalar(max_output_size.shape()),
938 errors::InvalidArgument("max_size_per_class must be 0-D, got shape ",
939 max_output_size.shape().DebugString()));
940 const int max_size_per_class = max_output_size.scalar<int>()();
941 OP_REQUIRES(context, max_size_per_class > 0,
942 errors::InvalidArgument("max_size_per_class must be positive"));
943 // max_total_size: scalar
944 const Tensor& max_total_size = context->input(3);
945 OP_REQUIRES(
946 context, TensorShapeUtils::IsScalar(max_total_size.shape()),
947 errors::InvalidArgument("max_total_size must be 0-D, got shape ",
948 max_total_size.shape().DebugString()));
949 const int max_total_size_per_batch = max_total_size.scalar<int>()();
950 OP_REQUIRES(context, max_total_size_per_batch > 0,
951 errors::InvalidArgument("max_total_size must be > 0"));
952 // Throw warning when `max_total_size` is too large as it may cause OOM.
953 if (max_total_size_per_batch > pow(10, 6)) {
954 LOG(WARNING) << "Detected a large value for `max_total_size`. This may "
955 << "cause OOM error. (max_total_size: "
956 << max_total_size.scalar<int>()() << ")";
957 }
958 // iou_threshold: scalar
959 const Tensor& iou_threshold = context->input(4);
960 OP_REQUIRES(context, TensorShapeUtils::IsScalar(iou_threshold.shape()),
961 errors::InvalidArgument("iou_threshold must be 0-D, got shape ",
962 iou_threshold.shape().DebugString()));
963 const float iou_threshold_val = iou_threshold.scalar<float>()();
964
965 // score_threshold: scalar
966 const Tensor& score_threshold = context->input(5);
967 OP_REQUIRES(
968 context, TensorShapeUtils::IsScalar(score_threshold.shape()),
969 errors::InvalidArgument("score_threshold must be 0-D, got shape ",
970 score_threshold.shape().DebugString()));
971 const float score_threshold_val = score_threshold.scalar<float>()();
972
973 OP_REQUIRES(context, iou_threshold_val >= 0 && iou_threshold_val <= 1,
974 errors::InvalidArgument("iou_threshold must be in [0, 1]"));
975 int num_boxes = 0;
976 const int num_classes = scores.dim_size(2);
977 ParseAndCheckCombinedNMSBoxSizes(context, boxes, &num_boxes, num_classes);
978 CheckCombinedNMSScoreSizes(context, num_boxes, scores);
979
980 if (!context->status().ok()) {
981 return;
982 }
983 BatchedNonMaxSuppressionOp(context, boxes, scores, num_boxes,
984 max_size_per_class, max_total_size_per_batch,
985 score_threshold_val, iou_threshold_val,
986 pad_per_class_, clip_boxes_);
987 }
988
989 private:
990 bool pad_per_class_;
991 bool clip_boxes_;
992 };
993
994 REGISTER_KERNEL_BUILDER(Name("NonMaxSuppression").Device(DEVICE_CPU),
995 NonMaxSuppressionOp<CPUDevice>);
996
997 REGISTER_KERNEL_BUILDER(
998 Name("NonMaxSuppressionV2").TypeConstraint<float>("T").Device(DEVICE_CPU),
999 NonMaxSuppressionV2Op<CPUDevice, float>);
1000 REGISTER_KERNEL_BUILDER(Name("NonMaxSuppressionV2")
1001 .TypeConstraint<Eigen::half>("T")
1002 .Device(DEVICE_CPU),
1003 NonMaxSuppressionV2Op<CPUDevice, Eigen::half>);
1004
1005 REGISTER_KERNEL_BUILDER(
1006 Name("NonMaxSuppressionV3").TypeConstraint<float>("T").Device(DEVICE_CPU),
1007 NonMaxSuppressionV3Op<CPUDevice, float>);
1008 REGISTER_KERNEL_BUILDER(Name("NonMaxSuppressionV3")
1009 .TypeConstraint<Eigen::half>("T")
1010 .Device(DEVICE_CPU),
1011 NonMaxSuppressionV3Op<CPUDevice, Eigen::half>);
1012
1013 REGISTER_KERNEL_BUILDER(
1014 Name("NonMaxSuppressionV4").TypeConstraint<float>("T").Device(DEVICE_CPU),
1015 NonMaxSuppressionV4Op<CPUDevice, float>);
1016 REGISTER_KERNEL_BUILDER(Name("NonMaxSuppressionV4")
1017 .TypeConstraint<Eigen::half>("T")
1018 .Device(DEVICE_CPU),
1019 NonMaxSuppressionV4Op<CPUDevice, Eigen::half>);
1020
1021 REGISTER_KERNEL_BUILDER(
1022 Name("NonMaxSuppressionV5").TypeConstraint<float>("T").Device(DEVICE_CPU),
1023 NonMaxSuppressionV5Op<CPUDevice, float>);
1024 REGISTER_KERNEL_BUILDER(Name("NonMaxSuppressionV5")
1025 .TypeConstraint<Eigen::half>("T")
1026 .Device(DEVICE_CPU),
1027 NonMaxSuppressionV5Op<CPUDevice, Eigen::half>);
1028
1029 REGISTER_KERNEL_BUILDER(
1030 Name("NonMaxSuppressionWithOverlaps").Device(DEVICE_CPU),
1031 NonMaxSuppressionWithOverlapsOp<CPUDevice>);
1032
1033 REGISTER_KERNEL_BUILDER(Name("CombinedNonMaxSuppression").Device(DEVICE_CPU),
1034 CombinedNonMaxSuppressionOp<CPUDevice>);
1035
1036 } // namespace tensorflow
1037