1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
22 //
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
26 //
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42
43 #include "precomp.hpp"
44
45 using namespace cv;
46 using namespace cv::cuda;
47
48 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
49
createBFMatcher(int)50 Ptr<cv::cuda::DescriptorMatcher> cv::cuda::DescriptorMatcher::createBFMatcher(int) { throw_no_cuda(); return Ptr<cv::cuda::DescriptorMatcher>(); }
51
52 #else /* !defined (HAVE_CUDA) */
53
54 namespace cv { namespace cuda { namespace device
55 {
56 namespace bf_match
57 {
58 template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask,
59 const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
60 cudaStream_t stream);
61 template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask,
62 const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
63 cudaStream_t stream);
64 template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask,
65 const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
66 cudaStream_t stream);
67
68 template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
69 const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
70 cudaStream_t stream);
71 template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
72 const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
73 cudaStream_t stream);
74 template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
75 const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
76 cudaStream_t stream);
77 }
78
79 namespace bf_knnmatch
80 {
81 template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
82 const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
83 cudaStream_t stream);
84 template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
85 const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
86 cudaStream_t stream);
87 template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
88 const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
89 cudaStream_t stream);
90
91 template <typename T> void match2L1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
92 const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
93 cudaStream_t stream);
94 template <typename T> void match2L2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
95 const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
96 cudaStream_t stream);
97 template <typename T> void match2Hamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
98 const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
99 cudaStream_t stream);
100 }
101
102 namespace bf_radius_match
103 {
104 template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
105 const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
106 cudaStream_t stream);
107 template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
108 const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
109 cudaStream_t stream);
110 template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
111 const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
112 cudaStream_t stream);
113
114 template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
115 const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
116 cudaStream_t stream);
117
118 template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
119 const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
120 cudaStream_t stream);
121
122 template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
123 const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
124 cudaStream_t stream);
125 }
126 }}}
127
128 namespace
129 {
makeGpuCollection(const std::vector<GpuMat> & trainDescCollection,const std::vector<GpuMat> & masks,GpuMat & trainCollection,GpuMat & maskCollection)130 static void makeGpuCollection(const std::vector<GpuMat>& trainDescCollection,
131 const std::vector<GpuMat>& masks,
132 GpuMat& trainCollection,
133 GpuMat& maskCollection)
134 {
135 if (trainDescCollection.empty())
136 return;
137
138 if (masks.empty())
139 {
140 Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepSzb)));
141
142 PtrStepSzb* trainCollectionCPU_ptr = trainCollectionCPU.ptr<PtrStepSzb>();
143
144 for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr)
145 *trainCollectionCPU_ptr = trainDescCollection[i];
146
147 trainCollection.upload(trainCollectionCPU);
148 maskCollection.release();
149 }
150 else
151 {
152 CV_Assert( masks.size() == trainDescCollection.size() );
153
154 Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepSzb)));
155 Mat maskCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepb)));
156
157 PtrStepSzb* trainCollectionCPU_ptr = trainCollectionCPU.ptr<PtrStepSzb>();
158 PtrStepb* maskCollectionCPU_ptr = maskCollectionCPU.ptr<PtrStepb>();
159
160 for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr, ++maskCollectionCPU_ptr)
161 {
162 const GpuMat& train = trainDescCollection[i];
163 const GpuMat& mask = masks[i];
164
165 CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.cols == train.rows) );
166
167 *trainCollectionCPU_ptr = train;
168 *maskCollectionCPU_ptr = mask;
169 }
170
171 trainCollection.upload(trainCollectionCPU);
172 maskCollection.upload(maskCollectionCPU);
173 }
174 }
175
176 class BFMatcher_Impl : public cv::cuda::DescriptorMatcher
177 {
178 public:
BFMatcher_Impl(int norm)179 explicit BFMatcher_Impl(int norm) : norm_(norm)
180 {
181 CV_Assert( norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING );
182 }
183
isMaskSupported() const184 virtual bool isMaskSupported() const { return true; }
185
add(const std::vector<GpuMat> & descriptors)186 virtual void add(const std::vector<GpuMat>& descriptors)
187 {
188 trainDescCollection_.insert(trainDescCollection_.end(), descriptors.begin(), descriptors.end());
189 }
190
getTrainDescriptors() const191 virtual const std::vector<GpuMat>& getTrainDescriptors() const
192 {
193 return trainDescCollection_;
194 }
195
clear()196 virtual void clear()
197 {
198 trainDescCollection_.clear();
199 }
200
empty() const201 virtual bool empty() const
202 {
203 return trainDescCollection_.empty();
204 }
205
train()206 virtual void train()
207 {
208 }
209
210 virtual void match(InputArray queryDescriptors, InputArray trainDescriptors,
211 std::vector<DMatch>& matches,
212 InputArray mask = noArray());
213
214 virtual void match(InputArray queryDescriptors,
215 std::vector<DMatch>& matches,
216 const std::vector<GpuMat>& masks = std::vector<GpuMat>());
217
218 virtual void matchAsync(InputArray queryDescriptors, InputArray trainDescriptors,
219 OutputArray matches,
220 InputArray mask = noArray(),
221 Stream& stream = Stream::Null());
222
223 virtual void matchAsync(InputArray queryDescriptors,
224 OutputArray matches,
225 const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
226 Stream& stream = Stream::Null());
227
228 virtual void matchConvert(InputArray gpu_matches,
229 std::vector<DMatch>& matches);
230
231 virtual void knnMatch(InputArray queryDescriptors, InputArray trainDescriptors,
232 std::vector<std::vector<DMatch> >& matches,
233 int k,
234 InputArray mask = noArray(),
235 bool compactResult = false);
236
237 virtual void knnMatch(InputArray queryDescriptors,
238 std::vector<std::vector<DMatch> >& matches,
239 int k,
240 const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
241 bool compactResult = false);
242
243 virtual void knnMatchAsync(InputArray queryDescriptors, InputArray trainDescriptors,
244 OutputArray matches,
245 int k,
246 InputArray mask = noArray(),
247 Stream& stream = Stream::Null());
248
249 virtual void knnMatchAsync(InputArray queryDescriptors,
250 OutputArray matches,
251 int k,
252 const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
253 Stream& stream = Stream::Null());
254
255 virtual void knnMatchConvert(InputArray gpu_matches,
256 std::vector< std::vector<DMatch> >& matches,
257 bool compactResult = false);
258
259 virtual void radiusMatch(InputArray queryDescriptors, InputArray trainDescriptors,
260 std::vector<std::vector<DMatch> >& matches,
261 float maxDistance,
262 InputArray mask = noArray(),
263 bool compactResult = false);
264
265 virtual void radiusMatch(InputArray queryDescriptors,
266 std::vector<std::vector<DMatch> >& matches,
267 float maxDistance,
268 const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
269 bool compactResult = false);
270
271 virtual void radiusMatchAsync(InputArray queryDescriptors, InputArray trainDescriptors,
272 OutputArray matches,
273 float maxDistance,
274 InputArray mask = noArray(),
275 Stream& stream = Stream::Null());
276
277 virtual void radiusMatchAsync(InputArray queryDescriptors,
278 OutputArray matches,
279 float maxDistance,
280 const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
281 Stream& stream = Stream::Null());
282
283 virtual void radiusMatchConvert(InputArray gpu_matches,
284 std::vector< std::vector<DMatch> >& matches,
285 bool compactResult = false);
286
287 private:
288 int norm_;
289 std::vector<GpuMat> trainDescCollection_;
290 };
291
292 //
293 // 1 to 1 match
294 //
295
match(InputArray _queryDescriptors,InputArray _trainDescriptors,std::vector<DMatch> & matches,InputArray _mask)296 void BFMatcher_Impl::match(InputArray _queryDescriptors, InputArray _trainDescriptors,
297 std::vector<DMatch>& matches,
298 InputArray _mask)
299 {
300 GpuMat d_matches;
301 matchAsync(_queryDescriptors, _trainDescriptors, d_matches, _mask);
302 matchConvert(d_matches, matches);
303 }
304
match(InputArray _queryDescriptors,std::vector<DMatch> & matches,const std::vector<GpuMat> & masks)305 void BFMatcher_Impl::match(InputArray _queryDescriptors,
306 std::vector<DMatch>& matches,
307 const std::vector<GpuMat>& masks)
308 {
309 GpuMat d_matches;
310 matchAsync(_queryDescriptors, d_matches, masks);
311 matchConvert(d_matches, matches);
312 }
313
matchAsync(InputArray _queryDescriptors,InputArray _trainDescriptors,OutputArray _matches,InputArray _mask,Stream & stream)314 void BFMatcher_Impl::matchAsync(InputArray _queryDescriptors, InputArray _trainDescriptors,
315 OutputArray _matches,
316 InputArray _mask,
317 Stream& stream)
318 {
319 using namespace cv::cuda::device::bf_match;
320
321 const GpuMat query = _queryDescriptors.getGpuMat();
322 const GpuMat train = _trainDescriptors.getGpuMat();
323 const GpuMat mask = _mask.getGpuMat();
324
325 if (query.empty() || train.empty())
326 {
327 _matches.release();
328 return;
329 }
330
331 CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
332 CV_Assert( train.cols == query.cols && train.type() == query.type() );
333 CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.rows == query.rows && mask.cols == train.rows) );
334
335 typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask,
336 const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
337 cudaStream_t stream);
338
339 static const caller_t callersL1[] =
340 {
341 matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
342 matchL1_gpu<unsigned short>, matchL1_gpu<short>,
343 matchL1_gpu<int>, matchL1_gpu<float>
344 };
345 static const caller_t callersL2[] =
346 {
347 0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
348 0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
349 0/*matchL2_gpu<int>*/, matchL2_gpu<float>
350 };
351 static const caller_t callersHamming[] =
352 {
353 matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
354 matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
355 matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
356 };
357
358 const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
359
360 const caller_t func = callers[query.depth()];
361 if (func == 0)
362 {
363 CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
364 }
365
366 const int nQuery = query.rows;
367
368 _matches.create(2, nQuery, CV_32SC1);
369 GpuMat matches = _matches.getGpuMat();
370
371 GpuMat trainIdx(1, nQuery, CV_32SC1, matches.ptr(0));
372 GpuMat distance(1, nQuery, CV_32FC1, matches.ptr(1));
373
374 func(query, train, mask, trainIdx, distance, StreamAccessor::getStream(stream));
375 }
376
matchAsync(InputArray _queryDescriptors,OutputArray _matches,const std::vector<GpuMat> & masks,Stream & stream)377 void BFMatcher_Impl::matchAsync(InputArray _queryDescriptors,
378 OutputArray _matches,
379 const std::vector<GpuMat>& masks,
380 Stream& stream)
381 {
382 using namespace cv::cuda::device::bf_match;
383
384 const GpuMat query = _queryDescriptors.getGpuMat();
385
386 if (query.empty() || trainDescCollection_.empty())
387 {
388 _matches.release();
389 return;
390 }
391
392 CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
393
394 GpuMat trainCollection, maskCollection;
395 makeGpuCollection(trainDescCollection_, masks, trainCollection, maskCollection);
396
397 typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
398 const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
399 cudaStream_t stream);
400
401 static const caller_t callersL1[] =
402 {
403 matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
404 matchL1_gpu<unsigned short>, matchL1_gpu<short>,
405 matchL1_gpu<int>, matchL1_gpu<float>
406 };
407 static const caller_t callersL2[] =
408 {
409 0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
410 0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
411 0/*matchL2_gpu<int>*/, matchL2_gpu<float>
412 };
413 static const caller_t callersHamming[] =
414 {
415 matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
416 matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
417 matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
418 };
419
420 const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
421
422 const caller_t func = callers[query.depth()];
423 if (func == 0)
424 {
425 CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
426 }
427
428 const int nQuery = query.rows;
429
430 _matches.create(3, nQuery, CV_32SC1);
431 GpuMat matches = _matches.getGpuMat();
432
433 GpuMat trainIdx(1, nQuery, CV_32SC1, matches.ptr(0));
434 GpuMat imgIdx(1, nQuery, CV_32SC1, matches.ptr(1));
435 GpuMat distance(1, nQuery, CV_32FC1, matches.ptr(2));
436
437 func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, StreamAccessor::getStream(stream));
438 }
439
matchConvert(InputArray _gpu_matches,std::vector<DMatch> & matches)440 void BFMatcher_Impl::matchConvert(InputArray _gpu_matches,
441 std::vector<DMatch>& matches)
442 {
443 Mat gpu_matches;
444 if (_gpu_matches.kind() == _InputArray::CUDA_GPU_MAT)
445 {
446 _gpu_matches.getGpuMat().download(gpu_matches);
447 }
448 else
449 {
450 gpu_matches = _gpu_matches.getMat();
451 }
452
453 if (gpu_matches.empty())
454 {
455 matches.clear();
456 return;
457 }
458
459 CV_Assert( (gpu_matches.type() == CV_32SC1) && (gpu_matches.rows == 2 || gpu_matches.rows == 3) );
460
461 const int nQuery = gpu_matches.cols;
462
463 matches.clear();
464 matches.reserve(nQuery);
465
466 const int* trainIdxPtr = NULL;
467 const int* imgIdxPtr = NULL;
468 const float* distancePtr = NULL;
469
470 if (gpu_matches.rows == 2)
471 {
472 trainIdxPtr = gpu_matches.ptr<int>(0);
473 distancePtr = gpu_matches.ptr<float>(1);
474 }
475 else
476 {
477 trainIdxPtr = gpu_matches.ptr<int>(0);
478 imgIdxPtr = gpu_matches.ptr<int>(1);
479 distancePtr = gpu_matches.ptr<float>(2);
480 }
481
482 for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
483 {
484 const int trainIdx = trainIdxPtr[queryIdx];
485 if (trainIdx == -1)
486 continue;
487
488 const int imgIdx = imgIdxPtr ? imgIdxPtr[queryIdx] : 0;
489 const float distance = distancePtr[queryIdx];
490
491 DMatch m(queryIdx, trainIdx, imgIdx, distance);
492
493 matches.push_back(m);
494 }
495 }
496
497 //
498 // knn match
499 //
500
knnMatch(InputArray _queryDescriptors,InputArray _trainDescriptors,std::vector<std::vector<DMatch>> & matches,int k,InputArray _mask,bool compactResult)501 void BFMatcher_Impl::knnMatch(InputArray _queryDescriptors, InputArray _trainDescriptors,
502 std::vector<std::vector<DMatch> >& matches,
503 int k,
504 InputArray _mask,
505 bool compactResult)
506 {
507 GpuMat d_matches;
508 knnMatchAsync(_queryDescriptors, _trainDescriptors, d_matches, k, _mask);
509 knnMatchConvert(d_matches, matches, compactResult);
510 }
511
knnMatch(InputArray _queryDescriptors,std::vector<std::vector<DMatch>> & matches,int k,const std::vector<GpuMat> & masks,bool compactResult)512 void BFMatcher_Impl::knnMatch(InputArray _queryDescriptors,
513 std::vector<std::vector<DMatch> >& matches,
514 int k,
515 const std::vector<GpuMat>& masks,
516 bool compactResult)
517 {
518 if (k == 2)
519 {
520 GpuMat d_matches;
521 knnMatchAsync(_queryDescriptors, d_matches, k, masks);
522 knnMatchConvert(d_matches, matches, compactResult);
523 }
524 else
525 {
526 const GpuMat query = _queryDescriptors.getGpuMat();
527
528 if (query.empty() || trainDescCollection_.empty())
529 {
530 matches.clear();
531 return;
532 }
533
534 CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
535
536 std::vector< std::vector<DMatch> > curMatches;
537 std::vector<DMatch> temp;
538 temp.reserve(2 * k);
539
540 matches.resize(query.rows);
541 for (size_t i = 0; i < matches.size(); ++i)
542 matches[i].reserve(k);
543
544 for (size_t imgIdx = 0; imgIdx < trainDescCollection_.size(); ++imgIdx)
545 {
546 knnMatch(query, trainDescCollection_[imgIdx], curMatches, k, masks.empty() ? GpuMat() : masks[imgIdx]);
547
548 for (int queryIdx = 0; queryIdx < query.rows; ++queryIdx)
549 {
550 std::vector<DMatch>& localMatch = curMatches[queryIdx];
551 std::vector<DMatch>& globalMatch = matches[queryIdx];
552
553 for (size_t i = 0; i < localMatch.size(); ++i)
554 localMatch[i].imgIdx = imgIdx;
555
556 temp.clear();
557 std::merge(globalMatch.begin(), globalMatch.end(), localMatch.begin(), localMatch.end(), std::back_inserter(temp));
558
559 globalMatch.clear();
560 const size_t count = std::min(static_cast<size_t>(k), temp.size());
561 std::copy(temp.begin(), temp.begin() + count, std::back_inserter(globalMatch));
562 }
563 }
564
565 if (compactResult)
566 {
567 std::vector< std::vector<DMatch> >::iterator new_end = std::remove_if(matches.begin(), matches.end(), std::mem_fun_ref(&std::vector<DMatch>::empty));
568 matches.erase(new_end, matches.end());
569 }
570 }
571 }
572
knnMatchAsync(InputArray _queryDescriptors,InputArray _trainDescriptors,OutputArray _matches,int k,InputArray _mask,Stream & stream)573 void BFMatcher_Impl::knnMatchAsync(InputArray _queryDescriptors, InputArray _trainDescriptors,
574 OutputArray _matches,
575 int k,
576 InputArray _mask,
577 Stream& stream)
578 {
579 using namespace cv::cuda::device::bf_knnmatch;
580
581 const GpuMat query = _queryDescriptors.getGpuMat();
582 const GpuMat train = _trainDescriptors.getGpuMat();
583 const GpuMat mask = _mask.getGpuMat();
584
585 if (query.empty() || train.empty())
586 {
587 _matches.release();
588 return;
589 }
590
591 CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
592 CV_Assert( train.cols == query.cols && train.type() == query.type() );
593 CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.rows == query.rows && mask.cols == train.rows) );
594
595 typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
596 const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
597 cudaStream_t stream);
598
599 static const caller_t callersL1[] =
600 {
601 matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
602 matchL1_gpu<unsigned short>, matchL1_gpu<short>,
603 matchL1_gpu<int>, matchL1_gpu<float>
604 };
605 static const caller_t callersL2[] =
606 {
607 0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
608 0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
609 0/*matchL2_gpu<int>*/, matchL2_gpu<float>
610 };
611 static const caller_t callersHamming[] =
612 {
613 matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
614 matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
615 matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
616 };
617
618 const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
619
620 const caller_t func = callers[query.depth()];
621 if (func == 0)
622 {
623 CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
624 }
625
626 const int nQuery = query.rows;
627 const int nTrain = train.rows;
628
629 GpuMat trainIdx, distance, allDist;
630 if (k == 2)
631 {
632 _matches.create(2, nQuery, CV_32SC2);
633 GpuMat matches = _matches.getGpuMat();
634
635 trainIdx = GpuMat(1, nQuery, CV_32SC2, matches.ptr(0));
636 distance = GpuMat(1, nQuery, CV_32FC2, matches.ptr(1));
637 }
638 else
639 {
640 _matches.create(2 * nQuery, k, CV_32SC1);
641 GpuMat matches = _matches.getGpuMat();
642
643 trainIdx = GpuMat(nQuery, k, CV_32SC1, matches.ptr(0), matches.step);
644 distance = GpuMat(nQuery, k, CV_32FC1, matches.ptr(nQuery), matches.step);
645
646 BufferPool pool(stream);
647 allDist = pool.getBuffer(nQuery, nTrain, CV_32FC1);
648 }
649
650 trainIdx.setTo(Scalar::all(-1), stream);
651
652 func(query, train, k, mask, trainIdx, distance, allDist, StreamAccessor::getStream(stream));
653 }
654
knnMatchAsync(InputArray _queryDescriptors,OutputArray _matches,int k,const std::vector<GpuMat> & masks,Stream & stream)655 void BFMatcher_Impl::knnMatchAsync(InputArray _queryDescriptors,
656 OutputArray _matches,
657 int k,
658 const std::vector<GpuMat>& masks,
659 Stream& stream)
660 {
661 using namespace cv::cuda::device::bf_knnmatch;
662
663 if (k != 2)
664 {
665 CV_Error(Error::StsNotImplemented, "only k=2 mode is supported for now");
666 }
667
668 const GpuMat query = _queryDescriptors.getGpuMat();
669
670 if (query.empty() || trainDescCollection_.empty())
671 {
672 _matches.release();
673 return;
674 }
675
676 CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
677
678 GpuMat trainCollection, maskCollection;
679 makeGpuCollection(trainDescCollection_, masks, trainCollection, maskCollection);
680
681 typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
682 const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
683 cudaStream_t stream);
684
685 static const caller_t callersL1[] =
686 {
687 match2L1_gpu<unsigned char>, 0/*match2L1_gpu<signed char>*/,
688 match2L1_gpu<unsigned short>, match2L1_gpu<short>,
689 match2L1_gpu<int>, match2L1_gpu<float>
690 };
691 static const caller_t callersL2[] =
692 {
693 0/*match2L2_gpu<unsigned char>*/, 0/*match2L2_gpu<signed char>*/,
694 0/*match2L2_gpu<unsigned short>*/, 0/*match2L2_gpu<short>*/,
695 0/*match2L2_gpu<int>*/, match2L2_gpu<float>
696 };
697 static const caller_t callersHamming[] =
698 {
699 match2Hamming_gpu<unsigned char>, 0/*match2Hamming_gpu<signed char>*/,
700 match2Hamming_gpu<unsigned short>, 0/*match2Hamming_gpu<short>*/,
701 match2Hamming_gpu<int>, 0/*match2Hamming_gpu<float>*/
702 };
703
704 const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
705
706 const caller_t func = callers[query.depth()];
707 if (func == 0)
708 {
709 CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
710 }
711
712 const int nQuery = query.rows;
713
714 _matches.create(3, nQuery, CV_32SC2);
715 GpuMat matches = _matches.getGpuMat();
716
717 GpuMat trainIdx(1, nQuery, CV_32SC2, matches.ptr(0));
718 GpuMat imgIdx(1, nQuery, CV_32SC2, matches.ptr(1));
719 GpuMat distance(1, nQuery, CV_32FC2, matches.ptr(2));
720
721 trainIdx.setTo(Scalar::all(-1), stream);
722
723 func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, StreamAccessor::getStream(stream));
724 }
725
knnMatchConvert(InputArray _gpu_matches,std::vector<std::vector<DMatch>> & matches,bool compactResult)726 void BFMatcher_Impl::knnMatchConvert(InputArray _gpu_matches,
727 std::vector< std::vector<DMatch> >& matches,
728 bool compactResult)
729 {
730 Mat gpu_matches;
731 if (_gpu_matches.kind() == _InputArray::CUDA_GPU_MAT)
732 {
733 _gpu_matches.getGpuMat().download(gpu_matches);
734 }
735 else
736 {
737 gpu_matches = _gpu_matches.getMat();
738 }
739
740 if (gpu_matches.empty())
741 {
742 matches.clear();
743 return;
744 }
745
746 CV_Assert( ((gpu_matches.type() == CV_32SC2) && (gpu_matches.rows == 2 || gpu_matches.rows == 3)) ||
747 (gpu_matches.type() == CV_32SC1) );
748
749 int nQuery = -1, k = -1;
750
751 const int* trainIdxPtr = NULL;
752 const int* imgIdxPtr = NULL;
753 const float* distancePtr = NULL;
754
755 if (gpu_matches.type() == CV_32SC2)
756 {
757 nQuery = gpu_matches.cols;
758 k = 2;
759
760 if (gpu_matches.rows == 2)
761 {
762 trainIdxPtr = gpu_matches.ptr<int>(0);
763 distancePtr = gpu_matches.ptr<float>(1);
764 }
765 else
766 {
767 trainIdxPtr = gpu_matches.ptr<int>(0);
768 imgIdxPtr = gpu_matches.ptr<int>(1);
769 distancePtr = gpu_matches.ptr<float>(2);
770 }
771 }
772 else
773 {
774 nQuery = gpu_matches.rows / 2;
775 k = gpu_matches.cols;
776
777 trainIdxPtr = gpu_matches.ptr<int>(0);
778 distancePtr = gpu_matches.ptr<float>(nQuery);
779 }
780
781 matches.clear();
782 matches.reserve(nQuery);
783
784 for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
785 {
786 matches.push_back(std::vector<DMatch>());
787 std::vector<DMatch>& curMatches = matches.back();
788 curMatches.reserve(k);
789
790 for (int i = 0; i < k; ++i)
791 {
792 const int trainIdx = *trainIdxPtr;
793 if (trainIdx == -1)
794 continue;
795
796 const int imgIdx = imgIdxPtr ? *imgIdxPtr : 0;
797 const float distance = *distancePtr;
798
799 DMatch m(queryIdx, trainIdx, imgIdx, distance);
800
801 curMatches.push_back(m);
802
803 ++trainIdxPtr;
804 ++distancePtr;
805 if (imgIdxPtr)
806 ++imgIdxPtr;
807 }
808
809 if (compactResult && curMatches.empty())
810 {
811 matches.pop_back();
812 }
813 }
814 }
815
816 //
817 // radius match
818 //
819
radiusMatch(InputArray _queryDescriptors,InputArray _trainDescriptors,std::vector<std::vector<DMatch>> & matches,float maxDistance,InputArray _mask,bool compactResult)820 void BFMatcher_Impl::radiusMatch(InputArray _queryDescriptors, InputArray _trainDescriptors,
821 std::vector<std::vector<DMatch> >& matches,
822 float maxDistance,
823 InputArray _mask,
824 bool compactResult)
825 {
826 GpuMat d_matches;
827 radiusMatchAsync(_queryDescriptors, _trainDescriptors, d_matches, maxDistance, _mask);
828 radiusMatchConvert(d_matches, matches, compactResult);
829 }
830
radiusMatch(InputArray _queryDescriptors,std::vector<std::vector<DMatch>> & matches,float maxDistance,const std::vector<GpuMat> & masks,bool compactResult)831 void BFMatcher_Impl::radiusMatch(InputArray _queryDescriptors,
832 std::vector<std::vector<DMatch> >& matches,
833 float maxDistance,
834 const std::vector<GpuMat>& masks,
835 bool compactResult)
836 {
837 GpuMat d_matches;
838 radiusMatchAsync(_queryDescriptors, d_matches, maxDistance, masks);
839 radiusMatchConvert(d_matches, matches, compactResult);
840 }
841
radiusMatchAsync(InputArray _queryDescriptors,InputArray _trainDescriptors,OutputArray _matches,float maxDistance,InputArray _mask,Stream & stream)842 void BFMatcher_Impl::radiusMatchAsync(InputArray _queryDescriptors, InputArray _trainDescriptors,
843 OutputArray _matches,
844 float maxDistance,
845 InputArray _mask,
846 Stream& stream)
847 {
848 using namespace cv::cuda::device::bf_radius_match;
849
850 const GpuMat query = _queryDescriptors.getGpuMat();
851 const GpuMat train = _trainDescriptors.getGpuMat();
852 const GpuMat mask = _mask.getGpuMat();
853
854 if (query.empty() || train.empty())
855 {
856 _matches.release();
857 return;
858 }
859
860 CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
861 CV_Assert( train.cols == query.cols && train.type() == query.type() );
862 CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.rows == query.rows && mask.cols == train.rows) );
863
864 typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
865 const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
866 cudaStream_t stream);
867
868 static const caller_t callersL1[] =
869 {
870 matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
871 matchL1_gpu<unsigned short>, matchL1_gpu<short>,
872 matchL1_gpu<int>, matchL1_gpu<float>
873 };
874 static const caller_t callersL2[] =
875 {
876 0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
877 0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
878 0/*matchL2_gpu<int>*/, matchL2_gpu<float>
879 };
880 static const caller_t callersHamming[] =
881 {
882 matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
883 matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
884 matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
885 };
886
887 const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
888
889 const caller_t func = callers[query.depth()];
890 if (func == 0)
891 {
892 CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
893 }
894
895 const int nQuery = query.rows;
896 const int nTrain = train.rows;
897
898 const int cols = std::max((nTrain / 100), nQuery);
899
900 _matches.create(2 * nQuery + 1, cols, CV_32SC1);
901 GpuMat matches = _matches.getGpuMat();
902
903 GpuMat trainIdx(nQuery, cols, CV_32SC1, matches.ptr(0), matches.step);
904 GpuMat distance(nQuery, cols, CV_32FC1, matches.ptr(nQuery), matches.step);
905 GpuMat nMatches(1, nQuery, CV_32SC1, matches.ptr(2 * nQuery));
906
907 nMatches.setTo(Scalar::all(0), stream);
908
909 func(query, train, maxDistance, mask, trainIdx, distance, nMatches, StreamAccessor::getStream(stream));
910 }
911
radiusMatchAsync(InputArray _queryDescriptors,OutputArray _matches,float maxDistance,const std::vector<GpuMat> & masks,Stream & stream)912 void BFMatcher_Impl::radiusMatchAsync(InputArray _queryDescriptors,
913 OutputArray _matches,
914 float maxDistance,
915 const std::vector<GpuMat>& masks,
916 Stream& stream)
917 {
918 using namespace cv::cuda::device::bf_radius_match;
919
920 const GpuMat query = _queryDescriptors.getGpuMat();
921
922 if (query.empty() || trainDescCollection_.empty())
923 {
924 _matches.release();
925 return;
926 }
927
928 CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
929
930 GpuMat trainCollection, maskCollection;
931 makeGpuCollection(trainDescCollection_, masks, trainCollection, maskCollection);
932
933 typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
934 const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
935 cudaStream_t stream);
936
937 static const caller_t callersL1[] =
938 {
939 matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
940 matchL1_gpu<unsigned short>, matchL1_gpu<short>,
941 matchL1_gpu<int>, matchL1_gpu<float>
942 };
943 static const caller_t callersL2[] =
944 {
945 0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
946 0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
947 0/*matchL2_gpu<int>*/, matchL2_gpu<float>
948 };
949 static const caller_t callersHamming[] =
950 {
951 matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
952 matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
953 matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
954 };
955
956 const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
957
958 const caller_t func = callers[query.depth()];
959 if (func == 0)
960 {
961 CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
962 }
963
964 const int nQuery = query.rows;
965
966 _matches.create(3 * nQuery + 1, nQuery, CV_32FC1);
967 GpuMat matches = _matches.getGpuMat();
968
969 GpuMat trainIdx(nQuery, nQuery, CV_32SC1, matches.ptr(0), matches.step);
970 GpuMat imgIdx(nQuery, nQuery, CV_32SC1, matches.ptr(nQuery), matches.step);
971 GpuMat distance(nQuery, nQuery, CV_32FC1, matches.ptr(2 * nQuery), matches.step);
972 GpuMat nMatches(1, nQuery, CV_32SC1, matches.ptr(3 * nQuery));
973
974 nMatches.setTo(Scalar::all(0), stream);
975
976 std::vector<PtrStepSzb> trains_(trainDescCollection_.begin(), trainDescCollection_.end());
977 std::vector<PtrStepSzb> masks_(masks.begin(), masks.end());
978
979 func(query, &trains_[0], static_cast<int>(trains_.size()), maxDistance, masks_.size() == 0 ? 0 : &masks_[0],
980 trainIdx, imgIdx, distance, nMatches, StreamAccessor::getStream(stream));
981 }
982
radiusMatchConvert(InputArray _gpu_matches,std::vector<std::vector<DMatch>> & matches,bool compactResult)983 void BFMatcher_Impl::radiusMatchConvert(InputArray _gpu_matches,
984 std::vector< std::vector<DMatch> >& matches,
985 bool compactResult)
986 {
987 Mat gpu_matches;
988 if (_gpu_matches.kind() == _InputArray::CUDA_GPU_MAT)
989 {
990 _gpu_matches.getGpuMat().download(gpu_matches);
991 }
992 else
993 {
994 gpu_matches = _gpu_matches.getMat();
995 }
996
997 if (gpu_matches.empty())
998 {
999 matches.clear();
1000 return;
1001 }
1002
1003 CV_Assert( gpu_matches.type() == CV_32SC1 || gpu_matches.type() == CV_32FC1 );
1004
1005 int nQuery = -1;
1006
1007 const int* trainIdxPtr = NULL;
1008 const int* imgIdxPtr = NULL;
1009 const float* distancePtr = NULL;
1010 const int* nMatchesPtr = NULL;
1011
1012 if (gpu_matches.type() == CV_32SC1)
1013 {
1014 nQuery = (gpu_matches.rows - 1) / 2;
1015
1016 trainIdxPtr = gpu_matches.ptr<int>(0);
1017 distancePtr = gpu_matches.ptr<float>(nQuery);
1018 nMatchesPtr = gpu_matches.ptr<int>(2 * nQuery);
1019 }
1020 else
1021 {
1022 nQuery = (gpu_matches.rows - 1) / 3;
1023
1024 trainIdxPtr = gpu_matches.ptr<int>(0);
1025 imgIdxPtr = gpu_matches.ptr<int>(nQuery);
1026 distancePtr = gpu_matches.ptr<float>(2 * nQuery);
1027 nMatchesPtr = gpu_matches.ptr<int>(3 * nQuery);
1028 }
1029
1030 matches.clear();
1031 matches.reserve(nQuery);
1032
1033 for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
1034 {
1035 const int nMatched = std::min(nMatchesPtr[queryIdx], gpu_matches.cols);
1036
1037 if (nMatched == 0)
1038 {
1039 if (!compactResult)
1040 {
1041 matches.push_back(std::vector<DMatch>());
1042 }
1043 }
1044 else
1045 {
1046 matches.push_back(std::vector<DMatch>(nMatched));
1047 std::vector<DMatch>& curMatches = matches.back();
1048
1049 for (int i = 0; i < nMatched; ++i)
1050 {
1051 const int trainIdx = trainIdxPtr[i];
1052
1053 const int imgIdx = imgIdxPtr ? imgIdxPtr[i] : 0;
1054 const float distance = distancePtr[i];
1055
1056 DMatch m(queryIdx, trainIdx, imgIdx, distance);
1057
1058 curMatches[i] = m;
1059 }
1060
1061 std::sort(curMatches.begin(), curMatches.end());
1062 }
1063
1064 trainIdxPtr += gpu_matches.cols;
1065 distancePtr += gpu_matches.cols;
1066 if (imgIdxPtr)
1067 imgIdxPtr += gpu_matches.cols;
1068 }
1069 }
1070 }
1071
createBFMatcher(int norm)1072 Ptr<cv::cuda::DescriptorMatcher> cv::cuda::DescriptorMatcher::createBFMatcher(int norm)
1073 {
1074 return makePtr<BFMatcher_Impl>(norm);
1075 }
1076
1077 #endif /* !defined (HAVE_CUDA) */
1078