• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 //   * Redistribution's of source code must retain the above copyright notice,
21 //     this list of conditions and the following disclaimer.
22 //
23 //   * Redistribution's in binary form must reproduce the above copyright notice,
24 //     this list of conditions and the following disclaimer in the documentation
25 //     and/or other materials provided with the distribution.
26 //
27 //   * The name of the copyright holders may not be used to endorse or promote products
28 //     derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42 
43 #include "precomp.hpp"
44 
45 using namespace cv;
46 using namespace cv::cuda;
47 
48 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
49 
createBFMatcher(int)50 Ptr<cv::cuda::DescriptorMatcher> cv::cuda::DescriptorMatcher::createBFMatcher(int) { throw_no_cuda(); return Ptr<cv::cuda::DescriptorMatcher>(); }
51 
52 #else /* !defined (HAVE_CUDA) */
53 
54 namespace cv { namespace cuda { namespace device
55 {
56     namespace bf_match
57     {
58         template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask,
59             const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
60             cudaStream_t stream);
61         template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask,
62             const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
63             cudaStream_t stream);
64         template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask,
65             const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
66             cudaStream_t stream);
67 
68         template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
69             const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
70             cudaStream_t stream);
71         template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
72             const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
73             cudaStream_t stream);
74         template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
75             const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
76             cudaStream_t stream);
77     }
78 
79     namespace bf_knnmatch
80     {
81         template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
82             const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
83             cudaStream_t stream);
84         template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
85             const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
86             cudaStream_t stream);
87         template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
88             const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
89             cudaStream_t stream);
90 
91         template <typename T> void match2L1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
92             const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
93             cudaStream_t stream);
94         template <typename T> void match2L2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
95             const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
96             cudaStream_t stream);
97         template <typename T> void match2Hamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
98             const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
99             cudaStream_t stream);
100     }
101 
102     namespace bf_radius_match
103     {
104         template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
105             const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
106             cudaStream_t stream);
107         template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
108             const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
109             cudaStream_t stream);
110         template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
111             const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
112             cudaStream_t stream);
113 
114         template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
115             const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
116             cudaStream_t stream);
117 
118         template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
119             const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
120             cudaStream_t stream);
121 
122         template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
123             const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
124             cudaStream_t stream);
125     }
126 }}}
127 
128 namespace
129 {
makeGpuCollection(const std::vector<GpuMat> & trainDescCollection,const std::vector<GpuMat> & masks,GpuMat & trainCollection,GpuMat & maskCollection)130     static void makeGpuCollection(const std::vector<GpuMat>& trainDescCollection,
131                                   const std::vector<GpuMat>& masks,
132                                   GpuMat& trainCollection,
133                                   GpuMat& maskCollection)
134     {
135         if (trainDescCollection.empty())
136             return;
137 
138         if (masks.empty())
139         {
140             Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepSzb)));
141 
142             PtrStepSzb* trainCollectionCPU_ptr = trainCollectionCPU.ptr<PtrStepSzb>();
143 
144             for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr)
145                 *trainCollectionCPU_ptr = trainDescCollection[i];
146 
147             trainCollection.upload(trainCollectionCPU);
148             maskCollection.release();
149         }
150         else
151         {
152             CV_Assert( masks.size() == trainDescCollection.size() );
153 
154             Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepSzb)));
155             Mat maskCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepb)));
156 
157             PtrStepSzb* trainCollectionCPU_ptr = trainCollectionCPU.ptr<PtrStepSzb>();
158             PtrStepb* maskCollectionCPU_ptr = maskCollectionCPU.ptr<PtrStepb>();
159 
160             for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr, ++maskCollectionCPU_ptr)
161             {
162                 const GpuMat& train = trainDescCollection[i];
163                 const GpuMat& mask = masks[i];
164 
165                 CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.cols == train.rows) );
166 
167                 *trainCollectionCPU_ptr = train;
168                 *maskCollectionCPU_ptr = mask;
169             }
170 
171             trainCollection.upload(trainCollectionCPU);
172             maskCollection.upload(maskCollectionCPU);
173         }
174     }
175 
176     class BFMatcher_Impl : public cv::cuda::DescriptorMatcher
177     {
178     public:
BFMatcher_Impl(int norm)179         explicit BFMatcher_Impl(int norm) : norm_(norm)
180         {
181             CV_Assert( norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING );
182         }
183 
isMaskSupported() const184         virtual bool isMaskSupported() const { return true; }
185 
add(const std::vector<GpuMat> & descriptors)186         virtual void add(const std::vector<GpuMat>& descriptors)
187         {
188             trainDescCollection_.insert(trainDescCollection_.end(), descriptors.begin(), descriptors.end());
189         }
190 
getTrainDescriptors() const191         virtual const std::vector<GpuMat>& getTrainDescriptors() const
192         {
193             return trainDescCollection_;
194         }
195 
clear()196         virtual void clear()
197         {
198             trainDescCollection_.clear();
199         }
200 
empty() const201         virtual bool empty() const
202         {
203             return trainDescCollection_.empty();
204         }
205 
train()206         virtual void train()
207         {
208         }
209 
210         virtual void match(InputArray queryDescriptors, InputArray trainDescriptors,
211                            std::vector<DMatch>& matches,
212                            InputArray mask = noArray());
213 
214         virtual void match(InputArray queryDescriptors,
215                            std::vector<DMatch>& matches,
216                            const std::vector<GpuMat>& masks = std::vector<GpuMat>());
217 
218         virtual void matchAsync(InputArray queryDescriptors, InputArray trainDescriptors,
219                                 OutputArray matches,
220                                 InputArray mask = noArray(),
221                                 Stream& stream = Stream::Null());
222 
223         virtual void matchAsync(InputArray queryDescriptors,
224                                 OutputArray matches,
225                                 const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
226                                 Stream& stream = Stream::Null());
227 
228         virtual void matchConvert(InputArray gpu_matches,
229                                   std::vector<DMatch>& matches);
230 
231         virtual void knnMatch(InputArray queryDescriptors, InputArray trainDescriptors,
232                               std::vector<std::vector<DMatch> >& matches,
233                               int k,
234                               InputArray mask = noArray(),
235                               bool compactResult = false);
236 
237         virtual void knnMatch(InputArray queryDescriptors,
238                               std::vector<std::vector<DMatch> >& matches,
239                               int k,
240                               const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
241                               bool compactResult = false);
242 
243         virtual void knnMatchAsync(InputArray queryDescriptors, InputArray trainDescriptors,
244                                    OutputArray matches,
245                                    int k,
246                                    InputArray mask = noArray(),
247                                    Stream& stream = Stream::Null());
248 
249         virtual void knnMatchAsync(InputArray queryDescriptors,
250                                    OutputArray matches,
251                                    int k,
252                                    const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
253                                    Stream& stream = Stream::Null());
254 
255         virtual void knnMatchConvert(InputArray gpu_matches,
256                                      std::vector< std::vector<DMatch> >& matches,
257                                      bool compactResult = false);
258 
259         virtual void radiusMatch(InputArray queryDescriptors, InputArray trainDescriptors,
260                                  std::vector<std::vector<DMatch> >& matches,
261                                  float maxDistance,
262                                  InputArray mask = noArray(),
263                                  bool compactResult = false);
264 
265         virtual void radiusMatch(InputArray queryDescriptors,
266                                  std::vector<std::vector<DMatch> >& matches,
267                                  float maxDistance,
268                                  const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
269                                  bool compactResult = false);
270 
271         virtual void radiusMatchAsync(InputArray queryDescriptors, InputArray trainDescriptors,
272                                       OutputArray matches,
273                                       float maxDistance,
274                                       InputArray mask = noArray(),
275                                       Stream& stream = Stream::Null());
276 
277         virtual void radiusMatchAsync(InputArray queryDescriptors,
278                                       OutputArray matches,
279                                       float maxDistance,
280                                       const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
281                                       Stream& stream = Stream::Null());
282 
283         virtual void radiusMatchConvert(InputArray gpu_matches,
284                                         std::vector< std::vector<DMatch> >& matches,
285                                         bool compactResult = false);
286 
287     private:
288         int norm_;
289         std::vector<GpuMat> trainDescCollection_;
290     };
291 
292     //
293     // 1 to 1 match
294     //
295 
match(InputArray _queryDescriptors,InputArray _trainDescriptors,std::vector<DMatch> & matches,InputArray _mask)296     void BFMatcher_Impl::match(InputArray _queryDescriptors, InputArray _trainDescriptors,
297                                std::vector<DMatch>& matches,
298                                InputArray _mask)
299     {
300         GpuMat d_matches;
301         matchAsync(_queryDescriptors, _trainDescriptors, d_matches, _mask);
302         matchConvert(d_matches, matches);
303     }
304 
match(InputArray _queryDescriptors,std::vector<DMatch> & matches,const std::vector<GpuMat> & masks)305     void BFMatcher_Impl::match(InputArray _queryDescriptors,
306                                std::vector<DMatch>& matches,
307                                const std::vector<GpuMat>& masks)
308     {
309         GpuMat d_matches;
310         matchAsync(_queryDescriptors, d_matches, masks);
311         matchConvert(d_matches, matches);
312     }
313 
matchAsync(InputArray _queryDescriptors,InputArray _trainDescriptors,OutputArray _matches,InputArray _mask,Stream & stream)314     void BFMatcher_Impl::matchAsync(InputArray _queryDescriptors, InputArray _trainDescriptors,
315                                     OutputArray _matches,
316                                     InputArray _mask,
317                                     Stream& stream)
318     {
319         using namespace cv::cuda::device::bf_match;
320 
321         const GpuMat query = _queryDescriptors.getGpuMat();
322         const GpuMat train = _trainDescriptors.getGpuMat();
323         const GpuMat mask = _mask.getGpuMat();
324 
325         if (query.empty() || train.empty())
326         {
327             _matches.release();
328             return;
329         }
330 
331         CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
332         CV_Assert( train.cols == query.cols && train.type() == query.type() );
333         CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.rows == query.rows && mask.cols == train.rows) );
334 
335         typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask,
336                                  const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
337                                  cudaStream_t stream);
338 
339         static const caller_t callersL1[] =
340         {
341             matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
342             matchL1_gpu<unsigned short>, matchL1_gpu<short>,
343             matchL1_gpu<int>, matchL1_gpu<float>
344         };
345         static const caller_t callersL2[] =
346         {
347             0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
348             0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
349             0/*matchL2_gpu<int>*/, matchL2_gpu<float>
350         };
351         static const caller_t callersHamming[] =
352         {
353             matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
354             matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
355             matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
356         };
357 
358         const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
359 
360         const caller_t func = callers[query.depth()];
361         if (func == 0)
362         {
363             CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
364         }
365 
366         const int nQuery = query.rows;
367 
368         _matches.create(2, nQuery, CV_32SC1);
369         GpuMat matches = _matches.getGpuMat();
370 
371         GpuMat trainIdx(1, nQuery, CV_32SC1, matches.ptr(0));
372         GpuMat distance(1, nQuery, CV_32FC1, matches.ptr(1));
373 
374         func(query, train, mask, trainIdx, distance, StreamAccessor::getStream(stream));
375     }
376 
matchAsync(InputArray _queryDescriptors,OutputArray _matches,const std::vector<GpuMat> & masks,Stream & stream)377     void BFMatcher_Impl::matchAsync(InputArray _queryDescriptors,
378                                     OutputArray _matches,
379                                     const std::vector<GpuMat>& masks,
380                                     Stream& stream)
381     {
382         using namespace cv::cuda::device::bf_match;
383 
384         const GpuMat query = _queryDescriptors.getGpuMat();
385 
386         if (query.empty() || trainDescCollection_.empty())
387         {
388             _matches.release();
389             return;
390         }
391 
392         CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
393 
394         GpuMat trainCollection, maskCollection;
395         makeGpuCollection(trainDescCollection_, masks, trainCollection, maskCollection);
396 
397         typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
398                                  const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
399                                  cudaStream_t stream);
400 
401         static const caller_t callersL1[] =
402         {
403             matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
404             matchL1_gpu<unsigned short>, matchL1_gpu<short>,
405             matchL1_gpu<int>, matchL1_gpu<float>
406         };
407         static const caller_t callersL2[] =
408         {
409             0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
410             0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
411             0/*matchL2_gpu<int>*/, matchL2_gpu<float>
412         };
413         static const caller_t callersHamming[] =
414         {
415             matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
416             matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
417             matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
418         };
419 
420         const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
421 
422         const caller_t func = callers[query.depth()];
423         if (func == 0)
424         {
425             CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
426         }
427 
428         const int nQuery = query.rows;
429 
430         _matches.create(3, nQuery, CV_32SC1);
431         GpuMat matches = _matches.getGpuMat();
432 
433         GpuMat trainIdx(1, nQuery, CV_32SC1, matches.ptr(0));
434         GpuMat imgIdx(1, nQuery, CV_32SC1, matches.ptr(1));
435         GpuMat distance(1, nQuery, CV_32FC1, matches.ptr(2));
436 
437         func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, StreamAccessor::getStream(stream));
438     }
439 
matchConvert(InputArray _gpu_matches,std::vector<DMatch> & matches)440     void BFMatcher_Impl::matchConvert(InputArray _gpu_matches,
441                                       std::vector<DMatch>& matches)
442     {
443         Mat gpu_matches;
444         if (_gpu_matches.kind() == _InputArray::CUDA_GPU_MAT)
445         {
446             _gpu_matches.getGpuMat().download(gpu_matches);
447         }
448         else
449         {
450             gpu_matches = _gpu_matches.getMat();
451         }
452 
453         if (gpu_matches.empty())
454         {
455             matches.clear();
456             return;
457         }
458 
459         CV_Assert( (gpu_matches.type() == CV_32SC1) && (gpu_matches.rows == 2 || gpu_matches.rows == 3) );
460 
461         const int nQuery = gpu_matches.cols;
462 
463         matches.clear();
464         matches.reserve(nQuery);
465 
466         const int* trainIdxPtr = NULL;
467         const int* imgIdxPtr = NULL;
468         const float* distancePtr = NULL;
469 
470         if (gpu_matches.rows == 2)
471         {
472             trainIdxPtr = gpu_matches.ptr<int>(0);
473             distancePtr =  gpu_matches.ptr<float>(1);
474         }
475         else
476         {
477             trainIdxPtr = gpu_matches.ptr<int>(0);
478             imgIdxPtr =  gpu_matches.ptr<int>(1);
479             distancePtr =  gpu_matches.ptr<float>(2);
480         }
481 
482         for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
483         {
484             const int trainIdx = trainIdxPtr[queryIdx];
485             if (trainIdx == -1)
486                 continue;
487 
488             const int imgIdx = imgIdxPtr ? imgIdxPtr[queryIdx] : 0;
489             const float distance = distancePtr[queryIdx];
490 
491             DMatch m(queryIdx, trainIdx, imgIdx, distance);
492 
493             matches.push_back(m);
494         }
495     }
496 
497     //
498     // knn match
499     //
500 
knnMatch(InputArray _queryDescriptors,InputArray _trainDescriptors,std::vector<std::vector<DMatch>> & matches,int k,InputArray _mask,bool compactResult)501     void BFMatcher_Impl::knnMatch(InputArray _queryDescriptors, InputArray _trainDescriptors,
502                                   std::vector<std::vector<DMatch> >& matches,
503                                   int k,
504                                   InputArray _mask,
505                                   bool compactResult)
506     {
507         GpuMat d_matches;
508         knnMatchAsync(_queryDescriptors, _trainDescriptors, d_matches, k, _mask);
509         knnMatchConvert(d_matches, matches, compactResult);
510     }
511 
knnMatch(InputArray _queryDescriptors,std::vector<std::vector<DMatch>> & matches,int k,const std::vector<GpuMat> & masks,bool compactResult)512     void BFMatcher_Impl::knnMatch(InputArray _queryDescriptors,
513                                   std::vector<std::vector<DMatch> >& matches,
514                                   int k,
515                                   const std::vector<GpuMat>& masks,
516                                   bool compactResult)
517     {
518         if (k == 2)
519         {
520             GpuMat d_matches;
521             knnMatchAsync(_queryDescriptors, d_matches, k, masks);
522             knnMatchConvert(d_matches, matches, compactResult);
523         }
524         else
525         {
526             const GpuMat query = _queryDescriptors.getGpuMat();
527 
528             if (query.empty() || trainDescCollection_.empty())
529             {
530                 matches.clear();
531                 return;
532             }
533 
534             CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
535 
536             std::vector< std::vector<DMatch> > curMatches;
537             std::vector<DMatch> temp;
538             temp.reserve(2 * k);
539 
540             matches.resize(query.rows);
541             for (size_t i = 0; i < matches.size(); ++i)
542                 matches[i].reserve(k);
543 
544             for (size_t imgIdx = 0; imgIdx < trainDescCollection_.size(); ++imgIdx)
545             {
546                 knnMatch(query, trainDescCollection_[imgIdx], curMatches, k, masks.empty() ? GpuMat() : masks[imgIdx]);
547 
548                 for (int queryIdx = 0; queryIdx < query.rows; ++queryIdx)
549                 {
550                     std::vector<DMatch>& localMatch = curMatches[queryIdx];
551                     std::vector<DMatch>& globalMatch = matches[queryIdx];
552 
553                     for (size_t i = 0; i < localMatch.size(); ++i)
554                         localMatch[i].imgIdx = imgIdx;
555 
556                     temp.clear();
557                     std::merge(globalMatch.begin(), globalMatch.end(), localMatch.begin(), localMatch.end(), std::back_inserter(temp));
558 
559                     globalMatch.clear();
560                     const size_t count = std::min(static_cast<size_t>(k), temp.size());
561                     std::copy(temp.begin(), temp.begin() + count, std::back_inserter(globalMatch));
562                 }
563             }
564 
565             if (compactResult)
566             {
567                 std::vector< std::vector<DMatch> >::iterator new_end = std::remove_if(matches.begin(), matches.end(), std::mem_fun_ref(&std::vector<DMatch>::empty));
568                 matches.erase(new_end, matches.end());
569             }
570         }
571     }
572 
knnMatchAsync(InputArray _queryDescriptors,InputArray _trainDescriptors,OutputArray _matches,int k,InputArray _mask,Stream & stream)573     void BFMatcher_Impl::knnMatchAsync(InputArray _queryDescriptors, InputArray _trainDescriptors,
574                                        OutputArray _matches,
575                                        int k,
576                                        InputArray _mask,
577                                        Stream& stream)
578     {
579         using namespace cv::cuda::device::bf_knnmatch;
580 
581         const GpuMat query = _queryDescriptors.getGpuMat();
582         const GpuMat train = _trainDescriptors.getGpuMat();
583         const GpuMat mask = _mask.getGpuMat();
584 
585         if (query.empty() || train.empty())
586         {
587             _matches.release();
588             return;
589         }
590 
591         CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
592         CV_Assert( train.cols == query.cols && train.type() == query.type() );
593         CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.rows == query.rows && mask.cols == train.rows) );
594 
595         typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
596                                  const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
597                                  cudaStream_t stream);
598 
599         static const caller_t callersL1[] =
600         {
601             matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
602             matchL1_gpu<unsigned short>, matchL1_gpu<short>,
603             matchL1_gpu<int>, matchL1_gpu<float>
604         };
605         static const caller_t callersL2[] =
606         {
607             0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
608             0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
609             0/*matchL2_gpu<int>*/, matchL2_gpu<float>
610         };
611         static const caller_t callersHamming[] =
612         {
613             matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
614             matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
615             matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
616         };
617 
618         const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
619 
620         const caller_t func = callers[query.depth()];
621         if (func == 0)
622         {
623             CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
624         }
625 
626         const int nQuery = query.rows;
627         const int nTrain = train.rows;
628 
629         GpuMat trainIdx, distance, allDist;
630         if (k == 2)
631         {
632             _matches.create(2, nQuery, CV_32SC2);
633             GpuMat matches = _matches.getGpuMat();
634 
635             trainIdx = GpuMat(1, nQuery, CV_32SC2, matches.ptr(0));
636             distance = GpuMat(1, nQuery, CV_32FC2, matches.ptr(1));
637         }
638         else
639         {
640             _matches.create(2 * nQuery, k, CV_32SC1);
641             GpuMat matches = _matches.getGpuMat();
642 
643             trainIdx = GpuMat(nQuery, k, CV_32SC1, matches.ptr(0), matches.step);
644             distance = GpuMat(nQuery, k, CV_32FC1, matches.ptr(nQuery), matches.step);
645 
646             BufferPool pool(stream);
647             allDist = pool.getBuffer(nQuery, nTrain, CV_32FC1);
648         }
649 
650         trainIdx.setTo(Scalar::all(-1), stream);
651 
652         func(query, train, k, mask, trainIdx, distance, allDist, StreamAccessor::getStream(stream));
653     }
654 
knnMatchAsync(InputArray _queryDescriptors,OutputArray _matches,int k,const std::vector<GpuMat> & masks,Stream & stream)655     void BFMatcher_Impl::knnMatchAsync(InputArray _queryDescriptors,
656                                        OutputArray _matches,
657                                        int k,
658                                        const std::vector<GpuMat>& masks,
659                                        Stream& stream)
660     {
661         using namespace cv::cuda::device::bf_knnmatch;
662 
663         if (k != 2)
664         {
665             CV_Error(Error::StsNotImplemented, "only k=2 mode is supported for now");
666         }
667 
668         const GpuMat query = _queryDescriptors.getGpuMat();
669 
670         if (query.empty() || trainDescCollection_.empty())
671         {
672             _matches.release();
673             return;
674         }
675 
676         CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
677 
678         GpuMat trainCollection, maskCollection;
679         makeGpuCollection(trainDescCollection_, masks, trainCollection, maskCollection);
680 
681         typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
682                                  const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
683                                  cudaStream_t stream);
684 
685         static const caller_t callersL1[] =
686         {
687             match2L1_gpu<unsigned char>, 0/*match2L1_gpu<signed char>*/,
688             match2L1_gpu<unsigned short>, match2L1_gpu<short>,
689             match2L1_gpu<int>, match2L1_gpu<float>
690         };
691         static const caller_t callersL2[] =
692         {
693             0/*match2L2_gpu<unsigned char>*/, 0/*match2L2_gpu<signed char>*/,
694             0/*match2L2_gpu<unsigned short>*/, 0/*match2L2_gpu<short>*/,
695             0/*match2L2_gpu<int>*/, match2L2_gpu<float>
696         };
697         static const caller_t callersHamming[] =
698         {
699             match2Hamming_gpu<unsigned char>, 0/*match2Hamming_gpu<signed char>*/,
700             match2Hamming_gpu<unsigned short>, 0/*match2Hamming_gpu<short>*/,
701             match2Hamming_gpu<int>, 0/*match2Hamming_gpu<float>*/
702         };
703 
704         const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
705 
706         const caller_t func = callers[query.depth()];
707         if (func == 0)
708         {
709             CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
710         }
711 
712         const int nQuery = query.rows;
713 
714         _matches.create(3, nQuery, CV_32SC2);
715         GpuMat matches = _matches.getGpuMat();
716 
717         GpuMat trainIdx(1, nQuery, CV_32SC2, matches.ptr(0));
718         GpuMat imgIdx(1, nQuery, CV_32SC2, matches.ptr(1));
719         GpuMat distance(1, nQuery, CV_32FC2, matches.ptr(2));
720 
721         trainIdx.setTo(Scalar::all(-1), stream);
722 
723         func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, StreamAccessor::getStream(stream));
724     }
725 
knnMatchConvert(InputArray _gpu_matches,std::vector<std::vector<DMatch>> & matches,bool compactResult)726     void BFMatcher_Impl::knnMatchConvert(InputArray _gpu_matches,
727                                          std::vector< std::vector<DMatch> >& matches,
728                                          bool compactResult)
729     {
730         Mat gpu_matches;
731         if (_gpu_matches.kind() == _InputArray::CUDA_GPU_MAT)
732         {
733             _gpu_matches.getGpuMat().download(gpu_matches);
734         }
735         else
736         {
737             gpu_matches = _gpu_matches.getMat();
738         }
739 
740         if (gpu_matches.empty())
741         {
742             matches.clear();
743             return;
744         }
745 
746         CV_Assert( ((gpu_matches.type() == CV_32SC2) && (gpu_matches.rows == 2 || gpu_matches.rows == 3)) ||
747                    (gpu_matches.type() == CV_32SC1) );
748 
749         int nQuery = -1, k = -1;
750 
751         const int* trainIdxPtr = NULL;
752         const int* imgIdxPtr = NULL;
753         const float* distancePtr = NULL;
754 
755         if (gpu_matches.type() == CV_32SC2)
756         {
757             nQuery = gpu_matches.cols;
758             k = 2;
759 
760             if (gpu_matches.rows == 2)
761             {
762                 trainIdxPtr = gpu_matches.ptr<int>(0);
763                 distancePtr =  gpu_matches.ptr<float>(1);
764             }
765             else
766             {
767                 trainIdxPtr = gpu_matches.ptr<int>(0);
768                 imgIdxPtr =  gpu_matches.ptr<int>(1);
769                 distancePtr =  gpu_matches.ptr<float>(2);
770             }
771         }
772         else
773         {
774             nQuery = gpu_matches.rows / 2;
775             k = gpu_matches.cols;
776 
777             trainIdxPtr = gpu_matches.ptr<int>(0);
778             distancePtr =  gpu_matches.ptr<float>(nQuery);
779         }
780 
781         matches.clear();
782         matches.reserve(nQuery);
783 
784         for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
785         {
786             matches.push_back(std::vector<DMatch>());
787             std::vector<DMatch>& curMatches = matches.back();
788             curMatches.reserve(k);
789 
790             for (int i = 0; i < k; ++i)
791             {
792                 const int trainIdx = *trainIdxPtr;
793                 if (trainIdx == -1)
794                     continue;
795 
796                 const int imgIdx = imgIdxPtr ? *imgIdxPtr : 0;
797                 const float distance = *distancePtr;
798 
799                 DMatch m(queryIdx, trainIdx, imgIdx, distance);
800 
801                 curMatches.push_back(m);
802 
803                 ++trainIdxPtr;
804                 ++distancePtr;
805                 if (imgIdxPtr)
806                     ++imgIdxPtr;
807             }
808 
809             if (compactResult && curMatches.empty())
810             {
811                 matches.pop_back();
812             }
813         }
814     }
815 
816     //
817     // radius match
818     //
819 
radiusMatch(InputArray _queryDescriptors,InputArray _trainDescriptors,std::vector<std::vector<DMatch>> & matches,float maxDistance,InputArray _mask,bool compactResult)820     void BFMatcher_Impl::radiusMatch(InputArray _queryDescriptors, InputArray _trainDescriptors,
821                                      std::vector<std::vector<DMatch> >& matches,
822                                      float maxDistance,
823                                      InputArray _mask,
824                                      bool compactResult)
825     {
826         GpuMat d_matches;
827         radiusMatchAsync(_queryDescriptors, _trainDescriptors, d_matches, maxDistance, _mask);
828         radiusMatchConvert(d_matches, matches, compactResult);
829     }
830 
radiusMatch(InputArray _queryDescriptors,std::vector<std::vector<DMatch>> & matches,float maxDistance,const std::vector<GpuMat> & masks,bool compactResult)831     void BFMatcher_Impl::radiusMatch(InputArray _queryDescriptors,
832                                      std::vector<std::vector<DMatch> >& matches,
833                                      float maxDistance,
834                                      const std::vector<GpuMat>& masks,
835                                      bool compactResult)
836     {
837         GpuMat d_matches;
838         radiusMatchAsync(_queryDescriptors, d_matches, maxDistance, masks);
839         radiusMatchConvert(d_matches, matches, compactResult);
840     }
841 
radiusMatchAsync(InputArray _queryDescriptors,InputArray _trainDescriptors,OutputArray _matches,float maxDistance,InputArray _mask,Stream & stream)842     void BFMatcher_Impl::radiusMatchAsync(InputArray _queryDescriptors, InputArray _trainDescriptors,
843                                           OutputArray _matches,
844                                           float maxDistance,
845                                           InputArray _mask,
846                                           Stream& stream)
847     {
848         using namespace cv::cuda::device::bf_radius_match;
849 
850         const GpuMat query = _queryDescriptors.getGpuMat();
851         const GpuMat train = _trainDescriptors.getGpuMat();
852         const GpuMat mask = _mask.getGpuMat();
853 
854         if (query.empty() || train.empty())
855         {
856             _matches.release();
857             return;
858         }
859 
860         CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
861         CV_Assert( train.cols == query.cols && train.type() == query.type() );
862         CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.rows == query.rows && mask.cols == train.rows) );
863 
864         typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
865                                  const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
866                                  cudaStream_t stream);
867 
868         static const caller_t callersL1[] =
869         {
870             matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
871             matchL1_gpu<unsigned short>, matchL1_gpu<short>,
872             matchL1_gpu<int>, matchL1_gpu<float>
873         };
874         static const caller_t callersL2[] =
875         {
876             0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
877             0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
878             0/*matchL2_gpu<int>*/, matchL2_gpu<float>
879         };
880         static const caller_t callersHamming[] =
881         {
882             matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
883             matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
884             matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
885         };
886 
887         const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
888 
889         const caller_t func = callers[query.depth()];
890         if (func == 0)
891         {
892             CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
893         }
894 
895         const int nQuery = query.rows;
896         const int nTrain = train.rows;
897 
898         const int cols = std::max((nTrain / 100), nQuery);
899 
900         _matches.create(2 * nQuery + 1, cols, CV_32SC1);
901         GpuMat matches = _matches.getGpuMat();
902 
903         GpuMat trainIdx(nQuery, cols, CV_32SC1, matches.ptr(0), matches.step);
904         GpuMat distance(nQuery, cols, CV_32FC1, matches.ptr(nQuery), matches.step);
905         GpuMat nMatches(1, nQuery, CV_32SC1, matches.ptr(2 * nQuery));
906 
907         nMatches.setTo(Scalar::all(0), stream);
908 
909         func(query, train, maxDistance, mask, trainIdx, distance, nMatches, StreamAccessor::getStream(stream));
910     }
911 
radiusMatchAsync(InputArray _queryDescriptors,OutputArray _matches,float maxDistance,const std::vector<GpuMat> & masks,Stream & stream)912     void BFMatcher_Impl::radiusMatchAsync(InputArray _queryDescriptors,
913                                           OutputArray _matches,
914                                           float maxDistance,
915                                           const std::vector<GpuMat>& masks,
916                                           Stream& stream)
917     {
918         using namespace cv::cuda::device::bf_radius_match;
919 
920         const GpuMat query = _queryDescriptors.getGpuMat();
921 
922         if (query.empty() || trainDescCollection_.empty())
923         {
924             _matches.release();
925             return;
926         }
927 
928         CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
929 
930         GpuMat trainCollection, maskCollection;
931         makeGpuCollection(trainDescCollection_, masks, trainCollection, maskCollection);
932 
933         typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
934                                  const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
935                                  cudaStream_t stream);
936 
937         static const caller_t callersL1[] =
938         {
939             matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
940             matchL1_gpu<unsigned short>, matchL1_gpu<short>,
941             matchL1_gpu<int>, matchL1_gpu<float>
942         };
943         static const caller_t callersL2[] =
944         {
945             0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
946             0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
947             0/*matchL2_gpu<int>*/, matchL2_gpu<float>
948         };
949         static const caller_t callersHamming[] =
950         {
951             matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
952             matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
953             matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
954         };
955 
956         const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
957 
958         const caller_t func = callers[query.depth()];
959         if (func == 0)
960         {
961             CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
962         }
963 
964         const int nQuery = query.rows;
965 
966         _matches.create(3 * nQuery + 1, nQuery, CV_32FC1);
967         GpuMat matches = _matches.getGpuMat();
968 
969         GpuMat trainIdx(nQuery, nQuery, CV_32SC1, matches.ptr(0), matches.step);
970         GpuMat imgIdx(nQuery, nQuery, CV_32SC1, matches.ptr(nQuery), matches.step);
971         GpuMat distance(nQuery, nQuery, CV_32FC1, matches.ptr(2 * nQuery), matches.step);
972         GpuMat nMatches(1, nQuery, CV_32SC1, matches.ptr(3 * nQuery));
973 
974         nMatches.setTo(Scalar::all(0), stream);
975 
976         std::vector<PtrStepSzb> trains_(trainDescCollection_.begin(), trainDescCollection_.end());
977         std::vector<PtrStepSzb> masks_(masks.begin(), masks.end());
978 
979         func(query, &trains_[0], static_cast<int>(trains_.size()), maxDistance, masks_.size() == 0 ? 0 : &masks_[0],
980             trainIdx, imgIdx, distance, nMatches, StreamAccessor::getStream(stream));
981     }
982 
radiusMatchConvert(InputArray _gpu_matches,std::vector<std::vector<DMatch>> & matches,bool compactResult)983     void BFMatcher_Impl::radiusMatchConvert(InputArray _gpu_matches,
984                                             std::vector< std::vector<DMatch> >& matches,
985                                             bool compactResult)
986     {
987         Mat gpu_matches;
988         if (_gpu_matches.kind() == _InputArray::CUDA_GPU_MAT)
989         {
990             _gpu_matches.getGpuMat().download(gpu_matches);
991         }
992         else
993         {
994             gpu_matches = _gpu_matches.getMat();
995         }
996 
997         if (gpu_matches.empty())
998         {
999             matches.clear();
1000             return;
1001         }
1002 
1003         CV_Assert( gpu_matches.type() == CV_32SC1 || gpu_matches.type() == CV_32FC1 );
1004 
1005         int nQuery = -1;
1006 
1007         const int* trainIdxPtr = NULL;
1008         const int* imgIdxPtr = NULL;
1009         const float* distancePtr = NULL;
1010         const int* nMatchesPtr = NULL;
1011 
1012         if (gpu_matches.type() == CV_32SC1)
1013         {
1014             nQuery = (gpu_matches.rows - 1) / 2;
1015 
1016             trainIdxPtr = gpu_matches.ptr<int>(0);
1017             distancePtr =  gpu_matches.ptr<float>(nQuery);
1018             nMatchesPtr = gpu_matches.ptr<int>(2 * nQuery);
1019         }
1020         else
1021         {
1022             nQuery = (gpu_matches.rows - 1) / 3;
1023 
1024             trainIdxPtr = gpu_matches.ptr<int>(0);
1025             imgIdxPtr = gpu_matches.ptr<int>(nQuery);
1026             distancePtr =  gpu_matches.ptr<float>(2 * nQuery);
1027             nMatchesPtr = gpu_matches.ptr<int>(3 * nQuery);
1028         }
1029 
1030         matches.clear();
1031         matches.reserve(nQuery);
1032 
1033         for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
1034         {
1035             const int nMatched = std::min(nMatchesPtr[queryIdx], gpu_matches.cols);
1036 
1037             if (nMatched == 0)
1038             {
1039                 if (!compactResult)
1040                 {
1041                     matches.push_back(std::vector<DMatch>());
1042                 }
1043             }
1044             else
1045             {
1046                 matches.push_back(std::vector<DMatch>(nMatched));
1047                 std::vector<DMatch>& curMatches = matches.back();
1048 
1049                 for (int i = 0; i < nMatched; ++i)
1050                 {
1051                     const int trainIdx = trainIdxPtr[i];
1052 
1053                     const int imgIdx = imgIdxPtr ? imgIdxPtr[i] : 0;
1054                     const float distance = distancePtr[i];
1055 
1056                     DMatch m(queryIdx, trainIdx, imgIdx, distance);
1057 
1058                     curMatches[i] = m;
1059                 }
1060 
1061                 std::sort(curMatches.begin(), curMatches.end());
1062             }
1063 
1064             trainIdxPtr += gpu_matches.cols;
1065             distancePtr += gpu_matches.cols;
1066             if (imgIdxPtr)
1067                 imgIdxPtr += gpu_matches.cols;
1068         }
1069     }
1070 }
1071 
createBFMatcher(int norm)1072 Ptr<cv::cuda::DescriptorMatcher> cv::cuda::DescriptorMatcher::createBFMatcher(int norm)
1073 {
1074     return makePtr<BFMatcher_Impl>(norm);
1075 }
1076 
1077 #endif /* !defined (HAVE_CUDA) */
1078