1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15 // Third party copyrights are property of their respective owners. 16 // 17 // Redistribution and use in source and binary forms, with or without modification, 18 // are permitted provided that the following conditions are met: 19 // 20 // * Redistribution's of source code must retain the above copyright notice, 21 // this list of conditions and the following disclaimer. 22 // 23 // * Redistribution's in binary form must reproduce the above copyright notice, 24 // this list of conditions and the following disclaimer in the documentation 25 // and/or other materials provided with the distribution. 26 // 27 // * The name of the copyright holders may not be used to endorse or promote products 28 // derived from this software without specific prior written permission. 29 // 30 // This software is provided by the copyright holders and contributors "as is" and 31 // any express or implied warranties, including, but not limited to, the implied 32 // warranties of merchantability and fitness for a particular purpose are disclaimed. 33 // In no event shall the Intel Corporation or contributors be liable for any direct, 34 // indirect, incidental, special, exemplary, or consequential damages 35 // (including, but not limited to, procurement of substitute goods or services; 36 // loss of use, data, or profits; or business interruption) however caused 37 // and on any theory of liability, whether in contract, strict liability, 38 // or tort (including negligence or otherwise) arising in any way out of 39 // the use of this software, even if advised of the possibility of such damage. 40 // 41 //M*/ 42 43 #if !defined CUDA_DISABLER 44 45 #include "opencv2/core/cuda/common.hpp" 46 #include "opencv2/core/cuda/utility.hpp" 47 #include "opencv2/core/cuda/limits.hpp" 48 #include "opencv2/core/cuda/vec_distance.hpp" 49 #include "opencv2/core/cuda/datamov_utils.hpp" 50 51 namespace cv { namespace cuda { namespace device 52 { 53 namespace bf_radius_match 54 { 55 /////////////////////////////////////////////////////////////////////////////// 56 // Match Unrolled 57 58 template <int BLOCK_SIZE, int MAX_DESC_LEN, bool SAVE_IMG_IDX, typename Dist, typename T, typename Mask> matchUnrolled(const PtrStepSz<T> query,int imgIdx,const PtrStepSz<T> train,float maxDistance,const Mask mask,PtrStepi bestTrainIdx,PtrStepi bestImgIdx,PtrStepf bestDistance,unsigned int * nMatches,int maxCount)59 __global__ void matchUnrolled(const PtrStepSz<T> query, int imgIdx, const PtrStepSz<T> train, float maxDistance, const Mask mask, 60 PtrStepi bestTrainIdx, PtrStepi bestImgIdx, PtrStepf bestDistance, unsigned int* nMatches, int maxCount) 61 { 62 extern __shared__ int smem[]; 63 64 const int queryIdx = blockIdx.y * BLOCK_SIZE + threadIdx.y; 65 const int trainIdx = blockIdx.x * BLOCK_SIZE + threadIdx.x; 66 67 typename Dist::value_type* s_query = (typename Dist::value_type*)(smem); 68 typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * BLOCK_SIZE); 69 70 Dist dist; 71 72 #pragma unroll 73 for (int i = 0; i < MAX_DESC_LEN / BLOCK_SIZE; ++i) 74 { 75 const int loadX = threadIdx.x + i * BLOCK_SIZE; 76 77 s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = 0; 78 s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0; 79 80 if (loadX < query.cols) 81 { 82 T val; 83 84 ForceGlob<T>::Load(query.ptr(::min(queryIdx, query.rows - 1)), loadX, val); 85 s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = val; 86 87 ForceGlob<T>::Load(train.ptr(::min(blockIdx.x * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val); 88 s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val; 89 } 90 91 __syncthreads(); 92 93 #pragma unroll 94 for (int j = 0; j < BLOCK_SIZE; ++j) 95 dist.reduceIter(s_query[threadIdx.y * BLOCK_SIZE + j], s_train[j * BLOCK_SIZE + threadIdx.x]); 96 97 __syncthreads(); 98 } 99 100 float distVal = (typename Dist::result_type)dist; 101 102 if (queryIdx < query.rows && trainIdx < train.rows && mask(queryIdx, trainIdx) && distVal < maxDistance) 103 { 104 unsigned int ind = atomicInc(nMatches + queryIdx, (unsigned int) -1); 105 if (ind < maxCount) 106 { 107 bestTrainIdx.ptr(queryIdx)[ind] = trainIdx; 108 if (SAVE_IMG_IDX) bestImgIdx.ptr(queryIdx)[ind] = imgIdx; 109 bestDistance.ptr(queryIdx)[ind] = distVal; 110 } 111 } 112 } 113 114 template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask> matchUnrolled(const PtrStepSz<T> & query,const PtrStepSz<T> & train,float maxDistance,const Mask & mask,const PtrStepSzi & trainIdx,const PtrStepSzf & distance,const PtrStepSz<unsigned int> & nMatches,cudaStream_t stream)115 void matchUnrolled(const PtrStepSz<T>& query, const PtrStepSz<T>& train, float maxDistance, const Mask& mask, 116 const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream) 117 { 118 const dim3 block(BLOCK_SIZE, BLOCK_SIZE); 119 const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE)); 120 121 const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); 122 123 matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, false, Dist><<<grid, block, smemSize, stream>>>(query, 0, train, maxDistance, mask, 124 trainIdx, PtrStepi(), distance, nMatches.data, trainIdx.cols); 125 cudaSafeCall( cudaGetLastError() ); 126 127 if (stream == 0) 128 cudaSafeCall( cudaDeviceSynchronize() ); 129 } 130 131 template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T> matchUnrolled(const PtrStepSz<T> & query,const PtrStepSz<T> * trains,int n,float maxDistance,const PtrStepSzb * masks,const PtrStepSzi & trainIdx,const PtrStepSzi & imgIdx,const PtrStepSzf & distance,const PtrStepSz<unsigned int> & nMatches,cudaStream_t stream)132 void matchUnrolled(const PtrStepSz<T>& query, const PtrStepSz<T>* trains, int n, float maxDistance, const PtrStepSzb* masks, 133 const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, 134 cudaStream_t stream) 135 { 136 const dim3 block(BLOCK_SIZE, BLOCK_SIZE); 137 138 const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); 139 140 for (int i = 0; i < n; ++i) 141 { 142 const PtrStepSz<T> train = trains[i]; 143 144 const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE)); 145 146 if (masks != 0 && masks[i].data) 147 { 148 matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, SingleMask(masks[i]), 149 trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols); 150 } 151 else 152 { 153 matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, WithOutMask(), 154 trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols); 155 } 156 cudaSafeCall( cudaGetLastError() ); 157 } 158 159 if (stream == 0) 160 cudaSafeCall( cudaDeviceSynchronize() ); 161 } 162 163 /////////////////////////////////////////////////////////////////////////////// 164 // Match 165 166 template <int BLOCK_SIZE, bool SAVE_IMG_IDX, typename Dist, typename T, typename Mask> match(const PtrStepSz<T> query,int imgIdx,const PtrStepSz<T> train,float maxDistance,const Mask mask,PtrStepi bestTrainIdx,PtrStepi bestImgIdx,PtrStepf bestDistance,unsigned int * nMatches,int maxCount)167 __global__ void match(const PtrStepSz<T> query, int imgIdx, const PtrStepSz<T> train, float maxDistance, const Mask mask, 168 PtrStepi bestTrainIdx, PtrStepi bestImgIdx, PtrStepf bestDistance, unsigned int* nMatches, int maxCount) 169 { 170 extern __shared__ int smem[]; 171 172 const int queryIdx = blockIdx.y * BLOCK_SIZE + threadIdx.y; 173 const int trainIdx = blockIdx.x * BLOCK_SIZE + threadIdx.x; 174 175 typename Dist::value_type* s_query = (typename Dist::value_type*)(smem); 176 typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * BLOCK_SIZE); 177 178 Dist dist; 179 180 for (int i = 0, endi = (query.cols + BLOCK_SIZE - 1) / BLOCK_SIZE; i < endi; ++i) 181 { 182 const int loadX = threadIdx.x + i * BLOCK_SIZE; 183 184 s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = 0; 185 s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0; 186 187 if (loadX < query.cols) 188 { 189 T val; 190 191 ForceGlob<T>::Load(query.ptr(::min(queryIdx, query.rows - 1)), loadX, val); 192 s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = val; 193 194 ForceGlob<T>::Load(train.ptr(::min(blockIdx.x * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val); 195 s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val; 196 } 197 198 __syncthreads(); 199 200 #pragma unroll 201 for (int j = 0; j < BLOCK_SIZE; ++j) 202 dist.reduceIter(s_query[threadIdx.y * BLOCK_SIZE + j], s_train[j * BLOCK_SIZE + threadIdx.x]); 203 204 __syncthreads(); 205 } 206 207 float distVal = (typename Dist::result_type)dist; 208 209 if (queryIdx < query.rows && trainIdx < train.rows && mask(queryIdx, trainIdx) && distVal < maxDistance) 210 { 211 unsigned int ind = atomicInc(nMatches + queryIdx, (unsigned int) -1); 212 if (ind < maxCount) 213 { 214 bestTrainIdx.ptr(queryIdx)[ind] = trainIdx; 215 if (SAVE_IMG_IDX) bestImgIdx.ptr(queryIdx)[ind] = imgIdx; 216 bestDistance.ptr(queryIdx)[ind] = distVal; 217 } 218 } 219 } 220 221 template <int BLOCK_SIZE, typename Dist, typename T, typename Mask> match(const PtrStepSz<T> & query,const PtrStepSz<T> & train,float maxDistance,const Mask & mask,const PtrStepSzi & trainIdx,const PtrStepSzf & distance,const PtrStepSz<unsigned int> & nMatches,cudaStream_t stream)222 void match(const PtrStepSz<T>& query, const PtrStepSz<T>& train, float maxDistance, const Mask& mask, 223 const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, 224 cudaStream_t stream) 225 { 226 const dim3 block(BLOCK_SIZE, BLOCK_SIZE); 227 const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE)); 228 229 const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); 230 231 match<BLOCK_SIZE, false, Dist><<<grid, block, smemSize, stream>>>(query, 0, train, maxDistance, mask, 232 trainIdx, PtrStepi(), distance, nMatches.data, trainIdx.cols); 233 cudaSafeCall( cudaGetLastError() ); 234 235 if (stream == 0) 236 cudaSafeCall( cudaDeviceSynchronize() ); 237 } 238 239 template <int BLOCK_SIZE, typename Dist, typename T> match(const PtrStepSz<T> & query,const PtrStepSz<T> * trains,int n,float maxDistance,const PtrStepSzb * masks,const PtrStepSzi & trainIdx,const PtrStepSzi & imgIdx,const PtrStepSzf & distance,const PtrStepSz<unsigned int> & nMatches,cudaStream_t stream)240 void match(const PtrStepSz<T>& query, const PtrStepSz<T>* trains, int n, float maxDistance, const PtrStepSzb* masks, 241 const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, 242 cudaStream_t stream) 243 { 244 const dim3 block(BLOCK_SIZE, BLOCK_SIZE); 245 246 const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); 247 248 for (int i = 0; i < n; ++i) 249 { 250 const PtrStepSz<T> train = trains[i]; 251 252 const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE)); 253 254 if (masks != 0 && masks[i].data) 255 { 256 match<BLOCK_SIZE, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, SingleMask(masks[i]), 257 trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols); 258 } 259 else 260 { 261 match<BLOCK_SIZE, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, WithOutMask(), 262 trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols); 263 } 264 cudaSafeCall( cudaGetLastError() ); 265 } 266 267 if (stream == 0) 268 cudaSafeCall( cudaDeviceSynchronize() ); 269 } 270 271 /////////////////////////////////////////////////////////////////////////////// 272 // Match dispatcher 273 274 template <typename Dist, typename T, typename Mask> matchDispatcher(const PtrStepSz<T> & query,const PtrStepSz<T> & train,float maxDistance,const Mask & mask,const PtrStepSzi & trainIdx,const PtrStepSzf & distance,const PtrStepSz<unsigned int> & nMatches,cudaStream_t stream)275 void matchDispatcher(const PtrStepSz<T>& query, const PtrStepSz<T>& train, float maxDistance, const Mask& mask, 276 const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, 277 cudaStream_t stream) 278 { 279 if (query.cols <= 64) 280 { 281 matchUnrolled<16, 64, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream); 282 } 283 else if (query.cols <= 128) 284 { 285 matchUnrolled<16, 128, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream); 286 } 287 /*else if (query.cols <= 256) 288 { 289 matchUnrolled<16, 256, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream); 290 } 291 else if (query.cols <= 512) 292 { 293 matchUnrolled<16, 512, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream); 294 } 295 else if (query.cols <= 1024) 296 { 297 matchUnrolled<16, 1024, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream); 298 }*/ 299 else 300 { 301 match<16, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream); 302 } 303 } 304 305 template <typename Dist, typename T> matchDispatcher(const PtrStepSz<T> & query,const PtrStepSz<T> * trains,int n,float maxDistance,const PtrStepSzb * masks,const PtrStepSzi & trainIdx,const PtrStepSzi & imgIdx,const PtrStepSzf & distance,const PtrStepSz<unsigned int> & nMatches,cudaStream_t stream)306 void matchDispatcher(const PtrStepSz<T>& query, const PtrStepSz<T>* trains, int n, float maxDistance, const PtrStepSzb* masks, 307 const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, 308 cudaStream_t stream) 309 { 310 if (query.cols <= 64) 311 { 312 matchUnrolled<16, 64, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream); 313 } 314 else if (query.cols <= 128) 315 { 316 matchUnrolled<16, 128, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream); 317 } 318 /*else if (query.cols <= 256) 319 { 320 matchUnrolled<16, 256, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream); 321 } 322 else if (query.cols <= 512) 323 { 324 matchUnrolled<16, 512, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream); 325 } 326 else if (query.cols <= 1024) 327 { 328 matchUnrolled<16, 1024, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream); 329 }*/ 330 else 331 { 332 match<16, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream); 333 } 334 } 335 336 /////////////////////////////////////////////////////////////////////////////// 337 // Radius Match caller 338 matchL1_gpu(const PtrStepSzb & query,const PtrStepSzb & train,float maxDistance,const PtrStepSzb & mask,const PtrStepSzi & trainIdx,const PtrStepSzf & distance,const PtrStepSz<unsigned int> & nMatches,cudaStream_t stream)339 template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask, 340 const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, 341 cudaStream_t stream) 342 { 343 if (mask.data) 344 { 345 matchDispatcher< L1Dist<T> >(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, SingleMask(mask), 346 trainIdx, distance, nMatches, 347 stream); 348 } 349 else 350 { 351 matchDispatcher< L1Dist<T> >(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, WithOutMask(), 352 trainIdx, distance, nMatches, 353 stream); 354 } 355 } 356 357 template void matchL1_gpu<uchar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 358 //template void matchL1_gpu<schar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 359 template void matchL1_gpu<ushort>(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 360 template void matchL1_gpu<short >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 361 template void matchL1_gpu<int >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 362 template void matchL1_gpu<float >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 363 matchL2_gpu(const PtrStepSzb & query,const PtrStepSzb & train,float maxDistance,const PtrStepSzb & mask,const PtrStepSzi & trainIdx,const PtrStepSzf & distance,const PtrStepSz<unsigned int> & nMatches,cudaStream_t stream)364 template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask, 365 const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, 366 cudaStream_t stream) 367 { 368 if (mask.data) 369 { 370 matchDispatcher<L2Dist>(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, SingleMask(mask), 371 trainIdx, distance, nMatches, 372 stream); 373 } 374 else 375 { 376 matchDispatcher<L2Dist>(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, WithOutMask(), 377 trainIdx, distance, nMatches, 378 stream); 379 } 380 } 381 382 //template void matchL2_gpu<uchar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 383 //template void matchL2_gpu<schar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 384 //template void matchL2_gpu<ushort>(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 385 //template void matchL2_gpu<short >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 386 //template void matchL2_gpu<int >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 387 template void matchL2_gpu<float >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 388 matchHamming_gpu(const PtrStepSzb & query,const PtrStepSzb & train,float maxDistance,const PtrStepSzb & mask,const PtrStepSzi & trainIdx,const PtrStepSzf & distance,const PtrStepSz<unsigned int> & nMatches,cudaStream_t stream)389 template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask, 390 const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, 391 cudaStream_t stream) 392 { 393 if (mask.data) 394 { 395 matchDispatcher<HammingDist>(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, SingleMask(mask), 396 trainIdx, distance, nMatches, 397 stream); 398 } 399 else 400 { 401 matchDispatcher<HammingDist>(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, WithOutMask(), 402 trainIdx, distance, nMatches, 403 stream); 404 } 405 } 406 407 template void matchHamming_gpu<uchar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 408 //template void matchHamming_gpu<schar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 409 template void matchHamming_gpu<ushort>(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 410 //template void matchHamming_gpu<short >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 411 template void matchHamming_gpu<int >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 412 matchL1_gpu(const PtrStepSzb & query,const PtrStepSzb * trains,int n,float maxDistance,const PtrStepSzb * masks,const PtrStepSzi & trainIdx,const PtrStepSzi & imgIdx,const PtrStepSzf & distance,const PtrStepSz<unsigned int> & nMatches,cudaStream_t stream)413 template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, 414 const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, 415 cudaStream_t stream) 416 { 417 matchDispatcher< L1Dist<T> >(static_cast< PtrStepSz<T> >(query), (const PtrStepSz<T>*)trains, n, maxDistance, masks, 418 trainIdx, imgIdx, distance, nMatches, 419 stream); 420 } 421 422 template void matchL1_gpu<uchar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 423 //template void matchL1_gpu<schar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 424 template void matchL1_gpu<ushort>(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 425 template void matchL1_gpu<short >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 426 template void matchL1_gpu<int >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 427 template void matchL1_gpu<float >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 428 matchL2_gpu(const PtrStepSzb & query,const PtrStepSzb * trains,int n,float maxDistance,const PtrStepSzb * masks,const PtrStepSzi & trainIdx,const PtrStepSzi & imgIdx,const PtrStepSzf & distance,const PtrStepSz<unsigned int> & nMatches,cudaStream_t stream)429 template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, 430 const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, 431 cudaStream_t stream) 432 { 433 matchDispatcher<L2Dist>(static_cast< PtrStepSz<T> >(query), (const PtrStepSz<T>*)trains, n, maxDistance, masks, 434 trainIdx, imgIdx, distance, nMatches, 435 stream); 436 } 437 438 //template void matchL2_gpu<uchar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 439 //template void matchL2_gpu<schar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 440 //template void matchL2_gpu<ushort>(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 441 //template void matchL2_gpu<short >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 442 //template void matchL2_gpu<int >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 443 template void matchL2_gpu<float >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 444 matchHamming_gpu(const PtrStepSzb & query,const PtrStepSzb * trains,int n,float maxDistance,const PtrStepSzb * masks,const PtrStepSzi & trainIdx,const PtrStepSzi & imgIdx,const PtrStepSzf & distance,const PtrStepSz<unsigned int> & nMatches,cudaStream_t stream)445 template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, 446 const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, 447 cudaStream_t stream) 448 { 449 matchDispatcher<HammingDist>(static_cast< PtrStepSz<T> >(query), (const PtrStepSz<T>*)trains, n, maxDistance, masks, 450 trainIdx, imgIdx, distance, nMatches, 451 stream); 452 } 453 454 template void matchHamming_gpu<uchar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 455 //template void matchHamming_gpu<schar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 456 template void matchHamming_gpu<ushort>(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 457 //template void matchHamming_gpu<short >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 458 template void matchHamming_gpu<int >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream); 459 } // namespace bf_radius_match 460 }}} // namespace cv { namespace cuda { namespace cudev 461 462 463 #endif /* CUDA_DISABLER */ 464