1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15 // Third party copyrights are property of their respective owners. 16 // 17 // Redistribution and use in source and binary forms, with or without modification, 18 // are permitted provided that the following conditions are met: 19 // 20 // * Redistribution's of source code must retain the above copyright notice, 21 // this list of conditions and the following disclaimer. 22 // 23 // * Redistribution's in binary form must reproduce the above copyright notice, 24 // this list of conditions and the following disclaimer in the documentation 25 // and/or other materials provided with the distribution. 26 // 27 // * The name of the copyright holders may not be used to endorse or promote products 28 // derived from this software without specific prior written permission. 29 // 30 // This software is provided by the copyright holders and contributors "as is" and 31 // any express or implied warranties, including, but not limited to, the implied 32 // warranties of merchantability and fitness for a particular purpose are disclaimed. 33 // In no event shall the Intel Corporation or contributors be liable for any direct, 34 // indirect, incidental, special, exemplary, or consequential damages 35 // (including, but not limited to, procurement of substitute goods or services; 36 // loss of use, data, or profits; or business interruption) however caused 37 // and on any theory of liability, whether in contract, strict liability, 38 // or tort (including negligence or otherwise) arising in any way out of 39 // the use of this software, even if advised of the possibility of such damage. 40 // 41 //M*/ 42 43 #if !defined CUDA_DISABLER 44 45 #include "opencv2/core/cuda/common.hpp" 46 #include "opencv2/core/cuda/emulation.hpp" 47 48 namespace cv { namespace cuda { namespace device 49 { 50 namespace hough 51 { 52 __device__ int g_counter; 53 54 template <int PIXELS_PER_THREAD> buildPointList(const PtrStepSzb src,unsigned int * list)55 __global__ void buildPointList(const PtrStepSzb src, unsigned int* list) 56 { 57 __shared__ unsigned int s_queues[4][32 * PIXELS_PER_THREAD]; 58 __shared__ int s_qsize[4]; 59 __shared__ int s_globStart[4]; 60 61 const int x = blockIdx.x * blockDim.x * PIXELS_PER_THREAD + threadIdx.x; 62 const int y = blockIdx.y * blockDim.y + threadIdx.y; 63 64 if (threadIdx.x == 0) 65 s_qsize[threadIdx.y] = 0; 66 __syncthreads(); 67 68 if (y < src.rows) 69 { 70 // fill the queue 71 const uchar* srcRow = src.ptr(y); 72 for (int i = 0, xx = x; i < PIXELS_PER_THREAD && xx < src.cols; ++i, xx += blockDim.x) 73 { 74 if (srcRow[xx]) 75 { 76 const unsigned int val = (y << 16) | xx; 77 const int qidx = Emulation::smem::atomicAdd(&s_qsize[threadIdx.y], 1); 78 s_queues[threadIdx.y][qidx] = val; 79 } 80 } 81 } 82 83 __syncthreads(); 84 85 // let one thread reserve the space required in the global list 86 if (threadIdx.x == 0 && threadIdx.y == 0) 87 { 88 // find how many items are stored in each list 89 int totalSize = 0; 90 for (int i = 0; i < blockDim.y; ++i) 91 { 92 s_globStart[i] = totalSize; 93 totalSize += s_qsize[i]; 94 } 95 96 // calculate the offset in the global list 97 const int globalOffset = atomicAdd(&g_counter, totalSize); 98 for (int i = 0; i < blockDim.y; ++i) 99 s_globStart[i] += globalOffset; 100 } 101 102 __syncthreads(); 103 104 // copy local queues to global queue 105 const int qsize = s_qsize[threadIdx.y]; 106 int gidx = s_globStart[threadIdx.y] + threadIdx.x; 107 for(int i = threadIdx.x; i < qsize; i += blockDim.x, gidx += blockDim.x) 108 list[gidx] = s_queues[threadIdx.y][i]; 109 } 110 buildPointList_gpu(PtrStepSzb src,unsigned int * list)111 int buildPointList_gpu(PtrStepSzb src, unsigned int* list) 112 { 113 const int PIXELS_PER_THREAD = 16; 114 115 void* counterPtr; 116 cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); 117 118 cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); 119 120 const dim3 block(32, 4); 121 const dim3 grid(divUp(src.cols, block.x * PIXELS_PER_THREAD), divUp(src.rows, block.y)); 122 123 cudaSafeCall( cudaFuncSetCacheConfig(buildPointList<PIXELS_PER_THREAD>, cudaFuncCachePreferShared) ); 124 125 buildPointList<PIXELS_PER_THREAD><<<grid, block>>>(src, list); 126 cudaSafeCall( cudaGetLastError() ); 127 128 cudaSafeCall( cudaDeviceSynchronize() ); 129 130 int totalCount; 131 cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); 132 133 return totalCount; 134 } 135 } 136 }}} 137 138 #endif /* CUDA_DISABLER */ 139