1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15 // Third party copyrights are property of their respective owners. 16 // 17 // Redistribution and use in source and binary forms, with or without modification, 18 // are permitted provided that the following conditions are met: 19 // 20 // * Redistribution's of source code must retain the above copyright notice, 21 // this list of conditions and the following disclaimer. 22 // 23 // * Redistribution's in binary form must reproduce the above copyright notice, 24 // this list of conditions and the following disclaimer in the documentation 25 // and/or other materials provided with the distribution. 26 // 27 // * The name of the copyright holders may not be used to endorse or promote products 28 // derived from this software without specific prior written permission. 29 // 30 // This software is provided by the copyright holders and contributors "as is" and 31 // any express or implied warranties, including, but not limited to, the implied 32 // warranties of merchantability and fitness for a particular purpose are disclaimed. 33 // In no event shall the Intel Corporation or contributors be liable for any direct, 34 // indirect, incidental, special, exemplary, or consequential damages 35 // (including, but not limited to, procurement of substitute goods or services; 36 // loss of use, data, or profits; or business interruption) however caused 37 // and on any theory of liability, whether in contract, strict liability, 38 // or tort (including negligence or otherwise) arising in any way out of 39 // the use of this software, even if advised of the possibility of such damage. 40 // 41 //M*/ 42 43 #if !defined CUDA_DISABLER 44 45 #include "opencv2/core/cuda/common.hpp" 46 #include "opencv2/core/cuda/transform.hpp" 47 #include "opencv2/core/cuda/functional.hpp" 48 #include "opencv2/core/cuda/reduce.hpp" 49 50 namespace cv { namespace cuda { namespace device 51 { 52 #define SOLVE_PNP_RANSAC_MAX_NUM_ITERS 200 53 54 namespace transform_points 55 { 56 __constant__ float3 crot0; 57 __constant__ float3 crot1; 58 __constant__ float3 crot2; 59 __constant__ float3 ctransl; 60 61 struct TransformOp : unary_function<float3, float3> 62 { operator ()cv::cuda::device::transform_points::TransformOp63 __device__ __forceinline__ float3 operator()(const float3& p) const 64 { 65 return make_float3( 66 crot0.x * p.x + crot0.y * p.y + crot0.z * p.z + ctransl.x, 67 crot1.x * p.x + crot1.y * p.y + crot1.z * p.z + ctransl.y, 68 crot2.x * p.x + crot2.y * p.y + crot2.z * p.z + ctransl.z); 69 } TransformOpcv::cuda::device::transform_points::TransformOp70 __host__ __device__ __forceinline__ TransformOp() {} TransformOpcv::cuda::device::transform_points::TransformOp71 __host__ __device__ __forceinline__ TransformOp(const TransformOp&) {} 72 }; 73 call(const PtrStepSz<float3> src,const float * rot,const float * transl,PtrStepSz<float3> dst,cudaStream_t stream)74 void call(const PtrStepSz<float3> src, const float* rot, 75 const float* transl, PtrStepSz<float3> dst, 76 cudaStream_t stream) 77 { 78 cudaSafeCall(cudaMemcpyToSymbol(crot0, rot, sizeof(float) * 3)); 79 cudaSafeCall(cudaMemcpyToSymbol(crot1, rot + 3, sizeof(float) * 3)); 80 cudaSafeCall(cudaMemcpyToSymbol(crot2, rot + 6, sizeof(float) * 3)); 81 cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3)); 82 cv::cuda::device::transform(src, dst, TransformOp(), WithOutMask(), stream); 83 } 84 } // namespace transform_points 85 86 namespace project_points 87 { 88 __constant__ float3 crot0; 89 __constant__ float3 crot1; 90 __constant__ float3 crot2; 91 __constant__ float3 ctransl; 92 __constant__ float3 cproj0; 93 __constant__ float3 cproj1; 94 95 struct ProjectOp : unary_function<float3, float3> 96 { operator ()cv::cuda::device::project_points::ProjectOp97 __device__ __forceinline__ float2 operator()(const float3& p) const 98 { 99 // Rotate and translate in 3D 100 float3 t = make_float3( 101 crot0.x * p.x + crot0.y * p.y + crot0.z * p.z + ctransl.x, 102 crot1.x * p.x + crot1.y * p.y + crot1.z * p.z + ctransl.y, 103 crot2.x * p.x + crot2.y * p.y + crot2.z * p.z + ctransl.z); 104 // Project on 2D plane 105 return make_float2( 106 (cproj0.x * t.x + cproj0.y * t.y) / t.z + cproj0.z, 107 (cproj1.x * t.x + cproj1.y * t.y) / t.z + cproj1.z); 108 } ProjectOpcv::cuda::device::project_points::ProjectOp109 __host__ __device__ __forceinline__ ProjectOp() {} ProjectOpcv::cuda::device::project_points::ProjectOp110 __host__ __device__ __forceinline__ ProjectOp(const ProjectOp&) {} 111 }; 112 call(const PtrStepSz<float3> src,const float * rot,const float * transl,const float * proj,PtrStepSz<float2> dst,cudaStream_t stream)113 void call(const PtrStepSz<float3> src, const float* rot, 114 const float* transl, const float* proj, PtrStepSz<float2> dst, 115 cudaStream_t stream) 116 { 117 cudaSafeCall(cudaMemcpyToSymbol(crot0, rot, sizeof(float) * 3)); 118 cudaSafeCall(cudaMemcpyToSymbol(crot1, rot + 3, sizeof(float) * 3)); 119 cudaSafeCall(cudaMemcpyToSymbol(crot2, rot + 6, sizeof(float) * 3)); 120 cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3)); 121 cudaSafeCall(cudaMemcpyToSymbol(cproj0, proj, sizeof(float) * 3)); 122 cudaSafeCall(cudaMemcpyToSymbol(cproj1, proj + 3, sizeof(float) * 3)); 123 cv::cuda::device::transform(src, dst, ProjectOp(), WithOutMask(), stream); 124 } 125 } // namespace project_points 126 127 namespace solve_pnp_ransac 128 { 129 __constant__ float3 crot_matrices[SOLVE_PNP_RANSAC_MAX_NUM_ITERS * 3]; 130 __constant__ float3 ctransl_vectors[SOLVE_PNP_RANSAC_MAX_NUM_ITERS]; 131 maxNumIters()132 int maxNumIters() 133 { 134 return SOLVE_PNP_RANSAC_MAX_NUM_ITERS; 135 } 136 sqr(float x)137 __device__ __forceinline__ float sqr(float x) 138 { 139 return x * x; 140 } 141 142 template <int BLOCK_SIZE> computeHypothesisScoresKernel(const int num_points,const float3 * object,const float2 * image,const float dist_threshold,int * g_num_inliers)143 __global__ void computeHypothesisScoresKernel( 144 const int num_points, const float3* object, const float2* image, 145 const float dist_threshold, int* g_num_inliers) 146 { 147 const float3* const &rot_mat = crot_matrices + blockIdx.x * 3; 148 const float3 &transl_vec = ctransl_vectors[blockIdx.x]; 149 int num_inliers = 0; 150 151 for (int i = threadIdx.x; i < num_points; i += blockDim.x) 152 { 153 float3 p = object[i]; 154 p = make_float3( 155 rot_mat[0].x * p.x + rot_mat[0].y * p.y + rot_mat[0].z * p.z + transl_vec.x, 156 rot_mat[1].x * p.x + rot_mat[1].y * p.y + rot_mat[1].z * p.z + transl_vec.y, 157 rot_mat[2].x * p.x + rot_mat[2].y * p.y + rot_mat[2].z * p.z + transl_vec.z); 158 p.x /= p.z; 159 p.y /= p.z; 160 float2 image_p = image[i]; 161 if (sqr(p.x - image_p.x) + sqr(p.y - image_p.y) < dist_threshold) 162 ++num_inliers; 163 } 164 165 __shared__ int s_num_inliers[BLOCK_SIZE]; 166 reduce<BLOCK_SIZE>(s_num_inliers, num_inliers, threadIdx.x, plus<int>()); 167 168 if (threadIdx.x == 0) 169 g_num_inliers[blockIdx.x] = num_inliers; 170 } 171 computeHypothesisScores(const int num_hypotheses,const int num_points,const float * rot_matrices,const float3 * transl_vectors,const float3 * object,const float2 * image,const float dist_threshold,int * hypothesis_scores)172 void computeHypothesisScores( 173 const int num_hypotheses, const int num_points, const float* rot_matrices, 174 const float3* transl_vectors, const float3* object, const float2* image, 175 const float dist_threshold, int* hypothesis_scores) 176 { 177 cudaSafeCall(cudaMemcpyToSymbol(crot_matrices, rot_matrices, num_hypotheses * 3 * sizeof(float3))); 178 cudaSafeCall(cudaMemcpyToSymbol(ctransl_vectors, transl_vectors, num_hypotheses * sizeof(float3))); 179 180 dim3 threads(256); 181 dim3 grid(num_hypotheses); 182 183 computeHypothesisScoresKernel<256><<<grid, threads>>>( 184 num_points, object, image, dist_threshold, hypothesis_scores); 185 cudaSafeCall( cudaGetLastError() ); 186 187 cudaSafeCall( cudaDeviceSynchronize() ); 188 } 189 } // namespace solvepnp_ransac 190 }}} // namespace cv { namespace cuda { namespace cudev 191 192 193 #endif /* CUDA_DISABLER */ 194