1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15 // Third party copyrights are property of their respective owners. 16 // 17 // Redistribution and use in source and binary forms, with or without modification, 18 // are permitted provided that the following conditions are met: 19 // 20 // * Redistribution's of source code must retain the above copyright notice, 21 // this list of conditions and the following disclaimer. 22 // 23 // * Redistribution's in binary form must reproduce the above copyright notice, 24 // this list of conditions and the following disclaimer in the documentation 25 // and/or other materials provided with the distribution. 26 // 27 // * The name of the copyright holders may not be used to endorse or promote products 28 // derived from this software without specific prior written permission. 29 // 30 // This software is provided by the copyright holders and contributors "as is" and 31 // any express or implied warranties, including, but not limited to, the implied 32 // warranties of merchantability and fitness for a particular purpose are disclaimed. 33 // In no event shall the Intel Corporation or contributors be liable for any direct, 34 // indirect, incidental, special, exemplary, or consequential damages 35 // (including, but not limited to, procurement of substitute goods or services; 36 // loss of use, data, or profits; or business interruption) however caused 37 // and on any theory of liability, whether in contract, strict liability, 38 // or tort (including negligence or otherwise) arising in any way out of 39 // the use of this software, even if advised of the possibility of such damage. 40 // 41 //M*/ 42 43 #ifndef __OPENCV_CUDA_DEVICE_WARP_HPP__ 44 #define __OPENCV_CUDA_DEVICE_WARP_HPP__ 45 46 /** @file 47 * @deprecated Use @ref cudev instead. 48 */ 49 50 //! @cond IGNORED 51 52 namespace cv { namespace cuda { namespace device 53 { 54 struct Warp 55 { 56 enum 57 { 58 LOG_WARP_SIZE = 5, 59 WARP_SIZE = 1 << LOG_WARP_SIZE, 60 STRIDE = WARP_SIZE 61 }; 62 63 /** \brief Returns the warp lane ID of the calling thread. */ laneIdcv::cuda::device::Warp64 static __device__ __forceinline__ unsigned int laneId() 65 { 66 unsigned int ret; 67 asm("mov.u32 %0, %laneid;" : "=r"(ret) ); 68 return ret; 69 } 70 71 template<typename It, typename T> fillcv::cuda::device::Warp72 static __device__ __forceinline__ void fill(It beg, It end, const T& value) 73 { 74 for(It t = beg + laneId(); t < end; t += STRIDE) 75 *t = value; 76 } 77 78 template<typename InIt, typename OutIt> copycv::cuda::device::Warp79 static __device__ __forceinline__ OutIt copy(InIt beg, InIt end, OutIt out) 80 { 81 for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE) 82 *out = *t; 83 return out; 84 } 85 86 template<typename InIt, typename OutIt, class UnOp> transformcv::cuda::device::Warp87 static __device__ __forceinline__ OutIt transform(InIt beg, InIt end, OutIt out, UnOp op) 88 { 89 for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE) 90 *out = op(*t); 91 return out; 92 } 93 94 template<typename InIt1, typename InIt2, typename OutIt, class BinOp> transformcv::cuda::device::Warp95 static __device__ __forceinline__ OutIt transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op) 96 { 97 unsigned int lane = laneId(); 98 99 InIt1 t1 = beg1 + lane; 100 InIt2 t2 = beg2 + lane; 101 for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, out += STRIDE) 102 *out = op(*t1, *t2); 103 return out; 104 } 105 106 template <class T, class BinOp> reducecv::cuda::device::Warp107 static __device__ __forceinline__ T reduce(volatile T *ptr, BinOp op) 108 { 109 const unsigned int lane = laneId(); 110 111 if (lane < 16) 112 { 113 T partial = ptr[lane]; 114 115 ptr[lane] = partial = op(partial, ptr[lane + 16]); 116 ptr[lane] = partial = op(partial, ptr[lane + 8]); 117 ptr[lane] = partial = op(partial, ptr[lane + 4]); 118 ptr[lane] = partial = op(partial, ptr[lane + 2]); 119 ptr[lane] = partial = op(partial, ptr[lane + 1]); 120 } 121 122 return *ptr; 123 } 124 125 template<typename OutIt, typename T> yotacv::cuda::device::Warp126 static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value) 127 { 128 unsigned int lane = laneId(); 129 value += lane; 130 131 for(OutIt t = beg + lane; t < end; t += STRIDE, value += STRIDE) 132 *t = value; 133 } 134 }; 135 }}} // namespace cv { namespace cuda { namespace cudev 136 137 //! @endcond 138 139 #endif /* __OPENCV_CUDA_DEVICE_WARP_HPP__ */ 140