Home
last modified time | relevance | path

Searched refs:smem (Results 1 – 25 of 57) sorted by relevance

123

/external/opencv3/modules/core/src/opencl/
Dfft.cl34 void butterfly2(CT a0, CT a1, __local CT* smem, __global const CT* twiddles,
41 smem[dst_ind] = a0 + a1;
42 smem[dst_ind+block_size] = a0 - a1;
46 void butterfly4(CT a0, CT a1, CT a2, CT a3, __local CT* smem, __global const CT* twiddles,
61 smem[dst_ind] = b0 + b1;
62 smem[dst_ind + block_size] = a2 + a3;
63 smem[dst_ind + 2*block_size] = b0 - b1;
64 smem[dst_ind + 3*block_size] = a2 - a3;
68 void butterfly3(CT a0, CT a1, CT a2, __local CT* smem, __global const CT* twiddles,
80 smem[dst_ind] = a0 + b1;
[all …]
/external/opencv3/modules/cudev/include/opencv2/cudev/warp/
Dscan.hpp59 __device__ T warpScanInclusive(T data, volatile T* smem, uint tid) in warpScanInclusive() argument
62 (void) smem; in warpScanInclusive()
79 smem[pos] = 0; in warpScanInclusive()
82 smem[pos] = data; in warpScanInclusive()
84 smem[pos] += smem[pos - 1]; in warpScanInclusive()
85 smem[pos] += smem[pos - 2]; in warpScanInclusive()
86 smem[pos] += smem[pos - 4]; in warpScanInclusive()
87 smem[pos] += smem[pos - 8]; in warpScanInclusive()
88 smem[pos] += smem[pos - 16]; in warpScanInclusive()
90 return smem[pos]; in warpScanInclusive()
[all …]
/external/opencv3/modules/objdetect/src/opencl/
Dobjdetect_hog.cl72 __global float* block_hists, __local float* smem)
87 __local float* hists = smem + lp * cnbins * (CELLS_PER_BLOCK_X *
172 __local float* smem = squares + boffset;
173 float sum = smem[hid];
175 smem[hid] = sum = sum + smem[hid + 18];
178 smem[hid] = sum = sum + smem[hid + 9];
181 smem[hid] = sum + smem[hid + 4];
183 sum = smem[0] + smem[1] + smem[2] + smem[3] + smem[8];
192 sum = smem[hid];
194 smem[hid] = sum = sum + smem[hid + 18];
[all …]
/external/opencv3/modules/core/include/opencv2/core/cuda/detail/
Dreduce.hpp74 … static __device__ void loadToSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid) in loadToSmem()
76 thrust::get<I>(smem)[tid] = thrust::get<I>(val); in loadToSmem() local
78 For<I + 1, N>::loadToSmem(smem, val, tid); in loadToSmem()
81 …static __device__ void loadFromSmem(const PointerTuple& smem, const ValTuple& val, unsigned int ti… in loadFromSmem()
83 thrust::get<I>(val) = thrust::get<I>(smem)[tid]; in loadFromSmem()
85 For<I + 1, N>::loadFromSmem(smem, val, tid); in loadFromSmem()
89 …static __device__ void merge(const PointerTuple& smem, const ValTuple& val, unsigned int tid, unsi… in merge()
91 …pename thrust::tuple_element<I, PointerTuple>::type>::type reg = thrust::get<I>(smem)[tid + delta]; in merge()
92 … thrust::get<I>(smem)[tid] = thrust::get<I>(val) = thrust::get<I>(op)(thrust::get<I>(val), reg); in merge() local
94 For<I + 1, N>::merge(smem, val, tid, delta, op); in merge()
[all …]
/external/opencv3/modules/cudev/include/opencv2/cudev/block/detail/
Dreduce.hpp83 __device__ static void loadToSmem(const PointerTuple& smem, const ValTuple& val, uint tid) in loadToSmem()
85 get<I>(smem)[tid] = get<I>(val); in loadToSmem() local
87 For<I + 1, N>::loadToSmem(smem, val, tid); in loadToSmem()
91 __device__ static void loadFromSmem(const PointerTuple& smem, const ValTuple& val, uint tid) in loadFromSmem()
93 get<I>(val) = get<I>(smem)[tid]; in loadFromSmem()
95 For<I + 1, N>::loadFromSmem(smem, val, tid); in loadFromSmem()
99 …__device__ static void merge(const PointerTuple& smem, const ValTuple& val, uint tid, uint delta, … in merge()
101 …typename GetType<typename tuple_element<I, PointerTuple>::type>::type reg = get<I>(smem)[tid + del… in merge()
102 get<I>(smem)[tid] = get<I>(val) = get<I>(op)(get<I>(val), reg); in merge() local
104 For<I + 1, N>::merge(smem, val, tid, delta, op); in merge()
[all …]
Dreduce_key_val.hpp82 … __device__ static void loadToSmem(const PointerTuple& smem, const ReferenceTuple& data, uint tid) in loadToSmem()
84 get<I>(smem)[tid] = get<I>(data); in loadToSmem() local
86 For<I + 1, N>::loadToSmem(smem, data, tid); in loadToSmem()
90 …__device__ static void loadFromSmem(const PointerTuple& smem, const ReferenceTuple& data, uint tid) in loadFromSmem()
92 get<I>(data) = get<I>(smem)[tid]; in loadFromSmem()
94 For<I + 1, N>::loadFromSmem(smem, data, tid); in loadFromSmem()
152 __device__ __forceinline__ void loadToSmem(volatile T* smem, T& data, uint tid) in loadToSmem() argument
154 smem[tid] = data; in loadToSmem()
158 __device__ __forceinline__ void loadFromSmem(volatile T* smem, T& data, uint tid) in loadFromSmem() argument
160 data = smem[tid]; in loadFromSmem()
[all …]
/external/opencv3/modules/imgproc/src/opencl/
Dclahe.cl50 inline int calc_lut(__local int* smem, int val, int tid)
52 smem[tid] = val;
57 smem[i] += smem[i - 1];
60 return smem[tid];
64 inline void reduce(volatile __local int* smem, int val, int tid)
66 smem[tid] = val;
70 smem[tid] = val += smem[tid + 128];
74 smem[tid] = val += smem[tid + 64];
78 smem[tid] += smem[tid + 32];
82 smem[tid] += smem[tid + 16];
[all …]
Dpyr_down.cl104 smem[0][col_lcl] = sum0; \
107 smem[1][col_lcl] = sum1;
123 vstore4(sum40, col_lcl, (__local float*) &smem[0][2]); \
126 vstore4(sum41, col_lcl, (__local float*) &smem[1][2]);
137 __local FT smem[2][LOCAL_SIZE + 4];
229 …FT sum = dot(vload4(0, (__local float*) (&smem) + tid2 + (yin - y) * (LOCAL_SIZE + 4)), (float4)(c…
231 …FT sum = dot(vload4(0, (__local double*) (&smem) + tid2 + (yin - y) * (LOCAL_SIZE + 4)), (double4)…
234 FT sum = co3 * smem[yin - y][2 + tid2 - 2];
235 sum = MAD(co2, smem[yin - y][2 + tid2 - 1], sum);
236 sum = MAD(co1, smem[yin - y][2 + tid2 ], sum);
[all …]
Dcanny.cl80 inline float3 sobel(int idx, __local const floatN *smem)
85 floatN dx = fma(2, smem[idx + GRP_SIZEX + 6] - smem[idx + GRP_SIZEX + 4],
86 smem[idx + 2] - smem[idx] + smem[idx + 2 * GRP_SIZEX + 10] - smem[idx + 2 * GRP_SIZEX + 8]);
88 floatN dy = fma(2, smem[idx + 1] - smem[idx + 2 * GRP_SIZEX + 9],
89 smem[idx + 2] - smem[idx + 2 * GRP_SIZEX + 10] + smem[idx] - smem[idx + 2 * GRP_SIZEX + 8]);
123 __local floatN smem[(GRP_SIZEX + 4) * (GRP_SIZEY + 4)];
136 smem[j] = loadpix(src + mad24(y, src_step, mad24(x, cn * (int)sizeof(TYPE), src_offset)));
152 mag[i] = (sobel(i, smem)).z;
153 mag[i + grp_sizey * (GRP_SIZEX + 2)] = (sobel(i + grp_sizey * (GRP_SIZEX + 4), smem)).z;
158 mag[i * (GRP_SIZEX + 2)] = (sobel(i * (GRP_SIZEX + 4), smem)).z;
[all …]
/external/opencv3/modules/cudev/include/opencv2/cudev/block/
Dvec_distance.hpp75 __device__ __forceinline__ void reduceWarp(result_type* smem, uint tid) in reduceWarp()
77 warpReduce(smem, mySum, tid, plus<result_type>()); in reduceWarp()
80 … template <int THREAD_DIM> __device__ __forceinline__ void reduceBlock(result_type* smem, uint tid) in reduceBlock()
82 blockReduce<THREAD_DIM>(smem, mySum, tid, plus<result_type>()); in reduceBlock()
104 __device__ __forceinline__ void reduceWarp(result_type* smem, uint tid) in reduceWarp()
106 warpReduce(smem, mySum, tid, plus<result_type>()); in reduceWarp()
109 … template <int THREAD_DIM> __device__ __forceinline__ void reduceBlock(result_type* smem, uint tid) in reduceBlock()
111 blockReduce<THREAD_DIM>(smem, mySum, tid, plus<result_type>()); in reduceBlock()
137 __device__ __forceinline__ void reduceWarp(result_type* smem, uint tid) in reduceWarp()
139 warpReduce(smem, mySum, tid, plus<result_type>()); in reduceWarp()
[all …]
Dscan.hpp58 __device__ T blockScanInclusive(T data, volatile T* smem, uint tid) in blockScanInclusive() argument
63 T warpResult = warpScanInclusive(data, smem, tid); in blockScanInclusive()
71 smem[tid >> LOG_WARP_SIZE] = warpResult; in blockScanInclusive()
79 T val = smem[tid]; in blockScanInclusive()
82 smem[tid] = warpScanExclusive(val, smem, tid); in blockScanInclusive()
88 return warpResult + smem[tid >> LOG_WARP_SIZE]; in blockScanInclusive()
92 return warpScanInclusive(data, smem, tid); in blockScanInclusive()
97 __device__ __forceinline__ T blockScanExclusive(T data, volatile T* smem, uint tid) in blockScanExclusive() argument
99 return blockScanInclusive<THREADS_NUM>(data, smem, tid) - data; in blockScanExclusive()
/external/opencv3/modules/core/include/opencv2/core/cuda/
Dvec_distance.hpp70 template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid) in reduceAll()
72 reduce<THREAD_DIM>(smem, mySum, tid, plus<int>()); in reduceAll()
94 template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid) in reduceAll()
96 reduce<THREAD_DIM>(smem, mySum, tid, plus<float>()); in reduceAll()
120 template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid) in reduceAll()
122 reduce<THREAD_DIM>(smem, mySum, tid, plus<float>()); in reduceAll()
145 template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid) in reduceAll()
147 reduce<THREAD_DIM>(smem, mySum, tid, plus<int>()); in reduceAll()
160 …bal(const T1* vec1, const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) in calcVecDiffGlobal() argument
173 dist.reduceAll<THREAD_DIM>(smem, tid); in calcVecDiffGlobal()
[all …]
/external/opencv3/modules/cudev/include/opencv2/cudev/warp/detail/
Dreduce.hpp81 __device__ static void loadToSmem(const PointerTuple& smem, const ValTuple& val, uint tid) in loadToSmem()
83 get<I>(smem)[tid] = get<I>(val); in loadToSmem() local
85 For<I + 1, N>::loadToSmem(smem, val, tid); in loadToSmem()
89 …__device__ static void merge(const PointerTuple& smem, const ValTuple& val, uint tid, uint delta, … in merge()
91 …typename GetType<typename tuple_element<I, PointerTuple>::type>::type reg = get<I>(smem)[tid + del… in merge()
92 get<I>(smem)[tid] = get<I>(val) = get<I>(op)(get<I>(val), reg); in merge() local
94 For<I + 1, N>::merge(smem, val, tid, delta, op); in merge()
132 __device__ __forceinline__ void loadToSmem(volatile T* smem, T& val, uint tid) in loadToSmem() argument
134 smem[tid] = val; in loadToSmem()
139 …device__ __forceinline__ void loadToSmem(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, in loadToSmem() argument
[all …]
/external/opencv3/modules/cudev/include/opencv2/cudev/grid/detail/
Dreduce_to_column.hpp61 … __device__ __forceinline__ static void call(work_elem_type smem[1][BLOCK_SIZE], work_type& myVal) in call()
64 blockReduce<BLOCK_SIZE>(smem[0], myVal, threadIdx.x, op); in call()
70 … __device__ __forceinline__ static void call(work_elem_type smem[2][BLOCK_SIZE], work_type& myVal) in call()
73 …blockReduce<BLOCK_SIZE>(smem_tuple(smem[0], smem[1]), tie(myVal.x, myVal.y), threadIdx.x, make_tup… in call()
79 … __device__ __forceinline__ static void call(work_elem_type smem[3][BLOCK_SIZE], work_type& myVal) in call()
82 …blockReduce<BLOCK_SIZE>(smem_tuple(smem[0], smem[1], smem[2]), tie(myVal.x, myVal.y, myVal.z), thr… in call()
88 … __device__ __forceinline__ static void call(work_elem_type smem[4][BLOCK_SIZE], work_type& myVal) in call()
91 …blockReduce<BLOCK_SIZE>(smem_tuple(smem[0], smem[1], smem[2], smem[3]), tie(myVal.x, myVal.y, myVa… in call()
102 __shared__ work_elem_type smem[cn][BLOCK_SIZE]; in reduceToColumn() local
118 Reduce<BLOCK_SIZE, work_type, work_elem_type, Reductor, cn>::call(smem, myVal); in reduceToColumn()
Dpyr_down.hpp68 __shared__ work_type smem[256 + 4]; in pyrDown() local
86 smem[2 + threadIdx.x] = sum; in pyrDown()
101 smem[threadIdx.x] = sum; in pyrDown()
116 smem[4 + threadIdx.x] = sum; in pyrDown()
130 smem[2 + threadIdx.x] = sum; in pyrDown()
145 smem[threadIdx.x] = sum; in pyrDown()
160 smem[4 + threadIdx.x] = sum; in pyrDown()
172 sum = 0.0625f * smem[2 + tid2 - 2]; in pyrDown()
173 sum = sum + 0.25f * smem[2 + tid2 - 1]; in pyrDown()
174 sum = sum + 0.375f * smem[2 + tid2 ]; in pyrDown()
[all …]
Dintegral.hpp63 __shared__ D smem[NUM_SCAN_THREADS * 2]; in horizontal_pass() local
84 const D curScanElem = blockScanInclusive<NUM_SCAN_THREADS>(curElem, smem, threadIdx.x); in horizontal_pass()
105 __shared__ D smem[NUM_SCAN_THREADS * 2]; in horizontal_pass() local
127 const D curScanElem = blockScanInclusive<NUM_SCAN_THREADS>(curElem, smem, threadIdx.x); in horizontal_pass()
481 __shared__ T smem[32][32]; in vertical_pass()
484 volatile T* smem_row = &smem[0][0] + 64 * threadIdx.y; in vertical_pass()
505 smem[threadIdx.y + 0][threadIdx.x] = 0.0f; in vertical_pass()
506 smem[threadIdx.y + 8][threadIdx.x] = 0.0f; in vertical_pass()
507 smem[threadIdx.y + 16][threadIdx.x] = 0.0f; in vertical_pass()
508 smem[threadIdx.y + 24][threadIdx.x] = 0.0f; in vertical_pass()
[all …]
Dhistogram.hpp60 __shared__ ResType smem[BIN_COUNT]; in histogram() local
66 smem[i] = 0; in histogram()
77 atomicAdd(&smem[data % BIN_COUNT], 1); in histogram()
86 const ResType histVal = smem[i]; in histogram()
/external/opencv3/modules/cudaimgproc/src/cuda/
Dcanny.cu249 __shared__ volatile int smem[18][18]; in edgesHysteresisLocalKernel() local
254 smem[threadIdx.y + 1][threadIdx.x + 1] = checkIdx(y, x, map.rows, map.cols) ? map(y, x) : 0; in edgesHysteresisLocalKernel()
256 smem[0][threadIdx.x + 1] = checkIdx(y - 1, x, map.rows, map.cols) ? map(y - 1, x) : 0; in edgesHysteresisLocalKernel()
258smem[blockDim.y + 1][threadIdx.x + 1] = checkIdx(y + 1, x, map.rows, map.cols) ? map(y + 1, x) : 0; in edgesHysteresisLocalKernel()
260 smem[threadIdx.y + 1][0] = checkIdx(y, x - 1, map.rows, map.cols) ? map(y, x - 1) : 0; in edgesHysteresisLocalKernel()
262smem[threadIdx.y + 1][blockDim.x + 1] = checkIdx(y, x + 1, map.rows, map.cols) ? map(y, x + 1) : 0; in edgesHysteresisLocalKernel()
264 smem[0][0] = checkIdx(y - 1, x - 1, map.rows, map.cols) ? map(y - 1, x - 1) : 0; in edgesHysteresisLocalKernel()
266smem[0][blockDim.x + 1] = checkIdx(y - 1, x + 1, map.rows, map.cols) ? map(y - 1, x + 1) : 0; in edgesHysteresisLocalKernel()
268smem[blockDim.y + 1][0] = checkIdx(y + 1, x - 1, map.rows, map.cols) ? map(y + 1, x - 1) : 0; in edgesHysteresisLocalKernel()
270smem[blockDim.y + 1][blockDim.x + 1] = checkIdx(y + 1, x + 1, map.rows, map.cols) ? map(y + 1, x +… in edgesHysteresisLocalKernel()
[all …]
Dclahe.cu61 __shared__ int smem[512]; in calcLutKernel() local
67 smem[tid] = 0; in calcLutKernel()
76 Emulation::smem::atomicAdd(&smem[data], 1); in calcLutKernel()
82 int tHistVal = smem[tid]; in calcLutKernel()
99 reduce<256>(smem, clipped, tid, plus<int>()); in calcLutKernel()
119 const int lutVal = blockScanInclusive<256>(tHistVal, smem, tid); in calcLutKernel()
Dhough_circles.cu184 int* smem = DynamicSharedMem<int>(); in circlesAccumRadius() local
187 smem[i] = 0; in circlesAccumRadius()
210 Emulation::smem::atomicAdd(&smem[r + 1], 1); in circlesAccumRadius()
218 const int curVotes = smem[i + 1]; in circlesAccumRadius()
220 if (curVotes >= threshold && curVotes > smem[i] && curVotes >= smem[i + 2]) in circlesAccumRadius()
/external/opencv3/modules/cudawarping/src/cuda/
Dpyr_down.cu59 __shared__ work_t smem[256 + 4]; in pyrDown() local
77 smem[2 + threadIdx.x] = sum; in pyrDown()
92 smem[threadIdx.x] = sum; in pyrDown()
107 smem[4 + threadIdx.x] = sum; in pyrDown()
121 smem[2 + threadIdx.x] = sum; in pyrDown()
136 smem[threadIdx.x] = sum; in pyrDown()
151 smem[4 + threadIdx.x] = sum; in pyrDown()
163 sum = 0.0625f * smem[2 + tid2 - 2]; in pyrDown()
164 sum = sum + 0.25f * smem[2 + tid2 - 1]; in pyrDown()
165 sum = sum + 0.375f * smem[2 + tid2 ]; in pyrDown()
[all …]
/external/opencv3/modules/cudafeatures2d/src/cuda/
Dbf_match.cu141 extern __shared__ int smem[]; in matchUnrolledCached()
145 typename Dist::value_type* s_query = (typename Dist::value_type*)(smem); in matchUnrolledCached()
146 …typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * MAX_DESC_LEN… in matchUnrolledCached()
157 float* s_distance = (float*)(smem); in matchUnrolledCached()
158 int* s_trainIdx = (int*)(smem + BLOCK_SIZE * BLOCK_SIZE); in matchUnrolledCached()
190 extern __shared__ int smem[]; in matchUnrolledCached()
194 typename Dist::value_type* s_query = (typename Dist::value_type*)(smem); in matchUnrolledCached()
195 …typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * MAX_DESC_LEN… in matchUnrolledCached()
214 float* s_distance = (float*)(smem); in matchUnrolledCached()
215 int* s_trainIdx = (int*)(smem + BLOCK_SIZE * BLOCK_SIZE); in matchUnrolledCached()
[all …]
Dbf_knnmatch.cu379 extern __shared__ int smem[]; in matchUnrolledCached()
383 typename Dist::value_type* s_query = (typename Dist::value_type*)(smem); in matchUnrolledCached()
384 …typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * MAX_DESC_LEN… in matchUnrolledCached()
397 float* s_distance = (float*)(smem); in matchUnrolledCached()
398 int* s_trainIdx = (int*)(smem + BLOCK_SIZE * BLOCK_SIZE); in matchUnrolledCached()
429 extern __shared__ int smem[]; in matchUnrolledCached()
433 typename Dist::value_type* s_query = (typename Dist::value_type*)(smem); in matchUnrolledCached()
434 …typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * MAX_DESC_LEN… in matchUnrolledCached()
456 float* s_distance = (float*)(smem); in matchUnrolledCached()
457 int* s_trainIdx = (int*)(smem + BLOCK_SIZE * BLOCK_SIZE); in matchUnrolledCached()
[all …]
/external/opencv3/modules/photo/src/cuda/
Dnlm.cu189 … __device__ __forceinline__ thrust::tuple<volatile float*, volatile float*> smem_tuple(float* smem) in smem_tuple()
191 return cv::cuda::device::smem_tuple(smem, smem + BLOCK_SIZE); in smem_tuple()
208 …ceinline__ thrust::tuple<volatile float*, volatile float*, volatile float*> smem_tuple(float* smem) in smem_tuple()
210 return cv::cuda::device::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE); in smem_tuple()
227 …::tuple<volatile float*, volatile float*, volatile float*, volatile float*> smem_tuple(float* smem) in smem_tuple()
229 …return cv::cuda::device::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE, smem + 3 * BLO… in smem_tuple()
246 …float*, volatile float*, volatile float*, volatile float*, volatile float*> smem_tuple(float* smem) in smem_tuple()
248 …return cv::cuda::device::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE, smem + 3 * BLO… in smem_tuple()
502 int smem = search_window * search_window * sizeof(int); in nlm_fast_gpu() local
505 fast_nlm_kernel<<<grid, block, smem>>>(fnlm, (PtrStepSz<T>)dst); in nlm_fast_gpu()
/external/opencv3/modules/cudaoptflow/src/cuda/
Dfarneback.cu75 extern __shared__ float smem[]; in polynomialExpansion()
76 volatile float *row = smem + tx; in polynomialExpansion()
140 int smem = 3 * block.x * sizeof(float); in polynomialExpansionGpu() local
143 polynomialExpansion<5><<<grid, block, smem, stream>>>(src.rows, src.cols, src, dst); in polynomialExpansionGpu()
145 polynomialExpansion<7><<<grid, block, smem, stream>>>(src.rows, src.cols, src, dst); in polynomialExpansionGpu()
364 extern __shared__ float smem[]; in boxFilter5()
367 volatile float *row = smem + 5 * ty * smw; in boxFilter5()
422 int smem = (block.x + 2*ksizeHalf) * 5 * block.y * sizeof(float); in boxFilter5Gpu() local
425 boxFilter5<<<grid, block, smem, stream>>>(height, width, src, ksizeHalf, boxAreaInv, dst); in boxFilter5Gpu()
441 int smem = (block.x + 2*ksizeHalf) * 5 * block.y * sizeof(float); in boxFilter5Gpu_CC11() local
[all …]

123