Home
last modified time | relevance | path

Searched refs:BlockReduce (Results 1 – 8 of 8) sorted by relevance

/external/pytorch/aten/src/ATen/native/cuda/
Dblock_reduce.cuh124 BlockReduce(T val, const ReduceOp& op, const T& identity_element, T* shared) { in BlockReduce() function
DDistanceKernel.cu116 agg = cuda_utils::BlockReduce(agg, DistReduceOp<scalar_t, F>{}, agg_init, agg_smem); in pdist_kernel_cuda_impl()
207 agg = cuda_utils::BlockReduce(agg, DistReduceOp<scalar_t, F>{}, agg_init, agg_smem); in cdist_kernel_cuda_impl()
DTensorModeKernel.cuh93 return cuda_utils::BlockReduce(local, reduceOp, init, smem); in reduceBlockWithNThreadLocalReductions()
Dgroup_norm_kernel.cu59 val = cuda_utils::BlockReduce( in RowwiseMomentsCUDAKernel()
DSoftMax.cu444 T result = cuda_utils::BlockReduce<T, Reduction<T>>(value, op, defaultVal, smem_cache); in blockReduceWarp()
Dlayer_norm_kernel.cu77 val = cuda_utils::BlockReduce( in RowwiseMomentsCUDAKernel()
DNormalization.cuh124 …sum = cuda_utils::BlockReduce<scalar_t, SumReduceOp<scalar_t>, cuda_utils::Block2D>(sum, reduce_op… in reduce()
/external/tensorflow/tensorflow/core/kernels/
Dreduction_gpu_kernels.cu.h183 typedef gpuprim::BlockReduce<value_type, num_threads> BlockReduce;
185 __shared__ typename BlockReduce::TempStorage temp_storage;
194 sum = BlockReduce(temp_storage).Reduce(sum, op, num_elements_to_reduce);