Searched refs:warpSize (Results 1 – 12 of 12) sorted by relevance
/external/clang/lib/Headers/ |
D | __clang_cuda_intrinsics.h | 39 int __width = warpSize) { \ 41 ((warpSize - __width) << 8) | (__Mask)); \ 44 int __width = warpSize) { \ 46 ((warpSize - __width) << 8) | (__Mask)); \ 49 int __width = warpSize) { \ 54 int __width = warpSize) { \ 69 unsigned long long __in, int __offset, int __width = warpSize) { \ 74 int __width = warpSize) { \
|
D | cuda_builtin_vars.h | 120 __attribute__((device)) const int warpSize = 32; variable
|
/external/clang/test/SemaCUDA/ |
D | cuda-builtin-vars.cu | 35 out[i++] = warpSize; in kernel() 36 …warpSize = 0; // expected-error {{cannot assign to variable 'warpSize' with const-qualified type '… in kernel() 56 const void *wsptr = &warpSize; in kernel()
|
/external/tensorflow/tensorflow/core/util/ |
D | cuda_kernel_helper.h | 72 int width = warpSize) { 78 unsigned mask, Eigen::half value, int delta, int width = warpSize) { 84 unsigned mask, Eigen::half value, int delta, int width = warpSize) { 90 unsigned mask, Eigen::half value, int lane_mask, int width = warpSize) {
|
D | cuda_kernel_helper_test.cu.cc | 94 for (int width = warpSize; width > 1; width /= 2) { in CudaShuffleGetSrcLaneTest() 103 for (int src_lane = -warpSize; src_lane <= warpSize; ++src_lane) { in CudaShuffleGetSrcLaneTest() 109 for (unsigned delta = 0; delta <= warpSize; ++delta) { in CudaShuffleGetSrcLaneTest() 115 for (unsigned delta = 0; delta <= warpSize; ++delta) { in CudaShuffleGetSrcLaneTest() 121 for (int lane_lane = warpSize; lane_lane > 0; lane_lane /= 2) { in CudaShuffleGetSrcLaneTest()
|
D | cuda_device_functions.h | 235 int width = warpSize) { 250 int src_lane, int width = warpSize) { 263 int width = warpSize) { 279 int width = warpSize) { 292 int width = warpSize) { 308 int width = warpSize) { 321 int width = warpSize) { 337 int width = warpSize) {
|
/external/eigen/unsupported/Eigen/CXX11/src/Tensor/ |
D | TensorReductionCuda.h | 170 for (int offset = warpSize/2; offset > 0; offset /= 2) { in FullReductionKernel() 171 reducer.reduce(__shfl_down(accum, offset, warpSize), &accum); in FullReductionKernel() 174 if ((threadIdx.x & (warpSize - 1)) == 0) { in FullReductionKernel() 246 for (int offset = warpSize/2; offset > 0; offset /= 2) { in FullReductionKernelHalfFloat() 247 reducer.reducePacket(__shfl_down(accum, offset, warpSize), &accum); in FullReductionKernelHalfFloat() 250 if ((threadIdx.x & (warpSize - 1)) == 0) { in FullReductionKernelHalfFloat() 428 for (int offset = warpSize/2; offset > 0; offset /= 2) { 432 if ((threadIdx.x & (warpSize - 1)) == 0) { 518 for (int offset = warpSize/2; offset > 0; offset /= 2) { 519 reducer.reducePacket(__shfl_down(reduced_val1, offset, warpSize), &reduced_val1); [all …]
|
D | TensorConvolution.h | 858 const int warpSize = 32; 883 const int inner_dim = maxSharedMem / ((warpSize + kernel_size) * sizeof(Scalar)); 887 block_size.x = numext::mini(warpSize, maxX);
|
/external/clang/test/CodeGenCUDA/ |
D | cuda-builtin-vars.cu | 25 out[i++] = warpSize; // CHECK: store i32 32, in kernel()
|
/external/eigen/test/ |
D | cuda_common.h | 93 std::cout << " warpSize: " << deviceProp.warpSize << "\n"; in ei_test_init_cuda()
|
/external/tensorflow/tensorflow/core/kernels/ |
D | bias_op_gpu.cu.cc | 187 for (int32 delta = warpSize / 2; delta > 0; delta /= 2) { in BiasGradNCHW_SharedAtomics()
|
/external/eigen/Eigen/src/Core/arch/CUDA/ |
D | Half.h | 595 …e__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width=warpSize) {
|