Home
last modified time | relevance | path

Searched refs:warpSize (Results 1 – 12 of 12) sorted by relevance

/external/clang/lib/Headers/
D__clang_cuda_intrinsics.h39 int __width = warpSize) { \
41 ((warpSize - __width) << 8) | (__Mask)); \
44 int __width = warpSize) { \
46 ((warpSize - __width) << 8) | (__Mask)); \
49 int __width = warpSize) { \
54 int __width = warpSize) { \
69 unsigned long long __in, int __offset, int __width = warpSize) { \
74 int __width = warpSize) { \
Dcuda_builtin_vars.h120 __attribute__((device)) const int warpSize = 32; variable
/external/clang/test/SemaCUDA/
Dcuda-builtin-vars.cu35 out[i++] = warpSize; in kernel()
36warpSize = 0; // expected-error {{cannot assign to variable 'warpSize' with const-qualified type '… in kernel()
56 const void *wsptr = &warpSize; in kernel()
/external/tensorflow/tensorflow/core/util/
Dcuda_kernel_helper.h72 int width = warpSize) {
78 unsigned mask, Eigen::half value, int delta, int width = warpSize) {
84 unsigned mask, Eigen::half value, int delta, int width = warpSize) {
90 unsigned mask, Eigen::half value, int lane_mask, int width = warpSize) {
Dcuda_kernel_helper_test.cu.cc94 for (int width = warpSize; width > 1; width /= 2) { in CudaShuffleGetSrcLaneTest()
103 for (int src_lane = -warpSize; src_lane <= warpSize; ++src_lane) { in CudaShuffleGetSrcLaneTest()
109 for (unsigned delta = 0; delta <= warpSize; ++delta) { in CudaShuffleGetSrcLaneTest()
115 for (unsigned delta = 0; delta <= warpSize; ++delta) { in CudaShuffleGetSrcLaneTest()
121 for (int lane_lane = warpSize; lane_lane > 0; lane_lane /= 2) { in CudaShuffleGetSrcLaneTest()
Dcuda_device_functions.h235 int width = warpSize) {
250 int src_lane, int width = warpSize) {
263 int width = warpSize) {
279 int width = warpSize) {
292 int width = warpSize) {
308 int width = warpSize) {
321 int width = warpSize) {
337 int width = warpSize) {
/external/eigen/unsupported/Eigen/CXX11/src/Tensor/
DTensorReductionCuda.h170 for (int offset = warpSize/2; offset > 0; offset /= 2) { in FullReductionKernel()
171 reducer.reduce(__shfl_down(accum, offset, warpSize), &accum); in FullReductionKernel()
174 if ((threadIdx.x & (warpSize - 1)) == 0) { in FullReductionKernel()
246 for (int offset = warpSize/2; offset > 0; offset /= 2) { in FullReductionKernelHalfFloat()
247 reducer.reducePacket(__shfl_down(accum, offset, warpSize), &accum); in FullReductionKernelHalfFloat()
250 if ((threadIdx.x & (warpSize - 1)) == 0) { in FullReductionKernelHalfFloat()
428 for (int offset = warpSize/2; offset > 0; offset /= 2) {
432 if ((threadIdx.x & (warpSize - 1)) == 0) {
518 for (int offset = warpSize/2; offset > 0; offset /= 2) {
519 reducer.reducePacket(__shfl_down(reduced_val1, offset, warpSize), &reduced_val1);
[all …]
DTensorConvolution.h858 const int warpSize = 32;
883 const int inner_dim = maxSharedMem / ((warpSize + kernel_size) * sizeof(Scalar));
887 block_size.x = numext::mini(warpSize, maxX);
/external/clang/test/CodeGenCUDA/
Dcuda-builtin-vars.cu25 out[i++] = warpSize; // CHECK: store i32 32, in kernel()
/external/eigen/test/
Dcuda_common.h93 std::cout << " warpSize: " << deviceProp.warpSize << "\n"; in ei_test_init_cuda()
/external/tensorflow/tensorflow/core/kernels/
Dbias_op_gpu.cu.cc187 for (int32 delta = warpSize / 2; delta > 0; delta /= 2) { in BiasGradNCHW_SharedAtomics()
/external/eigen/Eigen/src/Core/arch/CUDA/
DHalf.h595 …e__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width=warpSize) {