/external/eigen/unsupported/Eigen/CXX11/src/Tensor/ |
D | TensorContractionCuda.h | 67 const Index lhs_store_idx_base = threadIdx.y * 72 + threadIdx.x * 9 + threadIdx.z; in EigenContractionKernelInternal() 68 const Index rhs_store_idx_base = threadIdx.y * 72 + threadIdx.z * 8 + threadIdx.x; in EigenContractionKernelInternal() 97 const Index load_idx_vert = threadIdx.x + 8 * threadIdx.y; in EigenContractionKernelInternal() 121 const Index lhs_horiz_0 = base_k + threadIdx.z + 0 * 8; \ in EigenContractionKernelInternal() 122 const Index lhs_horiz_1 = base_k + threadIdx.z + 1 * 8; \ in EigenContractionKernelInternal() 123 const Index lhs_horiz_2 = base_k + threadIdx.z + 2 * 8; \ in EigenContractionKernelInternal() 124 const Index lhs_horiz_3 = base_k + threadIdx.z + 3 * 8; \ in EigenContractionKernelInternal() 125 const Index lhs_horiz_4 = base_k + threadIdx.z + 4 * 8; \ in EigenContractionKernelInternal() 126 const Index lhs_horiz_5 = base_k + threadIdx.z + 5 * 8; \ in EigenContractionKernelInternal() 127 const Index lhs_horiz_6 = base_k + threadIdx.z + 6 * 8; \ in EigenContractionKernelInternal() [all …]
|
D | TensorReductionCuda.h | 115 const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x; in ReductionInitKernel() 129 const Index first_index = blockIdx.x * BlockSize * NumPerThread + threadIdx.x; in FullReductionKernel() 136 if (threadIdx.x == 0) { in FullReductionKernel() 174 if ((threadIdx.x & (warpSize - 1)) == 0) { in FullReductionKernel() 178 if (gridDim.x > 1 && threadIdx.x == 0) { in FullReductionKernel() 205 const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x; in ReductionInitKernelHalfFloat() 223 const Index first_index = blockIdx.x * BlockSize * NumPerThread + 2*threadIdx.x; in FullReductionKernelHalfFloat() 250 if ((threadIdx.x & (warpSize - 1)) == 0) { in FullReductionKernelHalfFloat() 265 eigen_assert(threadIdx.x == 1); in ReductionCleanupKernelHalfFloat() 390 const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x; [all …]
|
D | TensorConvolution.h | 587 for (int p = first_plane + threadIdx.y; p < numPlanes; p += plane_stride) { 590 const int plane_kernel_offset = threadIdx.y * num_x_input; 592 for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) { 603 for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) { 641 for (int p = first_plane + threadIdx.z; p < numPlanes; p += plane_stride) { 644 const int plane_kernel_offset = threadIdx.z * num_y_input; 648 for (int j = threadIdx.y; j < num_y_input; j += blockDim.y) { 651 for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) { 663 for (int j = threadIdx.y; j < num_y_output; j += blockDim.y) { 665 for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) { [all …]
|
D | TensorRandom.h | 22 assert(threadIdx.z == 0); in get_random_seed() 24 blockIdx.x * blockDim.x + threadIdx.x + in get_random_seed() 25 gridDim.x * blockDim.x * (blockIdx.y * blockDim.y + threadIdx.y); in get_random_seed()
|
/external/clang/test/SemaCUDA/ |
D | cuda-builtin-vars.cu | 7 out[i++] = threadIdx.x; in kernel() 8 threadIdx.x = 0; // expected-error {{no setter defined for property 'x'}} in kernel() 9 out[i++] = threadIdx.y; in kernel() 10 threadIdx.y = 0; // expected-error {{no setter defined for property 'y'}} in kernel() 11 out[i++] = threadIdx.z; in kernel() 12 threadIdx.z = 0; // expected-error {{no setter defined for property 'z'}} in kernel() 43 …__cuda_builtin_threadIdx_t y = threadIdx; // expected-error {{calling a private constructor of cla… in kernel() 46 …threadIdx = threadIdx; // expected-error {{'operator=' is a private member of '__cuda_builtin_thre… in kernel() 49 …void *ptr = &threadIdx; // expected-error {{'operator&' is a private member of '__cuda_builtin_thr… in kernel()
|
/external/tensorflow/tensorflow/core/kernels/ |
D | bias_op_gpu.cu.cc | 121 for (int32 index = threadIdx.x; index < bias_size; index += blockDim.x) { in BiasGradNHWC_SharedAtomics() 126 for (int32 index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; in BiasGradNHWC_SharedAtomics() 133 for (int32 index = threadIdx.x; index < bias_size; index += blockDim.x) { in BiasGradNHWC_SharedAtomics() 147 for (int32 index = threadIdx.x; index < kSDataSize; index += blockDim.x) { in BiasGradNCHW_SharedAtomics() 158 for (int32 index = group_index * blockDim.x + threadIdx.x; in BiasGradNCHW_SharedAtomics() 169 int bias_offset = threadIdx.x % 32; in BiasGradNCHW_SharedAtomics() 175 int32 thread_index = threadIdx.x; in BiasGradNCHW_SharedAtomics()
|
D | concat_lib_gpu_impl.cu.cc | 40 IntType gidx = blockIdx.x * blockDim.x + threadIdx.x; in concat_fixed_kernel() 43 IntType gidy = blockIdx.y * blockDim.y + threadIdx.y; in concat_fixed_kernel() 68 IntType gidx = blockIdx.x * blockDim.x + threadIdx.x; in concat_variable_kernel() 76 IntType lidx = threadIdx.y * blockDim.x + threadIdx.x; in concat_variable_kernel() 107 IntType gidy = blockIdx.y * blockDim.y + threadIdx.y; in concat_variable_kernel()
|
D | reduction_gpu_kernels.cu.h | 181 const int tid = threadIdx.x; 218 const int row = (blockIdx.x * blockDim.x + threadIdx.x) / 32; 219 const int lane = threadIdx.x % 32; 222 int gid = threadIdx.x + blockIdx.x * blockDim.x; 256 const int lane = threadIdx.x % 32; 260 rows_per_warp * (blockIdx.y * blockDim.y + threadIdx.y); 284 cub::ShuffleIndex(sum, threadIdx.x + i * num_cols, 32, 0xffffffff); 288 if (lane < num_cols) partial_sums[lane * 33 + threadIdx.y] = sum; 292 if (threadIdx.y == 0 && threadIdx.x < num_cols) { 293 value_type s = partial_sums[threadIdx.x * 33]; [all …]
|
D | split_lib_gpu.cu.cc | 118 IntType gidx = blockIdx.x * blockDim.x + threadIdx.x; in split_v_kernel() 126 IntType lidx = threadIdx.y * blockDim.x + threadIdx.x; in split_v_kernel() 157 IntType gidy = blockIdx.y * blockDim.y + threadIdx.y; in split_v_kernel()
|
D | bucketize_op_gpu.cu.cc | 46 int32 lidx = threadIdx.y * blockDim.x + threadIdx.x; in BucketizeCustomKernel()
|
D | depthwise_conv_op_gpu.cu.cc | 212 const int thread_depth = threadIdx.x; in DepthwiseConv2dGPUKernelNHWCSmall() 213 const int thread_col = threadIdx.y; in DepthwiseConv2dGPUKernelNHWCSmall() 214 const int thread_row = threadIdx.z; in DepthwiseConv2dGPUKernelNHWCSmall() 491 const int thread_col = threadIdx.x; in DepthwiseConv2dGPUKernelNCHWSmall() 492 const int thread_row = threadIdx.y; in DepthwiseConv2dGPUKernelNCHWSmall() 493 const int thread_depth = threadIdx.z; in DepthwiseConv2dGPUKernelNCHWSmall() 1162 const int thread_depth = threadIdx.x; in DepthwiseConv2dBackpropFilterGPUKernelNHWCSmall() 1163 const int thread_col = threadIdx.y; in DepthwiseConv2dBackpropFilterGPUKernelNHWCSmall() 1164 const int thread_row = threadIdx.z; in DepthwiseConv2dBackpropFilterGPUKernelNHWCSmall() 1431 const int thread_col = threadIdx.x; in DepthwiseConv2dBackpropFilterGPUKernelNCHWSmall() [all …]
|
D | random_op_gpu.cu.cc | 134 const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x; in Run() 175 const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x; in Run()
|
D | check_numerics_op_gpu.cu.cc | 39 const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x; in CheckNumericsKernel()
|
D | adjust_hsv_gpu.cu.h | 103 const int64 idx = (blockDim.x * blockIdx.x + threadIdx.x) * 3; in adjust_hsv_nhwc()
|
D | softmax_op_gpu.cu.cc | 41 const int tid = blockIdx.x * blockDim.x + threadIdx.x; in GenerateNormalizedProb()
|
/external/clang/test/CodeGenCUDA/ |
D | cuda-builtin-vars.cu | 9 out[i++] = threadIdx.x; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.x() in kernel() 10 out[i++] = threadIdx.y; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.y() in kernel() 11 out[i++] = threadIdx.z; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.z() in kernel()
|
/external/llvm/test/Analysis/DivergenceAnalysis/NVPTX/ |
D | diverge.ll | 6 ; return (n < 0 ? a + threadIdx.x : b + threadIdx.x) 26 ; if (threadIdx.x < 5) // divergent: data dependent 45 ; if (threadIdx.x >= 5) { // divergent 48 ; // c here is divergent because it is sync dependent on threadIdx.x >= 5
|
/external/tensorflow/tensorflow/contrib/rnn/kernels/ |
D | lstm_ops_gpu.cu.cc | 48 const int batch_id = blockIdx.x * blockDim.x + threadIdx.x; in lstm_gates() 49 const int act_id = blockIdx.y * blockDim.y + threadIdx.y; in lstm_gates() 160 const int gid = blockDim.x * blockIdx.x + threadIdx.x; in concat_xh() 254 const int batch_id = blockIdx.x * blockDim.x + threadIdx.x; in lstm_gates_bprop() 255 const int act_id = blockIdx.y * blockDim.y + threadIdx.y; in lstm_gates_bprop()
|
/external/eigen/test/ |
D | cuda_common.h | 11 dim3 threadIdx, blockDim, blockIdx; variable 26 int i = threadIdx.x + blockIdx.x*blockDim.x; in run_on_cuda_meta_kernel()
|
/external/tensorflow/tensorflow/examples/adding_an_op/ |
D | cuda_op_kernel.cu.cc | 21 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N; in AddOneKernel()
|
/external/tensorflow/tensorflow/tools/ci_build/builds/user_ops/ |
D | cuda_op_kernel.cu.cc | 21 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N; in AddOneKernel()
|
/external/tensorflow/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/tests_data/ |
D | saxpy.ll | 9 @threadIdx = external addrspace(1) global %struct.uint3 38 …truct.uint3, %struct.uint3* addrspacecast (%struct.uint3 addrspace(1)* @threadIdx to %struct.uint3… 90 …truct.uint3, %struct.uint3* addrspacecast (%struct.uint3 addrspace(1)* @threadIdx to %struct.uint3…
|
/external/clang/lib/Headers/ |
D | cuda_builtin_vars.h | 112 __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx; variable
|
/external/tensorflow/tensorflow/contrib/mpi_collectives/kernels/ |
D | ring.cu.cc | 90 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < N; in elemwise_accum()
|
/external/tensorflow/tensorflow/core/util/ |
D | cuda_device_functions.h | 90 return detail::CudaGridRange<T>(blockIdx.x * blockDim.x + threadIdx.x, in CudaGridRangeX() 98 return detail::CudaGridRange<T>(blockIdx.y * blockDim.y + threadIdx.y, in CudaGridRangeY() 106 return detail::CudaGridRange<T>(blockIdx.z * blockDim.z + threadIdx.z, in CudaGridRangeZ()
|