/external/clang/test/SemaCUDA/ |
D | cuda-builtin-vars.cu | 21 out[i++] = blockDim.x; in kernel() 22 blockDim.x = 0; // expected-error {{no setter defined for property 'x'}} in kernel() 23 out[i++] = blockDim.y; in kernel() 24 blockDim.y = 0; // expected-error {{no setter defined for property 'y'}} in kernel() 25 out[i++] = blockDim.z; in kernel() 26 blockDim.z = 0; // expected-error {{no setter defined for property 'z'}} in kernel()
|
/external/tensorflow/tensorflow/core/kernels/ |
D | split_lib_gpu.cu.cc | 84 eigen_assert(blockDim.y == 1); in SplitOpKernel() 85 eigen_assert(blockDim.z == 1); in SplitOpKernel() 124 IntType gidx = blockIdx.x * blockDim.x + threadIdx.x; in split_v_kernel() 132 IntType lidx = threadIdx.y * blockDim.x + threadIdx.x; in split_v_kernel() 133 IntType blockSize = blockDim.x * blockDim.y; in split_v_kernel() 152 for (; gidx < total_cols; gidx += blockDim.x * gridDim.x) { in split_v_kernel() 163 IntType gidy = blockIdx.y * blockDim.y + threadIdx.y; in split_v_kernel() 164 for (; gidy < total_rows; gidy += blockDim.y * gridDim.y) in split_v_kernel() 180 eigen_assert(blockDim.y == 1); in SplitVOpKernel_fixed() 181 eigen_assert(blockDim.z == 1); in SplitVOpKernel_fixed()
|
D | concat_lib_gpu_impl.cu.cc | 41 IntType gidx = blockIdx.x * blockDim.x + threadIdx.x; in concat_fixed_kernel() 43 for (; gidx < total_cols; gidx += blockDim.x * gridDim.x) { in concat_fixed_kernel() 44 IntType gidy = blockIdx.y * blockDim.y + threadIdx.y; in concat_fixed_kernel() 50 for (; gidy < total_rows; gidy += blockDim.y * gridDim.y) { in concat_fixed_kernel() 69 IntType gidx = blockIdx.x * blockDim.x + threadIdx.x; in concat_variable_kernel() 77 IntType lidx = threadIdx.y * blockDim.x + threadIdx.x; in concat_variable_kernel() 78 IntType blockSize = blockDim.x * blockDim.y; in concat_variable_kernel() 97 for (; gidx < total_cols; gidx += blockDim.x * gridDim.x) { in concat_variable_kernel() 108 IntType gidy = blockIdx.y * blockDim.y + threadIdx.y; in concat_variable_kernel() 109 for (; gidy < total_rows; gidy += blockDim.y * gridDim.y) in concat_variable_kernel()
|
D | debug_ops_gpu.cu.cc | 41 const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x; in CurtHealthKernel() 42 const int32 total_thread_count = gridDim.x * blockDim.x; in CurtHealthKernel() 59 const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x; in ConciseHealthKernel() 60 const int32 total_thread_count = gridDim.x * blockDim.x; in ConciseHealthKernel() 90 const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x; in FullHealthKernel() 91 const int32 total_thread_count = gridDim.x * blockDim.x; in FullHealthKernel() 133 const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x; in ReduceInfNanThreeSlotsKernel() 134 const int32 total_thread_count = gridDim.x * blockDim.x; in ReduceInfNanThreeSlotsKernel()
|
D | check_numerics_op_gpu.cu.cc | 41 const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x; in CheckNumericsKernel() 42 const int32 total_thread_count = gridDim.x * blockDim.x; in CheckNumericsKernel() 64 const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x; in CheckNumericsKernelV2() 65 const int32 total_thread_count = gridDim.x * blockDim.x; in CheckNumericsKernelV2()
|
D | bias_op_gpu.cu.cc | 133 for (int32 index = threadIdx.x; index < bias_size; index += blockDim.x) { in BiasGradNHWC_SharedAtomics() 138 for (int32 index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; in BiasGradNHWC_SharedAtomics() 139 index += blockDim.x * gridDim.x) { in BiasGradNHWC_SharedAtomics() 145 for (int32 index = threadIdx.x; index < bias_size; index += blockDim.x) { in BiasGradNHWC_SharedAtomics() 158 for (int32 index = threadIdx.x; index < kSDataSize; index += blockDim.x) { in BiasGradNCHW_SharedAtomics() 169 for (int32 index = group_index * blockDim.x + threadIdx.x; in BiasGradNCHW_SharedAtomics() 170 index < total_count; index += blockDim.x * group_size) { in BiasGradNCHW_SharedAtomics()
|
D | bucketize_op_gpu.cu.cc | 45 int32 lidx = threadIdx.y * blockDim.x + threadIdx.x; in BucketizeCustomKernel() 46 int32 blockSize = blockDim.x * blockDim.y; in BucketizeCustomKernel()
|
D | random_op_gpu.h | 142 const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x; 143 const int32 total_thread_count = gridDim.x * blockDim.x; 181 const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x; 182 const int32 total_thread_count = gridDim.x * blockDim.x;
|
D | reduction_gpu_kernels.cu.h | 171 const int gid = bid * blockDim.x + tid; 172 const int stride = blockDim.x * gridDim.x; 193 max(min(num_elems - bid * blockDim.x, num_threads), 0); 207 assert(blockDim.x % TF_RED_WARPSIZE == 0); 208 int warps_per_block = blockDim.x / TF_RED_WARPSIZE; 214 int gid = threadIdx.x + blockIdx.x * blockDim.x; 280 rows_per_warp * (blockIdx.y * blockDim.y + threadIdx.y); 304 row += rows_per_warp * gridDim.y * blockDim.y; 305 for (; row < num_rows; row += rows_per_warp * gridDim.y * blockDim.y) { 327 if (blockDim.y > 1) { [all …]
|
D | depthwise_conv_op_gpu.h | 212 assert(blockDim.x == kBlockDepth); 213 assert(blockDim.y == args.in_cols); 214 const int block_height = blockDim.z; 504 assert(blockDim.x == args.in_cols); 505 assert(blockDim.z == kBlockDepth); 506 const int block_height = blockDim.y; 1164 assert(CanLaunchDepthwiseConv2dBackpropFilterGPUSmall(args, blockDim.z)); 1172 const int in_width = blockDim.y; // slower (see b/62280718): args.in_cols; 1181 assert(blockDim.x == kBlockDepth); 1182 assert(blockDim.y == args.in_cols); [all …]
|
D | adjust_hsv_gpu.cu.h | 102 for (int64 idx = (blockDim.x * blockIdx.x + threadIdx.x) * 3; in adjust_hsv_nhwc() 103 idx < number_elements; idx += blockDim.x * gridDim.x * 3) { in adjust_hsv_nhwc()
|
D | relu_op_gpu.cu.cc | 47 int32 index = blockIdx.x * blockDim.x + threadIdx.x; in ReluGradHalfKernel() 48 const int32 total_device_threads = gridDim.x * blockDim.x; in ReluGradHalfKernel()
|
D | fused_batch_norm_op.cu.cc | 184 int32 index = blockIdx.x * blockDim.x + threadIdx.x; in run() 185 const int32 total_device_threads = gridDim.x * blockDim.x; in run() 243 int32 index = blockIdx.x * blockDim.x + threadIdx.x; in run() 244 const int32 total_device_threads = gridDim.x * blockDim.x; in run()
|
/external/tensorflow/tensorflow/stream_executor/cuda/ |
D | cudart_stub.cc | 109 dim3 *blockDim, in __cudaPopCallConfiguration() argument 112 using FuncPtr = cudaError_t(CUDARTAPI *)(dim3 * gridDim, dim3 * blockDim, in __cudaPopCallConfiguration() 116 return func_ptr(gridDim, blockDim, sharedMem, stream); in __cudaPopCallConfiguration() 120 dim3 gridDim, dim3 blockDim, size_t sharedMem = 0, void *stream = 0) { in __cudaPushCallConfiguration() argument 121 using FuncPtr = unsigned(CUDARTAPI *)(dim3 gridDim, dim3 blockDim, in __cudaPushCallConfiguration() 125 return func_ptr(gridDim, blockDim, sharedMem, stream); in __cudaPushCallConfiguration()
|
D | cusparse_10_0.inc | 966 const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, 975 bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, 983 const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, 992 bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, 1001 const int *bsrSortedColIndA, int blockDim, const cuComplex *x, 1011 bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, 1020 const int *bsrSortedColIndA, int blockDim, const cuDoubleComplex *x, 1030 bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, 1040 const int *bsrSortedColIndA, int blockDim, const float *x, 1051 bsrSortedEndPtrA, bsrSortedColIndA, blockDim, x, beta, y); [all …]
|
D | cusparse_9_0.inc | 966 const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, 975 bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, 983 const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, 992 bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, 1001 const int *bsrSortedColIndA, int blockDim, const cuComplex *x, 1011 bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, 1020 const int *bsrSortedColIndA, int blockDim, const cuDoubleComplex *x, 1030 bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, 1040 const int *bsrSortedColIndA, int blockDim, const float *x, 1051 bsrSortedEndPtrA, bsrSortedColIndA, blockDim, x, beta, y); [all …]
|
/external/eigen/unsupported/Eigen/CXX11/src/Tensor/ |
D | TensorReductionCuda.h | 115 const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x; in ReductionInitKernel() 116 const Index num_threads = blockDim.x * gridDim.x; in ReductionInitKernel() 192 eigen_assert(blockDim.x == 1); in ReductionInitFullReduxKernelHalfFloat() 205 const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x; in ReductionInitKernelHalfFloat() 206 const Index num_threads = blockDim.x * gridDim.x; in ReductionInitKernelHalfFloat() 378 eigen_assert(blockDim.y == 1); 379 eigen_assert(blockDim.z == 1); 386 const Index input_col_blocks = divup<Index>(num_coeffs_to_reduce, blockDim.x * NumPerThread); 389 const Index num_threads = blockDim.x * gridDim.x; 390 const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x; [all …]
|
D | TensorConvolution.h | 584 const int first_plane = blockIdx.y * blockDim.y; 585 const int plane_stride = blockDim.y * gridDim.y; 592 for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) { 603 for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) { 638 const int first_plane = blockIdx.z * blockDim.z; 639 const int plane_stride = blockDim.z * gridDim.z; 648 for (int j = threadIdx.y; j < num_y_input; j += blockDim.y) { 651 for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) { 663 for (int j = threadIdx.y; j < num_y_output; j += blockDim.y) { 665 for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) { [all …]
|
D | TensorRandom.h | 24 blockIdx.x * blockDim.x + threadIdx.x + in get_random_seed() 25 gridDim.x * blockDim.x * (blockIdx.y * blockDim.y + threadIdx.y); in get_random_seed()
|
/external/clang/test/CodeGenCUDA/ |
D | cuda-builtin-vars.cu | 17 out[i++] = blockDim.x; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() in kernel() 18 out[i++] = blockDim.y; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.y() in kernel() 19 out[i++] = blockDim.z; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.z() in kernel()
|
/external/tensorflow/tensorflow/core/util/ |
D | gpu_device_functions.h | 110 return detail::GpuGridRange<T>(blockIdx.x * blockDim.x + threadIdx.x, in GpuGridRangeX() 111 gridDim.x * blockDim.x, count); in GpuGridRangeX() 119 return detail::GpuGridRange<T>(blockIdx.y * blockDim.y + threadIdx.y, in GpuGridRangeY() 120 gridDim.y * blockDim.y, count); in GpuGridRangeY() 128 return detail::GpuGridRange<T>(blockIdx.z * blockDim.z + threadIdx.z, in GpuGridRangeZ() 129 gridDim.z * blockDim.z, count); in GpuGridRangeZ() 503 assert(blockDim.y == 1); in SetZero() 504 assert(blockDim.z == 1); in SetZero() 505 assert(blockDim.x * gridDim.x / blockDim.x == gridDim.x); in SetZero() 515 assert(blockDim.y == 1); in SetToValue() [all …]
|
/external/tensorflow/tensorflow/tools/ci_build/builds/user_ops/ |
D | cuda_op_kernel.cu.cc | 22 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N; in AddOneKernel() 23 i += blockDim.x * gridDim.x) { in AddOneKernel()
|
/external/tensorflow/tensorflow/examples/adding_an_op/ |
D | cuda_op_kernel.cu.cc | 23 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N; in AddOneKernel() 24 i += blockDim.x * gridDim.x) { in AddOneKernel()
|
/external/eigen/test/ |
D | cuda_common.h | 11 dim3 threadIdx, blockDim, blockIdx; variable 26 int i = threadIdx.x + blockIdx.x*blockDim.x; in run_on_cuda_meta_kernel()
|
/external/tensorflow/tensorflow/core/kernels/rnn/ |
D | lstm_ops_gpu.cu.cc | 90 const int batch_id = blockIdx.x * blockDim.x + threadIdx.x; in lstm_gates() 91 const int act_id = blockIdx.y * blockDim.y + threadIdx.y; in lstm_gates() 211 const int gid = blockDim.x * blockIdx.x + threadIdx.x; in concat_xh() 311 const int batch_id = blockIdx.x * blockDim.x + threadIdx.x; in lstm_gates_bprop() 312 const int act_id = blockIdx.y * blockDim.y + threadIdx.y; in lstm_gates_bprop()
|