Home
last modified time | relevance | path

Searched refs:blockDim (Results 1 – 25 of 37) sorted by relevance

12

/external/clang/test/SemaCUDA/
Dcuda-builtin-vars.cu21 out[i++] = blockDim.x; in kernel()
22 blockDim.x = 0; // expected-error {{no setter defined for property 'x'}} in kernel()
23 out[i++] = blockDim.y; in kernel()
24 blockDim.y = 0; // expected-error {{no setter defined for property 'y'}} in kernel()
25 out[i++] = blockDim.z; in kernel()
26 blockDim.z = 0; // expected-error {{no setter defined for property 'z'}} in kernel()
/external/tensorflow/tensorflow/core/kernels/
Dconcat_lib_gpu_impl.cu.cc41 IntType gidx = blockIdx.x * blockDim.x + threadIdx.x; in concat_fixed_kernel()
43 for (; gidx < total_cols; gidx += blockDim.x * gridDim.x) { in concat_fixed_kernel()
44 IntType gidy = blockIdx.y * blockDim.y + threadIdx.y; in concat_fixed_kernel()
50 for (; gidy < total_rows; gidy += blockDim.y * gridDim.y) { in concat_fixed_kernel()
69 IntType gidx = blockIdx.x * blockDim.x + threadIdx.x; in concat_variable_kernel()
77 IntType lidx = threadIdx.y * blockDim.x + threadIdx.x; in concat_variable_kernel()
78 IntType blockSize = blockDim.x * blockDim.y; in concat_variable_kernel()
97 for (; gidx < total_cols; gidx += blockDim.x * gridDim.x) { in concat_variable_kernel()
108 IntType gidy = blockIdx.y * blockDim.y + threadIdx.y; in concat_variable_kernel()
109 for (; gidy < total_rows; gidy += blockDim.y * gridDim.y) in concat_variable_kernel()
Dsplit_lib_gpu.cu.cc84 eigen_assert(blockDim.y == 1); in SplitOpKernel()
85 eigen_assert(blockDim.z == 1); in SplitOpKernel()
124 IntType gidx = blockIdx.x * blockDim.x + threadIdx.x; in split_v_kernel()
132 IntType lidx = threadIdx.y * blockDim.x + threadIdx.x; in split_v_kernel()
133 IntType blockSize = blockDim.x * blockDim.y; in split_v_kernel()
152 for (; gidx < total_cols; gidx += blockDim.x * gridDim.x) { in split_v_kernel()
163 IntType gidy = blockIdx.y * blockDim.y + threadIdx.y; in split_v_kernel()
164 for (; gidy < total_rows; gidy += blockDim.y * gridDim.y) in split_v_kernel()
179 eigen_assert(blockDim.y == 1); in SplitVOpKernel_fixed()
180 eigen_assert(blockDim.z == 1); in SplitVOpKernel_fixed()
Dbias_op_gpu.cu.cc130 for (int32 index = threadIdx.x; index < bias_size; index += blockDim.x) { in BiasGradNHWC_SharedAtomics()
135 for (int32 index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; in BiasGradNHWC_SharedAtomics()
136 index += blockDim.x * gridDim.x) { in BiasGradNHWC_SharedAtomics()
142 for (int32 index = threadIdx.x; index < bias_size; index += blockDim.x) { in BiasGradNHWC_SharedAtomics()
156 for (int32 index = threadIdx.x; index < kSDataSize; index += blockDim.x) { in BiasGradNCHW_SharedAtomics()
167 for (int32 index = group_index * blockDim.x + threadIdx.x; in BiasGradNCHW_SharedAtomics()
168 index < total_count; index += blockDim.x * group_size) { in BiasGradNCHW_SharedAtomics()
Drandom_op_gpu.h135 const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x;
136 const int32 total_thread_count = gridDim.x * blockDim.x;
174 const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x;
175 const int32 total_thread_count = gridDim.x * blockDim.x;
Dbucketize_op_gpu.cu.cc46 int32 lidx = threadIdx.y * blockDim.x + threadIdx.x; in BucketizeCustomKernel()
47 int32 blockSize = blockDim.x * blockDim.y; in BucketizeCustomKernel()
Dcheck_numerics_op_gpu.cu.cc40 const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x; in CheckNumericsKernel()
41 const int32 total_thread_count = gridDim.x * blockDim.x; in CheckNumericsKernel()
Dreduction_gpu_kernels.cu.h200 const int gid = bid * blockDim.x + tid;
201 const int stride = blockDim.x * gridDim.x;
222 max(min(num_elems - bid * blockDim.x, num_threads), 0);
236 assert(blockDim.x % 32 == 0);
237 int warps_per_block = blockDim.x / 32;
243 int gid = threadIdx.x + blockIdx.x * blockDim.x;
308 rows_per_warp * (blockIdx.y * blockDim.y + threadIdx.y);
325 row += rows_per_warp * gridDim.y * blockDim.y;
326 for (; row < num_rows; row += rows_per_warp * gridDim.y * blockDim.y) {
347 if (blockDim.y > 1) {
[all …]
Ddepthwise_conv_op_gpu.h195 assert(blockDim.x == kBlockDepth); in DepthwiseConv2dGPUKernelNHWCSmall()
196 assert(blockDim.y == args.in_cols); in DepthwiseConv2dGPUKernelNHWCSmall()
197 const int block_height = blockDim.z; in DepthwiseConv2dGPUKernelNHWCSmall()
480 assert(blockDim.x == args.in_cols); in DepthwiseConv2dGPUKernelNCHWSmall()
481 assert(blockDim.z == kBlockDepth); in DepthwiseConv2dGPUKernelNCHWSmall()
482 const int block_height = blockDim.y; in DepthwiseConv2dGPUKernelNCHWSmall()
1170 assert(CanLaunchDepthwiseConv2dBackpropFilterGPUSmall(args, blockDim.z));
1178 const int in_width = blockDim.y; // slower (see b/62280718): args.in_cols;
1187 assert(blockDim.x == kBlockDepth);
1188 assert(blockDim.y == args.in_cols);
[all …]
Drelu_op_gpu.cu.cc41 int32 index = blockIdx.x * blockDim.x + threadIdx.x; in ReluGradHalfKernel()
42 const int32 total_device_threads = gridDim.x * blockDim.x; in ReluGradHalfKernel()
Dparameterized_truncated_normal_op_gpu.cu.cc62 const int32 initial_offset = blockIdx.x * blockDim.x + threadIdx.x; in TruncatedNormalKernel()
85 max_samples_per_item * (gridDim.x * blockDim.x); in TruncatedNormalKernel()
/external/eigen/unsupported/Eigen/CXX11/src/Tensor/
DTensorReductionCuda.h115 const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x; in ReductionInitKernel()
116 const Index num_threads = blockDim.x * gridDim.x; in ReductionInitKernel()
192 eigen_assert(blockDim.x == 1); in ReductionInitFullReduxKernelHalfFloat()
205 const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x; in ReductionInitKernelHalfFloat()
206 const Index num_threads = blockDim.x * gridDim.x; in ReductionInitKernelHalfFloat()
378 eigen_assert(blockDim.y == 1);
379 eigen_assert(blockDim.z == 1);
386 const Index input_col_blocks = divup<Index>(num_coeffs_to_reduce, blockDim.x * NumPerThread);
389 const Index num_threads = blockDim.x * gridDim.x;
390 const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x;
[all …]
DTensorConvolution.h584 const int first_plane = blockIdx.y * blockDim.y;
585 const int plane_stride = blockDim.y * gridDim.y;
592 for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) {
603 for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) {
638 const int first_plane = blockIdx.z * blockDim.z;
639 const int plane_stride = blockDim.z * gridDim.z;
648 for (int j = threadIdx.y; j < num_y_input; j += blockDim.y) {
651 for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) {
663 for (int j = threadIdx.y; j < num_y_output; j += blockDim.y) {
665 for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) {
[all …]
DTensorRandom.h24 blockIdx.x * blockDim.x + threadIdx.x + in get_random_seed()
25 gridDim.x * blockDim.x * (blockIdx.y * blockDim.y + threadIdx.y); in get_random_seed()
/external/tensorflow/tensorflow/core/util/
Dcuda_device_functions.h90 return detail::CudaGridRange<T>(blockIdx.x * blockDim.x + threadIdx.x, in CudaGridRangeX()
91 gridDim.x * blockDim.x, count); in CudaGridRangeX()
98 return detail::CudaGridRange<T>(blockIdx.y * blockDim.y + threadIdx.y, in CudaGridRangeY()
99 gridDim.y * blockDim.y, count); in CudaGridRangeY()
106 return detail::CudaGridRange<T>(blockIdx.z * blockDim.z + threadIdx.z, in CudaGridRangeZ()
107 gridDim.z * blockDim.z, count); in CudaGridRangeZ()
386 assert(blockDim.y == 1 && blockDim.z == 1); in SetZero()
387 assert(blockDim.x * gridDim.x / blockDim.x == gridDim.x); in SetZero()
397 assert(blockDim.y == 1 && blockDim.z == 1); in SetToValue()
398 assert(blockDim.x * gridDim.x / blockDim.x == gridDim.x); in SetToValue()
/external/clang/test/CodeGenCUDA/
Dcuda-builtin-vars.cu17 out[i++] = blockDim.x; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() in kernel()
18 out[i++] = blockDim.y; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.y() in kernel()
19 out[i++] = blockDim.z; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.z() in kernel()
/external/tensorflow/tensorflow/examples/adding_an_op/
Dcuda_op_kernel.cu.cc22 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N; in AddOneKernel()
23 i += blockDim.x * gridDim.x) { in AddOneKernel()
/external/tensorflow/tensorflow/tools/ci_build/builds/user_ops/
Dcuda_op_kernel.cu.cc22 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N; in AddOneKernel()
23 i += blockDim.x * gridDim.x) { in AddOneKernel()
/external/tensorflow/tensorflow/contrib/mpi_collectives/
Dring.cu.cc90 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < N; in elemwise_accum()
91 i += blockDim.x * gridDim.x) { in elemwise_accum()
/external/tensorflow/tensorflow/contrib/mpi_collectives/kernels/
Dring.cu.cc90 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < N; in elemwise_accum()
91 i += blockDim.x * gridDim.x) { in elemwise_accum()
/external/eigen/test/
Dcuda_common.h11 dim3 threadIdx, blockDim, blockIdx; variable
26 int i = threadIdx.x + blockIdx.x*blockDim.x; in run_on_cuda_meta_kernel()
/external/tensorflow/tensorflow/contrib/rnn/kernels/
Dlstm_ops_gpu.cu.cc91 const int batch_id = blockIdx.x * blockDim.x + threadIdx.x; in lstm_gates()
92 const int act_id = blockIdx.y * blockDim.y + threadIdx.y; in lstm_gates()
206 const int gid = blockDim.x * blockIdx.x + threadIdx.x; in concat_xh()
304 const int batch_id = blockIdx.x * blockDim.x + threadIdx.x; in lstm_gates_bprop()
305 const int act_id = blockIdx.y * blockDim.y + threadIdx.y; in lstm_gates_bprop()
/external/tensorflow/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/tests_data/
Dsaxpy.ll8 @blockDim = external addrspace(1) global %struct.dim3
36 …(%struct.dim3, %struct.dim3* addrspacecast (%struct.dim3 addrspace(1)* @blockDim to %struct.dim3*)…
88 …(%struct.dim3, %struct.dim3* addrspacecast (%struct.dim3 addrspace(1)* @blockDim to %struct.dim3*)…
/external/tensorflow/tensorflow/contrib/tensorrt/custom_plugin_examples/
Dinc_op_kernel.cu.cc33 int i = blockDim.x * blockIdx.x + threadIdx.x; in VecInc()
/external/clang/lib/Headers/
Dcuda_builtin_vars.h114 __CUDA_BUILTIN_VAR __cuda_builtin_blockDim_t blockDim; variable

12