Home
last modified time | relevance | path

Searched refs:blockDim (Results 1 – 25 of 46) sorted by relevance

12

/external/clang/test/SemaCUDA/
Dcuda-builtin-vars.cu21 out[i++] = blockDim.x; in kernel()
22 blockDim.x = 0; // expected-error {{no setter defined for property 'x'}} in kernel()
23 out[i++] = blockDim.y; in kernel()
24 blockDim.y = 0; // expected-error {{no setter defined for property 'y'}} in kernel()
25 out[i++] = blockDim.z; in kernel()
26 blockDim.z = 0; // expected-error {{no setter defined for property 'z'}} in kernel()
/external/tensorflow/tensorflow/core/kernels/
Dsplit_lib_gpu.cu.cc84 eigen_assert(blockDim.y == 1); in SplitOpKernel()
85 eigen_assert(blockDim.z == 1); in SplitOpKernel()
124 IntType gidx = blockIdx.x * blockDim.x + threadIdx.x; in split_v_kernel()
132 IntType lidx = threadIdx.y * blockDim.x + threadIdx.x; in split_v_kernel()
133 IntType blockSize = blockDim.x * blockDim.y; in split_v_kernel()
152 for (; gidx < total_cols; gidx += blockDim.x * gridDim.x) { in split_v_kernel()
163 IntType gidy = blockIdx.y * blockDim.y + threadIdx.y; in split_v_kernel()
164 for (; gidy < total_rows; gidy += blockDim.y * gridDim.y) in split_v_kernel()
180 eigen_assert(blockDim.y == 1); in SplitVOpKernel_fixed()
181 eigen_assert(blockDim.z == 1); in SplitVOpKernel_fixed()
Dconcat_lib_gpu_impl.cu.cc41 IntType gidx = blockIdx.x * blockDim.x + threadIdx.x; in concat_fixed_kernel()
43 for (; gidx < total_cols; gidx += blockDim.x * gridDim.x) { in concat_fixed_kernel()
44 IntType gidy = blockIdx.y * blockDim.y + threadIdx.y; in concat_fixed_kernel()
50 for (; gidy < total_rows; gidy += blockDim.y * gridDim.y) { in concat_fixed_kernel()
69 IntType gidx = blockIdx.x * blockDim.x + threadIdx.x; in concat_variable_kernel()
77 IntType lidx = threadIdx.y * blockDim.x + threadIdx.x; in concat_variable_kernel()
78 IntType blockSize = blockDim.x * blockDim.y; in concat_variable_kernel()
97 for (; gidx < total_cols; gidx += blockDim.x * gridDim.x) { in concat_variable_kernel()
108 IntType gidy = blockIdx.y * blockDim.y + threadIdx.y; in concat_variable_kernel()
109 for (; gidy < total_rows; gidy += blockDim.y * gridDim.y) in concat_variable_kernel()
Ddebug_ops_gpu.cu.cc41 const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x; in CurtHealthKernel()
42 const int32 total_thread_count = gridDim.x * blockDim.x; in CurtHealthKernel()
59 const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x; in ConciseHealthKernel()
60 const int32 total_thread_count = gridDim.x * blockDim.x; in ConciseHealthKernel()
90 const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x; in FullHealthKernel()
91 const int32 total_thread_count = gridDim.x * blockDim.x; in FullHealthKernel()
133 const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x; in ReduceInfNanThreeSlotsKernel()
134 const int32 total_thread_count = gridDim.x * blockDim.x; in ReduceInfNanThreeSlotsKernel()
Dcheck_numerics_op_gpu.cu.cc41 const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x; in CheckNumericsKernel()
42 const int32 total_thread_count = gridDim.x * blockDim.x; in CheckNumericsKernel()
64 const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x; in CheckNumericsKernelV2()
65 const int32 total_thread_count = gridDim.x * blockDim.x; in CheckNumericsKernelV2()
Dbias_op_gpu.cu.cc133 for (int32 index = threadIdx.x; index < bias_size; index += blockDim.x) { in BiasGradNHWC_SharedAtomics()
138 for (int32 index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; in BiasGradNHWC_SharedAtomics()
139 index += blockDim.x * gridDim.x) { in BiasGradNHWC_SharedAtomics()
145 for (int32 index = threadIdx.x; index < bias_size; index += blockDim.x) { in BiasGradNHWC_SharedAtomics()
158 for (int32 index = threadIdx.x; index < kSDataSize; index += blockDim.x) { in BiasGradNCHW_SharedAtomics()
169 for (int32 index = group_index * blockDim.x + threadIdx.x; in BiasGradNCHW_SharedAtomics()
170 index < total_count; index += blockDim.x * group_size) { in BiasGradNCHW_SharedAtomics()
Dbucketize_op_gpu.cu.cc45 int32 lidx = threadIdx.y * blockDim.x + threadIdx.x; in BucketizeCustomKernel()
46 int32 blockSize = blockDim.x * blockDim.y; in BucketizeCustomKernel()
Drandom_op_gpu.h142 const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x;
143 const int32 total_thread_count = gridDim.x * blockDim.x;
181 const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x;
182 const int32 total_thread_count = gridDim.x * blockDim.x;
Dreduction_gpu_kernels.cu.h171 const int gid = bid * blockDim.x + tid;
172 const int stride = blockDim.x * gridDim.x;
193 max(min(num_elems - bid * blockDim.x, num_threads), 0);
207 assert(blockDim.x % TF_RED_WARPSIZE == 0);
208 int warps_per_block = blockDim.x / TF_RED_WARPSIZE;
214 int gid = threadIdx.x + blockIdx.x * blockDim.x;
280 rows_per_warp * (blockIdx.y * blockDim.y + threadIdx.y);
304 row += rows_per_warp * gridDim.y * blockDim.y;
305 for (; row < num_rows; row += rows_per_warp * gridDim.y * blockDim.y) {
327 if (blockDim.y > 1) {
[all …]
Ddepthwise_conv_op_gpu.h212 assert(blockDim.x == kBlockDepth);
213 assert(blockDim.y == args.in_cols);
214 const int block_height = blockDim.z;
504 assert(blockDim.x == args.in_cols);
505 assert(blockDim.z == kBlockDepth);
506 const int block_height = blockDim.y;
1164 assert(CanLaunchDepthwiseConv2dBackpropFilterGPUSmall(args, blockDim.z));
1172 const int in_width = blockDim.y; // slower (see b/62280718): args.in_cols;
1181 assert(blockDim.x == kBlockDepth);
1182 assert(blockDim.y == args.in_cols);
[all …]
Dadjust_hsv_gpu.cu.h102 for (int64 idx = (blockDim.x * blockIdx.x + threadIdx.x) * 3; in adjust_hsv_nhwc()
103 idx < number_elements; idx += blockDim.x * gridDim.x * 3) { in adjust_hsv_nhwc()
Drelu_op_gpu.cu.cc47 int32 index = blockIdx.x * blockDim.x + threadIdx.x; in ReluGradHalfKernel()
48 const int32 total_device_threads = gridDim.x * blockDim.x; in ReluGradHalfKernel()
Dfused_batch_norm_op.cu.cc184 int32 index = blockIdx.x * blockDim.x + threadIdx.x; in run()
185 const int32 total_device_threads = gridDim.x * blockDim.x; in run()
243 int32 index = blockIdx.x * blockDim.x + threadIdx.x; in run()
244 const int32 total_device_threads = gridDim.x * blockDim.x; in run()
/external/tensorflow/tensorflow/stream_executor/cuda/
Dcudart_stub.cc109 dim3 *blockDim, in __cudaPopCallConfiguration() argument
112 using FuncPtr = cudaError_t(CUDARTAPI *)(dim3 * gridDim, dim3 * blockDim, in __cudaPopCallConfiguration()
116 return func_ptr(gridDim, blockDim, sharedMem, stream); in __cudaPopCallConfiguration()
120 dim3 gridDim, dim3 blockDim, size_t sharedMem = 0, void *stream = 0) { in __cudaPushCallConfiguration() argument
121 using FuncPtr = unsigned(CUDARTAPI *)(dim3 gridDim, dim3 blockDim, in __cudaPushCallConfiguration()
125 return func_ptr(gridDim, blockDim, sharedMem, stream); in __cudaPushCallConfiguration()
Dcusparse_10_0.inc966 const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim,
975 bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim,
983 const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim,
992 bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim,
1001 const int *bsrSortedColIndA, int blockDim, const cuComplex *x,
1011 bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim,
1020 const int *bsrSortedColIndA, int blockDim, const cuDoubleComplex *x,
1030 bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim,
1040 const int *bsrSortedColIndA, int blockDim, const float *x,
1051 bsrSortedEndPtrA, bsrSortedColIndA, blockDim, x, beta, y);
[all …]
Dcusparse_9_0.inc966 const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim,
975 bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim,
983 const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim,
992 bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim,
1001 const int *bsrSortedColIndA, int blockDim, const cuComplex *x,
1011 bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim,
1020 const int *bsrSortedColIndA, int blockDim, const cuDoubleComplex *x,
1030 bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim,
1040 const int *bsrSortedColIndA, int blockDim, const float *x,
1051 bsrSortedEndPtrA, bsrSortedColIndA, blockDim, x, beta, y);
[all …]
/external/eigen/unsupported/Eigen/CXX11/src/Tensor/
DTensorReductionCuda.h115 const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x; in ReductionInitKernel()
116 const Index num_threads = blockDim.x * gridDim.x; in ReductionInitKernel()
192 eigen_assert(blockDim.x == 1); in ReductionInitFullReduxKernelHalfFloat()
205 const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x; in ReductionInitKernelHalfFloat()
206 const Index num_threads = blockDim.x * gridDim.x; in ReductionInitKernelHalfFloat()
378 eigen_assert(blockDim.y == 1);
379 eigen_assert(blockDim.z == 1);
386 const Index input_col_blocks = divup<Index>(num_coeffs_to_reduce, blockDim.x * NumPerThread);
389 const Index num_threads = blockDim.x * gridDim.x;
390 const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x;
[all …]
DTensorConvolution.h584 const int first_plane = blockIdx.y * blockDim.y;
585 const int plane_stride = blockDim.y * gridDim.y;
592 for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) {
603 for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) {
638 const int first_plane = blockIdx.z * blockDim.z;
639 const int plane_stride = blockDim.z * gridDim.z;
648 for (int j = threadIdx.y; j < num_y_input; j += blockDim.y) {
651 for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) {
663 for (int j = threadIdx.y; j < num_y_output; j += blockDim.y) {
665 for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) {
[all …]
DTensorRandom.h24 blockIdx.x * blockDim.x + threadIdx.x + in get_random_seed()
25 gridDim.x * blockDim.x * (blockIdx.y * blockDim.y + threadIdx.y); in get_random_seed()
/external/clang/test/CodeGenCUDA/
Dcuda-builtin-vars.cu17 out[i++] = blockDim.x; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() in kernel()
18 out[i++] = blockDim.y; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.y() in kernel()
19 out[i++] = blockDim.z; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.z() in kernel()
/external/tensorflow/tensorflow/core/util/
Dgpu_device_functions.h110 return detail::GpuGridRange<T>(blockIdx.x * blockDim.x + threadIdx.x, in GpuGridRangeX()
111 gridDim.x * blockDim.x, count); in GpuGridRangeX()
119 return detail::GpuGridRange<T>(blockIdx.y * blockDim.y + threadIdx.y, in GpuGridRangeY()
120 gridDim.y * blockDim.y, count); in GpuGridRangeY()
128 return detail::GpuGridRange<T>(blockIdx.z * blockDim.z + threadIdx.z, in GpuGridRangeZ()
129 gridDim.z * blockDim.z, count); in GpuGridRangeZ()
503 assert(blockDim.y == 1); in SetZero()
504 assert(blockDim.z == 1); in SetZero()
505 assert(blockDim.x * gridDim.x / blockDim.x == gridDim.x); in SetZero()
515 assert(blockDim.y == 1); in SetToValue()
[all …]
/external/tensorflow/tensorflow/tools/ci_build/builds/user_ops/
Dcuda_op_kernel.cu.cc22 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N; in AddOneKernel()
23 i += blockDim.x * gridDim.x) { in AddOneKernel()
/external/tensorflow/tensorflow/examples/adding_an_op/
Dcuda_op_kernel.cu.cc23 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N; in AddOneKernel()
24 i += blockDim.x * gridDim.x) { in AddOneKernel()
/external/eigen/test/
Dcuda_common.h11 dim3 threadIdx, blockDim, blockIdx; variable
26 int i = threadIdx.x + blockIdx.x*blockDim.x; in run_on_cuda_meta_kernel()
/external/tensorflow/tensorflow/core/kernels/rnn/
Dlstm_ops_gpu.cu.cc90 const int batch_id = blockIdx.x * blockDim.x + threadIdx.x; in lstm_gates()
91 const int act_id = blockIdx.y * blockDim.y + threadIdx.y; in lstm_gates()
211 const int gid = blockDim.x * blockIdx.x + threadIdx.x; in concat_xh()
311 const int batch_id = blockIdx.x * blockDim.x + threadIdx.x; in lstm_gates_bprop()
312 const int act_id = blockIdx.y * blockDim.y + threadIdx.y; in lstm_gates_bprop()

12