Home
last modified time | relevance | path

Searched refs:thread_per_block (Results 1 – 25 of 51) sorted by relevance

123

/external/tensorflow/tensorflow/core/util/
Dcuda_kernel_helper_test.cu.cc158 cfg.thread_per_block, 0, d.stream(), cfg, \ in TEST_F()
162 TF_CHECK_OK(CudaLaunchKernel(Count1D, cfg.block_count, cfg.thread_per_block, \ in TEST_F()
169 cfg.thread_per_block, 0, d.stream(), cfg, \ in TEST_F()
173 TF_CHECK_OK(CudaLaunchKernel(Count1D, cfg.block_count, cfg.thread_per_block, \ in TEST_F()
192 return a.thread_per_block.x == b.thread_per_block.x && in operator ==()
193 a.thread_per_block.y == b.thread_per_block.y && in operator ==()
194 a.thread_per_block.z == b.thread_per_block.z && in operator ==()
198 a.thread_per_block.x == b.thread_per_block.x && in operator ==()
199 a.thread_per_block.y == b.thread_per_block.y && in operator ==()
200 a.thread_per_block.z == b.thread_per_block.z; in operator ==()
[all …]
Dcuda_launch_config.h116 int thread_per_block = -1; member
133 const int thread_per_block = std::min(1024, d.maxGpuThreadsPerBlock()); in GetCudaLaunchConfig() local
135 std::min(DivUp(physical_thread_count, thread_per_block), in GetCudaLaunchConfig()
139 config.thread_per_block = thread_per_block; in GetCudaLaunchConfig()
156 int thread_per_block = 0; in GetCudaLaunchConfig() local
159 &block_count, &thread_per_block, func, dynamic_shared_memory_size, in GetCudaLaunchConfig()
164 std::min(block_count, DivUp(work_element_count, thread_per_block)); in GetCudaLaunchConfig()
167 config.thread_per_block = thread_per_block; in GetCudaLaunchConfig()
191 config.thread_per_block = fixed_block_size; in GetCudaLaunchConfigFixedBlockSize()
198 dim3 thread_per_block = dim3(0, 0, 0); member
[all …]
/external/tensorflow/tensorflow/core/kernels/
Dbias_op_gpu.cu.cc89 config.thread_per_block, 0, d.stream(), in compute()
94 config.thread_per_block, 0, d.stream(), in compute()
219 <<<config.block_count, config.thread_per_block, shared_memory_size, in compute()
226 if (config.thread_per_block < kWarpSize) { in compute()
227 config.thread_per_block = kWarpSize; in compute()
231 config.thread_per_block, 0, d.stream(), output_backprop, in compute()
240 BiasGradNHWC_Naive<T>, config.block_count, config.thread_per_block, 0, in compute()
244 config.thread_per_block, 0, d.stream(), in compute()
Ddepthtospace_op_gpu.cu.cc166 D2S_NHWC<T>, config.block_count, config.thread_per_block, 0, d.stream(), in operator ()()
202 D2S_NCHW_LOOP<T, 2>, config.block_count, config.thread_per_block, in operator ()()
209 D2S_NCHW_LOOP<T, 3>, config.block_count, config.thread_per_block, in operator ()()
216 D2S_NCHW_LOOP<T, 4>, config.block_count, config.thread_per_block, in operator ()()
231 D2S_NCHW<T>, config.block_count, config.thread_per_block, 0, d.stream(), in operator ()()
Dspacetodepth_op_gpu.cu.cc162 S2D_NHWC<T>, config.block_count, config.thread_per_block, 0, d.stream(), in operator ()()
198 S2D_NCHW_LOOP<T, 2>, config.block_count, config.thread_per_block, in operator ()()
205 S2D_NCHW_LOOP<T, 3>, config.block_count, config.thread_per_block, in operator ()()
212 S2D_NCHW_LOOP<T, 4>, config.block_count, config.thread_per_block, in operator ()()
227 S2D_NCHW<T>, config.block_count, config.thread_per_block, 0, d.stream(), in operator ()()
Dinplace_ops_functor_gpu.cu.cc53 DoParallelConcatOpKernel<T>, cfg.block_count, cfg.thread_per_block, 0, in DoParallelConcatUpdate()
121 cfg.block_count, cfg.thread_per_block, 0, in DoInplaceOp()
127 cfg.thread_per_block, 0, d.stream(), in DoInplaceOp()
133 cfg.thread_per_block, 0, d.stream(), in DoInplaceOp()
155 cfg.block_count, cfg.thread_per_block, 0, in DoInplaceOp()
Dresize_nearest_neighbor_op_gpu.cu.cc180 config.thread_per_block, 0, d.stream(), output_size, input.data(), in operator ()()
187 config.thread_per_block, 0, d.stream(), output_size, input.data(), in operator ()()
223 SetZero<<<output_config.block_count, output_config.thread_per_block, 0, in operator ()()
234 input_config.thread_per_block, 0, d.stream(), in operator ()()
242 input_config.block_count, input_config.thread_per_block, 0, in operator ()()
Dsegment_reduction_ops_gpu.cu.cc142 config.thread_per_block, 0, d.stream(), in operator ()()
168 config.block_count, config.thread_per_block, 0, d.stream(), in operator ()()
188 SetToValue<T>, config.block_count, config.thread_per_block, 0, in operator ()()
204 config.thread_per_block, 0, d.stream(), input_outer_dim_size, in operator ()()
Dcwise_op_clip_gpu.cu.cc66 UnaryClipCustomKernel<T>, config.block_count, config.thread_per_block, in operator ()()
83 config.thread_per_block, 0, d.stream(), in0_flat.size(), in operator ()()
99 config.thread_per_block, 0, d.stream(), in0_flat.size(), in operator ()()
Dresize_bilinear_op_gpu.cu.cc285 ResizeBilinearKernel<T>, config.block_count, config.thread_per_block, in operator ()()
292 config.thread_per_block, 0, d.stream(), config.virtual_thread_count, in operator ()()
323 SetZero<T>, config.block_count, config.thread_per_block, 0, d.stream(), in operator ()()
332 config.thread_per_block, 0, d.stream(), config.virtual_thread_count, in operator ()()
339 config.thread_per_block, 0, d.stream(), config.virtual_thread_count, in operator ()()
Ddilation_ops_gpu.cu.cc199 DilationKernel<<<config.block_count, config.thread_per_block, 0, in operator ()()
234 config.thread_per_block, 0, d.stream(), in operator ()()
242 config.thread_per_block, 0, d.stream(), config.virtual_thread_count, in operator ()()
276 config.thread_per_block, 0, d.stream(), in operator ()()
282 DilationBackpropFilterKernel<<<config.block_count, config.thread_per_block, in operator ()()
Dsplit_lib_gpu.cu.cc206 config.thread_per_block, 0, d.stream(), input, in Run()
221 SplitVOpKernel_fixed<T><<<config.block_count, config.thread_per_block, 0, in Run()
234 <<<config.block_count, config.thread_per_block, smem_usage, in Run()
239 <<<config.block_count, config.thread_per_block, 0, in Run()
Dfused_batch_norm_op.cu.cc41 VarianceToInvVarianceKernel<<<config.block_count, config.thread_per_block, 0, in operator ()()
63 InvVarianceToVarianceKernel<<<config.block_count, config.thread_per_block, 0, in operator ()()
Dmatrix_set_diag_op_gpu.cu.cc77 config.block_count, config.thread_per_block, in Compute()
84 config.block_count, config.thread_per_block, in Compute()
Dgather_functor_gpu.cu.h97 config.thread_per_block, 0, d.stream(), params.data(), indices.data(),
102 config.thread_per_block, 0, d.stream(), params.data(), indices.data(),
Ddiag_op_gpu.cu.cc64 DiagCudaKernel<<<diag_config.block_count, diag_config.thread_per_block, 0, in operator ()()
104 DiagPartCudaKernel<<<diag_config.block_count, diag_config.thread_per_block, in operator ()()
Dcrop_and_resize_op_gpu.cu.cc375 CropAndResizeKernel<T>, config.block_count, config.thread_per_block, in operator ()()
410 SetZero<T>, config.block_count, config.thread_per_block, 0, in operator ()()
426 config.thread_per_block, 0, d.stream(), config.virtual_thread_count, in operator ()()
460 SetZero<float>, config.block_count, config.thread_per_block, 0, in operator ()()
470 config.thread_per_block, 0, d.stream(), config.virtual_thread_count, in operator ()()
Dbucketize_op_gpu.cu.cc102 <<<config.block_count, config.thread_per_block, shared_mem_size, in Compute()
108 config.thread_per_block, 0, d.stream(), input.size(), input.data(), in Compute()
Dsearchsorted_op_gpu.cu.cc73 config.thread_per_block, 0, stream, sorted_inputs.data(), batch_size, in Compute()
93 config.thread_per_block, 0, stream, sorted_inputs.data(), batch_size, in Compute()
Dconcat_lib_gpu_impl.cu.cc148 <<<config.block_count, config.thread_per_block, 0, in ConcatGPUImpl()
161 <<<config.block_count, config.thread_per_block, smem_usage, in ConcatGPUImpl()
167 <<<config.block_count, config.thread_per_block, 0, in ConcatGPUImpl()
Drelu_op_gpu.cu.cc109 ReluGradHalfKernel<<<config.block_count, config.thread_per_block, 0, in operator ()()
138 Relu_int8x4_kernel<<<config.block_count, config.thread_per_block, 0, in operator ()()
Ddeterminant_op_gpu.cu.cc135 config.block_count, config.thread_per_block, 0, device.stream(), in operator ()()
157 config.block_count, config.thread_per_block, 0, device.stream(), in operator ()()
Dscatter_functor_gpu.cu.h132 config.thread_per_block, 0, d.stream(), params.data(), updates.data(),
153 config.block_count, config.thread_per_block, 0, d.stream(),
Dpooling_ops_3d_gpu.cu.cc148 config.thread_per_block, 0, d.stream()>>>( in operator ()()
155 config.thread_per_block, 0, d.stream()>>>( in operator ()()
/external/tensorflow/tensorflow/contrib/resampler/kernels/
Dresampler_ops_gpu.cu.cc123 Resampler2DKernel<T>, config.block_count, config.thread_per_block, 0, in operator ()()
258 SetZero<T>, config.block_count, config.thread_per_block, 0, d.stream(), in operator ()()
263 SetZero<T>, config.block_count, config.thread_per_block, 0, d.stream(), in operator ()()
270 config.thread_per_block, 0, d.stream(), data, in operator ()()

123