/external/tensorflow/tensorflow/core/util/ |
D | cuda_kernel_helper_test.cu.cc | 158 cfg.thread_per_block, 0, d.stream(), cfg, \ in TEST_F() 162 TF_CHECK_OK(CudaLaunchKernel(Count1D, cfg.block_count, cfg.thread_per_block, \ in TEST_F() 169 cfg.thread_per_block, 0, d.stream(), cfg, \ in TEST_F() 173 TF_CHECK_OK(CudaLaunchKernel(Count1D, cfg.block_count, cfg.thread_per_block, \ in TEST_F() 192 return a.thread_per_block.x == b.thread_per_block.x && in operator ==() 193 a.thread_per_block.y == b.thread_per_block.y && in operator ==() 194 a.thread_per_block.z == b.thread_per_block.z && in operator ==() 198 a.thread_per_block.x == b.thread_per_block.x && in operator ==() 199 a.thread_per_block.y == b.thread_per_block.y && in operator ==() 200 a.thread_per_block.z == b.thread_per_block.z; in operator ==() [all …]
|
D | cuda_launch_config.h | 116 int thread_per_block = -1; member 133 const int thread_per_block = std::min(1024, d.maxGpuThreadsPerBlock()); in GetCudaLaunchConfig() local 135 std::min(DivUp(physical_thread_count, thread_per_block), in GetCudaLaunchConfig() 139 config.thread_per_block = thread_per_block; in GetCudaLaunchConfig() 156 int thread_per_block = 0; in GetCudaLaunchConfig() local 159 &block_count, &thread_per_block, func, dynamic_shared_memory_size, in GetCudaLaunchConfig() 164 std::min(block_count, DivUp(work_element_count, thread_per_block)); in GetCudaLaunchConfig() 167 config.thread_per_block = thread_per_block; in GetCudaLaunchConfig() 191 config.thread_per_block = fixed_block_size; in GetCudaLaunchConfigFixedBlockSize() 198 dim3 thread_per_block = dim3(0, 0, 0); member [all …]
|
/external/tensorflow/tensorflow/core/kernels/ |
D | bias_op_gpu.cu.cc | 89 config.thread_per_block, 0, d.stream(), in compute() 94 config.thread_per_block, 0, d.stream(), in compute() 219 <<<config.block_count, config.thread_per_block, shared_memory_size, in compute() 226 if (config.thread_per_block < kWarpSize) { in compute() 227 config.thread_per_block = kWarpSize; in compute() 231 config.thread_per_block, 0, d.stream(), output_backprop, in compute() 240 BiasGradNHWC_Naive<T>, config.block_count, config.thread_per_block, 0, in compute() 244 config.thread_per_block, 0, d.stream(), in compute()
|
D | depthtospace_op_gpu.cu.cc | 166 D2S_NHWC<T>, config.block_count, config.thread_per_block, 0, d.stream(), in operator ()() 202 D2S_NCHW_LOOP<T, 2>, config.block_count, config.thread_per_block, in operator ()() 209 D2S_NCHW_LOOP<T, 3>, config.block_count, config.thread_per_block, in operator ()() 216 D2S_NCHW_LOOP<T, 4>, config.block_count, config.thread_per_block, in operator ()() 231 D2S_NCHW<T>, config.block_count, config.thread_per_block, 0, d.stream(), in operator ()()
|
D | spacetodepth_op_gpu.cu.cc | 162 S2D_NHWC<T>, config.block_count, config.thread_per_block, 0, d.stream(), in operator ()() 198 S2D_NCHW_LOOP<T, 2>, config.block_count, config.thread_per_block, in operator ()() 205 S2D_NCHW_LOOP<T, 3>, config.block_count, config.thread_per_block, in operator ()() 212 S2D_NCHW_LOOP<T, 4>, config.block_count, config.thread_per_block, in operator ()() 227 S2D_NCHW<T>, config.block_count, config.thread_per_block, 0, d.stream(), in operator ()()
|
D | inplace_ops_functor_gpu.cu.cc | 53 DoParallelConcatOpKernel<T>, cfg.block_count, cfg.thread_per_block, 0, in DoParallelConcatUpdate() 121 cfg.block_count, cfg.thread_per_block, 0, in DoInplaceOp() 127 cfg.thread_per_block, 0, d.stream(), in DoInplaceOp() 133 cfg.thread_per_block, 0, d.stream(), in DoInplaceOp() 155 cfg.block_count, cfg.thread_per_block, 0, in DoInplaceOp()
|
D | resize_nearest_neighbor_op_gpu.cu.cc | 180 config.thread_per_block, 0, d.stream(), output_size, input.data(), in operator ()() 187 config.thread_per_block, 0, d.stream(), output_size, input.data(), in operator ()() 223 SetZero<<<output_config.block_count, output_config.thread_per_block, 0, in operator ()() 234 input_config.thread_per_block, 0, d.stream(), in operator ()() 242 input_config.block_count, input_config.thread_per_block, 0, in operator ()()
|
D | segment_reduction_ops_gpu.cu.cc | 142 config.thread_per_block, 0, d.stream(), in operator ()() 168 config.block_count, config.thread_per_block, 0, d.stream(), in operator ()() 188 SetToValue<T>, config.block_count, config.thread_per_block, 0, in operator ()() 204 config.thread_per_block, 0, d.stream(), input_outer_dim_size, in operator ()()
|
D | cwise_op_clip_gpu.cu.cc | 66 UnaryClipCustomKernel<T>, config.block_count, config.thread_per_block, in operator ()() 83 config.thread_per_block, 0, d.stream(), in0_flat.size(), in operator ()() 99 config.thread_per_block, 0, d.stream(), in0_flat.size(), in operator ()()
|
D | resize_bilinear_op_gpu.cu.cc | 285 ResizeBilinearKernel<T>, config.block_count, config.thread_per_block, in operator ()() 292 config.thread_per_block, 0, d.stream(), config.virtual_thread_count, in operator ()() 323 SetZero<T>, config.block_count, config.thread_per_block, 0, d.stream(), in operator ()() 332 config.thread_per_block, 0, d.stream(), config.virtual_thread_count, in operator ()() 339 config.thread_per_block, 0, d.stream(), config.virtual_thread_count, in operator ()()
|
D | dilation_ops_gpu.cu.cc | 199 DilationKernel<<<config.block_count, config.thread_per_block, 0, in operator ()() 234 config.thread_per_block, 0, d.stream(), in operator ()() 242 config.thread_per_block, 0, d.stream(), config.virtual_thread_count, in operator ()() 276 config.thread_per_block, 0, d.stream(), in operator ()() 282 DilationBackpropFilterKernel<<<config.block_count, config.thread_per_block, in operator ()()
|
D | split_lib_gpu.cu.cc | 206 config.thread_per_block, 0, d.stream(), input, in Run() 221 SplitVOpKernel_fixed<T><<<config.block_count, config.thread_per_block, 0, in Run() 234 <<<config.block_count, config.thread_per_block, smem_usage, in Run() 239 <<<config.block_count, config.thread_per_block, 0, in Run()
|
D | fused_batch_norm_op.cu.cc | 41 VarianceToInvVarianceKernel<<<config.block_count, config.thread_per_block, 0, in operator ()() 63 InvVarianceToVarianceKernel<<<config.block_count, config.thread_per_block, 0, in operator ()()
|
D | matrix_set_diag_op_gpu.cu.cc | 77 config.block_count, config.thread_per_block, in Compute() 84 config.block_count, config.thread_per_block, in Compute()
|
D | gather_functor_gpu.cu.h | 97 config.thread_per_block, 0, d.stream(), params.data(), indices.data(), 102 config.thread_per_block, 0, d.stream(), params.data(), indices.data(),
|
D | diag_op_gpu.cu.cc | 64 DiagCudaKernel<<<diag_config.block_count, diag_config.thread_per_block, 0, in operator ()() 104 DiagPartCudaKernel<<<diag_config.block_count, diag_config.thread_per_block, in operator ()()
|
D | crop_and_resize_op_gpu.cu.cc | 375 CropAndResizeKernel<T>, config.block_count, config.thread_per_block, in operator ()() 410 SetZero<T>, config.block_count, config.thread_per_block, 0, in operator ()() 426 config.thread_per_block, 0, d.stream(), config.virtual_thread_count, in operator ()() 460 SetZero<float>, config.block_count, config.thread_per_block, 0, in operator ()() 470 config.thread_per_block, 0, d.stream(), config.virtual_thread_count, in operator ()()
|
D | bucketize_op_gpu.cu.cc | 102 <<<config.block_count, config.thread_per_block, shared_mem_size, in Compute() 108 config.thread_per_block, 0, d.stream(), input.size(), input.data(), in Compute()
|
D | searchsorted_op_gpu.cu.cc | 73 config.thread_per_block, 0, stream, sorted_inputs.data(), batch_size, in Compute() 93 config.thread_per_block, 0, stream, sorted_inputs.data(), batch_size, in Compute()
|
D | concat_lib_gpu_impl.cu.cc | 148 <<<config.block_count, config.thread_per_block, 0, in ConcatGPUImpl() 161 <<<config.block_count, config.thread_per_block, smem_usage, in ConcatGPUImpl() 167 <<<config.block_count, config.thread_per_block, 0, in ConcatGPUImpl()
|
D | relu_op_gpu.cu.cc | 109 ReluGradHalfKernel<<<config.block_count, config.thread_per_block, 0, in operator ()() 138 Relu_int8x4_kernel<<<config.block_count, config.thread_per_block, 0, in operator ()()
|
D | determinant_op_gpu.cu.cc | 135 config.block_count, config.thread_per_block, 0, device.stream(), in operator ()() 157 config.block_count, config.thread_per_block, 0, device.stream(), in operator ()()
|
D | scatter_functor_gpu.cu.h | 132 config.thread_per_block, 0, d.stream(), params.data(), updates.data(), 153 config.block_count, config.thread_per_block, 0, d.stream(),
|
D | pooling_ops_3d_gpu.cu.cc | 148 config.thread_per_block, 0, d.stream()>>>( in operator ()() 155 config.thread_per_block, 0, d.stream()>>>( in operator ()()
|
/external/tensorflow/tensorflow/contrib/resampler/kernels/ |
D | resampler_ops_gpu.cu.cc | 123 Resampler2DKernel<T>, config.block_count, config.thread_per_block, 0, in operator ()() 258 SetZero<T>, config.block_count, config.thread_per_block, 0, d.stream(), in operator ()() 263 SetZero<T>, config.block_count, config.thread_per_block, 0, d.stream(), in operator ()() 270 config.thread_per_block, 0, d.stream(), data, in operator ()()
|