Searched refs:kThreadsPerBlock (Results 1 – 3 of 3) sorted by relevance
/external/tensorflow/tensorflow/core/kernels/ |
D | maxpooling_op_gpu.cu.cc | 391 const int kThreadsPerBlock = 1024; in operator ()() local 395 (output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, in operator ()() 410 const int kThreadsPerBlock = 1024; in operator ()() local 415 <<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, in operator ()() 416 kThreadsPerBlock, 0, d.stream()>>>( in operator ()() 422 <<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, in operator ()() 423 kThreadsPerBlock, 0, d.stream()>>>( in operator ()() 438 const int kThreadsPerBlock = 1024; in operator ()() local 442 SetZero<<<(bottom_size + kThreadsPerBlock - 1) / kThreadsPerBlock, in operator ()() 443 kThreadsPerBlock, 0, d.stream()>>>(bottom_size, bottom_diff); in operator ()() [all …]
|
/external/tensorflow/tensorflow/core/util/ |
D | cuda_launch_config.h | 210 const int kThreadsPerBlock = 256; in GetCuda2DLaunchConfig() local 211 int block_cols = std::min(xdim, kThreadsPerBlock); in GetCuda2DLaunchConfig() 213 int block_rows = std::max(kThreadsPerBlock / block_cols, 1); in GetCuda2DLaunchConfig() 218 const int max_blocks = std::max(physical_thread_count / kThreadsPerBlock, 1); in GetCuda2DLaunchConfig()
|
/external/tensorflow/tensorflow/compiler/xla/service/gpu/ |
D | ir_emitter_unnested.cc | 1297 const uint64 kThreadsPerBlock = kTileSize / 2; in HandleSort() local 1310 kThreadsPerBlock > in HandleSort() 1315 uint64 num_blocks = CeilOfRatio(num_iterations, kThreadsPerBlock); in HandleSort() 1316 LaunchDimensions tiled_launch_dimensions(num_blocks, kThreadsPerBlock); in HandleSort()
|