Searched refs:kThreadsPerBlock (Results 1 – 4 of 4) sorted by relevance
/external/tensorflow/tensorflow/core/kernels/ |
D | maxpooling_op_gpu.cu.cc | 360 const int kThreadsPerBlock = 1024; in operator ()() local 365 (output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, in operator ()() 381 const int kThreadsPerBlock = 1024; in operator ()() local 387 (output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, in operator ()() 388 kThreadsPerBlock, 0, d.stream(), output_size, in operator ()() 395 (output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, in operator ()() 396 kThreadsPerBlock, 0, d.stream(), output_size, in operator ()() 410 const int kThreadsPerBlock = 1024; in operator ()() local 413 SetZero<T>, (input_size + kThreadsPerBlock - 1) / kThreadsPerBlock, in operator ()() 414 kThreadsPerBlock, 0, d.stream(), input_size, bottom_diff)); in operator ()() [all …]
|
/external/tensorflow/tensorflow/core/util/ |
D | gpu_launch_config.h | 234 const int kThreadsPerBlock = 256; in GetGpu2DLaunchConfig() local 235 int block_cols = std::min(xdim, kThreadsPerBlock); in GetGpu2DLaunchConfig() 237 int block_rows = std::max(kThreadsPerBlock / block_cols, 1); in GetGpu2DLaunchConfig() 242 const int max_blocks = std::max(physical_thread_count / kThreadsPerBlock, 1); in GetGpu2DLaunchConfig()
|
/external/pytorch/torch/csrc/distributed/c10d/ |
D | intra_node_comm.cu | 12 static constexpr size_t kThreadsPerBlock = 1024; variable 473 if (N_aligned < numelPerThread * kThreadsPerBlock) { in getLaunchConfig() 480 std::min(divUp(threadsRequired, kThreadsPerBlock), kMaxAllReduceBlocks); in getLaunchConfig() 482 threads.x = std::min(kThreadsPerBlock, warpsPerBlock * kWarpSize); in getLaunchConfig()
|
/external/tensorflow/tensorflow/compiler/xla/service/gpu/ |
D | ir_emitter_unnested.cc | 2652 const uint64_t kThreadsPerBlock = kTileSize / 2; in EmitSort() local 2665 kThreadsPerBlock > in EmitSort() 2674 op_name, (no_tiling ? "won't" : "will"), kTileSize, kThreadsPerBlock, in EmitSort() 2679 uint64_t num_blocks = CeilOfRatio(num_iterations, kThreadsPerBlock); in EmitSort() 2680 LaunchDimensions tiled_launch_dimensions(num_blocks, kThreadsPerBlock); in EmitSort() 2682 op_name, num_blocks, kThreadsPerBlock); in EmitSort()
|