/external/tensorflow/tensorflow/stream_executor/rocm/ |
D | rocm_helpers.cu.cc | 36 const int threads_per_block = 256; in rocm_MakeBatchPointers() local 39 dim3((n + threads_per_block - 1) / threads_per_block, 1, 1), in rocm_MakeBatchPointers() 40 dim3(threads_per_block, 1, 1), 0, (hipStream_t)stream, base, stride, n, in rocm_MakeBatchPointers()
|
D | rocm_driver.cc | 1339 GpuContext* context, hipFunction_t kernel, int threads_per_block, in GetMaxOccupiedBlocksPerCore() argument
|
/external/tensorflow/tensorflow/core/kernels/image/ |
D | adjust_hue_op_gpu.cu.cc | 35 const int threads_per_block = config.thread_per_block; in operator ()() local 37 (number_of_elements + threads_per_block - 1) / threads_per_block; in operator ()() 39 block_count, threads_per_block, 0, stream, in operator ()()
|
D | adjust_saturation_op_gpu.cu.cc | 36 const int threads_per_block = config.thread_per_block; in operator ()() local 38 (number_of_elements + threads_per_block - 1) / threads_per_block; in operator ()() 40 block_count, threads_per_block, 0, stream, in operator ()()
|
/external/tensorflow/tensorflow/compiler/xla/service/gpu/ |
D | launch_dimensions.cc | 39 int64_t threads_per_block = gpu_device_info.threads_per_block_limit; in ThreadsPerBlockLimit() local 40 if (threads_per_block <= 0) { in ThreadsPerBlockLimit() 48 threads_per_block = gpu_device_info.threads_per_warp; in ThreadsPerBlockLimit() 49 if (threads_per_block == 0) { in ThreadsPerBlockLimit() 51 threads_per_block = 32; in ThreadsPerBlockLimit() 54 return threads_per_block; in ThreadsPerBlockLimit()
|
D | elemental_ir_emitter.cc | 329 llvm::Value* threads_per_block = IntCast( in EmitThreadId() local 332 return NSWAdd(NSWMul(block_id, threads_per_block), thread_id_in_block); in EmitThreadId()
|
D | ir_emitter_unnested.h | 667 int threads_per_block, int num_results_per_warp = 1); 740 llvm::Value* EmitThreadId(int64_t threads_per_block, llvm::Type* index_ty);
|
D | ir_emitter_unnested.cc | 3666 int threads_per_block, int num_results_per_warp) { in EmitFullWarpShuffleDownLoopForReduce() argument 3671 CHECK_EQ(threads_per_block % 32, 0); in EmitFullWarpShuffleDownLoopForReduce() 4055 llvm::Value* IrEmitterUnnested::EmitThreadId(int64_t threads_per_block, in EmitThreadId() argument 4061 llvm_ir::AddRangeMetadata(0, threads_per_block, thread_id_raw); in EmitThreadId()
|
/external/tensorflow/tensorflow/compiler/xla/stream_executor/ |
D | device_description.cc | 143 int64_t element_count, int64_t *threads_per_block, in CalculateDimensionality() argument 145 *threads_per_block = device_description.threads_per_block_limit(); in CalculateDimensionality() 146 *block_count = port::MathUtil::CeilOfRatio(element_count, *threads_per_block); in CalculateDimensionality() 148 CHECK_LE(element_count, *threads_per_block); in CalculateDimensionality() 149 *threads_per_block = element_count; in CalculateDimensionality()
|
D | device_description.h | 472 int64_t element_count, int64_t *threads_per_block,
|
/external/tensorflow/tensorflow/core/kernels/ |
D | reduction_gpu_kernels.cu.h | 721 const int threads_per_block = 128; 722 const int warps_per_block = threads_per_block / TF_RED_WARPSIZE; 726 threads_per_block, 0, cu_stream, in, out, 862 int threads_per_block = 128; 863 int num_blocks = Eigen::divup(extent_y, threads_per_block); 866 num_blocks, threads_per_block, 0, cu_stream, in, 875 int threads_per_block = 128; 877 (extent_x * extent_z + threads_per_block - 1) / threads_per_block; 882 num_blocks, threads_per_block, 0, cu_stream, in, 890 int threads_per_block = 128; [all …]
|
/external/tensorflow/tensorflow/compiler/xla/stream_executor/gpu/ |
D | redzone_allocator.cc | 224 int64_t threads_per_block = std::min( in RunRedzoneChecker() local 227 tensorflow::MathUtil::CeilOfRatio(num_elements, threads_per_block); in RunRedzoneChecker() 230 ThreadDim(threads_per_block), BlockDim(block_count), comparison_kernel, in RunRedzoneChecker()
|
D | gpu_driver.h | 580 GpuContext* context, GpuFunctionHandle kernel, int threads_per_block,
|
/external/mesa3d/src/gallium/drivers/radeonsi/ |
D | si_get.c | 731 unsigned threads_per_block = get_max_threads_per_block(sscreen, ir_type); in si_get_compute_param() local 732 block_size[0] = threads_per_block; in si_get_compute_param() 733 block_size[1] = threads_per_block; in si_get_compute_param() 734 block_size[2] = threads_per_block; in si_get_compute_param()
|
/external/mesa3d/src/gallium/drivers/r600/ |
D | r600_pipe_common.c | 958 unsigned threads_per_block = get_max_threads_per_block(rscreen, ir_type); in r600_get_compute_param() local 959 block_size[0] = threads_per_block; in r600_get_compute_param() 960 block_size[1] = threads_per_block; in r600_get_compute_param() 961 block_size[2] = threads_per_block; in r600_get_compute_param()
|
/external/tensorflow/tensorflow/compiler/xla/stream_executor/cuda/ |
D | cuda_driver.cc | 1637 GpuContext* context, CUfunction kernel, int threads_per_block, in GetMaxOccupiedBlocksPerCore() argument 1644 &max_blocks, kernel, threads_per_block, dynamic_shared_memory_bytes), in GetMaxOccupiedBlocksPerCore()
|