Home
last modified time | relevance | path

Searched refs:kWarpSize (Results 1 – 5 of 5) sorted by relevance

/external/tensorflow/tensorflow/compiler/xla/service/gpu/
Dtree_reduction_rewriter.cc43 static constexpr int64 kRowAtomicFreeBound = kWarpSize * kWarpSize * 8;
44 static constexpr int64 kColumnAtomicFreeBound = kWarpSize * 128;
Dir_emission_utils.cc143 if (reduction_dimensions.dimensions[2] % (kWarpSize * kWarpSize * 64) == in GetReductionTiling()
291 return reduction_dimensions.dimensions[2] >= kWarpSize; in IsReductionFromOrToContiguousDimensions()
297 return reduction_dimensions.dimensions[1] >= kWarpSize; in IsReductionFromOrToContiguousDimensions()
363 return reduction_dimensions.dimensions[2] >= kWarpSize; in IsReductionFromOrToContiguousDimensions()
369 return reduction_dimensions.dimensions[1] >= kWarpSize; in IsReductionFromOrToContiguousDimensions()
504 intrinsic, {b->getInt32(-1), value, offset, b->getInt32(kWarpSize - 1)}); in EmitNVPTXShflDown()
Dir_emission_utils.h77 constexpr int64 kWarpSize = 32; variable
Dir_emitter_unnested.cc4294 llvm::ArrayType::get(primitive_type, kWarpSize), in EmitPrologueForReduction()
4484 b_.CreateUDiv(thread_id_info.thread_id_x, constant(kWarpSize)); in EmitEpilogueForReduction()
4505 constant(mapping_scheme.GetNumThreadsX() / kWarpSize)); in EmitEpilogueForReduction()
4691 /*lane_id=*/b_.CreateURem(thread_id, constant(kWarpSize), "lane_id")}; in EmitThreadIdInfo()
4827 /*tile_sizes=*/{1, kWarpSize, kWarpSize}, in EmitHlo021Tile()
4829 /*num_threads_x=*/kWarpSize, in EmitHlo021Tile()
5320 int64 num_threads_y = reduction_dimensions.is_row_reduction ? 1 : kWarpSize; in ComputeReductionCodegenInfo()
5338 kWarpSize)); in ComputeReductionCodegenInfo()
5340 return kWarpSize; in ComputeReductionCodegenInfo()
/external/tensorflow/tensorflow/core/kernels/
Dbias_op_gpu.cu.cc222 static constexpr int32 kWarpSize = 32; in compute() local
241 if (config.thread_per_block < kWarpSize) { in compute()
242 config.thread_per_block = kWarpSize; in compute()