Home
last modified time | relevance | path

Searched refs:unroll_factor (Results 1 – 17 of 17) sorted by relevance

/external/tensorflow/tensorflow/compiler/xla/service/gpu/
Dparallel_loop_emitter.cc75 const int unroll_factor = in EmitLinearBaseAndThreadIdx() local
76 launch_config_.unroll_factor > 1 ? launch_config_.unroll_factor : 1; in EmitLinearBaseAndThreadIdx()
83 ? launch_dimensions_.total_nb_threads() * unroll_factor in EmitLinearBaseAndThreadIdx()
131 if (!launch_config_.logical_order && launch_config_.unroll_factor > 1) { in EmitLinearBaseAndThreadIdx()
134 llvm::ConstantInt::get(index_type, launch_config_.unroll_factor), in EmitLinearBaseAndThreadIdx()
155 const int unroll_factor = launch_config_.unroll_factor; in EmitLogicalIndexAndSetExitBasicBlock() local
159 for (int i = 0; i < unroll_factor; ++i) { in EmitLogicalIndexAndSetExitBasicBlock()
226 launch_config_.unroll_factor); in EmitIndexAndSetExitBasicBlock()
232 << launch_config_.unroll_factor; in EmitIndexAndSetExitBasicBlock()
259 llvm::ConstantInt::get(index_type, launch_config_.unroll_factor), in EmitIndexAndSetExitBasicBlock()
[all …]
Dlaunch_dimensions.cc64 shape.dimensions().back() / dim_config.unroll_factor; in ThreadsPerBlockRowVectorized()
66 shape.dimensions().back() % dim_config.unroll_factor == 0 && in ThreadsPerBlockRowVectorized()
87 CHECK_EQ(num_elements % dim_config.unroll_factor, 0); in CalculateLaunchDimensions()
88 num_elements = num_elements / dim_config.unroll_factor; in CalculateLaunchDimensions()
114 dim_config.unroll_factor, in CalculateLaunchDimensions()
Dlaunch_dimensions.h86 int unroll_factor = 1; member
104 "unroll_factor=", unroll_factor, ", few_waves=", few_waves, in ToString()
Dir_emitter_unnested.cc730 int unroll_factor = 1; in EmitPadToStatic() local
737 {unroll_factor})); in EmitPadToStatic()
845 {unroll_factor}) in EmitPadToStatic()
856 int unroll_factor = 1; in EmitSliceToDynamic() local
863 {unroll_factor})); in EmitSliceToDynamic()
968 {unroll_factor}) in EmitSliceToDynamic()
1561 (launch_dimensions.launch_bound() * launch_config.unroll_factor)) { in EnableLogicalIndexGenerationForOutput()
1685 int unroll_factor; in EmitLoopFusion() local
1687 unroll_factor = ComputeMaxUnrollFactor(fusion, hlo_module_config_); in EmitLoopFusion()
1689 unroll_factor = 1; in EmitLoopFusion()
[all …]
/external/pytorch/aten/src/ATen/native/cuda/
DRreluWithNoise.cu19 template <typename scalar_t, int unroll_factor, typename F>
40 int grid_stride = blockDim.x * gridDim.x * unroll_factor; in rrelu_with_noise_cuda_kernel()
48 static_assert(sizeof(rand)/sizeof(rand.x) == unroll_factor, ""); in rrelu_with_noise_cuda_kernel()
51 for (int ii = 0; ii < unroll_factor; ii++) { in rrelu_with_noise_cuda_kernel()
83 const int unroll_factor = std::is_same<scalar_t, double>::value ? 2 : 4; in _rrelu_with_noise_cuda_train() local
84 auto execution_policy = calc_execution_policy(numel, unroll_factor); in _rrelu_with_noise_cuda_train()
DDistributionTemplates.h49 …_t, dim3, dim3> calc_execution_policy(const int64_t total_elements, const uint32_t unroll_factor) { in calc_execution_policy() argument
59 …uint64_t counter_offset = ((numel - 1) / (block_size * grid.x * unroll_factor) + 1) * max_generato… in calc_execution_policy()
64 template<typename accscalar_t, int unroll_factor, typename dist_t, typename transform_t>
78 int rounded_size = ((numel - 1)/(blockDim.x * gridDim.x * unroll_factor)+1) * in C10_LAUNCH_BOUNDS_2()
79 blockDim.x * gridDim.x * unroll_factor; in C10_LAUNCH_BOUNDS_2()
80 …index = idx; linear_index < rounded_size; linear_index += blockDim.x * gridDim.x * unroll_factor) { in C10_LAUNCH_BOUNDS_2()
83 for (int ii = 0; ii < unroll_factor; ii++) { in C10_LAUNCH_BOUNDS_2()
119 const int unroll_factor = sizeof(dist_func_return_t) / sizeof(accscalar_t); in distribution_nullary_kernel() local
120 TORCH_CHECK(unroll_factor >= 1, "unroll_factor must be >= 1."); in distribution_nullary_kernel()
126 auto execution_policy = calc_execution_policy(numel, unroll_factor); in distribution_nullary_kernel()
[all …]
Dlayer_norm_kernel.cu568 constexpr int unroll_factor = 8; in GammaBetaBackwardCUDAKernel_32x32() local
578 for (bcounter = 0; bcounter < M / (blockDim.y * unroll_factor); in GammaBetaBackwardCUDAKernel_32x32()
580 int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor; in GammaBetaBackwardCUDAKernel_32x32()
582 if (laneId < unroll_factor) { in GammaBetaBackwardCUDAKernel_32x32()
589 for (int ii = 0; ii < unroll_factor; ++ii) { in GammaBetaBackwardCUDAKernel_32x32()
600 int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor; in GammaBetaBackwardCUDAKernel_32x32()
601 for (int ii = 0; ii < unroll_factor; ii++) { in GammaBetaBackwardCUDAKernel_32x32()
668 constexpr int unroll_factor = 8; in GammaBetaBackwardCUDAKernel() local
677 for (bcounter = 0; bcounter < M / (blockDim.y * unroll_factor); bcounter++){ in GammaBetaBackwardCUDAKernel()
678 int offset = (bcounter * blockDim.y + threadIdx.y) * unroll_factor; in GammaBetaBackwardCUDAKernel()
[all …]
DCUDALoops.cuh274 constexpr int unroll_factor = sizeof(arg0_t) >= 4 ? 2 : 4; in gpu_kernel_impl_nocast() local
275 launch_legacy_kernel<128, unroll_factor>(numel, [=] GPU_LAMBDA(int idx) { in gpu_kernel_impl_nocast()
/external/eigen/Eigen/src/Core/arch/AltiVec/
DMatrixProductMMA.h163 if (unroll_factor > iter) { \
171 if (unroll_factor > iter) { \
215 if (unroll_factor > iter) { \
224 if (unroll_factor > iter) { \
233 if (unroll_factor > iter) { \
240 if (unroll_factor > iter) { \
246 template<int unroll_factor, typename Scalar, typename Packet, typename RhsPacket, typename DataMapp…
278 row += unroll_factor*accCols; in gemm_unrolled_MMA_iteration()
382 if (unroll_factor > iter) { \
397 if (unroll_factor > iter) { \
[all …]
DMatrixProduct.h1456 if (unroll_factor > iter) { \
1464 if (unroll_factor > iter) { \
1515 if (unroll_factor > iter) { \
1524 if (unroll_factor > iter) { \
1533 if (unroll_factor > iter) { \
1540 if (unroll_factor > iter) { \
1552 if (unroll_factor > iter) { \
1560 template<int unroll_factor, typename Scalar, typename Packet, typename DataMapper, typename Index, …
1593 row += unroll_factor*accCols;
1596 template<int unroll_factor, typename Scalar, typename Packet, typename DataMapper, typename Index, …
[all …]
/external/deqp-deps/SPIRV-Tools/source/opt/
Dloop_unroller.h26 LoopUnroller(bool fully_unroll, int unroll_factor) in LoopUnroller() argument
27 : Pass(), fully_unroll_(fully_unroll), unroll_factor_(unroll_factor) {} in LoopUnroller()
Dloop_descriptor.h342 size_t unroll_factor);
/external/swiftshader/third_party/SPIRV-Tools/source/opt/
Dloop_unroller.h26 LoopUnroller(bool fully_unroll, int unroll_factor) in LoopUnroller() argument
27 : Pass(), fully_unroll_(fully_unroll), unroll_factor_(unroll_factor) {} in LoopUnroller()
Dloop_descriptor.h342 size_t unroll_factor);
/external/angle/third_party/spirv-tools/src/source/opt/
Dloop_unroller.h26 LoopUnroller(bool fully_unroll, int unroll_factor) in LoopUnroller() argument
27 : Pass(), fully_unroll_(fully_unroll), unroll_factor_(unroll_factor) {} in LoopUnroller()
Dloop_descriptor.h342 size_t unroll_factor);
/external/pytorch/test/
Dtest_jit.py11178 unroll_factor = 8
11179 FileCheck().check("prim::Loop").check_count("aten::sub", unroll_factor) \
11217 unroll_factor = 8
11218 … FileCheck().check("prim::Loop").check("prim::Loop").check_count('aten::sub', unroll_factor) \