1 /*
2 * Copyright (c) 2021-2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #include "compiler_logger.h"
16 #include "optimizer/ir/basicblock.h"
17 #include "optimizer/ir/graph.h"
18 #include "optimizer/ir/graph_cloner.h"
19 #include "optimizer/optimizations/loop_unroll.h"
20 #include "optimizer/analysis/alias_analysis.h"
21 #include "optimizer/analysis/bounds_analysis.h"
22 #include "optimizer/analysis/dominators_tree.h"
23
24 namespace panda::compiler {
RunImpl()25 bool LoopUnroll::RunImpl()
26 {
27 COMPILER_LOG(DEBUG, LOOP_TRANSFORM) << "Run " << GetPassName();
28 RunLoopsVisitor();
29 COMPILER_LOG(DEBUG, LOOP_TRANSFORM) << GetPassName() << " complete";
30 GetGraph()->SetUnrollComplete();
31 return isApplied_;
32 }
33
InvalidateAnalyses()34 void LoopUnroll::InvalidateAnalyses()
35 {
36 GetGraph()->InvalidateAnalysis<BoundsAnalysis>();
37 GetGraph()->InvalidateAnalysis<AliasAnalysis>();
38 GetGraph()->InvalidateAnalysis<LoopAnalyzer>();
39 InvalidateBlocksOrderAnalyzes(GetGraph());
40 }
41
42 template <typename T>
ConditionOverFlowImpl(const CountableLoopInfo & loopInfo,uint32_t unrollFactor)43 bool ConditionOverFlowImpl(const CountableLoopInfo &loopInfo, uint32_t unrollFactor)
44 {
45 auto immValue = (static_cast<uint64_t>(unrollFactor) - 1) * loopInfo.constStep;
46 auto testValue = static_cast<T>(loopInfo.test->CastToConstant()->GetIntValue());
47 auto typeMin = std::numeric_limits<T>::min();
48 auto typeMax = std::numeric_limits<T>::max();
49 if (immValue > static_cast<uint64_t>(typeMax)) {
50 return true;
51 }
52 if (loopInfo.isInc) {
53 // condition will be updated: test_value - imm_value
54 // so if (test_value - imm_value) < type_min, it's overflow
55 return (typeMin + static_cast<T>(immValue)) > testValue;
56 }
57 // condition will be updated: test_value + imm_value
58 // so if (test_value + imm_value) > type_max, it's overflow
59 return (typeMax - static_cast<T>(immValue)) < testValue;
60 }
61
62 /// NOTE(a.popov) Create pre-header compare if it doesn't exist
63
ConditionOverFlow(const CountableLoopInfo & loopInfo,uint32_t unrollFactor)64 bool ConditionOverFlow(const CountableLoopInfo &loopInfo, uint32_t unrollFactor)
65 {
66 auto type = loopInfo.index->GetType();
67 ASSERT(DataType::GetCommonType(type) == DataType::INT64);
68 auto updateOpcode = loopInfo.update->GetOpcode();
69 if (updateOpcode == Opcode::AddOverflowCheck || updateOpcode == Opcode::SubOverflowCheck) {
70 return true;
71 }
72 if (!loopInfo.test->IsConst()) {
73 return false;
74 }
75
76 switch (type) {
77 case DataType::INT32:
78 return ConditionOverFlowImpl<int32_t>(loopInfo, unrollFactor);
79 case DataType::UINT32:
80 return ConditionOverFlowImpl<uint32_t>(loopInfo, unrollFactor);
81 case DataType::INT64:
82 return ConditionOverFlowImpl<int64_t>(loopInfo, unrollFactor);
83 case DataType::UINT64:
84 return ConditionOverFlowImpl<uint64_t>(loopInfo, unrollFactor);
85 default:
86 return true;
87 }
88 }
89
TransformLoopImpl(Loop * loop,std::optional<uint64_t> optIterations,bool noSideExits,uint32_t unrollFactor,std::optional<CountableLoopInfo> loopInfo)90 void LoopUnroll::TransformLoopImpl(Loop *loop, std::optional<uint64_t> optIterations, bool noSideExits,
91 uint32_t unrollFactor, std::optional<CountableLoopInfo> loopInfo)
92 {
93 auto graphCloner = GraphCloner(GetGraph(), GetGraph()->GetAllocator(), GetGraph()->GetLocalAllocator());
94 if (optIterations && noSideExits) {
95 // GCC gives false positive here
96 #if !defined(__clang__)
97 #pragma GCC diagnostic push
98 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
99 #endif
100 auto iterations = *optIterations;
101 ASSERT(unrollFactor != 0);
102 auto remainingIters = iterations % unrollFactor;
103 #if !defined(__clang__)
104 #pragma GCC diagnostic pop
105 #endif
106 Loop *cloneLoop = remainingIters == 0 ? nullptr : graphCloner.CloneLoop(loop);
107 // Unroll loop without side-exits and fix compare in the pre-header and back-edge
108 graphCloner.UnrollLoopBody<UnrollType::UNROLL_WITHOUT_SIDE_EXITS>(loop, unrollFactor);
109 FixCompareInst(loopInfo.value(), loop->GetHeader(), unrollFactor);
110 // Unroll loop without side-exits for remaining iterations
111 if (remainingIters != 0) {
112 graphCloner.UnrollLoopBody<UnrollType::UNROLL_CONSTANT_ITERATIONS>(cloneLoop, remainingIters);
113 }
114 COMPILER_LOG(DEBUG, LOOP_TRANSFORM)
115 << "Unrolled without side-exits the loop with constant number of iterations (" << iterations
116 << "). Loop id = " << loop->GetId();
117 } else if (noSideExits) {
118 auto cloneLoop = graphCloner.CloneLoop(loop);
119 // Unroll loop without side-exits and fix compare in the pre-header and back-edge
120 graphCloner.UnrollLoopBody<UnrollType::UNROLL_WITHOUT_SIDE_EXITS>(loop, unrollFactor);
121 FixCompareInst(loopInfo.value(), loop->GetHeader(), unrollFactor);
122 // Unroll loop with side-exits for remaining iterations
123 graphCloner.UnrollLoopBody<UnrollType::UNROLL_POST_INCREMENT>(cloneLoop, unrollFactor - 1);
124 COMPILER_LOG(DEBUG, LOOP_TRANSFORM)
125 << "Unrolled without side-exits the loop with unroll factor = " << unrollFactor
126 << ". Loop id = " << loop->GetId();
127 } else if (g_options.IsCompilerUnrollWithSideExits()) {
128 graphCloner.UnrollLoopBody<UnrollType::UNROLL_WITH_SIDE_EXITS>(loop, unrollFactor);
129 COMPILER_LOG(DEBUG, LOOP_TRANSFORM) << "Unrolled with side-exits the loop with unroll factor = " << unrollFactor
130 << ". Loop id = " << loop->GetId();
131 }
132 }
133
TransformLoop(Loop * loop)134 bool LoopUnroll::TransformLoop(Loop *loop)
135 {
136 auto unrollParams = GetUnrollParams(loop);
137 if (!g_options.IsCompilerUnrollLoopWithCalls() && unrollParams.hasCall) {
138 COMPILER_LOG(DEBUG, LOOP_TRANSFORM)
139 << "Loop isn't unrolled since it contains calls. Loop id = " << loop->GetId();
140 return false;
141 }
142
143 auto graphCloner = GraphCloner(GetGraph(), GetGraph()->GetAllocator(), GetGraph()->GetLocalAllocator());
144 uint32_t unrollFactor = std::min(unrollParams.unrollFactor, unrollFactor_);
145 auto loopParser = CountableLoopParser(*loop);
146 auto loopInfo = loopParser.Parse();
147 std::optional<uint64_t> optIterations {};
148 auto noBranching = false;
149 if (loopInfo.has_value()) {
150 optIterations = CountableLoopParser::GetLoopIterations(*loopInfo);
151 if (optIterations == 0) {
152 optIterations.reset();
153 }
154 if (optIterations.has_value()) {
155 // Increase instruction limit for unroll without branching
156 // <= unroll_factor * 2 because unroll without side exits would create unroll_factor * 2 - 1 copies of loop
157 noBranching = unrollParams.cloneableInsts <= instLimit_ &&
158 (*optIterations <= unrollFactor * 2U || *optIterations <= unrollFactor_) &&
159 CountableLoopParser::HasPreHeaderCompare(loop, *loopInfo);
160 }
161 }
162
163 if (noBranching) {
164 auto iterations = *optIterations;
165 graphCloner.UnrollLoopBody<UnrollType::UNROLL_CONSTANT_ITERATIONS>(loop, iterations);
166 COMPILER_LOG(DEBUG, LOOP_TRANSFORM)
167 << "Unrolled without branching the loop with constant number of iterations (" << iterations
168 << "). Loop id = " << loop->GetId();
169 isApplied_ = true;
170 GetGraph()->GetEventWriter().EventLoopUnroll(loop->GetId(), loop->GetHeader()->GetGuestPc(), iterations,
171 unrollParams.cloneableInsts, "without branching");
172 return true;
173 }
174
175 if (unrollFactor <= 1U) {
176 COMPILER_LOG(DEBUG, LOOP_TRANSFORM)
177 << "Loop isn't unrolled due to unroll factor = " << unrollFactor << ". Loop id = " << loop->GetId();
178 return false;
179 }
180
181 auto noSideExits = false;
182 if (loopInfo.has_value()) {
183 noSideExits =
184 !ConditionOverFlow(*loopInfo, unrollFactor) && CountableLoopParser::HasPreHeaderCompare(loop, *loopInfo);
185 }
186
187 TransformLoopImpl(loop, optIterations, noSideExits, unrollFactor, loopInfo);
188 isApplied_ = true;
189 GetGraph()->GetEventWriter().EventLoopUnroll(loop->GetId(), loop->GetHeader()->GetGuestPc(), unrollFactor,
190 unrollParams.cloneableInsts,
191 noSideExits ? "without side exits" : "with side exits");
192 return true;
193 }
194
195 /**
196 * @return - unroll parameters:
197 * - maximum value of unroll factor, depends on INST_LIMIT
198 * - number of cloneable instructions
199 */
GetUnrollParams(Loop * loop)200 LoopUnroll::UnrollParams LoopUnroll::GetUnrollParams(Loop *loop)
201 {
202 uint32_t baseInstCount = 0;
203 uint32_t notCloneableCount = 0;
204 bool hasCall = false;
205 for (auto block : loop->GetBlocks()) {
206 for (auto inst : block->AllInsts()) {
207 baseInstCount++;
208 if ((block->IsLoopHeader() && inst->IsPhi()) || inst->GetOpcode() == Opcode::SafePoint) {
209 notCloneableCount++;
210 }
211 hasCall |= inst->IsCall() && !static_cast<CallInst *>(inst)->IsInlined();
212 }
213 }
214
215 UnrollParams params = {1, (baseInstCount - notCloneableCount), hasCall};
216 if (baseInstCount >= instLimit_) {
217 return params;
218 }
219 uint32_t canBeClonedCount = instLimit_ - baseInstCount;
220 params.unrollFactor = unrollFactor_;
221 if (params.cloneableInsts > 0) {
222 params.unrollFactor = (canBeClonedCount / params.cloneableInsts) + 1;
223 }
224 return params;
225 }
226
227 /**
228 * @return - `if_imm`'s compare input when `if_imm` its single user,
229 * otherwise create a new one Compare for this `if_imm` and return it
230 */
GetOrCreateIfImmUniqueCompare(Inst * ifImm)231 Inst *GetOrCreateIfImmUniqueCompare(Inst *ifImm)
232 {
233 ASSERT(ifImm->GetOpcode() == Opcode::IfImm);
234 auto compare = ifImm->GetInput(0).GetInst();
235 ASSERT(compare->GetOpcode() == Opcode::Compare);
236 if (compare->HasSingleUser()) {
237 return compare;
238 }
239 auto newCmp = compare->Clone(compare->GetBasicBlock()->GetGraph());
240 newCmp->SetInput(0, compare->GetInput(0).GetInst());
241 newCmp->SetInput(1, compare->GetInput(1).GetInst());
242 ifImm->InsertBefore(newCmp);
243 ifImm->SetInput(0, newCmp);
244 return newCmp;
245 }
246
247 /// Normalize control-flow to the form: `if condition is true goto loop_header`
NormalizeControlFlow(BasicBlock * edge,const BasicBlock * loopHeader)248 void NormalizeControlFlow(BasicBlock *edge, const BasicBlock *loopHeader)
249 {
250 auto ifImm = edge->GetLastInst()->CastToIfImm();
251 ASSERT(ifImm->GetImm() == 0);
252 if (ifImm->GetCc() == CC_EQ) {
253 ifImm->SetCc(CC_NE);
254 edge->SwapTrueFalseSuccessors<true>();
255 }
256 auto cmp = ifImm->GetInput(0).GetInst()->CastToCompare();
257 if (!cmp->HasSingleUser()) {
258 auto newCmp = cmp->Clone(edge->GetGraph());
259 ifImm->InsertBefore(newCmp);
260 ifImm->SetInput(0, newCmp);
261 cmp = newCmp->CastToCompare();
262 }
263 if (edge->GetFalseSuccessor() == loopHeader) {
264 auto inversedCc = GetInverseConditionCode(cmp->GetCc());
265 cmp->SetCc(inversedCc);
266 edge->SwapTrueFalseSuccessors<true>();
267 }
268 }
269
CreateNewTestInst(const CountableLoopInfo & loopInfo,Inst * constInst,Inst * preHeaderCmp)270 Inst *LoopUnroll::CreateNewTestInst(const CountableLoopInfo &loopInfo, Inst *constInst, Inst *preHeaderCmp)
271 {
272 Inst *test = nullptr;
273 if (loopInfo.isInc) {
274 test = GetGraph()->CreateInstSub(preHeaderCmp->CastToCompare()->GetOperandsType(), preHeaderCmp->GetPc(),
275 loopInfo.test, constInst);
276 #ifdef PANDA_COMPILER_DEBUG_INFO
277 test->SetCurrentMethod(preHeaderCmp->GetCurrentMethod());
278 #endif
279 } else {
280 test = GetGraph()->CreateInstAdd(preHeaderCmp->CastToCompare()->GetOperandsType(), preHeaderCmp->GetPc(),
281 loopInfo.test, constInst);
282 #ifdef PANDA_COMPILER_DEBUG_INFO
283 test->SetCurrentMethod(preHeaderCmp->GetCurrentMethod());
284 #endif
285 }
286 preHeaderCmp->InsertBefore(test);
287 return test;
288 }
289
290 /**
291 * Replace `Compare(init, test)` with these instructions:
292 *
293 * Constant(unroll_factor)
294 * Sub/Add(test, Constant)
295 * Compare(init, SubI/AddI)
296 *
297 * And replace condition code if it is `CC_NE`.
298 * We use Constant + Sub/Add because low-level instructions (SubI/AddI) may appear only after Lowering pass.
299 */
FixCompareInst(const CountableLoopInfo & loopInfo,BasicBlock * header,uint32_t unrollFactor)300 void LoopUnroll::FixCompareInst(const CountableLoopInfo &loopInfo, BasicBlock *header, uint32_t unrollFactor)
301 {
302 auto preHeader = header->GetLoop()->GetPreHeader();
303 auto backEdge = loopInfo.ifImm->GetBasicBlock();
304 ASSERT(!preHeader->IsEmpty() && preHeader->GetLastInst()->GetOpcode() == Opcode::IfImm);
305 auto preHeaderIf = preHeader->GetLastInst()->CastToIfImm();
306 auto preHeaderCmp = GetOrCreateIfImmUniqueCompare(preHeaderIf);
307 auto backEdgeCmp = GetOrCreateIfImmUniqueCompare(loopInfo.ifImm);
308 NormalizeControlFlow(preHeader, header);
309 NormalizeControlFlow(backEdge, header);
310 // Create Sub/Add + Const instructions and replace Compare's test inst input
311 auto immValue = (static_cast<uint64_t>(unrollFactor) - 1) * loopInfo.constStep;
312 auto newTest = CreateNewTestInst(loopInfo, GetGraph()->FindOrCreateConstant(immValue), preHeaderCmp);
313 auto testInputIdx = 1;
314 if (backEdgeCmp->GetInput(0) == loopInfo.test) {
315 testInputIdx = 0;
316 } else {
317 ASSERT(backEdgeCmp->GetInput(1) == loopInfo.test);
318 }
319 ASSERT(preHeaderCmp->GetInput(testInputIdx).GetInst() == loopInfo.test);
320 preHeaderCmp->SetInput(testInputIdx, newTest);
321 backEdgeCmp->SetInput(testInputIdx, newTest);
322 // Replace CC_NE ConditionCode
323 if (loopInfo.normalizedCc == CC_NE) {
324 auto cc = loopInfo.isInc ? CC_LT : CC_GT;
325 if (testInputIdx == 0) {
326 cc = SwapOperandsConditionCode(cc);
327 }
328 preHeaderCmp->CastToCompare()->SetCc(cc);
329 backEdgeCmp->CastToCompare()->SetCc(cc);
330 }
331 // for not constant test-instruction we need to insert `overflow-check`:
332 // `test - imm_value` should be less than `test` (incerement loop-index case)
333 // `test + imm_value` should be greater than `test` (decrement loop-index case)
334 // If overflow-check is failed goto after-loop
335 if (!loopInfo.test->IsConst()) {
336 auto cc = loopInfo.isInc ? CC_LT : CC_GT;
337 // Create overflow_compare
338 auto overflowCompare = GetGraph()->CreateInstCompare(compiler::DataType::BOOL, preHeaderCmp->GetPc(), newTest,
339 loopInfo.test, loopInfo.test->GetType(), cc);
340 #ifdef PANDA_COMPILER_DEBUG_INFO
341 overflowCompare->SetCurrentMethod(preHeaderCmp->GetCurrentMethod());
342 #endif
343 // Create (pre_header_compare AND overflow_compare) inst
344 auto andInst = GetGraph()->CreateInstAnd(DataType::BOOL, preHeaderCmp->GetPc(), preHeaderCmp, overflowCompare);
345 #ifdef PANDA_COMPILER_DEBUG_INFO
346 andInst->SetCurrentMethod(preHeaderCmp->GetCurrentMethod());
347 #endif
348 preHeaderIf->SetInput(0, andInst);
349 preHeaderIf->InsertBefore(andInst);
350 andInst->InsertBefore(overflowCompare);
351 }
352 }
353 } // namespace panda::compiler
354