• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include "compiler_logger.h"
16 #include "optimizer/ir/basicblock.h"
17 #include "optimizer/ir/graph.h"
18 #include "optimizer/ir/graph_cloner.h"
19 #include "optimizer/analysis/alias_analysis.h"
20 #include "optimizer/analysis/bounds_analysis.h"
21 #include "optimizer/analysis/dominators_tree.h"
22 #include "optimizer/optimizations/loop_unroll.h"
23 
24 namespace panda::compiler {
RunImpl()25 bool LoopUnroll::RunImpl()
26 {
27     COMPILER_LOG(DEBUG, LOOP_TRANSFORM) << "Run " << GetPassName();
28     RunLoopsVisitor();
29     COMPILER_LOG(DEBUG, LOOP_TRANSFORM) << GetPassName() << " complete";
30     return is_applied_;
31 }
32 
InvalidateAnalyses()33 void LoopUnroll::InvalidateAnalyses()
34 {
35     GetGraph()->InvalidateAnalysis<BoundsAnalysis>();
36     GetGraph()->InvalidateAnalysis<AliasAnalysis>();
37     GetGraph()->InvalidateAnalysis<LoopAnalyzer>();
38     InvalidateBlocksOrderAnalyzes(GetGraph());
39 }
40 
41 /**
42  * TODO(a.popov) Create pre-header compare if it doesn't exist
43  */
HasPreHeaderCompare(Loop * loop,const CountableLoopInfo & loop_info)44 bool LoopUnroll::HasPreHeaderCompare(Loop *loop, const CountableLoopInfo &loop_info)
45 {
46     auto pre_header = loop->GetPreHeader();
47     if (pre_header->IsEmpty() || pre_header->GetLastInst()->GetOpcode() != Opcode::IfImm) {
48         return false;
49     }
50     auto back_edge = loop->GetBackEdges()[0];
51     ASSERT(loop_info.if_imm->GetBasicBlock() == back_edge);
52     auto pre_header_if_imm = pre_header->GetLastInst();
53     ASSERT(pre_header_if_imm->GetOpcode() == Opcode::IfImm);
54     auto pre_header_cmp = pre_header_if_imm->GetInput(0).GetInst();
55     if (pre_header_cmp->GetOpcode() != Opcode::Compare) {
56         return false;
57     }
58     auto back_edge_cmp = loop_info.if_imm->GetInput(0).GetInst();
59     ASSERT(back_edge_cmp->GetOpcode() == Opcode::Compare);
60 
61     // Compare condition codes
62     if (pre_header_cmp->CastToCompare()->GetCc() != back_edge_cmp->CastToCompare()->GetCc()) {
63         return false;
64     }
65 
66     if (loop_info.if_imm->CastToIfImm()->GetCc() != pre_header_if_imm->CastToIfImm()->GetCc() ||
67         loop_info.if_imm->CastToIfImm()->GetImm() != pre_header_if_imm->CastToIfImm()->GetImm()) {
68         return false;
69     }
70 
71     // Compare control-flow
72     if (pre_header->GetTrueSuccessor() != back_edge->GetTrueSuccessor() ||
73         pre_header->GetFalseSuccessor() != back_edge->GetFalseSuccessor()) {
74         return false;
75     }
76 
77     // Compare test inputs
78     auto test_input_idx = 1;
79     if (back_edge_cmp->GetInput(0).GetInst() == loop_info.test) {
80         test_input_idx = 0;
81     } else {
82         ASSERT(back_edge_cmp->GetInput(1).GetInst() == loop_info.test);
83     }
84 
85     return pre_header_cmp->GetInput(test_input_idx).GetInst() == loop_info.test;
86 }
87 
ConditionOverFlow(const CountableLoopInfo & loop_info,uint32_t unroll_factor)88 bool ConditionOverFlow(const CountableLoopInfo &loop_info, uint32_t unroll_factor)
89 {
90     auto type = loop_info.index->GetType();
91     ASSERT(DataType::GetCommonType(type) == DataType::INT64 && DataType::IsTypeSigned(type));
92     if (!loop_info.test->IsConst()) {
93         return false;
94     }
95     bool is_inc = loop_info.update->GetOpcode() == Opcode::Add;
96     int64_t imm_value = (static_cast<int64_t>(unroll_factor) - 1) * loop_info.const_step;
97     int64_t test_value = loop_info.test->CastToConstant()->GetIntValue();
98 
99     auto is_int32 = DataType::Is32Bits(type, loop_info.test->GetBasicBlock()->GetGraph()->GetArch());
100     auto type_min = is_int32 ? std::numeric_limits<int32_t>::min() : std::numeric_limits<int64_t>::min();
101     auto type_max = is_int32 ? std::numeric_limits<int32_t>::max() : std::numeric_limits<int64_t>::max();
102     if (is_inc) {
103         // condition will be updated: test_value - imm_value
104         // so if (test_value - imm_value) < type_min, it's overflow
105         return (type_min + imm_value) > test_value;
106     }
107     // condition will be updated: test_value + imm_value
108     // so if (test_value + imm_value) > type_max, it's overflow
109     return (type_max - imm_value) < test_value;
110 }
111 
TransformLoop(Loop * loop)112 bool LoopUnroll::TransformLoop(Loop *loop)
113 {
114     auto unroll_params = GetUnrollParams(loop);
115     if (!options.IsCompilerUnrollLoopWithCalls() && unroll_params.has_call) {
116         COMPILER_LOG(DEBUG, LOOP_TRANSFORM)
117             << "Loop isn't unrolled since it contains calls. Loop id = " << loop->GetId();
118         return false;
119     }
120 
121     uint32_t unroll_factor = std::min(unroll_params.unroll_factor, UNROLL_FACTOR);
122     if (unroll_factor <= 1) {
123         COMPILER_LOG(DEBUG, LOOP_TRANSFORM)
124             << "Loop isn't unrolled due to unroll factor = " << unroll_factor << ". Loop id = " << loop->GetId();
125         return false;
126     }
127 
128     auto graph_cloner = GraphCloner(GetGraph(), GetGraph()->GetAllocator(), GetGraph()->GetLocalAllocator());
129     auto loop_parser = CountableLoopParser(*loop);
130     auto loop_info = loop_parser.Parse();
131     if (loop_info.has_value() && !ConditionOverFlow(loop_info.value(), unroll_factor) &&
132         HasPreHeaderCompare(loop, loop_info.value())) {
133         auto clone_loop = graph_cloner.CloneLoop(loop);
134         // Unroll loop without side-exits and fix compare in the pre-header and back-edge
135         graph_cloner.UnrollLoopBody<UnrollType::UNROLL_WITHOUT_SIDE_EXITS>(loop, unroll_factor);
136         FixCompareInst(loop_info.value(), loop->GetHeader(), unroll_factor);
137         // Unroll loop with side-exits for remaining iterations
138         graph_cloner.UnrollLoopBody<UnrollType::UNROLL_POST_INCREMENT>(clone_loop, unroll_factor - 1);
139         COMPILER_LOG(DEBUG, LOOP_TRANSFORM)
140             << "Unrolled without side-exits the loop with unroll factor = " << unroll_factor
141             << ". Loop id = " << loop->GetId();
142     } else if (options.IsCompilerUnrollWithSideExits()) {
143         graph_cloner.UnrollLoopBody<UnrollType::UNROLL_WITH_SIDE_EXITS>(loop, unroll_factor);
144         COMPILER_LOG(DEBUG, LOOP_TRANSFORM)
145             << "Unrolled with side-exits the loop with unroll factor = " << unroll_factor
146             << ". Loop id = " << loop->GetId();
147     }
148     is_applied_ = true;
149     GetGraph()->GetEventWriter().EventLoopUnroll(loop->GetId(), loop->GetHeader()->GetGuestPc(), unroll_factor,
150                                                  unroll_params.cloneable_insts);
151     return true;
152 }
153 
154 /**
155  * @return - unroll parameters:
156  * - maximum value of unroll factor, depends on INST_LIMIT
157  * - number of cloneable instructions
158  */
GetUnrollParams(Loop * loop)159 LoopUnroll::UnrollParams LoopUnroll::GetUnrollParams(Loop *loop)
160 {
161     uint32_t base_inst_count = 0;
162     uint32_t not_cloneable_count = 0;
163     bool has_call = false;
164     for (auto block : loop->GetBlocks()) {
165         for (auto inst : block->AllInsts()) {
166             base_inst_count++;
167             if ((block->IsLoopHeader() && inst->IsPhi()) || inst->GetOpcode() == Opcode::SafePoint) {
168                 not_cloneable_count++;
169             }
170             has_call |= inst->IsCall();
171         }
172     }
173 
174     UnrollParams params = {1, (base_inst_count - not_cloneable_count), has_call};
175     if (base_inst_count >= INST_LIMIT) {
176         return params;
177     }
178     uint32_t can_be_cloned_count = INST_LIMIT - base_inst_count;
179     params.unroll_factor = UNROLL_FACTOR;
180     if (params.cloneable_insts > 0) {
181         params.unroll_factor = (can_be_cloned_count / params.cloneable_insts) + 1;
182     }
183     return params;
184 }
185 
186 /**
187  * @return - `if_imm`'s compare input when `if_imm` its single user,
188  * otherwise create a new one Compare for this `if_imm` and return it
189  */
GetOrCreateIfImmUniqueCompare(Inst * if_imm)190 Inst *GetOrCreateIfImmUniqueCompare(Inst *if_imm)
191 {
192     ASSERT(if_imm->GetOpcode() == Opcode::IfImm);
193     auto compare = if_imm->GetInput(0).GetInst();
194     ASSERT(compare->GetOpcode() == Opcode::Compare);
195     if (compare->HasSingleUser()) {
196         return compare;
197     }
198     auto new_cmp = compare->Clone(compare->GetBasicBlock()->GetGraph());
199     new_cmp->SetInput(0, compare->GetInput(0).GetInst());
200     new_cmp->SetInput(1, compare->GetInput(1).GetInst());
201     if_imm->InsertBefore(new_cmp);
202     if_imm->SetInput(0, new_cmp);
203     return new_cmp;
204 }
205 
206 /**
207  * Normalize control-flow to the form: `if condition is true goto loop_header`
208  */
NormalizeControlFlow(BasicBlock * edge,const BasicBlock * loop_header)209 void NormalizeControlFlow(BasicBlock *edge, const BasicBlock *loop_header)
210 {
211     auto if_imm = edge->GetLastInst()->CastToIfImm();
212     ASSERT(if_imm->GetImm() == 0);
213     if (if_imm->GetCc() == CC_EQ) {
214         if_imm->SetCc(CC_NE);
215         edge->SwapTrueFalseSuccessors<true>();
216     }
217     auto cmp = if_imm->GetInput(0).GetInst()->CastToCompare();
218     ASSERT(cmp->GetBasicBlock() == edge);
219     if (edge->GetFalseSuccessor() == loop_header) {
220         auto inversed_cc = GetInverseConditionCode(cmp->GetCc());
221         cmp->SetCc(inversed_cc);
222         edge->SwapTrueFalseSuccessors<true>();
223     }
224 }
225 
CreateNewTestInst(const CountableLoopInfo & loop_info,Inst * const_inst,Inst * pre_header_cmp)226 Inst *LoopUnroll::CreateNewTestInst(const CountableLoopInfo &loop_info, Inst *const_inst, Inst *pre_header_cmp)
227 {
228     Inst *test = nullptr;
229     bool is_inc = loop_info.update->GetOpcode() == Opcode::Add;
230     if (is_inc) {
231         test = GetGraph()->CreateInstSub(pre_header_cmp->CastToCompare()->GetOperandsType(), pre_header_cmp->GetPc());
232     } else {
233         ASSERT(loop_info.update->GetOpcode() == Opcode::Sub);
234         test = GetGraph()->CreateInstAdd(pre_header_cmp->CastToCompare()->GetOperandsType(), pre_header_cmp->GetPc());
235     }
236     test->SetInput(0, loop_info.test);
237     test->SetInput(1, const_inst);
238     pre_header_cmp->InsertBefore(test);
239     return test;
240 }
241 
242 /**
243  * Replace `Compare(init, test)` with these instructions:
244  *
245  * Constant(unroll_factor)
246  * Sub/Add(test, Constant)
247  * Compare(init, SubI/AddI)
248  *
249  * And replace condition code if it is `CC_NE`.
250  * We use Constant + Sub/Add because low-level instructions (SubI/AddI) may appear only after Lowering pass.
251  */
FixCompareInst(const CountableLoopInfo & loop_info,BasicBlock * header,uint32_t unroll_factor)252 void LoopUnroll::FixCompareInst(const CountableLoopInfo &loop_info, BasicBlock *header, uint32_t unroll_factor)
253 {
254     auto pre_header = header->GetLoop()->GetPreHeader();
255     auto back_edge = loop_info.if_imm->GetBasicBlock();
256     ASSERT(!pre_header->IsEmpty() && pre_header->GetLastInst()->GetOpcode() == Opcode::IfImm);
257     auto pre_header_if = pre_header->GetLastInst()->CastToIfImm();
258     auto pre_header_cmp = GetOrCreateIfImmUniqueCompare(pre_header_if);
259     auto back_edge_cmp = GetOrCreateIfImmUniqueCompare(loop_info.if_imm);
260     NormalizeControlFlow(pre_header, header);
261     NormalizeControlFlow(back_edge, header);
262     // Create Sub/Add + Const instructions and replace Compare's test inst input
263     auto imm_value = (static_cast<uint64_t>(unroll_factor) - 1) * loop_info.const_step;
264     auto new_test = CreateNewTestInst(loop_info, GetGraph()->FindOrCreateConstant(imm_value), pre_header_cmp);
265     auto test_input_idx = 1;
266     if (back_edge_cmp->GetInput(0).GetInst() == loop_info.test) {
267         test_input_idx = 0;
268     } else {
269         ASSERT(back_edge_cmp->GetInput(1).GetInst() == loop_info.test);
270     }
271     ASSERT(pre_header_cmp->GetInput(test_input_idx).GetInst() == loop_info.test);
272     pre_header_cmp->SetInput(test_input_idx, new_test);
273     back_edge_cmp->SetInput(test_input_idx, new_test);
274     // Replace CC_NE ConditionCode
275     if (loop_info.normalized_cc == CC_NE) {
276         auto cc = loop_info.update->GetOpcode() == Opcode::Add ? CC_LT : CC_GT;
277         if (test_input_idx == 0) {
278             cc = SwapOperandsConditionCode(cc);
279         }
280         pre_header_cmp->CastToCompare()->SetCc(cc);
281         back_edge_cmp->CastToCompare()->SetCc(cc);
282     }
283     // for not constant test-instruction we need to insert `overflow-check`:
284     // `test - imm_value` should be less than `test` (incerement loop-index case)
285     // `test + imm_value` should be greater than `test` (decrement loop-index case)
286     // If overflow-check is failed goto after-loop
287     if (!loop_info.test->IsConst()) {
288         auto cc = loop_info.update->GetOpcode() == Opcode::Add ? CC_LT : CC_GT;
289         // Create overflow_compare
290         auto overflow_compare = GetGraph()->CreateInstCompare(compiler::DataType::BOOL, pre_header_cmp->GetPc(), cc);
291         overflow_compare->CastToCompare()->SetOperandsType(loop_info.test->GetType());
292         overflow_compare->SetInput(0, new_test);
293         overflow_compare->SetInput(1, loop_info.test);
294         // Create (pre_header_compare AND overflow_compare) inst
295         auto and_inst = GetGraph()->CreateInstAnd(DataType::BOOL, pre_header_cmp->GetPc());
296         and_inst->SetInput(0, pre_header_cmp);
297         and_inst->SetInput(1, overflow_compare);
298         pre_header_if->SetInput(0, and_inst);
299         pre_header_if->InsertBefore(and_inst);
300         and_inst->InsertBefore(overflow_compare);
301     }
302 }
303 }  // namespace panda::compiler
304