1 /**
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #include "compiler_logger.h"
16 #include "optimizer/ir/basicblock.h"
17 #include "optimizer/ir/graph.h"
18 #include "optimizer/ir/graph_cloner.h"
19 #include "optimizer/analysis/alias_analysis.h"
20 #include "optimizer/analysis/bounds_analysis.h"
21 #include "optimizer/analysis/dominators_tree.h"
22 #include "optimizer/optimizations/loop_unroll.h"
23
24 namespace panda::compiler {
RunImpl()25 bool LoopUnroll::RunImpl()
26 {
27 COMPILER_LOG(DEBUG, LOOP_TRANSFORM) << "Run " << GetPassName();
28 RunLoopsVisitor();
29 COMPILER_LOG(DEBUG, LOOP_TRANSFORM) << GetPassName() << " complete";
30 return is_applied_;
31 }
32
InvalidateAnalyses()33 void LoopUnroll::InvalidateAnalyses()
34 {
35 GetGraph()->InvalidateAnalysis<BoundsAnalysis>();
36 GetGraph()->InvalidateAnalysis<AliasAnalysis>();
37 GetGraph()->InvalidateAnalysis<LoopAnalyzer>();
38 InvalidateBlocksOrderAnalyzes(GetGraph());
39 }
40
41 /**
42 * TODO(a.popov) Create pre-header compare if it doesn't exist
43 */
HasPreHeaderCompare(Loop * loop,const CountableLoopInfo & loop_info)44 bool LoopUnroll::HasPreHeaderCompare(Loop *loop, const CountableLoopInfo &loop_info)
45 {
46 auto pre_header = loop->GetPreHeader();
47 if (pre_header->IsEmpty() || pre_header->GetLastInst()->GetOpcode() != Opcode::IfImm) {
48 return false;
49 }
50 auto back_edge = loop->GetBackEdges()[0];
51 ASSERT(loop_info.if_imm->GetBasicBlock() == back_edge);
52 auto pre_header_if_imm = pre_header->GetLastInst();
53 ASSERT(pre_header_if_imm->GetOpcode() == Opcode::IfImm);
54 auto pre_header_cmp = pre_header_if_imm->GetInput(0).GetInst();
55 if (pre_header_cmp->GetOpcode() != Opcode::Compare) {
56 return false;
57 }
58 auto back_edge_cmp = loop_info.if_imm->GetInput(0).GetInst();
59 ASSERT(back_edge_cmp->GetOpcode() == Opcode::Compare);
60
61 // Compare condition codes
62 if (pre_header_cmp->CastToCompare()->GetCc() != back_edge_cmp->CastToCompare()->GetCc()) {
63 return false;
64 }
65
66 if (loop_info.if_imm->CastToIfImm()->GetCc() != pre_header_if_imm->CastToIfImm()->GetCc() ||
67 loop_info.if_imm->CastToIfImm()->GetImm() != pre_header_if_imm->CastToIfImm()->GetImm()) {
68 return false;
69 }
70
71 // Compare control-flow
72 if (pre_header->GetTrueSuccessor() != back_edge->GetTrueSuccessor() ||
73 pre_header->GetFalseSuccessor() != back_edge->GetFalseSuccessor()) {
74 return false;
75 }
76
77 // Compare test inputs
78 auto test_input_idx = 1;
79 if (back_edge_cmp->GetInput(0).GetInst() == loop_info.test) {
80 test_input_idx = 0;
81 } else {
82 ASSERT(back_edge_cmp->GetInput(1).GetInst() == loop_info.test);
83 }
84
85 return pre_header_cmp->GetInput(test_input_idx).GetInst() == loop_info.test;
86 }
87
ConditionOverFlow(const CountableLoopInfo & loop_info,uint32_t unroll_factor)88 bool ConditionOverFlow(const CountableLoopInfo &loop_info, uint32_t unroll_factor)
89 {
90 auto type = loop_info.index->GetType();
91 ASSERT(DataType::GetCommonType(type) == DataType::INT64 && DataType::IsTypeSigned(type));
92 if (!loop_info.test->IsConst()) {
93 return false;
94 }
95 bool is_inc = loop_info.update->GetOpcode() == Opcode::Add;
96 int64_t imm_value = (static_cast<int64_t>(unroll_factor) - 1) * loop_info.const_step;
97 int64_t test_value = loop_info.test->CastToConstant()->GetIntValue();
98
99 auto is_int32 = DataType::Is32Bits(type, loop_info.test->GetBasicBlock()->GetGraph()->GetArch());
100 auto type_min = is_int32 ? std::numeric_limits<int32_t>::min() : std::numeric_limits<int64_t>::min();
101 auto type_max = is_int32 ? std::numeric_limits<int32_t>::max() : std::numeric_limits<int64_t>::max();
102 if (is_inc) {
103 // condition will be updated: test_value - imm_value
104 // so if (test_value - imm_value) < type_min, it's overflow
105 return (type_min + imm_value) > test_value;
106 }
107 // condition will be updated: test_value + imm_value
108 // so if (test_value + imm_value) > type_max, it's overflow
109 return (type_max - imm_value) < test_value;
110 }
111
TransformLoop(Loop * loop)112 bool LoopUnroll::TransformLoop(Loop *loop)
113 {
114 auto unroll_params = GetUnrollParams(loop);
115 if (!options.IsCompilerUnrollLoopWithCalls() && unroll_params.has_call) {
116 COMPILER_LOG(DEBUG, LOOP_TRANSFORM)
117 << "Loop isn't unrolled since it contains calls. Loop id = " << loop->GetId();
118 return false;
119 }
120
121 uint32_t unroll_factor = std::min(unroll_params.unroll_factor, UNROLL_FACTOR);
122 if (unroll_factor <= 1) {
123 COMPILER_LOG(DEBUG, LOOP_TRANSFORM)
124 << "Loop isn't unrolled due to unroll factor = " << unroll_factor << ". Loop id = " << loop->GetId();
125 return false;
126 }
127
128 auto graph_cloner = GraphCloner(GetGraph(), GetGraph()->GetAllocator(), GetGraph()->GetLocalAllocator());
129 auto loop_parser = CountableLoopParser(*loop);
130 auto loop_info = loop_parser.Parse();
131 if (loop_info.has_value() && !ConditionOverFlow(loop_info.value(), unroll_factor) &&
132 HasPreHeaderCompare(loop, loop_info.value())) {
133 auto clone_loop = graph_cloner.CloneLoop(loop);
134 // Unroll loop without side-exits and fix compare in the pre-header and back-edge
135 graph_cloner.UnrollLoopBody<UnrollType::UNROLL_WITHOUT_SIDE_EXITS>(loop, unroll_factor);
136 FixCompareInst(loop_info.value(), loop->GetHeader(), unroll_factor);
137 // Unroll loop with side-exits for remaining iterations
138 graph_cloner.UnrollLoopBody<UnrollType::UNROLL_POST_INCREMENT>(clone_loop, unroll_factor - 1);
139 COMPILER_LOG(DEBUG, LOOP_TRANSFORM)
140 << "Unrolled without side-exits the loop with unroll factor = " << unroll_factor
141 << ". Loop id = " << loop->GetId();
142 } else if (options.IsCompilerUnrollWithSideExits()) {
143 graph_cloner.UnrollLoopBody<UnrollType::UNROLL_WITH_SIDE_EXITS>(loop, unroll_factor);
144 COMPILER_LOG(DEBUG, LOOP_TRANSFORM)
145 << "Unrolled with side-exits the loop with unroll factor = " << unroll_factor
146 << ". Loop id = " << loop->GetId();
147 }
148 is_applied_ = true;
149 GetGraph()->GetEventWriter().EventLoopUnroll(loop->GetId(), loop->GetHeader()->GetGuestPc(), unroll_factor,
150 unroll_params.cloneable_insts);
151 return true;
152 }
153
154 /**
155 * @return - unroll parameters:
156 * - maximum value of unroll factor, depends on INST_LIMIT
157 * - number of cloneable instructions
158 */
GetUnrollParams(Loop * loop)159 LoopUnroll::UnrollParams LoopUnroll::GetUnrollParams(Loop *loop)
160 {
161 uint32_t base_inst_count = 0;
162 uint32_t not_cloneable_count = 0;
163 bool has_call = false;
164 for (auto block : loop->GetBlocks()) {
165 for (auto inst : block->AllInsts()) {
166 base_inst_count++;
167 if ((block->IsLoopHeader() && inst->IsPhi()) || inst->GetOpcode() == Opcode::SafePoint) {
168 not_cloneable_count++;
169 }
170 has_call |= inst->IsCall();
171 }
172 }
173
174 UnrollParams params = {1, (base_inst_count - not_cloneable_count), has_call};
175 if (base_inst_count >= INST_LIMIT) {
176 return params;
177 }
178 uint32_t can_be_cloned_count = INST_LIMIT - base_inst_count;
179 params.unroll_factor = UNROLL_FACTOR;
180 if (params.cloneable_insts > 0) {
181 params.unroll_factor = (can_be_cloned_count / params.cloneable_insts) + 1;
182 }
183 return params;
184 }
185
186 /**
187 * @return - `if_imm`'s compare input when `if_imm` its single user,
188 * otherwise create a new one Compare for this `if_imm` and return it
189 */
GetOrCreateIfImmUniqueCompare(Inst * if_imm)190 Inst *GetOrCreateIfImmUniqueCompare(Inst *if_imm)
191 {
192 ASSERT(if_imm->GetOpcode() == Opcode::IfImm);
193 auto compare = if_imm->GetInput(0).GetInst();
194 ASSERT(compare->GetOpcode() == Opcode::Compare);
195 if (compare->HasSingleUser()) {
196 return compare;
197 }
198 auto new_cmp = compare->Clone(compare->GetBasicBlock()->GetGraph());
199 new_cmp->SetInput(0, compare->GetInput(0).GetInst());
200 new_cmp->SetInput(1, compare->GetInput(1).GetInst());
201 if_imm->InsertBefore(new_cmp);
202 if_imm->SetInput(0, new_cmp);
203 return new_cmp;
204 }
205
206 /**
207 * Normalize control-flow to the form: `if condition is true goto loop_header`
208 */
NormalizeControlFlow(BasicBlock * edge,const BasicBlock * loop_header)209 void NormalizeControlFlow(BasicBlock *edge, const BasicBlock *loop_header)
210 {
211 auto if_imm = edge->GetLastInst()->CastToIfImm();
212 ASSERT(if_imm->GetImm() == 0);
213 if (if_imm->GetCc() == CC_EQ) {
214 if_imm->SetCc(CC_NE);
215 edge->SwapTrueFalseSuccessors<true>();
216 }
217 auto cmp = if_imm->GetInput(0).GetInst()->CastToCompare();
218 ASSERT(cmp->GetBasicBlock() == edge);
219 if (edge->GetFalseSuccessor() == loop_header) {
220 auto inversed_cc = GetInverseConditionCode(cmp->GetCc());
221 cmp->SetCc(inversed_cc);
222 edge->SwapTrueFalseSuccessors<true>();
223 }
224 }
225
CreateNewTestInst(const CountableLoopInfo & loop_info,Inst * const_inst,Inst * pre_header_cmp)226 Inst *LoopUnroll::CreateNewTestInst(const CountableLoopInfo &loop_info, Inst *const_inst, Inst *pre_header_cmp)
227 {
228 Inst *test = nullptr;
229 bool is_inc = loop_info.update->GetOpcode() == Opcode::Add;
230 if (is_inc) {
231 test = GetGraph()->CreateInstSub(pre_header_cmp->CastToCompare()->GetOperandsType(), pre_header_cmp->GetPc());
232 } else {
233 ASSERT(loop_info.update->GetOpcode() == Opcode::Sub);
234 test = GetGraph()->CreateInstAdd(pre_header_cmp->CastToCompare()->GetOperandsType(), pre_header_cmp->GetPc());
235 }
236 test->SetInput(0, loop_info.test);
237 test->SetInput(1, const_inst);
238 pre_header_cmp->InsertBefore(test);
239 return test;
240 }
241
242 /**
243 * Replace `Compare(init, test)` with these instructions:
244 *
245 * Constant(unroll_factor)
246 * Sub/Add(test, Constant)
247 * Compare(init, SubI/AddI)
248 *
249 * And replace condition code if it is `CC_NE`.
250 * We use Constant + Sub/Add because low-level instructions (SubI/AddI) may appear only after Lowering pass.
251 */
FixCompareInst(const CountableLoopInfo & loop_info,BasicBlock * header,uint32_t unroll_factor)252 void LoopUnroll::FixCompareInst(const CountableLoopInfo &loop_info, BasicBlock *header, uint32_t unroll_factor)
253 {
254 auto pre_header = header->GetLoop()->GetPreHeader();
255 auto back_edge = loop_info.if_imm->GetBasicBlock();
256 ASSERT(!pre_header->IsEmpty() && pre_header->GetLastInst()->GetOpcode() == Opcode::IfImm);
257 auto pre_header_if = pre_header->GetLastInst()->CastToIfImm();
258 auto pre_header_cmp = GetOrCreateIfImmUniqueCompare(pre_header_if);
259 auto back_edge_cmp = GetOrCreateIfImmUniqueCompare(loop_info.if_imm);
260 NormalizeControlFlow(pre_header, header);
261 NormalizeControlFlow(back_edge, header);
262 // Create Sub/Add + Const instructions and replace Compare's test inst input
263 auto imm_value = (static_cast<uint64_t>(unroll_factor) - 1) * loop_info.const_step;
264 auto new_test = CreateNewTestInst(loop_info, GetGraph()->FindOrCreateConstant(imm_value), pre_header_cmp);
265 auto test_input_idx = 1;
266 if (back_edge_cmp->GetInput(0).GetInst() == loop_info.test) {
267 test_input_idx = 0;
268 } else {
269 ASSERT(back_edge_cmp->GetInput(1).GetInst() == loop_info.test);
270 }
271 ASSERT(pre_header_cmp->GetInput(test_input_idx).GetInst() == loop_info.test);
272 pre_header_cmp->SetInput(test_input_idx, new_test);
273 back_edge_cmp->SetInput(test_input_idx, new_test);
274 // Replace CC_NE ConditionCode
275 if (loop_info.normalized_cc == CC_NE) {
276 auto cc = loop_info.update->GetOpcode() == Opcode::Add ? CC_LT : CC_GT;
277 if (test_input_idx == 0) {
278 cc = SwapOperandsConditionCode(cc);
279 }
280 pre_header_cmp->CastToCompare()->SetCc(cc);
281 back_edge_cmp->CastToCompare()->SetCc(cc);
282 }
283 // for not constant test-instruction we need to insert `overflow-check`:
284 // `test - imm_value` should be less than `test` (incerement loop-index case)
285 // `test + imm_value` should be greater than `test` (decrement loop-index case)
286 // If overflow-check is failed goto after-loop
287 if (!loop_info.test->IsConst()) {
288 auto cc = loop_info.update->GetOpcode() == Opcode::Add ? CC_LT : CC_GT;
289 // Create overflow_compare
290 auto overflow_compare = GetGraph()->CreateInstCompare(compiler::DataType::BOOL, pre_header_cmp->GetPc(), cc);
291 overflow_compare->CastToCompare()->SetOperandsType(loop_info.test->GetType());
292 overflow_compare->SetInput(0, new_test);
293 overflow_compare->SetInput(1, loop_info.test);
294 // Create (pre_header_compare AND overflow_compare) inst
295 auto and_inst = GetGraph()->CreateInstAnd(DataType::BOOL, pre_header_cmp->GetPc());
296 and_inst->SetInput(0, pre_header_cmp);
297 and_inst->SetInput(1, overflow_compare);
298 pre_header_if->SetInput(0, and_inst);
299 pre_header_if->InsertBefore(and_inst);
300 and_inst->InsertBefore(overflow_compare);
301 }
302 }
303 } // namespace panda::compiler
304