1 /**
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #include "compiler_logger.h"
16 #include "optimizer/analysis/alias_analysis.h"
17 #include "optimizer/analysis/bounds_analysis.h"
18 #include "optimizer/analysis/dominators_tree.h"
19 #include "optimizer/ir/basicblock.h"
20 #include "optimizer/ir/graph.h"
21 #include "optimizer/ir/graph_cloner.h"
22 #include "optimizer/optimizations/loop_peeling.h"
23
24 namespace panda::compiler {
25 /**
26 * Loop-peeling optimization works with a loops with the following requirements:
27 * - loop is not irreducible;
28 * - there is only 1 back-edge;
29 * - loop-header is a single loop-exit point;
30 *
31 * [pre-header]
32 * |
33 * v
34 * /---->[header]--------\
35 * | | |
36 * | v v
37 * \----[back-edge] [outer]
38 *
39 * There are two stages of the algorithm
40 * 1 stage - insert pre-loop which is an if-block:
41 *
42 * [pre-header]
43 * |
44 * v
45 * [pre-loop]--------\
46 * | |
47 * v v
48 * /---->[header]-------->|
49 * | | |
50 * | v v
51 * \----[back-edge] [resolver]
52 * |
53 * v
54 * [outer]
55 *
56 * 2 stage - move exit-edge form the header to the back-edge block:
57 *
58 * [pre-header]
59 * |
60 * v
61 * [pre-loop]--------\
62 * | |
63 * v v
64 * /---->[header] |
65 * | | |
66 * | v v
67 * \----[back-edge]-->[resolver]
68 * |
69 * v
70 * [outer]
71 *
72 */
RunImpl()73 bool LoopPeeling::RunImpl()
74 {
75 COMPILER_LOG(DEBUG, LOOP_TRANSFORM) << "Run " << GetPassName();
76 RunLoopsVisitor();
77 COMPILER_LOG(DEBUG, LOOP_TRANSFORM) << GetPassName() << " complete";
78 return is_appied_;
79 }
80
InvalidateAnalyses()81 void LoopPeeling::InvalidateAnalyses()
82 {
83 GetGraph()->InvalidateAnalysis<BoundsAnalysis>();
84 GetGraph()->InvalidateAnalysis<AliasAnalysis>();
85 }
86
CleanDeadPhis(BasicBlock * block)87 static inline void CleanDeadPhis(BasicBlock *block)
88 {
89 for (auto phi : block->PhiInstsSafe()) {
90 if (!phi->HasUsers()) {
91 block->RemoveInst(phi);
92 }
93 }
94 }
95
HeaderHasInlinedCalls(const BasicBlock * header)96 static bool HeaderHasInlinedCalls(const BasicBlock *header)
97 {
98 auto check_inlined_call = [](auto inst) {
99 return inst->IsCall() && static_cast<const CallInst *>(inst)->IsInlined();
100 };
101 auto insts = header->AllInsts();
102 return std::find_if(insts.begin(), insts.end(), check_inlined_call) != insts.end();
103 }
104
TransformLoop(Loop * loop)105 bool LoopPeeling::TransformLoop(Loop *loop)
106 {
107 ASSERT(loop->GetBackEdges().size() == 1);
108 ASSERT(loop->GetHeader()->GetLastInst()->GetOpcode() == Opcode::IfImm ||
109 loop->GetHeader()->GetLastInst()->GetOpcode() == Opcode::If);
110 auto header = loop->GetHeader();
111 // If header contains inlined call this call will be cloned and stay unpaired without `Return.inlined` instruction
112 if (HeaderHasInlinedCalls(header)) {
113 return false;
114 }
115 auto back_edge = loop->GetBackEdges()[0];
116 InsertPreLoop(loop);
117 auto moved_inst_count = MoveLoopExitToBackEdge(header, back_edge);
118 CleanDeadPhis(header);
119 is_appied_ = true;
120 COMPILER_LOG(DEBUG, LOOP_TRANSFORM) << "Loop was peeled, id = " << loop->GetId();
121 GetGraph()->GetEventWriter().EventLoopPeeling(loop->GetId(), header->GetGuestPc(), back_edge->GetGuestPc(),
122 moved_inst_count);
123 return true;
124 }
125
126 /**
127 * Clone the loop-header and insert before it
128 */
InsertPreLoop(Loop * loop)129 void LoopPeeling::InsertPreLoop(Loop *loop)
130 {
131 auto header = loop->GetHeader();
132 auto pre_header = loop->GetPreHeader();
133 auto graph_cloner =
134 GraphCloner(header->GetGraph(), header->GetGraph()->GetAllocator(), header->GetGraph()->GetLocalAllocator());
135 graph_cloner.CloneLoopHeader(header, GetLoopOuterBlock(header), pre_header);
136 }
137
138 /*
139 * Make back-edge loop-exit point
140 */
MoveLoopExitToBackEdge(BasicBlock * header,BasicBlock * back_edge)141 size_t LoopPeeling::MoveLoopExitToBackEdge(BasicBlock *header, BasicBlock *back_edge)
142 {
143 size_t moved_inst_count = 0;
144 auto outer_block = GetLoopOuterBlock(header);
145 size_t outer_idx = header->GetSuccBlockIndex(outer_block);
146
147 // Create exit block
148 BasicBlock *exit_block = nullptr;
149 if (header != back_edge) {
150 ASSERT(GetGraph()->IsAnalysisValid<DominatorsTree>());
151 exit_block = back_edge->InsertNewBlockToSuccEdge(header);
152 outer_block->ReplacePred(header, exit_block);
153 header->RemoveSucc(outer_block);
154 exit_block->SetTry(header->IsTry());
155
156 auto loop = header->GetLoop();
157 loop->AppendBlock(exit_block);
158 loop->ReplaceBackEdge(back_edge, exit_block);
159
160 // Check the order of true-false successors
161 if (exit_block->GetSuccBlockIndex(outer_block) != outer_idx) {
162 exit_block->SwapTrueFalseSuccessors();
163 }
164
165 // Fix Dominators info
166 auto &dom_tree = GetGraph()->GetAnalysis<DominatorsTree>();
167 dom_tree.SetValid(true);
168 dom_tree.SetDomPair(back_edge, exit_block);
169 back_edge = exit_block;
170 }
171
172 // Use reverse order to keep domination relation between instructions in the header-block
173 for (auto inst : header->InstsSafeReverse()) {
174 if (exit_block != nullptr) {
175 header->EraseInst(inst);
176 exit_block->PrependInst(inst);
177 moved_inst_count++;
178 }
179 UpdateClonedInstInputs(inst, header, back_edge);
180 }
181
182 // Update outer phis
183 for (auto phi : outer_block->PhiInsts()) {
184 size_t header_idx = phi->CastToPhi()->GetPredBlockIndex(back_edge);
185 auto header_inst = phi->GetInput(header_idx).GetInst();
186 if (header_inst->IsPhi()) {
187 phi->SetInput(header_idx, header_inst->CastToPhi()->GetPhiInput(back_edge));
188 }
189 }
190
191 return moved_inst_count;
192 }
193
UpdateClonedInstInputs(Inst * inst,BasicBlock * header,BasicBlock * back_edge)194 void LoopPeeling::UpdateClonedInstInputs(Inst *inst, BasicBlock *header, BasicBlock *back_edge)
195 {
196 for (size_t i = 0; i < inst->GetInputsCount(); ++i) {
197 auto input_inst = inst->GetInput(i).GetInst();
198 if (input_inst->IsPhi() && input_inst->GetBasicBlock() == header) {
199 auto phi_input = input_inst->CastToPhi()->GetPhiInput(back_edge);
200 // Replace phi by its input, if this input will NOT be moved to the exit block
201 bool is_moved = phi_input->GetBasicBlock() == header && !phi_input->IsPhi();
202 if (phi_input->IsDominate(inst) && !is_moved) {
203 inst->SetInput(i, phi_input);
204 }
205 }
206 }
207 }
208 } // namespace panda::compiler
209