• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef COMPILER_OPTIMIZER_IR_GRAPH_CLONER_H_
17 #define COMPILER_OPTIMIZER_IR_GRAPH_CLONER_H_
18 
19 #include "optimizer/ir/basicblock.h"
20 #include "optimizer/ir/graph.h"
21 #include "utils/arena_containers.h"
22 #include "utils/hash.h"
23 
24 namespace panda::compiler {
25 class BasicBlock;
26 class Graph;
27 class Inst;
28 
29 // NOLINTNEXTLINE(readability-redundant-declaration)
30 bool IsLoopSingleBackEdgeExitPoint(Loop *loop);
31 
32 enum class UnrollType : uint8_t { UNROLL_WITH_SIDE_EXITS, UNROLL_WITHOUT_SIDE_EXITS, UNROLL_POST_INCREMENT };
33 
34 enum class InstCloneType : uint8_t { CLONE_ALL, CLONE_INSTS };
35 
36 enum class CloneEdgeType : uint8_t { EDGE_PRED, EDGE_SUCC };
37 
38 /**
39  * Helper-class, provides methods to:
40  * - Clone the whole graph;
41  * - Clone loop;
42  * - Unroll loop;
43  * - Peel loop;
44  */
45 class GraphCloner {
46     using PhiInputsMap = ArenaUnorderedMap<Inst *, Inst *>;
47 
48     struct LoopUnrollData {
49         BasicBlock *header {nullptr};
50         BasicBlock *backedge {nullptr};
51         BasicBlock *exit_block {nullptr};
52         BasicBlock *outer {nullptr};
53         ArenaVector<BasicBlock *> *blocks {nullptr};
54         InstVector *phi_update_inputs {nullptr};
55         PhiInputsMap *phi_replaced_inputs {nullptr};
56     };
57 
58     struct LoopClonerData {
59         BasicBlock *outer {nullptr};
60         BasicBlock *header {nullptr};
61         BasicBlock *pre_header {nullptr};
62         ArenaVector<BasicBlock *> *blocks {nullptr};
63     };
64 
65 public:
66     explicit GraphCloner(Graph *graph, ArenaAllocator *allocator, ArenaAllocator *local_allocator);
67 
68     Graph *CloneGraph();
69     BasicBlock *CloneLoopHeader(BasicBlock *block, BasicBlock *outer, BasicBlock *replaceable_pred);
70     Loop *CloneLoop(Loop *loop);
71     bool IsLoopClonable(Loop *loop, size_t inst_limit);
72 
73     /**
74      * Make equal to the `factor` number of clones of loop body and insert them into the graph
75      *
76      *      /----[pre-loop]
77      *      |        |
78      *      |        v
79      *      |    [loop-body]<----\
80      *      |        |   |       |
81      *      |        |   \-------/
82      *      |        |
83      *      |        v
84      *      \--->[outside-block]
85      *               |
86      *               v
87      *              ...
88      *
89      * Cloning with side-exits:
90      *
91      *      /----[pre-loop]
92      *      |        |
93      *      |        v
94      *      |   [loop-body]---------\
95      *      |        |              |
96      *      |        v              |
97      *      |   [loop-body']------->|
98      *      |        |              |
99      *      |        v              |
100      *      |   [loop-body'']------>|
101      *      |        |              |
102      *      |        v              |
103      *      |  [resolver-block]<----/
104      *      |        |
105      *      |        v
106      *      \--->[outside-block]
107      *              ...
108      *
109      *  Cloning without side-exits:
110      *
111      *      /----[pre-loop]
112      *      |        |
113      *      |        v
114      *      |    [loop-body]
115      *      |        |
116      *      |        v
117      *      |    [loop-body']
118      *      |        |
119      *      |        v
120      *      |    [loop-body'']
121      *      |        |
122      *      |        v
123      *      \--->[outside-block]
124      */
125     template <UnrollType type>
UnrollLoopBody(Loop * loop,size_t factor)126     void UnrollLoopBody(Loop *loop, size_t factor)
127     {
128         ASSERT_PRINT(IsLoopSingleBackEdgeExitPoint(loop), "Cloning blocks doesn't have single entry/exit point");
129         auto marker_holder = MarkerHolder(GetGraph());
130         clone_marker_ = marker_holder.GetMarker();
131         auto unroll_data = PrepareLoopToUnroll(loop, type != UnrollType::UNROLL_WITHOUT_SIDE_EXITS);
132 
133         auto clone_count = factor - 1;
134         for (size_t i = 0; i < clone_count; i++) {
135             CloneBlocksAndInstructions<InstCloneType::CLONE_ALL, true>(*unroll_data->blocks, GetGraph());
136             BuildLoopUnrollControlFlow(unroll_data);
137             // NOLINTNEXTLINE(bugprone-suspicious-semicolon, readability-braces-around-statements)
138             if constexpr (type == UnrollType::UNROLL_WITHOUT_SIDE_EXITS) {
139                 // Users update should be done on the last no-side-exits unroll iteration
140                 // before building loop data-flow
141                 if (i + 1 == clone_count) {  // last_iteration
142                     UpdateUsersAfterNoSideExitsUnroll(unroll_data);
143                 }
144             }
145             BuildLoopUnrollDataFlow(unroll_data);
146         }
147 
148         // NOLINTNEXTLINE(bugprone-suspicious-semicolon, readability-braces-around-statements)
149         if constexpr (type == UnrollType::UNROLL_POST_INCREMENT) {
150             RemoveLoopBackEdge(unroll_data);
151         }
152     }
153 
154 private:
155     // Whole graph cloning
156     void CopyLoop(Loop *loop, Loop *cloned_loop);
157     void CloneLinearOrder(Graph *new_graph);
158     void BuildControlFlow();
159     void BuildDataFlow();
160     void CloneAnalyses(Graph *new_graph);
161     // Loop cloning
162     LoopClonerData *PrepareLoopToClone(Loop *loop);
163     BasicBlock *CreateNewOutsideSucc(BasicBlock *outside_succ, BasicBlock *back_edge, BasicBlock *pre_header);
164     void BuildLoopCloneControlFlow(LoopClonerData *unroll_data);
165     void BuildLoopCloneDataFlow(LoopClonerData *unroll_data);
166     void MakeLoopCloneInfo(LoopClonerData *unroll_data);
167     // Unroll cloning
168     LoopUnrollData *PrepareLoopToUnroll(Loop *loop, bool clone_side_exits);
169     BasicBlock *CreateResolverBlock(Loop *loop, BasicBlock *back_edge);
170     BasicBlock *SplitBackEdge(LoopUnrollData *unroll_data, Loop *loop, BasicBlock *back_edge);
171     void UpdateUsersAfterNoSideExitsUnroll(const LoopUnrollData *unroll_data);
172     void BuildLoopUnrollControlFlow(LoopUnrollData *unroll_data);
173     void BuildLoopUnrollDataFlow(LoopUnrollData *unroll_data);
174     void RemoveLoopBackEdge(const LoopUnrollData *unroll_data);
175     // Loop header cloning
176     void BuildClonedLoopHeaderDataFlow(const BasicBlock &block, BasicBlock *resolver, BasicBlock *clone);
177     void UpdateUsersForClonedLoopHeader(Inst *inst, BasicBlock *outer_block);
178     // Cloned blocks and instructions getters
HasClone(const BasicBlock * block)179     bool HasClone(const BasicBlock *block)
180     {
181         return (block->GetId() < clone_blocks_.size()) && (clone_blocks_[block->GetId()] != nullptr);
182     }
183 
GetClone(const BasicBlock * block)184     BasicBlock *GetClone(const BasicBlock *block)
185     {
186         ASSERT(block != nullptr);
187         ASSERT_PRINT(block->GetGraph() == GetGraph(), "GraphCloner probably caught disconnected block");
188         ASSERT_DO(HasClone(block), block->Dump(&std::cerr));
189         return clone_blocks_[block->GetId()];
190     }
191 
HasClone(Inst * inst)192     bool HasClone(Inst *inst)
193     {
194         return inst->IsMarked(clone_marker_) && (inst->GetCloneNumber() < clone_instructions_.size());
195     }
196 
GetClone(Inst * inst)197     Inst *GetClone(Inst *inst)
198     {
199         ASSERT(inst != nullptr);
200         ASSERT(HasClone(inst));
201 
202         // We don't use clone_marker_ when cloning the whole graph, so lets at least check the basic block here
203         ASSERT(inst->GetBasicBlock() != nullptr);
204         ASSERT_PRINT(inst->GetBasicBlock()->GetGraph() == GetGraph(),
205                      "GraphCloner probably caught an instruction from disconnected block");
206         // Empty clone_blocks_ means we are cloning only one basic block
207         ASSERT(clone_blocks_.empty() || HasClone(inst->GetBasicBlock()));
208 
209         return clone_instructions_[inst->GetCloneNumber()];
210     }
211 
212     /**
213      * For each block of input vector create a new one empty block and populate it with the instructions,
214      * cloned form the original block
215      */
216     template <InstCloneType type, bool skip_safepoints>
CloneBlocksAndInstructions(const ArenaVector<BasicBlock * > & blocks,Graph * target_graph)217     void CloneBlocksAndInstructions(const ArenaVector<BasicBlock *> &blocks, Graph *target_graph)
218     {
219         clone_blocks_.clear();
220         clone_blocks_.resize(GetGraph()->GetVectorBlocks().size(), nullptr);
221         clone_instructions_.clear();
222         size_t inst_count = 0;
223         for (const auto &block : blocks) {
224             if (block != nullptr) {
225                 auto clone = block->Clone(target_graph);
226                 clone_blocks_[block->GetId()] = clone;
227                 CloneInstructions<type, skip_safepoints>(block, clone, &inst_count);
228                 if (block->IsTryBegin()) {
229                     target_graph->AppendTryBeginBlock(clone);
230                 }
231             }
232         }
233     }
234 
235     /**
236      * Clone block's instructions and append to the block's clone
237      */
238     template <InstCloneType type, bool skip_safepoints>
CloneInstructions(const BasicBlock * block,BasicBlock * clone,size_t * inst_count)239     void CloneInstructions(const BasicBlock *block, BasicBlock *clone, size_t *inst_count)
240     {
241         for (auto inst : block->Insts()) {
242             if constexpr (skip_safepoints) {  // NOLINT
243                 if (inst->GetOpcode() == Opcode::SafePoint) {
244                     continue;
245                 }
246             }
247             if constexpr (type != InstCloneType::CLONE_ALL) {  // NOLINT
248                 if (inst->GetOpcode() == Opcode::NOP) {
249                     continue;
250                 }
251             }
252 
253             clone->AppendInst(CloneInstruction(inst, inst_count, clone->GetGraph()));
254         }
255 
256         if constexpr (type == InstCloneType::CLONE_ALL) {  // NOLINT
257             for (auto phi : block->PhiInsts()) {
258                 auto phi_clone = CloneInstruction(phi, inst_count, clone->GetGraph());
259                 clone->AppendPhi(phi_clone->CastToPhi());
260             }
261         }
262     }
263 
264     /**
265      * Clone instruction and mark both: clone and cloned
266      */
CloneInstruction(Inst * inst,size_t * inst_count,Graph * target_graph)267     Inst *CloneInstruction(Inst *inst, size_t *inst_count, Graph *target_graph)
268     {
269         inst->SetCloneNumber((*inst_count)++);
270         auto inst_clone = inst->Clone(target_graph);
271         clone_instructions_.push_back(inst_clone);
272         inst->SetMarker(clone_marker_);
273         inst_clone->SetMarker(clone_marker_);
274         if (inst->GetBasicBlock()->GetGraph() != target_graph) {
275             inst_clone->SetId(inst->GetId());
276         }
277         return inst_clone;
278     }
279 
280     inline bool IsInstLoopHeaderPhi(Inst *inst, Loop *loop);
281 
282     /**
283      * Use the following rules cloning the inputs:
284      * - if input of the original instruction has clone - insert this clone as input
285      * - otherwise - use original input as clone instruction input
286      */
287     template <bool replace_header_phi>
288     void SetCloneInputs(Inst *inst, BasicBlock *block = nullptr)
289     {
290         auto clone = GetClone(inst);
291         for (size_t i = 0; i < inst->GetInputsCount(); i++) {
292             auto input = inst->GetInput(i).GetInst();
293             if (replace_header_phi && IsInstLoopHeaderPhi(input, inst->GetBasicBlock()->GetLoop())) {
294                 ASSERT(block != nullptr);
295                 input = input->CastToPhi()->GetPhiInput(block);
296             } else if (HasClone(input)) {
297                 input = GetClone(input);
298             }
299 
300             if (clone->IsOperandsDynamic()) {
301                 clone->AppendInput(input);
302                 if (clone->IsSaveState()) {
303                     static_cast<SaveStateInst *>(clone)->SetVirtualRegister(
304                         i, static_cast<SaveStateInst *>(inst)->GetVirtualRegister(i));
305                 } else if (clone->IsPhi()) {
306                     clone->CastToPhi()->SetPhiInputBbNum(i, inst->CastToPhi()->GetPhiInputBbNum(i));
307                 }
308             } else {
309                 clone->SetInput(i, input);
310             }
311         }
312     }
313 
314     template <CloneEdgeType edge_type>
CloneEdges(BasicBlock * block)315     void CloneEdges(BasicBlock *block)
316     {
317         auto clone = GetClone(block);
318         auto block_edges = &block->GetPredsBlocks();
319         auto clone_edges = &clone->GetPredsBlocks();
320         // NOLINTNEXTLINE(bugprone-suspicious-semicolon, readability-braces-around-statements)
321         if constexpr (edge_type == CloneEdgeType::EDGE_SUCC) {
322             block_edges = &block->GetSuccsBlocks();
323             clone_edges = &clone->GetSuccsBlocks();
324         }
325         ASSERT(clone_edges->empty());
326         clone_edges->reserve(block_edges->size());
327         for (auto edge : *block_edges) {
328             clone_edges->push_back(GetClone(edge));
329         }
330     }
331 
GetGraph()332     Graph *GetGraph()
333     {
334         return graph_;
335     }
336 
337     void UpdateCaller(Inst *inst);
338 
339 private:
340     Graph *graph_;
341     ArenaAllocator *allocator_;
342     ArenaAllocator *local_allocator_;
343     ArenaVector<BasicBlock *> clone_blocks_;
344     InstVector clone_instructions_;
345     Marker clone_marker_ {UNDEF_MARKER};
346 };
347 }  // namespace panda::compiler
348 
349 #endif  // COMPILER_OPTIMIZER_IR_GRAPH_CLONER_H_
350