• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef COMPILER_OPTIMIZER_IR_GRAPH_CLONER_H
17 #define COMPILER_OPTIMIZER_IR_GRAPH_CLONER_H
18 
19 #include "optimizer/ir/basicblock.h"
20 #include "optimizer/ir/graph.h"
21 #include "utils/arena_containers.h"
22 #include "utils/hash.h"
23 
24 namespace panda::compiler {
25 class BasicBlock;
26 class Graph;
27 class Inst;
28 
29 // NOLINTNEXTLINE(readability-redundant-declaration)
30 bool IsLoopSingleBackEdgeExitPoint(Loop *loop);
31 
32 enum class UnrollType : uint8_t { UNROLL_WITH_SIDE_EXITS, UNROLL_WITHOUT_SIDE_EXITS, UNROLL_POST_INCREMENT };
33 
34 enum class InstCloneType : uint8_t { CLONE_ALL, CLONE_INSTS };
35 
36 enum class CloneEdgeType : uint8_t { EDGE_PRED, EDGE_SUCC };
37 
38 /**
39  * Helper-class, provides methods to:
40  * - Clone the whole graph;
41  * - Clone loop;
42  * - Unroll loop;
43  * - Peel loop;
44  */
45 class GraphCloner {
46     using PhiInputsMap = ArenaUnorderedMap<Inst *, Inst *>;
47 
48     struct LoopUnrollData {
49         BasicBlock *header {nullptr};
50         BasicBlock *backedge {nullptr};
51         BasicBlock *exit_block {nullptr};
52         BasicBlock *outer {nullptr};
53         ArenaVector<BasicBlock *> *blocks {nullptr};
54         InstVector *phi_update_inputs {nullptr};
55         PhiInputsMap *phi_replaced_inputs {nullptr};
56     };
57 
58     struct LoopClonerData {
59         BasicBlock *outer {nullptr};
60         BasicBlock *header {nullptr};
61         BasicBlock *pre_header {nullptr};
62         ArenaVector<BasicBlock *> *blocks {nullptr};
63     };
64 
65 public:
66     explicit GraphCloner(Graph *graph, ArenaAllocator *allocator, ArenaAllocator *local_allocator);
67 
68     Graph *CloneGraph();
69     BasicBlock *CloneLoopHeader(BasicBlock *block, BasicBlock *outer, BasicBlock *replaceable_pred);
70     Loop *CloneLoop(Loop *loop);
71     bool IsLoopClonable(Loop *loop, size_t inst_limit);
72 
73     /**
74      * Make equal to the `factor` number of clones of loop body and insert them into the graph
75      *
76      *      /----[pre-loop]
77      *      |        |
78      *      |        v
79      *      |    [loop-body]<----\
80      *      |        |   |       |
81      *      |        |   \-------/
82      *      |        |
83      *      |        v
84      *      \--->[outside-block]
85      *               |
86      *               v
87      *              ...
88      *
89      * Cloning with side-exits:
90      *
91      *      /----[pre-loop]
92      *      |        |
93      *      |        v
94      *      |   [loop-body]---------\
95      *      |        |              |
96      *      |        v              |
97      *      |   [loop-body']------->|
98      *      |        |              |
99      *      |        v              |
100      *      |   [loop-body'']------>|
101      *      |        |              |
102      *      |        v              |
103      *      |  [resolver-block]<----/
104      *      |        |
105      *      |        v
106      *      \--->[outside-block]
107      *              ...
108      *
109      *  Cloning without side-exits:
110      *
111      *      /----[pre-loop]
112      *      |        |
113      *      |        v
114      *      |    [loop-body]
115      *      |        |
116      *      |        v
117      *      |    [loop-body']
118      *      |        |
119      *      |        v
120      *      |    [loop-body'']
121      *      |        |
122      *      |        v
123      *      \--->[outside-block]
124      */
125     template <UnrollType type>
UnrollLoopBody(Loop * loop,size_t factor)126     void UnrollLoopBody(Loop *loop, size_t factor)
127     {
128         ASSERT_PRINT(IsLoopSingleBackEdgeExitPoint(loop), "Cloning blocks doesn't have single entry/exit point");
129         auto marker_holder = MarkerHolder(GetGraph());
130         clone_marker_ = marker_holder.GetMarker();
131         auto unroll_data = PrepareLoopToUnroll(loop, type != UnrollType::UNROLL_WITHOUT_SIDE_EXITS);
132 
133         auto clone_count = factor - 1;
134         for (size_t i = 0; i < clone_count; i++) {
135             CloneBlocksAndInstructions<InstCloneType::CLONE_ALL, true>(*unroll_data->blocks, GetGraph());
136             BuildLoopUnrollControlFlow(unroll_data);
137             // NOLINTNEXTLINE(bugprone-suspicious-semicolon, readability-braces-around-statements)
138             if constexpr (type == UnrollType::UNROLL_WITHOUT_SIDE_EXITS) {
139                 // Users update should be done on the last no-side-exits unroll iteration
140                 // before building loop data-flow
141                 if (i + 1 == clone_count) {  // last_iteration
142                     UpdateUsersAfterNoSideExitsUnroll(unroll_data);
143                 }
144             }
145             BuildLoopUnrollDataFlow(unroll_data);
146         }
147 
148         // NOLINTNEXTLINE(bugprone-suspicious-semicolon, readability-braces-around-statements)
149         if constexpr (type == UnrollType::UNROLL_POST_INCREMENT) {
150             RemoveLoopBackEdge(unroll_data);
151         }
152     }
153 
154 private:
155     // Whole graph cloning
156     void CopyLoop(Loop *loop, Loop *cloned_loop);
157     void CloneLinearOrder(Graph *new_graph);
158     void BuildControlFlow();
159     void BuildDataFlow();
160     void CloneAnalyses(Graph *new_graph);
161     // Loop cloning
162     LoopClonerData *PrepareLoopToClone(Loop *loop);
163     BasicBlock *CreateNewOutsideSucc(BasicBlock *outside_succ, BasicBlock *back_edge, BasicBlock *pre_header);
164     void BuildLoopCloneControlFlow(LoopClonerData *unroll_data);
165     void BuildLoopCloneDataFlow(LoopClonerData *unroll_data);
166     void MakeLoopCloneInfo(LoopClonerData *unroll_data);
167     // Unroll cloning
168     LoopUnrollData *PrepareLoopToUnroll(Loop *loop, bool clone_side_exits);
169     BasicBlock *CreateResolverBlock(Loop *loop, BasicBlock *back_edge);
170     BasicBlock *SplitBackEdge(LoopUnrollData *unroll_data, Loop *loop, BasicBlock *back_edge);
171     void UpdateUsersAfterNoSideExitsUnroll(const LoopUnrollData *unroll_data);
172     void BuildLoopUnrollControlFlow(LoopUnrollData *unroll_data);
173     void BuildLoopUnrollDataFlow(LoopUnrollData *unroll_data);
174     void RemoveLoopBackEdge(const LoopUnrollData *unroll_data);
175     // Loop header cloning
176     void BuildClonedLoopHeaderDataFlow(const BasicBlock &block, BasicBlock *resolver, BasicBlock *clone);
177     void UpdateUsersForClonedLoopHeader(Inst *inst, BasicBlock *outer_block);
178     // Cloned blocks and instructions getters
HasClone(const BasicBlock * block)179     bool HasClone(const BasicBlock *block)
180     {
181         return (block->GetId() < clone_blocks_.size()) && (clone_blocks_[block->GetId()] != nullptr);
182     }
183 
GetClone(const BasicBlock * block)184     BasicBlock *GetClone(const BasicBlock *block)
185     {
186         ASSERT(block != nullptr);
187         ASSERT_PRINT(block->GetGraph() == GetGraph(), "GraphCloner probably caught disconnected block");
188         ASSERT_DO(HasClone(block), block->Dump(&std::cerr));
189         return clone_blocks_[block->GetId()];
190     }
191 
HasClone(Inst * inst)192     bool HasClone(Inst *inst)
193     {
194         return inst->IsMarked(clone_marker_) && (inst->GetCloneNumber() < clone_instructions_.size());
195     }
196 
GetClone(Inst * inst)197     Inst *GetClone(Inst *inst)
198     {
199         ASSERT(inst != nullptr);
200         ASSERT(HasClone(inst));
201 
202         // We don't use clone_marker_ when cloning the whole graph, so lets at least check the basic block here
203         ASSERT(inst->GetBasicBlock() != nullptr);
204         ASSERT_PRINT(inst->GetBasicBlock()->GetGraph() == GetGraph(),
205                      "GraphCloner probably caught an instruction from disconnected block");
206         // Empty clone_blocks_ means we are cloning only one basic block
207         ASSERT(clone_blocks_.empty() || HasClone(inst->GetBasicBlock()));
208 
209         return clone_instructions_[inst->GetCloneNumber()];
210     }
211 
212     /**
213      * For each block of input vector create a new one empty block and populate it with the instructions,
214      * cloned form the original block
215      */
216     template <InstCloneType type, bool skip_safepoints>
CloneBlocksAndInstructions(const ArenaVector<BasicBlock * > & blocks,Graph * target_graph)217     void CloneBlocksAndInstructions(const ArenaVector<BasicBlock *> &blocks, Graph *target_graph)
218     {
219         clone_blocks_.clear();
220         clone_blocks_.resize(GetGraph()->GetVectorBlocks().size(), nullptr);
221         clone_instructions_.clear();
222         size_t inst_count = 0;
223         for (const auto &block : blocks) {
224             if (block != nullptr) {
225                 auto clone = block->Clone(target_graph);
226                 clone_blocks_[block->GetId()] = clone;
227                 CloneInstructions<type, skip_safepoints>(block, clone, &inst_count);
228                 if (block->IsTryBegin()) {
229                     target_graph->AppendTryBeginBlock(clone);
230                 }
231             }
232         }
233     }
234 
235     /**
236      * Clone block's instructions and append to the block's clone
237      */
238     template <InstCloneType type, bool skip_safepoints>
CloneInstructions(const BasicBlock * block,BasicBlock * clone,size_t * inst_count)239     void CloneInstructions(const BasicBlock *block, BasicBlock *clone, size_t *inst_count)
240     {
241         for (auto inst : block->Insts()) {
242             clone->AppendInst(CloneInstruction(inst, inst_count, clone->GetGraph()));
243         }
244 
245         if constexpr (type == InstCloneType::CLONE_ALL) {  // NOLINT
246             for (auto phi : block->PhiInsts()) {
247                 auto phi_clone = CloneInstruction(phi, inst_count, clone->GetGraph());
248                 clone->AppendPhi(phi_clone->CastToPhi());
249             }
250         }
251     }
252 
253     /**
254      * Clone instruction and mark both: clone and cloned
255      */
CloneInstruction(Inst * inst,size_t * inst_count,Graph * target_graph)256     Inst *CloneInstruction(Inst *inst, size_t *inst_count, Graph *target_graph)
257     {
258         inst->SetCloneNumber((*inst_count)++);
259         auto inst_clone = inst->Clone(target_graph);
260         clone_instructions_.push_back(inst_clone);
261         inst->SetMarker(clone_marker_);
262         inst_clone->SetMarker(clone_marker_);
263         if (inst->GetBasicBlock()->GetGraph() != target_graph) {
264             inst_clone->SetId(inst->GetId());
265         }
266         return inst_clone;
267     }
268 
269     inline bool IsInstLoopHeaderPhi(Inst *inst, Loop *loop);
270 
271     /**
272      * Use the following rules cloning the inputs:
273      * - if input of the original instruction has clone - insert this clone as input
274      * - otherwise - use original input as clone instruction input
275      */
276     template <bool replace_header_phi>
277     void SetCloneInputs(Inst *inst, BasicBlock *block = nullptr)
278     {
279         auto clone = GetClone(inst);
280         for (size_t i = 0; i < inst->GetInputsCount(); i++) {
281             auto input = inst->GetInput(i).GetInst();
282             if (replace_header_phi && IsInstLoopHeaderPhi(input, inst->GetBasicBlock()->GetLoop())) {
283                 ASSERT(block != nullptr);
284                 input = input->CastToPhi()->GetPhiInput(block);
285             } else if (HasClone(input)) {
286                 input = GetClone(input);
287             }
288 
289             if (clone->IsOperandsDynamic()) {
290                 clone->AppendInput(input);
291                 if (clone->IsSaveState()) {
292                     static_cast<SaveStateInst *>(clone)->SetVirtualRegister(
293                         i, static_cast<SaveStateInst *>(inst)->GetVirtualRegister(i));
294                 } else if (clone->IsPhi()) {
295                     clone->CastToPhi()->SetPhiInputBbNum(i, inst->CastToPhi()->GetPhiInputBbNum(i));
296                 }
297             } else {
298                 clone->SetInput(i, input);
299             }
300         }
301     }
302 
303     template <CloneEdgeType edge_type>
CloneEdges(BasicBlock * block)304     void CloneEdges(BasicBlock *block)
305     {
306         auto clone = GetClone(block);
307         auto block_edges = &block->GetPredsBlocks();
308         auto clone_edges = &clone->GetPredsBlocks();
309         // NOLINTNEXTLINE(bugprone-suspicious-semicolon, readability-braces-around-statements)
310         if constexpr (edge_type == CloneEdgeType::EDGE_SUCC) {
311             block_edges = &block->GetSuccsBlocks();
312             clone_edges = &clone->GetSuccsBlocks();
313         }
314         ASSERT(clone_edges->empty());
315         clone_edges->reserve(block_edges->size());
316         for (auto edge : *block_edges) {
317             clone_edges->push_back(GetClone(edge));
318         }
319     }
320 
GetGraph()321     Graph *GetGraph()
322     {
323         return graph_;
324     }
325 
326     void UpdateCaller(Inst *inst);
327 
328 private:
329     Graph *graph_;
330     ArenaAllocator *allocator_;
331     ArenaAllocator *local_allocator_;
332     ArenaVector<BasicBlock *> clone_blocks_;
333     InstVector clone_instructions_;
334     Marker clone_marker_ {UNDEF_MARKER};
335 };
336 }  // namespace panda::compiler
337 
338 #endif  // COMPILER_OPTIMIZER_IR_GRAPH_CLONER_H
339