1 /**
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "compiler_logger.h"
17 #include "optimizer/analysis/dominators_tree.h"
18 #include "optimizer/analysis/loop_analyzer.h"
19 #include "optimizer/analysis/linear_order.h"
20 #include "optimizer/ir/graph_cloner.h"
21
22 namespace panda::compiler {
GraphCloner(Graph * graph,ArenaAllocator * allocator,ArenaAllocator * local_allocator)23 GraphCloner::GraphCloner(Graph *graph, ArenaAllocator *allocator, ArenaAllocator *local_allocator)
24 : graph_(graph),
25 allocator_(allocator),
26 local_allocator_(local_allocator),
27 clone_blocks_(allocator->Adapter()),
28 clone_instructions_(allocator->Adapter())
29 {
30 }
31
32 /**
33 * Clone the whole graph
34 */
CloneGraph()35 Graph *GraphCloner::CloneGraph()
36 {
37 auto new_graph =
38 allocator_->New<Graph>(allocator_, local_allocator_, GetGraph()->GetArch(), GetGraph()->GetMethod(),
39 GetGraph()->GetRuntime(), GetGraph()->GetParentGraph(), GetGraph()->GetMode());
40 CHECK_NOT_NULL(new_graph);
41 new_graph->SetCurrentInstructionId(GetGraph()->GetCurrentInstructionId());
42 CloneBlocksAndInstructions<InstCloneType::CLONE_ALL, false>(GetGraph()->GetVectorBlocks(), new_graph);
43 BuildControlFlow();
44 BuildDataFlow();
45 new_graph->GetPassManager()->SetCheckMode(GetGraph()->GetPassManager()->IsCheckMode());
46 // Clone all flags
47 new_graph->SetBitFields(GetGraph()->GetBitFields());
48 new_graph->InitUsedRegs<DataType::INT64>(GetGraph()->GetUsedRegs<DataType::INT64>());
49 new_graph->InitUsedRegs<DataType::FLOAT64>(GetGraph()->GetUsedRegs<DataType::FLOAT64>());
50 #ifndef NDEBUG
51 CloneAnalyses(new_graph);
52 #endif
53 return new_graph;
54 }
55
CloneAnalyses(Graph * new_graph)56 void GraphCloner::CloneAnalyses(Graph *new_graph)
57 {
58 // Clone dominators if analysis is valid to check dom-tree
59 ASSERT(!new_graph->IsAnalysisValid<DominatorsTree>());
60 if (GetGraph()->IsAnalysisValid<DominatorsTree>()) {
61 new_graph->GetAnalysis<DominatorsTree>().SetValid(true);
62 for (auto block : GetGraph()->GetBlocksRPO()) {
63 auto clone = GetClone(block);
64 if (block->GetDominator() != nullptr) {
65 auto clone_dom = GetClone(block->GetDominator());
66 clone->SetDominator(clone_dom);
67 }
68 for (auto dom_blocks : block->GetDominatedBlocks()) {
69 clone->AddDominatedBlock(GetClone(dom_blocks));
70 }
71 }
72 }
73
74 // Clone loops if analysis is valid to check loop-tree
75 ASSERT(!new_graph->IsAnalysisValid<LoopAnalyzer>());
76 if (GetGraph()->IsAnalysisValid<LoopAnalyzer>()) {
77 auto &cloned_la = new_graph->GetAnalysis<LoopAnalyzer>();
78 cloned_la.SetValid(true);
79 cloned_la.CreateRootLoop();
80 CopyLoop(GetGraph()->GetRootLoop(), new_graph->GetRootLoop());
81 new_graph->SetHasIrreducibleLoop(GetGraph()->HasIrreducibleLoop());
82 new_graph->SetHasInfiniteLoop(GetGraph()->HasInfiniteLoop());
83 }
84
85 ASSERT(!new_graph->IsAnalysisValid<LinearOrder>());
86 if (GetGraph()->IsAnalysisValid<LinearOrder>()) {
87 new_graph->GetAnalysis<LinearOrder>().SetValid(true);
88 CloneLinearOrder(new_graph);
89 }
90 }
91
CopyLoop(Loop * loop,Loop * cloned_loop)92 void GraphCloner::CopyLoop(Loop *loop, Loop *cloned_loop)
93 {
94 if (!loop->IsRoot() && !loop->IsIrreducible() && !loop->IsTryCatchLoop()) {
95 ASSERT(GetClone(loop->GetHeader()) == cloned_loop->GetHeader());
96 cloned_loop->SetPreHeader(GetClone(loop->GetPreHeader()));
97 }
98 for (auto block : loop->GetBlocks()) {
99 if (block->IsLoopHeader()) {
100 continue;
101 }
102 cloned_loop->AppendBlock(GetClone(block));
103 }
104
105 for (auto back_edge : loop->GetBackEdges()) {
106 cloned_loop->AppendBackEdge(GetClone(back_edge));
107 }
108 cloned_loop->SetIsIrreducible(loop->IsIrreducible());
109 cloned_loop->SetIsInfinite(loop->IsInfinite());
110
111 // clone inner loops
112 for (const auto &inner_loop : loop->GetInnerLoops()) {
113 auto cloned_header = GetClone(inner_loop->GetHeader());
114 auto &cloned_la = cloned_header->GetGraph()->GetAnalysis<LoopAnalyzer>();
115 auto cloned_inner_loop = cloned_la.CreateNewLoop(cloned_header);
116 cloned_inner_loop->SetOuterLoop(cloned_loop);
117 cloned_loop->AppendInnerLoop(cloned_inner_loop);
118 CopyLoop(inner_loop, cloned_inner_loop);
119 }
120 }
121
CloneLinearOrder(Graph * new_graph)122 void GraphCloner::CloneLinearOrder([[maybe_unused]] Graph *new_graph)
123 {
124 ASSERT(new_graph != nullptr);
125 ASSERT(GetGraph()->IsAnalysisValid<LinearOrder>());
126 auto &clone_linear_blocks = new_graph->GetAnalysis<LinearOrder>().GetBlocks();
127 clone_linear_blocks.reserve(GetGraph()->GetBlocksLinearOrder().size());
128 for (auto block : GetGraph()->GetBlocksLinearOrder()) {
129 clone_linear_blocks.push_back(GetClone(block));
130 }
131 }
132
133 /**
134 * Clone the whole graph control-flow
135 */
BuildControlFlow()136 void GraphCloner::BuildControlFlow()
137 {
138 for (const auto &block : GetGraph()->GetVectorBlocks()) {
139 if (block == nullptr) {
140 continue;
141 }
142 CloneEdges<CloneEdgeType::EDGE_PRED>(block);
143 CloneEdges<CloneEdgeType::EDGE_SUCC>(block);
144 }
145 }
146
147 /**
148 * Clone the whole graph data-flow
149 */
BuildDataFlow()150 void GraphCloner::BuildDataFlow()
151 {
152 for (const auto &block : GetGraph()->GetVectorBlocks()) {
153 if (block == nullptr) {
154 continue;
155 }
156 auto block_clone = GetClone(block);
157 for (const auto &inst : block->Insts()) {
158 SetCloneInputs<false>(inst);
159 GetClone(inst)->SetId(inst->GetId());
160 UpdateCaller(inst);
161 }
162 for (const auto &inst : block->PhiInsts()) {
163 auto phi = inst->CastToPhi();
164 auto inst_clone = GetClone(inst);
165 inst_clone->SetId(inst->GetId());
166 for (const auto &clone_pred_block : block_clone->GetPredsBlocks()) {
167 auto it = std::find(clone_blocks_.begin(), clone_blocks_.end(), clone_pred_block);
168 ASSERT(it != clone_blocks_.end());
169 size_t index = std::distance(clone_blocks_.begin(), it);
170 ASSERT(GetGraph()->GetVectorBlocks().size() > index);
171 auto pred_block = GetGraph()->GetVectorBlocks()[index];
172 inst_clone->AppendInput(GetClone(phi->GetPhiInput(pred_block)));
173 }
174 }
175 }
176 }
177
178 /*
179 * Create resolver-block - common successor for all loop side-exits
180 */
CreateResolverBlock(Loop * loop,BasicBlock * back_edge)181 BasicBlock *GraphCloner::CreateResolverBlock(Loop *loop, BasicBlock *back_edge)
182 {
183 auto outside_succ = GetLoopOutsideSuccessor(loop);
184 auto resolver = back_edge->InsertNewBlockToSuccEdge(outside_succ);
185 back_edge->GetLoop()->GetOuterLoop()->AppendBlock(resolver);
186 // Populate resolver-block with phis for each instruction which has outside-loop user
187 for (auto block : loop->GetBlocks()) {
188 for (auto inst : block->AllInsts()) {
189 Inst *phi_resolver = nullptr;
190 auto user_it = inst->GetUsers().begin();
191 while (user_it != inst->GetUsers().end()) {
192 auto user = user_it->GetInst();
193 auto input_idx = user_it->GetIndex();
194 ++user_it;
195 ASSERT(user->GetBasicBlock() != nullptr);
196 if (user->GetBasicBlock()->GetLoop() != loop) {
197 if (phi_resolver == nullptr) {
198 phi_resolver = GetGraph()->CreateInstPhi(inst->GetType(), inst->GetPc());
199 phi_resolver->AppendInput(inst);
200 resolver->AppendPhi(phi_resolver);
201 }
202 user->SetInput(input_idx, phi_resolver);
203 }
204 }
205 }
206 }
207 return resolver;
208 }
209
210 /*
211 * Split back-edge for cloning without side exits - in order not to clone `Compare` and `IfImm` instructions
212 */
SplitBackEdge(LoopUnrollData * unroll_data,Loop * loop,BasicBlock * back_edge)213 BasicBlock *GraphCloner::SplitBackEdge(LoopUnrollData *unroll_data, Loop *loop, BasicBlock *back_edge)
214 {
215 auto ifimm = back_edge->GetLastInst();
216 ASSERT(ifimm->GetOpcode() == Opcode::IfImm);
217 auto compare = ifimm->GetInput(0).GetInst();
218 ASSERT(compare->GetOpcode() == Opcode::Compare);
219 // If there are intructions between `Compare` and `IfImm`, clone `Compare` and insert before `IfImm`
220 if (ifimm->GetPrev() != compare) {
221 auto new_cmp = compare->Clone(compare->GetBasicBlock()->GetGraph());
222 new_cmp->SetInput(0, compare->GetInput(0).GetInst());
223 new_cmp->SetInput(1, compare->GetInput(1).GetInst());
224 ifimm->InsertBefore(new_cmp);
225 ifimm->SetInput(0, new_cmp);
226 compare = new_cmp;
227 }
228 if (compare->GetPrev() != nullptr) {
229 auto back_edge_split = back_edge->SplitBlockAfterInstruction(compare->GetPrev(), true);
230 loop->ReplaceBackEdge(back_edge, back_edge_split);
231 back_edge = back_edge_split;
232 } else {
233 ASSERT(back_edge->GetPredsBlocks().size() == 1);
234 auto it = std::find(unroll_data->blocks->begin(), unroll_data->blocks->end(), back_edge);
235 ASSERT(it != unroll_data->blocks->end());
236 unroll_data->blocks->erase(it);
237 }
238 [[maybe_unused]] static constexpr auto BACK_EDGE_INST_COUNT = 2;
239 ASSERT(std::distance(back_edge->AllInsts().begin(), back_edge->AllInsts().end()) == BACK_EDGE_INST_COUNT);
240 return back_edge;
241 }
242
243 /**
244 * - Split loop-header into two blocks, header phis will not be cloned:
245 * [phi-insts]
246 * -----------
247 * [all-insts]
248 *
249 * - If loop is cloing with side-exits create common successor for them;
250 * - Otherwise split back-edge to cut `Compare` and `IfImm` instructions and not clone them;
251 */
PrepareLoopToUnroll(Loop * loop,bool clone_side_exits)252 GraphCloner::LoopUnrollData *GraphCloner::PrepareLoopToUnroll(Loop *loop, bool clone_side_exits)
253 {
254 ASSERT(loop != nullptr);
255 // Populate `LoopUnrollData`
256 auto allocator = loop->GetHeader()->GetGraph()->GetLocalAllocator();
257 auto unroll_data = allocator->New<LoopUnrollData>();
258 CHECK_NOT_NULL(unroll_data);
259 unroll_data->blocks = allocator->New<ArenaVector<BasicBlock *>>(allocator->Adapter());
260 CHECK_NOT_NULL(unroll_data->blocks);
261 unroll_data->blocks->resize(loop->GetBlocks().size());
262 std::copy(loop->GetBlocks().begin(), loop->GetBlocks().end(), unroll_data->blocks->begin());
263 // Split loop-header
264 ASSERT(loop->GetBackEdges().size() == 1U);
265 auto back_edge = loop->GetBackEdges()[0];
266 ASSERT(back_edge != nullptr);
267 auto header_block = loop->GetHeader();
268 ASSERT(!header_block->IsEmpty());
269 if (header_block->HasPhi()) {
270 auto last_phi = header_block->GetFirstInst()->GetPrev();
271 ASSERT(last_phi != nullptr && last_phi->IsPhi());
272 auto header_split = header_block->SplitBlockAfterInstruction(last_phi, true);
273 ASSERT(loop->GetBlocks().front() == header_block);
274 unroll_data->blocks->at(0) = header_split;
275
276 if (back_edge == header_block) {
277 loop->ReplaceBackEdge(header_block, header_split);
278 back_edge = header_split;
279 }
280 }
281 unroll_data->exit_block = back_edge;
282 if (clone_side_exits) {
283 unroll_data->outer = CreateResolverBlock(loop, back_edge);
284 } else {
285 back_edge = SplitBackEdge(unroll_data, loop, back_edge);
286 }
287 // Save replaceable phi inputs
288
289 unroll_data->phi_update_inputs = allocator->New<InstVector>(allocator->Adapter());
290 CHECK_NOT_NULL(unroll_data->phi_update_inputs);
291 for (auto phi : header_block->PhiInsts()) {
292 unroll_data->phi_update_inputs->push_back(phi->CastToPhi()->GetPhiInput(back_edge));
293 }
294 unroll_data->header = header_block;
295 unroll_data->backedge = back_edge;
296 return unroll_data;
297 }
298
299 /**
300 * Update data-flow after unrolling without side-exits
301 */
UpdateUsersAfterNoSideExitsUnroll(const LoopUnrollData * unroll_data)302 void GraphCloner::UpdateUsersAfterNoSideExitsUnroll(const LoopUnrollData *unroll_data)
303 {
304 auto loop = unroll_data->header->GetLoop();
305 // Update outloop users: replace inputs located in the original loop by theirs clones
306 auto compare = unroll_data->backedge->GetLastInst()->GetPrev();
307 ASSERT(compare->GetOpcode() == Opcode::Compare);
308 for (size_t i = 0; i < compare->GetInputsCount(); i++) {
309 auto input = compare->GetInput(i).GetInst();
310 if (HasClone(input)) {
311 compare->SetInput(i, GetClone(input));
312 }
313 }
314 // update outloop users
315 for (auto block : *unroll_data->blocks) {
316 for (auto inst : block->AllInsts()) {
317 auto user_it = inst->GetUsers().begin();
318 while (user_it != inst->GetUsers().end()) {
319 auto user = user_it->GetInst();
320 auto input_idx = user_it->GetIndex();
321 ++user_it;
322 ASSERT(user->GetBasicBlock() != nullptr);
323 if (user->GetBasicBlock()->GetLoop() != loop) {
324 user->SetInput(input_idx, GetClone(inst));
325 }
326 }
327 }
328 }
329
330 // All header-phi's outloop users are placed in the outer_bb after cloning the original loop
331 // So it's enough to to iterate outer_bb's phis and repalce header-phi by its backedge input
332 auto outer_idx = 1U - unroll_data->backedge->GetSuccBlockIndex(unroll_data->header);
333 auto outer_bb = unroll_data->backedge->GetSuccessor(outer_idx);
334 for (auto outer_phi : outer_bb->PhiInsts()) {
335 auto header_phi = outer_phi->CastToPhi()->GetPhiInput(unroll_data->backedge);
336 if (header_phi->IsPhi() && header_phi->GetBasicBlock() == unroll_data->header) {
337 outer_phi->ReplaceInput(header_phi, header_phi->CastToPhi()->GetPhiInput(unroll_data->backedge));
338 }
339 }
340 }
341
342 /**
343 * Link cloned blocks with each other and insert them to the graph between the last-block and the output-block
344 *
345 * - No-side-exits case:
346 * /---->[header]
347 * | |
348 * | v
349 * | [loop-body] << last-block << exit-block
350 * | |
351 * | v
352 * \-----[backedge]----> ...
353 *
354 * New control-flow:
355 * /---->[header]
356 * | |
357 * | v
358 * | [loop-body] << exit-block
359 * | |
360 * | v
361 * | [loop-body-clone] << last-block
362 * | |
363 * | v
364 * \-----[backedge]----> ...
365 *
366 *
367 * Side-exits case:
368 * /---->[header]
369 * | |
370 * | v
371 * | [loop-body]
372 * | |
373 * | v
374 * \-----[backedge] << last-block << exit-block
375 * |
376 * v
377 * [outer]-----> ...
378 *
379 * New control-flow:
380 * /---->[header]
381 * | |
382 * | v
383 * | [loop-body]
384 * | |
385 * | v
386 * | [backedge]------------\ << exit-block
387 * | | |
388 * | v |
389 * | [loop-body-clone] |
390 * | | |
391 * | v |
392 * \-----[backedge-clone]----->| << last-block
393 * |
394 * v
395 * [outer]-----> ...
396 */
BuildLoopUnrollControlFlow(LoopUnrollData * unroll_data)397 void GraphCloner::BuildLoopUnrollControlFlow(LoopUnrollData *unroll_data)
398 {
399 auto front_block = unroll_data->blocks->front();
400 auto loop = front_block->GetLoop();
401
402 // Copy 'blocks' control-flow to the 'clones'
403 for (auto block : *unroll_data->blocks) {
404 ASSERT(block->GetLoop() == loop);
405 loop->AppendBlock(GetClone(block));
406
407 if (block != front_block) {
408 CloneEdges<CloneEdgeType::EDGE_PRED>(block);
409 }
410 if (block != unroll_data->exit_block) {
411 CloneEdges<CloneEdgeType::EDGE_SUCC>(block);
412 }
413 }
414
415 auto front_clone = GetClone(front_block);
416 auto exit_clone = GetClone(unroll_data->exit_block);
417 if (unroll_data->outer == nullptr) {
418 ASSERT(unroll_data->backedge->GetPredsBlocks().size() == 1);
419 auto last_block = unroll_data->backedge->GetPredsBlocks()[0];
420 last_block->ReplaceSucc(unroll_data->backedge, front_clone);
421 unroll_data->backedge->ReplacePred(last_block, exit_clone);
422 } else {
423 ASSERT(!unroll_data->outer->GetPredsBlocks().empty());
424 auto last_block = unroll_data->outer->GetPredsBlocks().back();
425 last_block->ReplaceSucc(unroll_data->header, front_clone);
426 unroll_data->header->ReplacePred(last_block, exit_clone);
427
428 exit_clone->AddSucc(unroll_data->outer);
429 if (exit_clone->GetSuccBlockIndex(unroll_data->outer) != last_block->GetSuccBlockIndex(unroll_data->outer)) {
430 exit_clone->SwapTrueFalseSuccessors();
431 }
432 auto new_backedge = GetClone(unroll_data->exit_block);
433 loop->ReplaceBackEdge(unroll_data->backedge, new_backedge);
434 unroll_data->backedge = new_backedge;
435 }
436 }
437
438 /**
439 * Construct dataflow for the cloned instructions
440 * if input of the original instruction is front-block phi - insert replaceable input of this phi
441 */
BuildLoopUnrollDataFlow(LoopUnrollData * unroll_data)442 void GraphCloner::BuildLoopUnrollDataFlow(LoopUnrollData *unroll_data)
443 {
444 for (auto block : *unroll_data->blocks) {
445 for (auto inst : block->AllInsts()) {
446 if (inst->IsMarked(clone_marker_)) {
447 SetCloneInputs<true>(inst, unroll_data->backedge);
448 UpdateCaller(inst);
449 }
450 }
451 }
452
453 // Append input to the phi-resolver from outer block, it holds all instructions which have outside loop users
454 auto loop = unroll_data->blocks->front()->GetLoop();
455 if (unroll_data->outer != nullptr) {
456 for (auto phi : unroll_data->outer->PhiInsts()) {
457 auto inst = phi->GetInput(0).GetInst();
458 if (IsInstLoopHeaderPhi(inst, loop)) {
459 auto update = inst->CastToPhi()->GetPhiInput(unroll_data->backedge);
460 phi->AppendInput(update);
461 } else {
462 phi->AppendInput(GetClone(inst));
463 }
464 }
465 }
466
467 // TODO (a.popov) use temp container after if would be possible to reset local allocator
468 if (unroll_data->phi_replaced_inputs == nullptr) {
469 unroll_data->phi_replaced_inputs = allocator_->New<PhiInputsMap>(allocator_->Adapter());
470 CHECK_NOT_NULL(unroll_data->phi_replaced_inputs);
471 } else {
472 unroll_data->phi_replaced_inputs->clear();
473 }
474
475 // Set new update inputs for header phis
476 size_t phi_count = 0;
477 for (auto phi : loop->GetHeader()->PhiInsts()) {
478 auto input = unroll_data->phi_update_inputs->at(phi_count);
479 if (HasClone(input)) {
480 input = GetClone(input);
481 } else if (input->IsPhi() && input->GetBasicBlock()->GetLoop() == loop) {
482 if (phi->IsDominate(input)) {
483 input = input->CastToPhi()->GetPhiInput(unroll_data->backedge);
484 } else {
485 // phi should be visited and its input should be added to the map
486 ASSERT(unroll_data->phi_replaced_inputs->count(input) == 1);
487 input = unroll_data->phi_replaced_inputs->at(input);
488 }
489 }
490
491 auto phi_update_input_idx = phi->CastToPhi()->GetPredBlockIndex(unroll_data->backedge);
492 unroll_data->phi_replaced_inputs->emplace(phi, phi->GetInput(phi_update_input_idx).GetInst());
493 phi->SetInput(phi_update_input_idx, input);
494 phi_count++;
495 }
496 }
497
RemoveLoopBackEdge(const LoopUnrollData * unroll_data)498 void GraphCloner::RemoveLoopBackEdge(const LoopUnrollData *unroll_data)
499 {
500 ASSERT(unroll_data->outer != nullptr);
501 ASSERT(!unroll_data->outer->GetPredsBlocks().empty());
502 auto last_block = unroll_data->outer->GetPredsBlocks().back();
503 // Erase control-flow instruction
504 auto ifimm = last_block->GetLastInst();
505 ASSERT(ifimm->GetOpcode() == Opcode::IfImm);
506 last_block->RemoveInst(ifimm);
507 // Remove back-edge
508 auto header = unroll_data->header;
509 // Clear header block, it should only contain phi
510 ASSERT(header->GetFirstInst() == nullptr);
511 for (auto phi : header->PhiInstsSafe()) {
512 auto remaining_inst = phi->CastToPhi()->GetPhiInput(header->GetLoop()->GetPreHeader());
513 phi->ReplaceUsers(remaining_inst);
514 header->RemoveInst(phi);
515 }
516
517 last_block->RemoveSucc(header);
518 header->RemovePred(last_block);
519
520 // Clear outer phis if it has single predecessor
521 if (unroll_data->outer->GetPredsBlocks().size() == 1U) {
522 for (auto phi : unroll_data->outer->PhiInstsSafe()) {
523 auto remaining_inst = phi->GetInput(0).GetInst();
524 phi->ReplaceUsers(remaining_inst);
525 unroll_data->outer->RemoveInst(phi);
526 }
527 }
528 }
529
BuildClonedLoopHeaderDataFlow(const BasicBlock & block,BasicBlock * resolver,BasicBlock * clone)530 void GraphCloner::BuildClonedLoopHeaderDataFlow(const BasicBlock &block, BasicBlock *resolver, BasicBlock *clone)
531 {
532 for (auto inst : block.Insts()) {
533 if (inst->IsMarked(clone_marker_)) {
534 SetCloneInputs<true>(inst, clone);
535 UpdateUsersForClonedLoopHeader(inst, resolver);
536 UpdateCaller(inst);
537 }
538 }
539 for (auto phi : block.PhiInsts()) {
540 ASSERT(phi->GetInputsCount() == 2U);
541 auto preloop_input = phi->CastToPhi()->GetPhiInput(clone);
542 // Create phi instruction in the `resolver` block with two inputs: current phi and phi's preloop_input
543 auto resolver_phi = GetGraph()->CreateInstPhi(phi->GetType(), phi->GetPc());
544 for (auto user_it = phi->GetUsers().begin(); user_it != phi->GetUsers().end();) {
545 auto user = user_it->GetInst();
546 auto input_index = user_it->GetIndex();
547 ++user_it;
548 ASSERT(user->GetBasicBlock() != nullptr);
549 if (user->GetBasicBlock()->GetLoop() != block.GetLoop()) {
550 user->SetInput(input_index, resolver_phi);
551 }
552 }
553 if (resolver_phi->HasUsers()) {
554 resolver_phi->AppendInput(phi);
555 resolver_phi->AppendInput(preloop_input);
556 resolver->AppendPhi(resolver_phi);
557 }
558 }
559 }
560
561 /**
562 * Used by Loop peeling to clone loop-header and insert this clone before loop-header.
563 * Created block-resolved - common succesor for loop-header and its clone
564 *
565 * [replaceable_pred]
566 * |
567 * v
568 * ... --->[block]--------\
569 * | |
570 * v v
571 * ... [outer]
572 *
573 *
574 * [replaceable_pred]
575 * |
576 * v
577 * [clone_block]---------\
578 * | |
579 * v |
580 * ... --->[block]--------\ |
581 * | | |
582 * v v v
583 * ... [resolver]
584 * |
585 * v
586 * [outer]
587 */
CloneLoopHeader(BasicBlock * block,BasicBlock * outer,BasicBlock * replaceable_pred)588 BasicBlock *GraphCloner::CloneLoopHeader(BasicBlock *block, BasicBlock *outer, BasicBlock *replaceable_pred)
589 {
590 ASSERT(GetGraph()->IsAnalysisValid<DominatorsTree>());
591 ASSERT(clone_marker_ == UNDEF_MARKER);
592 auto marker_holder = MarkerHolder(GetGraph());
593 clone_marker_ = marker_holder.GetMarker();
594 clone_instructions_.clear();
595 size_t inst_count = 0;
596 // Build control-flow
597 auto resolver = block->InsertNewBlockToSuccEdge(outer);
598 outer->GetLoop()->AppendBlock(resolver);
599 if (outer->GetLoop()->HasBackEdge(block)) {
600 outer->GetLoop()->ReplaceBackEdge(block, resolver);
601 }
602 auto clone_block = replaceable_pred->InsertNewBlockToSuccEdge(block);
603 ASSERT(block->GetLoop()->GetPreHeader() == replaceable_pred);
604 block->GetLoop()->SetPreHeader(clone_block);
605 replaceable_pred->GetLoop()->AppendBlock(clone_block);
606 clone_block->AddSucc(resolver);
607 // Check the order of true-false successors
608 if (clone_block->GetSuccBlockIndex(resolver) != block->GetSuccBlockIndex(resolver)) {
609 clone_block->SwapTrueFalseSuccessors();
610 }
611 // Fix Dominators info
612 auto &dom_tree = GetGraph()->GetAnalysis<DominatorsTree>();
613 dom_tree.SetValid(true);
614 ASSERT(block->GetDominator() == replaceable_pred);
615 replaceable_pred->RemoveDominatedBlock(block);
616 dom_tree.SetDomPair(clone_block, block);
617 dom_tree.SetDomPair(replaceable_pred, clone_block);
618 dom_tree.SetDomPair(clone_block, resolver);
619 if (outer->GetDominator() == block) {
620 block->RemoveDominatedBlock(outer);
621 dom_tree.SetDomPair(resolver, outer);
622 }
623 CloneInstructions<InstCloneType::CLONE_INSTS, true>(block, clone_block, &inst_count);
624 BuildClonedLoopHeaderDataFlow(*block, resolver, clone_block);
625 clone_block->SetAllFields(block->GetAllFields());
626 return clone_block;
627 }
628
629 /**
630 * Use the following logic cloning the users:
631 * - replace inputs of all users, placed OUTSIDE cloneable loop, by the new `phi_out` instruction
632 * - `phi_out` is appended to the outer-block
633 * - replace inputs of all users, placed INSIDE cloneable loop, but not cloned, by the new `phi_in` instruction
634 * - `phi_in` is appended to the `inst` basic block
635 * - `phi_in\phi_out` have `inst` and its clone as inputs
636 */
UpdateUsersForClonedLoopHeader(Inst * inst,BasicBlock * outer_block)637 void GraphCloner::UpdateUsersForClonedLoopHeader(Inst *inst, BasicBlock *outer_block)
638 {
639 if (!inst->HasUsers()) {
640 return;
641 }
642 auto inst_block = inst->GetBasicBlock();
643 auto clone = GetClone(inst);
644 auto clone_block = clone->GetBasicBlock();
645 ASSERT(clone_block != nullptr);
646 // phi for inside users
647 auto phi_in = GetGraph()->CreateInstPhi(inst->GetType(), inst->GetPc());
648 // phi for outside users
649 auto phi_out = GetGraph()->CreateInstPhi(inst->GetType(), inst->GetPc());
650 auto user_it = inst->GetUsers().begin();
651 while (user_it != inst->GetUsers().end()) {
652 auto user = user_it->GetInst();
653 auto input_idx = user_it->GetIndex();
654 ++user_it;
655 ASSERT(user->GetBasicBlock() != nullptr);
656 if (user->GetBasicBlock()->GetLoop() == inst_block->GetLoop()) {
657 // user inside loop
658 // skip users that will be moved to the loop-exit block
659 if (user->GetBasicBlock()->IsLoopHeader() && !user->IsPhi()) {
660 continue;
661 }
662 ASSERT(user->GetBasicBlock() != inst_block || user->IsPhi());
663 user->SetInput(input_idx, phi_in);
664 } else {
665 // user outside loop
666 user->SetInput(input_idx, phi_out);
667 }
668 }
669
670 if (phi_in->HasUsers()) {
671 auto clone_index {inst_block->GetPredBlockIndex(clone_block)};
672 phi_in->AppendInput(clone);
673 phi_in->AppendInput(inst);
674 phi_in->CastToPhi()->SetPhiInputBbNum(0, clone_index);
675 phi_in->CastToPhi()->SetPhiInputBbNum(1, 1 - clone_index);
676
677 auto first_phi = inst_block->GetFirstPhi();
678 if (first_phi == nullptr) {
679 inst_block->AppendPhi(phi_in);
680 } else {
681 inst_block->InsertBefore(phi_in, first_phi);
682 }
683 }
684
685 if (phi_out->HasUsers()) {
686 phi_out->AppendInput(inst);
687 phi_out->AppendInput(clone);
688 outer_block->AppendPhi(phi_out);
689 }
690 }
691
IsInstLoopHeaderPhi(Inst * inst,Loop * loop)692 inline bool GraphCloner::IsInstLoopHeaderPhi(Inst *inst, Loop *loop)
693 {
694 return inst->IsPhi() && inst->GetBasicBlock() == loop->GetHeader();
695 }
696
697 /**
698 * Create clone of loop and insert it after original loop:
699 *
700 * /----[pre-loop]
701 * | |
702 * | v
703 * | [loop-body]<----\
704 * | | | |
705 * | | \-------/
706 * | |
707 * | v
708 * \--->[outside-block]
709 * |
710 * v
711 * /----[pre-loop']
712 * | |
713 * | v
714 * | [loop-body']<----\
715 * | | | |
716 * | | \-------/
717 * | |
718 * | v
719 * \--->[outside-block']
720 */
CloneLoop(Loop * loop)721 Loop *GraphCloner::CloneLoop(Loop *loop)
722 {
723 ASSERT(loop != nullptr && !loop->IsRoot());
724 ASSERT_PRINT(IsLoopSingleBackEdgeExitPoint(loop), "Cloning blocks doesn't have single entry/exit point");
725 ASSERT(loop->GetPreHeader() != nullptr);
726 ASSERT(clone_marker_ == UNDEF_MARKER);
727
728 auto marker_holder = MarkerHolder(GetGraph());
729 clone_marker_ = marker_holder.GetMarker();
730 auto unroll_data = PrepareLoopToClone(loop);
731
732 CloneBlocksAndInstructions<InstCloneType::CLONE_ALL, true>(*unroll_data->blocks, GetGraph());
733 BuildLoopCloneControlFlow(unroll_data);
734 BuildLoopCloneDataFlow(unroll_data);
735 MakeLoopCloneInfo(unroll_data);
736 GetGraph()->RunPass<DominatorsTree>();
737
738 auto clone_loop = GetClone(loop->GetHeader())->GetLoop();
739 ASSERT(clone_loop != loop && clone_loop->GetOuterLoop() == loop->GetOuterLoop());
740 COMPILER_LOG(DEBUG, GRAPH_CLONER) << "Loop " << loop->GetId() << " is copied";
741 COMPILER_LOG(DEBUG, GRAPH_CLONER) << "Created new loop, id = " << clone_loop->GetId();
742 return clone_loop;
743 }
744
CreateNewOutsideSucc(BasicBlock * outside_succ,BasicBlock * back_edge,BasicBlock * pre_header)745 BasicBlock *GraphCloner::CreateNewOutsideSucc(BasicBlock *outside_succ, BasicBlock *back_edge, BasicBlock *pre_header)
746 {
747 auto back_edge_idx = outside_succ->GetPredBlockIndex(back_edge);
748 auto pre_header_idx = outside_succ->GetPredBlockIndex(pre_header);
749 auto rm_idx_max = std::max(back_edge_idx, pre_header_idx);
750 auto rm_idx_min = std::min(back_edge_idx, pre_header_idx);
751 auto new_outside_succ = GetGraph()->CreateEmptyBlock();
752 outside_succ->GetLoop()->AppendBlock(new_outside_succ);
753 back_edge->ReplaceSucc(outside_succ, new_outside_succ);
754 pre_header->ReplaceSucc(outside_succ, new_outside_succ);
755 new_outside_succ->AddSucc(outside_succ);
756 for (auto phi : outside_succ->PhiInsts()) {
757 auto new_phi = GetGraph()->CreateInstPhi(phi->GetType(), phi->GetPc());
758 new_phi->AppendInput(phi->CastToPhi()->GetPhiInput(back_edge));
759 new_phi->AppendInput(phi->CastToPhi()->GetPhiInput(pre_header));
760 phi->AppendInput(new_phi);
761 auto phi_back_edge_idx {phi->CastToPhi()->GetPredBlockIndex(back_edge)};
762 auto phi_pre_header_idx {phi->CastToPhi()->GetPredBlockIndex(pre_header)};
763 phi->RemoveInput(rm_idx_max == back_edge_idx ? phi_back_edge_idx : phi_pre_header_idx);
764 phi->RemoveInput(rm_idx_min == pre_header_idx ? phi_pre_header_idx : phi_back_edge_idx);
765 new_outside_succ->AppendPhi(new_phi);
766 }
767 outside_succ->RemovePred(rm_idx_max);
768 outside_succ->RemovePred(rm_idx_min);
769
770 COMPILER_LOG(DEBUG, GRAPH_CLONER) << "New loop outside block created: " << new_outside_succ->GetId();
771 return new_outside_succ;
772 }
773
774 /**
775 * - Split pre-header to contain `Compare` and `IfImm` instructions only;
776 * - Make sure `outside_succ` has 2 predecessors only: loop header and back-edge;
777 * - Split `outside_succ` to contain phi-instructions only;
778 */
PrepareLoopToClone(Loop * loop)779 GraphCloner::LoopClonerData *GraphCloner::PrepareLoopToClone(Loop *loop)
780 {
781 auto pre_header = loop->GetPreHeader();
782 auto ifimm = pre_header->GetLastInst();
783 ASSERT(ifimm->GetOpcode() == Opcode::IfImm);
784 auto compare = ifimm->GetInput(0).GetInst();
785 ASSERT(compare->GetOpcode() == Opcode::Compare);
786 if (ifimm->GetPrev() != compare) {
787 auto new_cmp = compare->Clone(compare->GetBasicBlock()->GetGraph());
788 new_cmp->SetInput(0, compare->GetInput(0).GetInst());
789 new_cmp->SetInput(1, compare->GetInput(1).GetInst());
790 ifimm->InsertBefore(new_cmp);
791 ifimm->SetInput(0, new_cmp);
792 compare = new_cmp;
793 }
794 if (compare->GetPrev() != nullptr) {
795 auto new_pre_header = pre_header->SplitBlockAfterInstruction(compare->GetPrev(), true);
796 loop->SetPreHeader(new_pre_header);
797 pre_header = new_pre_header;
798 }
799 [[maybe_unused]] static constexpr auto PRE_HEADER_INST_COUNT = 2;
800 ASSERT(std::distance(pre_header->AllInsts().begin(), pre_header->AllInsts().end()) == PRE_HEADER_INST_COUNT);
801 // If `outside_succ` has more than 2 predecessors, create a new one
802 // with loop header and back-edge predecessors only and insert it before `outside_succ`
803 auto outside_succ = GetLoopOutsideSuccessor(loop);
804 constexpr auto PREDS_NUM = 2;
805 if (outside_succ->GetPredsBlocks().size() > PREDS_NUM) {
806 auto back_edge = loop->GetBackEdges()[0];
807 outside_succ = CreateNewOutsideSucc(outside_succ, back_edge, pre_header);
808 }
809 // Split outside succ after last phi
810 // create empty block before outside succ if outside succ don't contain phi insts
811 if (outside_succ->HasPhi() && outside_succ->GetFirstInst() != nullptr) {
812 auto last_phi = outside_succ->GetFirstInst()->GetPrev();
813 auto block = outside_succ->SplitBlockAfterInstruction(last_phi, true);
814 // if `outside_succ` is pre-header replace it by `block`
815 for (auto in_loop : loop->GetOuterLoop()->GetInnerLoops()) {
816 if (in_loop->GetPreHeader() == outside_succ) {
817 in_loop->SetPreHeader(block);
818 }
819 }
820 } else if (outside_succ->GetFirstInst() != nullptr) {
821 auto block = outside_succ->InsertEmptyBlockBefore();
822 outside_succ->GetLoop()->AppendBlock(block);
823 outside_succ = block;
824 }
825 return CreateLoopClonerData(loop, pre_header, outside_succ);
826 }
827
CreateLoopClonerData(Loop * loop,BasicBlock * pre_header,BasicBlock * outside_succ)828 GraphCloner::LoopClonerData *GraphCloner::CreateLoopClonerData(Loop *loop, BasicBlock *pre_header,
829 BasicBlock *outside_succ)
830 {
831 // Populate `LoopClonerData`
832 auto allocator = GetGraph()->GetLocalAllocator();
833 auto unroll_data = allocator->New<LoopClonerData>();
834 CHECK_NOT_NULL(unroll_data);
835 unroll_data->blocks = allocator->New<ArenaVector<BasicBlock *>>(allocator->Adapter());
836 CHECK_NOT_NULL(unroll_data->blocks);
837 unroll_data->blocks->resize(loop->GetBlocks().size() + 1);
838 unroll_data->blocks->at(0) = pre_header;
839 std::copy(loop->GetBlocks().begin(), loop->GetBlocks().end(), unroll_data->blocks->begin() + 1);
840 unroll_data->blocks->push_back(outside_succ);
841 unroll_data->outer = outside_succ;
842 unroll_data->header = loop->GetHeader();
843 unroll_data->pre_header = loop->GetPreHeader();
844 return unroll_data;
845 }
846
847 /**
848 * Create new loop, populate it with cloned blocks and build conrlow-flow
849 */
BuildLoopCloneControlFlow(LoopClonerData * unroll_data)850 void GraphCloner::BuildLoopCloneControlFlow(LoopClonerData *unroll_data)
851 {
852 ASSERT(unroll_data != nullptr);
853 auto outer_clone = GetClone(unroll_data->outer);
854 auto pre_header_clone = GetClone(unroll_data->pre_header);
855
856 while (!unroll_data->outer->GetSuccsBlocks().empty()) {
857 auto succ = unroll_data->outer->GetSuccsBlocks().front();
858 succ->ReplacePred(unroll_data->outer, outer_clone);
859 unroll_data->outer->RemoveSucc(succ);
860 }
861 unroll_data->outer->AddSucc(pre_header_clone);
862
863 for (auto &block : *unroll_data->blocks) {
864 if (block != unroll_data->pre_header) {
865 CloneEdges<CloneEdgeType::EDGE_PRED>(block);
866 }
867 if (block != unroll_data->outer) {
868 CloneEdges<CloneEdgeType::EDGE_SUCC>(block);
869 }
870 }
871 ASSERT(unroll_data->outer->GetPredBlockIndex(unroll_data->pre_header) ==
872 outer_clone->GetPredBlockIndex(pre_header_clone));
873 ASSERT(unroll_data->header->GetPredBlockIndex(unroll_data->pre_header) ==
874 GetClone(unroll_data->header)->GetPredBlockIndex(pre_header_clone));
875 }
876
877 /**
878 * Insert cloned loop into loop-tree and populated with cloned blocks
879 */
MakeLoopCloneInfo(LoopClonerData * unroll_data)880 void GraphCloner::MakeLoopCloneInfo(LoopClonerData *unroll_data)
881 {
882 ASSERT(unroll_data != nullptr);
883 // Update loop tree
884 auto loop = unroll_data->header->GetLoop();
885 auto header_clone = GetClone(loop->GetHeader());
886 auto clone_loop = GetGraph()->GetAnalysis<LoopAnalyzer>().CreateNewLoop(header_clone);
887 auto outer_loop = loop->GetOuterLoop();
888 outer_loop->AppendInnerLoop(clone_loop);
889 clone_loop->SetOuterLoop(outer_loop);
890
891 // Populate cloned loop
892 auto pre_loop_clone = GetClone(unroll_data->pre_header);
893 auto outside_succ_clone = GetClone(unroll_data->outer);
894 clone_loop->SetPreHeader(pre_loop_clone);
895 outer_loop->AppendBlock(pre_loop_clone);
896 outer_loop->AppendBlock(outside_succ_clone);
897 for (auto &block : loop->GetBlocks()) {
898 if (!block->IsLoopHeader()) {
899 clone_loop->AppendBlock(GetClone(block));
900 }
901 }
902 for (auto back_edge : loop->GetBackEdges()) {
903 clone_loop->AppendBackEdge(GetClone(back_edge));
904 }
905 }
906
907 /**
908 * Find or create phi in the outside_succ block with the same inputs as `check_phi`
909 */
GetPhiResolver(Inst * check_phi,BasicBlock * outside_succ,BasicBlock * pre_header)910 Inst *GetPhiResolver(Inst *check_phi, BasicBlock *outside_succ, BasicBlock *pre_header)
911 {
912 [[maybe_unused]] constexpr auto MAX_PREDS_NUM = 2;
913 ASSERT(outside_succ->GetPredsBlocks().size() == MAX_PREDS_NUM);
914 ASSERT(check_phi->GetBasicBlock()->IsLoopHeader());
915 auto init_idx = check_phi->CastToPhi()->GetPredBlockIndex(pre_header);
916 auto init_input = check_phi->GetInput(init_idx).GetInst();
917 auto update_input = check_phi->GetInput(1 - init_idx).GetInst();
918
919 for (auto phi : outside_succ->PhiInsts()) {
920 auto idx {phi->CastToPhi()->GetPredBlockIndex(pre_header)};
921 if (phi->GetInput(idx).GetInst() == init_input && phi->GetInput(1 - idx).GetInst() == update_input) {
922 return phi;
923 }
924 }
925
926 auto phi_resolver = outside_succ->GetGraph()->CreateInstPhi(check_phi->GetType(), check_phi->GetPc());
927 auto out_init_idx = outside_succ->GetPredBlockIndex(pre_header);
928 phi_resolver->AppendInput(init_input);
929 phi_resolver->AppendInput(update_input);
930 phi_resolver->SetPhiInputBbNum(0, out_init_idx);
931 phi_resolver->SetPhiInputBbNum(1, 1 - out_init_idx);
932
933 outside_succ->AppendPhi(phi_resolver);
934 return phi_resolver;
935 }
936
937 /**
938 * Build data-flow for cloned instructions
939 */
BuildLoopCloneDataFlow(LoopClonerData * unroll_data)940 void GraphCloner::BuildLoopCloneDataFlow(LoopClonerData *unroll_data)
941 {
942 ASSERT(unroll_data != nullptr);
943 for (const auto &block : *unroll_data->blocks) {
944 for (const auto &inst : block->AllInsts()) {
945 if (inst->IsMarked(clone_marker_)) {
946 SetCloneInputs<false>(inst);
947 UpdateCaller(inst);
948 }
949 }
950 }
951
952 auto pre_loop_clone = GetClone(unroll_data->pre_header);
953 for (auto phi : unroll_data->outer->PhiInsts()) {
954 auto phi_clone = GetClone(phi);
955 phi->ReplaceUsers(phi_clone);
956 auto idx = phi_clone->CastToPhi()->GetPredBlockIndex(pre_loop_clone);
957 phi_clone->SetInput(idx, phi);
958 }
959
960 auto compare = pre_loop_clone->GetFirstInst();
961 auto exit_block = unroll_data->outer->GetPredBlockByIndex(0);
962 if (exit_block == unroll_data->pre_header) {
963 exit_block = unroll_data->outer->GetPredBlockByIndex(1);
964 } else {
965 ASSERT(unroll_data->outer->GetPredBlockByIndex(1) == unroll_data->pre_header);
966 }
967 auto back_edge_compare = exit_block->GetLastInst()->GetInput(0).GetInst();
968 ASSERT(compare->GetOpcode() == Opcode::Compare);
969 ASSERT(back_edge_compare->GetOpcode() == Opcode::Compare);
970 for (auto phi : unroll_data->header->PhiInsts()) {
971 auto init_idx = phi->CastToPhi()->GetPredBlockIndex(unroll_data->pre_header);
972 ASSERT(GetClone(phi)->CastToPhi()->GetPredBlockIndex(pre_loop_clone) == init_idx);
973
974 auto init = phi->GetInput(init_idx).GetInst();
975 auto update = phi->GetInput(1 - init_idx).GetInst();
976 auto resolver_phi = GetPhiResolver(phi, unroll_data->outer, unroll_data->pre_header);
977 auto clone_phi = GetClone(phi);
978 ASSERT(clone_phi->GetInput(init_idx).GetInst() == init);
979 clone_phi->SetInput(init_idx, resolver_phi);
980 for (size_t i = 0; i < compare->GetInputsCount(); i++) {
981 if (compare->GetInput(i).GetInst() == init && back_edge_compare->GetInput(i).GetInst() == update) {
982 compare->SetInput(i, resolver_phi);
983 break;
984 }
985 }
986 }
987 }
988
UpdateCaller(Inst * inst)989 void GraphCloner::UpdateCaller(Inst *inst)
990 {
991 }
992
IsLoopClonable(Loop * loop,size_t inst_limit)993 bool GraphCloner::IsLoopClonable(Loop *loop, size_t inst_limit)
994 {
995 // TODO(schernykh) : implement case when we have inner loops
996 if (!loop->GetOuterLoop()->IsRoot() || !loop->GetInnerLoops().empty() || !IsLoopSingleBackEdgeExitPoint(loop)) {
997 return false;
998 }
999
1000 auto pre_header = loop->GetPreHeader();
1001 auto ifimm = pre_header->GetLastInst();
1002 ASSERT(ifimm->GetOpcode() == Opcode::IfImm);
1003 auto compare = ifimm->GetInput(0).GetInst();
1004 ASSERT(compare->GetOpcode() == Opcode::Compare);
1005 // TODO(schernykh) : Implement case when compare is not before of ifimm
1006 if (ifimm->GetPrev() != compare) {
1007 return false;
1008 }
1009
1010 // Count instructions for copy
1011 // in pre header copied compare + ifimm inst
1012 uint32_t inst_count = 1;
1013 for (const auto &block : loop->GetBlocks()) {
1014 inst_count += std::distance(block->AllInsts().begin(), block->AllInsts().end());
1015 if (inst_count > inst_limit) {
1016 return false;
1017 }
1018 }
1019 for ([[maybe_unused]] auto phi : GetLoopOutsideSuccessor(loop)->PhiInsts()) {
1020 inst_count++;
1021 }
1022 return (inst_count <= inst_limit);
1023 }
1024 } // namespace panda::compiler
1025