1 /*
2 * Copyright (c) 2021-2025 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "compiler_logger.h"
17 #include "inst.h"
18 #include "optimizer/analysis/dominators_tree.h"
19 #include "optimizer/analysis/loop_analyzer.h"
20 #include "optimizer/analysis/linear_order.h"
21 #include "optimizer/ir/graph.h"
22 #include "optimizer/ir/graph_cloner.h"
23
24 namespace ark::compiler {
GraphCloner(Graph * graph,ArenaAllocator * allocator,ArenaAllocator * localAllocator)25 GraphCloner::GraphCloner(Graph *graph, ArenaAllocator *allocator, ArenaAllocator *localAllocator)
26 : graph_(graph),
27 allocator_(allocator),
28 localAllocator_(localAllocator),
29 cloneBlocks_(allocator->Adapter()),
30 cloneInstructions_(allocator->Adapter()),
31 cloneInstMap_(allocator->Adapter())
32 {
33 }
34
35 /// Clone the whole graph
CloneGraph()36 Graph *GraphCloner::CloneGraph()
37 {
38 auto newGraph = allocator_->New<Graph>(Graph::GraphArgs {allocator_, localAllocator_, GetGraph()->GetArch(),
39 GetGraph()->GetMethod(), GetGraph()->GetRuntime()},
40 GetGraph()->GetParentGraph(), GetGraph()->GetMode());
41 ASSERT(newGraph != nullptr);
42 newGraph->SetCurrentInstructionId(GetGraph()->GetCurrentInstructionId());
43 newGraph->SetAotData(GetGraph()->GetAotData());
44 CloneBlocksAndInstructions<InstCloneType::CLONE_ALL, false>(GetGraph()->GetVectorBlocks(), newGraph);
45 BuildControlFlow();
46 BuildTryCatchLogic(newGraph);
47 BuildDataFlow();
48 CloneConstantsInfo(newGraph);
49 newGraph->GetPassManager()->SetCheckMode(GetGraph()->GetPassManager()->IsCheckMode());
50 // Clone all flags
51 newGraph->SetBitFields(GetGraph()->GetBitFields());
52 newGraph->InitUsedRegs<DataType::INT64>(GetGraph()->GetUsedRegs<DataType::INT64>());
53 newGraph->InitUsedRegs<DataType::FLOAT64>(GetGraph()->GetUsedRegs<DataType::FLOAT64>());
54 #ifdef COMPILER_DEBUG_CHECKS
55 CloneAnalyses(newGraph);
56 #endif
57 return newGraph;
58 }
59
CloneAnalyses(Graph * newGraph)60 void GraphCloner::CloneAnalyses(Graph *newGraph)
61 {
62 // Clone dominators if analysis is valid to check dom-tree
63 ASSERT(!newGraph->IsAnalysisValid<DominatorsTree>());
64 if (GetGraph()->IsAnalysisValid<DominatorsTree>()) {
65 newGraph->GetAnalysis<DominatorsTree>().SetValid(true);
66 for (auto block : GetGraph()->GetBlocksRPO()) {
67 auto clone = GetClone(block);
68 if (block->GetDominator() != nullptr) {
69 auto cloneDom = GetClone(block->GetDominator());
70 clone->SetDominator(cloneDom);
71 }
72 for (auto domBlocks : block->GetDominatedBlocks()) {
73 clone->AddDominatedBlock(GetClone(domBlocks));
74 }
75 }
76 }
77
78 // Clone loops if analysis is valid to check loop-tree
79 ASSERT(!newGraph->IsAnalysisValid<LoopAnalyzer>());
80 if (GetGraph()->IsAnalysisValid<LoopAnalyzer>()) {
81 auto &clonedLa = newGraph->GetAnalysis<LoopAnalyzer>();
82 clonedLa.SetValid(true);
83 clonedLa.CreateRootLoop();
84 CopyLoop(GetGraph()->GetRootLoop(), newGraph->GetRootLoop());
85 newGraph->SetHasIrreducibleLoop(GetGraph()->HasIrreducibleLoop());
86 newGraph->SetHasInfiniteLoop(GetGraph()->HasInfiniteLoop());
87 }
88
89 ASSERT(!newGraph->IsAnalysisValid<LinearOrder>());
90 if (GetGraph()->IsAnalysisValid<LinearOrder>()) {
91 newGraph->GetAnalysis<LinearOrder>().SetValid(true);
92 CloneLinearOrder(newGraph);
93 }
94 }
95
CopyLoop(Loop * loop,Loop * clonedLoop)96 void GraphCloner::CopyLoop(Loop *loop, Loop *clonedLoop)
97 {
98 if (!loop->IsRoot() && !loop->IsIrreducible() && !loop->IsTryCatchLoop()) {
99 ASSERT(GetClone(loop->GetHeader()) == clonedLoop->GetHeader());
100 clonedLoop->SetPreHeader(GetClone(loop->GetPreHeader()));
101 }
102 for (auto block : loop->GetBlocks()) {
103 if (block->IsLoopHeader()) {
104 continue;
105 }
106 clonedLoop->AppendBlock(GetClone(block));
107 }
108
109 for (auto backEdge : loop->GetBackEdges()) {
110 clonedLoop->AppendBackEdge(GetClone(backEdge));
111 }
112 clonedLoop->SetIsIrreducible(loop->IsIrreducible());
113 clonedLoop->SetIsInfinite(loop->IsInfinite());
114
115 // clone inner loops
116 for (const auto &innerLoop : loop->GetInnerLoops()) {
117 auto clonedHeader = GetClone(innerLoop->GetHeader());
118 auto &clonedLa = clonedHeader->GetGraph()->GetAnalysis<LoopAnalyzer>();
119 auto clonedInnerLoop = clonedLa.CreateNewLoop(clonedHeader);
120 ASSERT(clonedInnerLoop != nullptr);
121 clonedInnerLoop->SetOuterLoop(clonedLoop);
122 clonedLoop->AppendInnerLoop(clonedInnerLoop);
123 CopyLoop(innerLoop, clonedInnerLoop);
124 }
125 }
126
CloneLinearOrder(Graph * newGraph)127 void GraphCloner::CloneLinearOrder([[maybe_unused]] Graph *newGraph)
128 {
129 ASSERT(newGraph != nullptr);
130 ASSERT(GetGraph()->IsAnalysisValid<LinearOrder>());
131 auto &cloneLinearBlocks = newGraph->GetAnalysis<LinearOrder>().GetBlocks();
132 cloneLinearBlocks.reserve(GetGraph()->GetBlocksLinearOrder().size());
133 for (auto block : GetGraph()->GetBlocksLinearOrder()) {
134 cloneLinearBlocks.push_back(GetClone(block));
135 }
136 }
137
138 /// Clone the whole graph control-flow
BuildControlFlow()139 void GraphCloner::BuildControlFlow()
140 {
141 for (const auto &block : GetGraph()->GetVectorBlocks()) {
142 if (block == nullptr) {
143 continue;
144 }
145 CloneEdges<CloneEdgeType::EDGE_PRED>(block);
146 CloneEdges<CloneEdgeType::EDGE_SUCC>(block);
147 }
148 }
149
CloneThrowableInstHandlers(Graph * newGraph)150 void GraphCloner::CloneThrowableInstHandlers(Graph *newGraph)
151 {
152 for (auto *block : GetGraph()->GetVectorBlocks()) {
153 if (block == nullptr) {
154 continue;
155 }
156 for (auto *inst : block->AllInsts()) {
157 if (!GetGraph()->IsInstThrowable(inst)) {
158 continue;
159 }
160
161 auto *clonedInst = GetClone(inst);
162 for (auto *catchBlock : GetGraph()->GetThrowableInstHandlers(inst)) {
163 auto *clonedCatchBlock = GetClone(catchBlock);
164 newGraph->AppendThrowableInst(clonedInst, clonedCatchBlock);
165 }
166 }
167 }
168 }
169
BuildTryCatchLogic(Graph * newGraph)170 void GraphCloner::BuildTryCatchLogic(Graph *newGraph)
171 {
172 // Connect every tryInst to corresponding tryEndBlock
173 for (auto *tryBegin : newGraph->GetTryBeginBlocks()) {
174 TryInst *tryInst = GetTryBeginInst(tryBegin);
175 ASSERT(tryInst != nullptr);
176 for (auto block : newGraph->GetBlocksRPO()) {
177 if (block->IsTryEnd() && (block->GetTryId() == tryBegin->GetTryId())) {
178 tryInst->SetTryEndBlock(block);
179 }
180 }
181 }
182
183 CloneThrowableInstHandlers(newGraph);
184
185 auto fixThrowables = [this](const CatchPhiInst *originalInst, CatchPhiInst *clonedInst) {
186 if (originalInst->GetThrowableInsts() != nullptr) {
187 for (size_t i = 0, j = originalInst->GetThrowableInsts()->size(); i < j; ++i) {
188 auto *clonedThrowable = cloneInstMap_[originalInst->GetThrowableInst(i)];
189 clonedInst->AppendThrowableInst(clonedThrowable);
190 }
191 }
192 };
193
194 // Restore catchPhi's and throwables
195 for (BasicBlock *clone : cloneBlocks_) {
196 // Skip nullptrs and non-catch blocks
197 if (clone == nullptr || !clone->IsCatch()) {
198 continue;
199 }
200 Inst *inst = clone->GetFirstInst();
201 while (inst != nullptr) {
202 if (inst->IsCatchPhi() && !inst->CastToCatchPhi()->IsAcc()) {
203 fixThrowables(cloneInstMap_[inst]->CastToCatchPhi(), inst->CastToCatchPhi());
204 }
205 inst = inst->GetNext();
206 }
207 }
208 }
209
210 /// Clone the whole graph data-flow
BuildDataFlow()211 void GraphCloner::BuildDataFlow()
212 {
213 for (const auto &block : GetGraph()->GetVectorBlocks()) {
214 if (block == nullptr) {
215 continue;
216 }
217 auto blockClone = GetClone(block);
218 for (const auto &inst : block->Insts()) {
219 SetCloneInputs<false>(inst);
220 GetClone(inst)->SetId(inst->GetId());
221 UpdateCaller(inst);
222 }
223 for (const auto &inst : block->PhiInsts()) {
224 auto phi = inst->CastToPhi();
225 auto instClone = GetClone(inst);
226 instClone->SetId(inst->GetId());
227 for (const auto &clonePredBlock : blockClone->GetPredsBlocks()) {
228 auto it = std::find(cloneBlocks_.begin(), cloneBlocks_.end(), clonePredBlock);
229 ASSERT(it != cloneBlocks_.end());
230 size_t index = std::distance(cloneBlocks_.begin(), it);
231 ASSERT(GetGraph()->GetVectorBlocks().size() > index);
232 auto predBlock = GetGraph()->GetVectorBlocks()[index];
233 instClone->AppendInput(GetClone(phi->GetPhiInput(predBlock)));
234 }
235 }
236 }
237 }
238
CloneConstantsInfo(Graph * newGraph)239 void GraphCloner::CloneConstantsInfo(Graph *newGraph)
240 {
241 // Restore firstConstInst_ logic
242 BasicBlock *startBB = newGraph->GetStartBlock();
243 Inst *inst = startBB->GetFirstInst();
244 // Find first const if any
245 while (inst != nullptr) {
246 if (inst->IsConst()) {
247 newGraph->SetFirstConstInst(inst->CastToConstant());
248 break;
249 }
250 inst = inst->GetNext();
251 }
252 if (inst == nullptr) {
253 return;
254 }
255 // Put another constants in chain if any
256 ConstantInst *constInst = inst->CastToConstant();
257 ASSERT(constInst->GetBasicBlock() == startBB);
258 while ((inst = inst->GetNext()) != nullptr) {
259 if (inst->IsConst()) {
260 constInst->SetNextConst(inst->CastToConstant());
261 constInst = inst->CastToConstant();
262 }
263 }
264 }
265
PopulateResolverBlock(Loop * loop,BasicBlock * resolver,Inst * inst)266 void PopulateResolverBlock(Loop *loop, BasicBlock *resolver, Inst *inst)
267 {
268 Inst *phiResolver = nullptr;
269 auto userIt = inst->GetUsers().begin();
270 while (userIt != inst->GetUsers().end()) {
271 auto user = userIt->GetInst();
272 auto inputIdx = userIt->GetIndex();
273 ++userIt;
274 ASSERT(user->GetBasicBlock() != nullptr);
275 if (user->GetBasicBlock()->GetLoop() != loop) {
276 if (phiResolver == nullptr) {
277 phiResolver = inst->GetBasicBlock()->GetGraph()->CreateInstPhi(inst->GetType(), inst->GetPc());
278 phiResolver->AppendInput(inst);
279 resolver->AppendPhi(phiResolver);
280 }
281 user->SetInput(inputIdx, phiResolver);
282 }
283 }
284 }
285
286 /*
287 * Create resolver-block - common successor for all loop side-exits
288 */
CreateResolverBlock(Loop * loop,BasicBlock * backEdge)289 BasicBlock *GraphCloner::CreateResolverBlock(Loop *loop, BasicBlock *backEdge)
290 {
291 auto outsideSucc = GetLoopOutsideSuccessor(loop);
292 ASSERT(backEdge != nullptr);
293 auto resolver = backEdge->InsertNewBlockToSuccEdge(outsideSucc);
294 backEdge->GetLoop()->GetOuterLoop()->AppendBlock(resolver);
295 // Populate resolver-block with phis for each instruction which has outside-loop user
296 for (auto block : loop->GetBlocks()) {
297 for (auto inst : block->AllInsts()) {
298 PopulateResolverBlock(loop, resolver, inst);
299 }
300 }
301 return resolver;
302 }
303
GetCompareInst(Inst * ifimm)304 Inst *GraphCloner::GetCompareInst(Inst *ifimm)
305 {
306 auto compare = ifimm->GetInput(0).GetInst();
307 ASSERT(compare->GetOpcode() == Opcode::Compare);
308 // If there are intructions between `Compare` and `IfImm`, clone `Compare` and insert before `IfImm`
309 if (ifimm->GetPrev() != compare || !compare->HasSingleUser()) {
310 auto newCmp = compare->Clone(compare->GetBasicBlock()->GetGraph());
311 newCmp->SetInput(0, compare->GetInput(0).GetInst());
312 newCmp->SetInput(1, compare->GetInput(1).GetInst());
313 ifimm->InsertBefore(newCmp);
314 ifimm->SetInput(0, newCmp);
315 compare = newCmp;
316 }
317 return compare;
318 }
319
320 /*
321 * Split back-edge for cloning without side exits - in order not to clone `Compare` and `IfImm` instructions
322 */
SplitBackEdge(LoopUnrollData * unrollData,Loop * loop,BasicBlock * backEdge)323 BasicBlock *GraphCloner::SplitBackEdge(LoopUnrollData *unrollData, Loop *loop, BasicBlock *backEdge)
324 {
325 auto *ifimm = backEdge->GetLastInst();
326 ASSERT(ifimm->GetOpcode() == Opcode::IfImm);
327 auto *compare = GetCompareInst(ifimm);
328 if (compare->GetPrev() != nullptr) {
329 auto backEdgeSplit = backEdge->SplitBlockAfterInstruction(compare->GetPrev(), true);
330 loop->ReplaceBackEdge(backEdge, backEdgeSplit);
331 backEdge = backEdgeSplit;
332 } else {
333 ASSERT(backEdge->GetPredsBlocks().size() == 1);
334 auto it = std::find(unrollData->blocks->begin(), unrollData->blocks->end(), backEdge);
335 ASSERT(it != unrollData->blocks->end());
336 unrollData->blocks->erase(it);
337 unrollData->exitBlock = backEdge->GetPredBlockByIndex(0);
338 }
339 [[maybe_unused]] static constexpr auto BACK_EDGE_INST_COUNT = 2;
340 ASSERT(std::distance(backEdge->AllInsts().begin(), backEdge->AllInsts().end()) == BACK_EDGE_INST_COUNT);
341 return backEdge;
342 }
343
344 /**
345 * - Split loop-header into two blocks, header phis will not be cloned:
346 * [phi-insts]
347 * -----------
348 * [all-insts]
349 *
350 * - If loop is cloing with side-exits create common successor for them;
351 * - Otherwise split back-edge to cut `Compare` and `IfImm` instructions and not clone them;
352 */
PrepareLoopToUnroll(Loop * loop,bool cloneSideExits)353 GraphCloner::LoopUnrollData *GraphCloner::PrepareLoopToUnroll(Loop *loop, bool cloneSideExits)
354 {
355 ASSERT(loop != nullptr);
356 // Populate `LoopUnrollData`
357 auto allocator = loop->GetHeader()->GetGraph()->GetLocalAllocator();
358 auto unrollData = allocator->New<LoopUnrollData>();
359 ASSERT(unrollData != nullptr);
360 unrollData->blocks = allocator->New<ArenaVector<BasicBlock *>>(allocator->Adapter());
361 unrollData->blocks->resize(loop->GetBlocks().size());
362 std::copy(loop->GetBlocks().begin(), loop->GetBlocks().end(), unrollData->blocks->begin());
363 // Split loop-header
364 ASSERT(loop->GetBackEdges().size() == 1U);
365 auto backEdge = loop->GetBackEdges()[0];
366 ASSERT(backEdge != nullptr);
367 auto headerBlock = loop->GetHeader();
368 ASSERT(!headerBlock->IsEmpty());
369 if (headerBlock->HasPhi()) {
370 auto lastPhi = headerBlock->GetFirstInst()->GetPrev();
371 ASSERT(lastPhi != nullptr && lastPhi->IsPhi());
372 auto headerSplit = headerBlock->SplitBlockAfterInstruction(lastPhi, true);
373 ASSERT(loop->GetBlocks().front() == headerBlock);
374 unrollData->blocks->at(0) = headerSplit;
375
376 if (backEdge == headerBlock) {
377 loop->ReplaceBackEdge(headerBlock, headerSplit);
378 backEdge = headerSplit;
379 }
380 }
381 unrollData->exitBlock = backEdge;
382 if (cloneSideExits) {
383 unrollData->outer = CreateResolverBlock(loop, backEdge);
384 } else {
385 backEdge = SplitBackEdge(unrollData, loop, backEdge);
386 }
387 // Save replaceable phi inputs
388
389 unrollData->phiUpdateInputs = allocator->New<InstVector>(allocator->Adapter());
390 for (auto phi : headerBlock->PhiInsts()) {
391 ASSERT(unrollData->phiUpdateInputs != nullptr);
392 unrollData->phiUpdateInputs->push_back(phi->CastToPhi()->GetPhiInput(backEdge));
393 }
394 unrollData->header = headerBlock;
395 unrollData->backedge = backEdge;
396 return unrollData;
397 }
398
399 /// Update data-flow after unrolling without side-exits
UpdateUsersAfterNoSideExitsUnroll(const LoopUnrollData * unrollData)400 void GraphCloner::UpdateUsersAfterNoSideExitsUnroll(const LoopUnrollData *unrollData)
401 {
402 auto loop = unrollData->header->GetLoop();
403 // Update outloop users: replace inputs located in the original loop by theirs clones
404 auto compare = unrollData->backedge->GetLastInst()->GetPrev();
405 ASSERT(compare->GetOpcode() == Opcode::Compare);
406 for (size_t i = 0; i < compare->GetInputsCount(); i++) {
407 auto input = compare->GetInput(i).GetInst();
408 if (HasClone(input)) {
409 compare->SetInput(i, GetClone(input));
410 }
411 }
412 // update outloop users
413 for (auto block : *unrollData->blocks) {
414 for (auto inst : block->AllInsts()) {
415 UpdateOutloopUsers(loop, inst);
416 }
417 }
418
419 // All header-phi's outloop users are placed in the outer_bb after cloning the original loop
420 // So it's enough to to iterate outer_bb's phis and repalce header-phi by its backedge input
421 auto outerIdx = 1U - unrollData->backedge->GetSuccBlockIndex(unrollData->header);
422 auto outerBb = unrollData->backedge->GetSuccessor(outerIdx);
423 for (auto outerPhi : outerBb->PhiInsts()) {
424 auto headerPhi = outerPhi->CastToPhi()->GetPhiInput(unrollData->backedge);
425 if (headerPhi->IsPhi() && headerPhi->GetBasicBlock() == unrollData->header) {
426 outerPhi->ReplaceInput(headerPhi, headerPhi->CastToPhi()->GetPhiInput(unrollData->backedge));
427 }
428 }
429 }
430
UpdateOutloopUsers(Loop * loop,Inst * inst)431 void GraphCloner::UpdateOutloopUsers(Loop *loop, Inst *inst)
432 {
433 auto userIt = inst->GetUsers().begin();
434 while (userIt != inst->GetUsers().end()) {
435 auto user = userIt->GetInst();
436 auto inputIdx = userIt->GetIndex();
437 ++userIt;
438 ASSERT(user->GetBasicBlock() != nullptr);
439 if (user->GetBasicBlock()->GetLoop() != loop) {
440 user->SetInput(inputIdx, GetClone(inst));
441 }
442 }
443 }
444
445 /**
446 * Link cloned blocks with each other and insert them to the graph between the last-block and the output-block
447 *
448 * - No-side-exits case:
449 * /---->[header]
450 * | |
451 * | v
452 * | [loop-body] << last-block << exit-block
453 * | |
454 * | v
455 * \-----[backedge]----> ...
456 *
457 * New control-flow:
458 * /---->[header]
459 * | |
460 * | v
461 * | [loop-body] << exit-block
462 * | |
463 * | v
464 * | [loop-body-clone] << last-block
465 * | |
466 * | v
467 * \-----[backedge]----> ...
468 *
469 *
470 * Side-exits case:
471 * /---->[header]
472 * | |
473 * | v
474 * | [loop-body]
475 * | |
476 * | v
477 * \-----[backedge] << last-block << exit-block
478 * |
479 * v
480 * [outer]-----> ...
481 *
482 * New control-flow:
483 * /---->[header]
484 * | |
485 * | v
486 * | [loop-body]
487 * | |
488 * | v
489 * | [backedge]------------\ << exit-block
490 * | | |
491 * | v |
492 * | [loop-body-clone] |
493 * | | |
494 * | v |
495 * \-----[backedge-clone]----->| << last-block
496 * |
497 * v
498 * [outer]-----> ...
499 */
BuildLoopUnrollControlFlow(LoopUnrollData * unrollData)500 void GraphCloner::BuildLoopUnrollControlFlow(LoopUnrollData *unrollData)
501 {
502 auto frontBlock = unrollData->blocks->front();
503 auto loop = frontBlock->GetLoop();
504
505 // Copy 'blocks' control-flow to the 'clones'
506 for (auto block : *unrollData->blocks) {
507 ASSERT(block->GetLoop() == loop);
508 loop->AppendBlock(GetClone(block));
509
510 if (block != frontBlock) {
511 CloneEdges<CloneEdgeType::EDGE_PRED>(block);
512 }
513 if (block != unrollData->exitBlock) {
514 CloneEdges<CloneEdgeType::EDGE_SUCC>(block);
515 }
516 }
517
518 auto frontClone = GetClone(frontBlock);
519 auto exitClone = GetClone(unrollData->exitBlock);
520 if (unrollData->outer == nullptr) {
521 ASSERT(unrollData->backedge->GetPredsBlocks().size() == 1);
522 auto lastBlock = unrollData->backedge->GetPredsBlocks()[0];
523 lastBlock->ReplaceSucc(unrollData->backedge, frontClone);
524 unrollData->backedge->ReplacePred(lastBlock, exitClone);
525 } else {
526 ASSERT(!unrollData->outer->GetPredsBlocks().empty());
527 auto lastBlock = unrollData->outer->GetPredsBlocks().back();
528 lastBlock->ReplaceSucc(unrollData->header, frontClone);
529 unrollData->header->ReplacePred(lastBlock, exitClone);
530
531 exitClone->AddSucc(unrollData->outer);
532 if (exitClone->GetSuccBlockIndex(unrollData->outer) != lastBlock->GetSuccBlockIndex(unrollData->outer)) {
533 exitClone->SwapTrueFalseSuccessors();
534 }
535 auto newBackedge = GetClone(unrollData->exitBlock);
536 loop->ReplaceBackEdge(unrollData->backedge, newBackedge);
537 unrollData->backedge = newBackedge;
538 }
539 }
540
541 /**
542 * Construct dataflow for the cloned instructions
543 * if input of the original instruction is front-block phi - insert replaceable input of this phi
544 */
BuildLoopUnrollDataFlow(LoopUnrollData * unrollData)545 void GraphCloner::BuildLoopUnrollDataFlow(LoopUnrollData *unrollData)
546 {
547 for (auto block : *unrollData->blocks) {
548 for (auto inst : block->AllInsts()) {
549 if (inst->IsMarked(cloneMarker_)) {
550 SetCloneInputs<true>(inst, unrollData->backedge);
551 UpdateCaller(inst);
552 }
553 }
554 }
555
556 // Append input to the phi-resolver from outer block, it holds all instructions which have outside loop users
557 auto loop = unrollData->blocks->front()->GetLoop();
558 if (unrollData->outer != nullptr) {
559 for (auto phi : unrollData->outer->PhiInsts()) {
560 auto inst = phi->GetInput(0).GetInst();
561 if (IsInstLoopHeaderPhi(inst, loop)) {
562 auto update = inst->CastToPhi()->GetPhiInput(unrollData->backedge);
563 phi->AppendInput(update);
564 } else {
565 phi->AppendInput(GetClone(inst));
566 }
567 }
568 }
569
570 // NOTE (a.popov) use temp container after if would be possible to reset local allocator
571 if (unrollData->phiReplacedInputs == nullptr) {
572 unrollData->phiReplacedInputs = allocator_->New<PhiInputsMap>(allocator_->Adapter());
573 } else {
574 unrollData->phiReplacedInputs->clear();
575 }
576
577 // Set new update inputs for header phis
578 size_t phiCount = 0;
579 for (auto phi : loop->GetHeader()->PhiInsts()) {
580 auto input = unrollData->phiUpdateInputs->at(phiCount);
581 if (HasClone(input)) {
582 input = GetClone(input);
583 } else if (input->IsPhi() && input->GetBasicBlock()->GetLoop() == loop) {
584 if (phi->IsDominate(input)) {
585 input = input->CastToPhi()->GetPhiInput(unrollData->backedge);
586 } else {
587 // phi should be visited and its input should be added to the map
588 ASSERT(unrollData->phiReplacedInputs->count(input) == 1);
589 ASSERT(unrollData->phiReplacedInputs != nullptr);
590 input = unrollData->phiReplacedInputs->at(input);
591 }
592 }
593
594 auto phiUpdateInputIdx = phi->CastToPhi()->GetPredBlockIndex(unrollData->backedge);
595 unrollData->phiReplacedInputs->emplace(phi, phi->GetInput(phiUpdateInputIdx).GetInst());
596 phi->SetInput(phiUpdateInputIdx, input);
597 phiCount++;
598 }
599 }
600
RemoveLoopBackEdge(const LoopUnrollData * unrollData)601 void GraphCloner::RemoveLoopBackEdge(const LoopUnrollData *unrollData)
602 {
603 auto lastBlock = unrollData->backedge;
604 ASSERT(unrollData->outer == nullptr || lastBlock == unrollData->outer->GetPredsBlocks().back());
605
606 // Erase control-flow instruction
607 auto ifimm = lastBlock->GetLastInst();
608 ASSERT(ifimm->GetOpcode() == Opcode::IfImm);
609 lastBlock->RemoveInst(ifimm);
610 // Remove back-edge
611 auto header = unrollData->header;
612 // Clear header block, it should only contain phi
613 ASSERT(header->GetFirstInst() == nullptr);
614 for (auto phi : header->PhiInstsSafe()) {
615 auto remainingInst = phi->CastToPhi()->GetPhiInput(header->GetLoop()->GetPreHeader());
616 phi->ReplaceUsers(remainingInst);
617 header->RemoveInst(phi);
618 }
619
620 lastBlock->RemoveSucc(header);
621 header->RemovePred(lastBlock);
622
623 // Clear outer phis if it has single predecessor
624 if (unrollData->outer != nullptr && unrollData->outer->GetPredsBlocks().size() == 1U) {
625 for (auto phi : unrollData->outer->PhiInstsSafe()) {
626 auto remainingInst = phi->GetInput(0).GetInst();
627 phi->ReplaceUsers(remainingInst);
628 unrollData->outer->RemoveInst(phi);
629 }
630 }
631 }
632
RemoveLoopPreHeader(const LoopUnrollData * unrollData)633 void GraphCloner::RemoveLoopPreHeader(const LoopUnrollData *unrollData)
634 {
635 ASSERT(unrollData->header->GetPredsBlocks().size() == 1);
636 // Loop::GetPreHeader may be invalid here
637 auto preHeader = unrollData->header->GetPredBlockByIndex(0);
638 auto ifimm = preHeader->GetLastInst();
639 ASSERT(ifimm->GetOpcode() == Opcode::IfImm);
640 preHeader->RemoveInst(ifimm);
641
642 // Loop header should be removed from backedge successors before call to this function
643 ASSERT(unrollData->backedge->GetSuccsBlocks().size() == 1);
644 auto removedSucc = unrollData->backedge->GetSuccessor(0);
645
646 for (auto phi : removedSucc->PhiInstsSafe()) {
647 if (removedSucc->GetPredsBlocks().size() == 2U) {
648 auto remainingInst = phi->CastToPhi()->GetPhiInput(unrollData->backedge);
649 phi->ReplaceUsers(remainingInst);
650 removedSucc->RemoveInst(phi);
651 } else {
652 phi->RemoveInput(phi->CastToPhi()->GetPredBlockIndex(preHeader));
653 }
654 }
655
656 preHeader->RemoveSucc(removedSucc);
657 removedSucc->RemovePred(preHeader);
658 }
659
BuildClonedLoopHeaderDataFlow(const BasicBlock & block,BasicBlock * resolver,BasicBlock * clone)660 void GraphCloner::BuildClonedLoopHeaderDataFlow(const BasicBlock &block, BasicBlock *resolver, BasicBlock *clone)
661 {
662 for (auto inst : block.Insts()) {
663 if (inst->IsMarked(cloneMarker_)) {
664 SetCloneInputs<true>(inst, clone);
665 UpdateUsersForClonedLoopHeader(inst, resolver);
666 UpdateCaller(inst);
667 }
668 }
669 for (auto phi : block.PhiInsts()) {
670 ASSERT(phi->GetInputsCount() == 2U);
671 auto preloopInput = phi->CastToPhi()->GetPhiInput(clone);
672 // Create phi instruction in the `resolver` block with two inputs: current phi and phi's preloop_input
673 auto resolverPhi = GetGraph()->CreateInstPhi(phi->GetType(), phi->GetPc());
674 for (auto userIt = phi->GetUsers().begin(); userIt != phi->GetUsers().end();) {
675 auto user = userIt->GetInst();
676 auto inputIndex = userIt->GetIndex();
677 ++userIt;
678 ASSERT(user->GetBasicBlock() != nullptr);
679 auto userLoop = user->GetBasicBlock()->GetLoop();
680 if (userLoop != block.GetLoop() && !userLoop->IsInside(block.GetLoop())) {
681 user->SetInput(inputIndex, resolverPhi);
682 }
683 }
684 if (resolverPhi->HasUsers()) {
685 resolverPhi->AppendInput(phi);
686 resolverPhi->AppendInput(preloopInput);
687 resolver->AppendPhi(resolverPhi);
688 }
689 }
690 }
691
692 /**
693 * Used by Loop peeling to clone loop-header and insert this clone before loop-header.
694 * Created block-resolved - common succesor for loop-header and its clone
695 *
696 * [replaceable_pred]
697 * |
698 * v
699 * ... --->[block]--------\
700 * | |
701 * v v
702 * ... [outer]
703 *
704 *
705 * [replaceable_pred]
706 * |
707 * v
708 * [clone_block]---------\
709 * | |
710 * v |
711 * ... --->[block]--------\ |
712 * | | |
713 * v v v
714 * ... [resolver]
715 * |
716 * v
717 * [outer]
718 */
CloneLoopHeader(BasicBlock * block,BasicBlock * outer,BasicBlock * replaceablePred)719 BasicBlock *GraphCloner::CloneLoopHeader(BasicBlock *block, BasicBlock *outer, BasicBlock *replaceablePred)
720 {
721 ASSERT(GetGraph()->IsAnalysisValid<DominatorsTree>());
722 ASSERT(cloneMarker_ == UNDEF_MARKER);
723 auto markerHolder = MarkerHolder(GetGraph());
724 cloneMarker_ = markerHolder.GetMarker();
725 cloneInstructions_.clear();
726 size_t instCount = 0;
727 // Build control-flow
728 auto resolver = block->InsertNewBlockToSuccEdge(outer);
729 outer->GetLoop()->AppendBlock(resolver);
730 if (outer->GetLoop()->HasBackEdge(block)) {
731 outer->GetLoop()->ReplaceBackEdge(block, resolver);
732 }
733 auto cloneBlock = replaceablePred->InsertNewBlockToSuccEdge(block);
734 ASSERT(block->GetLoop()->GetPreHeader() == replaceablePred);
735 block->GetLoop()->SetPreHeader(cloneBlock);
736 ASSERT(replaceablePred->GetNextLoop() == nullptr);
737 replaceablePred->GetLoop()->AppendBlock(cloneBlock);
738 cloneBlock->AddSucc(resolver);
739 // Check the order of true-false successors
740 if (cloneBlock->GetSuccBlockIndex(resolver) != block->GetSuccBlockIndex(resolver)) {
741 cloneBlock->SwapTrueFalseSuccessors();
742 }
743 // Fix Dominators info
744 auto &domTree = GetGraph()->GetAnalysis<DominatorsTree>();
745 domTree.SetValid(true);
746 ASSERT(block->GetDominator() == replaceablePred);
747 replaceablePred->RemoveDominatedBlock(block);
748 domTree.SetDomPair(cloneBlock, block);
749 domTree.SetDomPair(replaceablePred, cloneBlock);
750 domTree.SetDomPair(cloneBlock, resolver);
751 if (outer->GetDominator() == block) {
752 block->RemoveDominatedBlock(outer);
753 domTree.SetDomPair(resolver, outer);
754 }
755 CloneInstructions<InstCloneType::CLONE_INSTS, true>(block, cloneBlock, &instCount);
756 BuildClonedLoopHeaderDataFlow(*block, resolver, cloneBlock);
757 cloneBlock->SetAllFields(block->GetAllFields());
758 return cloneBlock;
759 }
760
761 /**
762 * Use the following logic cloning the users:
763 * - replace inputs of all users, placed OUTSIDE cloneable loop, by the new `phi_out` instruction
764 * - `phi_out` is appended to the outer-block
765 * - replace inputs of all users, placed INSIDE cloneable loop, but not cloned, by the new `phi_in` instruction
766 * - `phi_in` is appended to the `inst` basic block
767 * - `phi_in\phi_out` have `inst` and its clone as inputs
768 */
UpdateUsersForClonedLoopHeader(Inst * inst,BasicBlock * outerBlock)769 void GraphCloner::UpdateUsersForClonedLoopHeader(Inst *inst, BasicBlock *outerBlock)
770 {
771 if (!inst->HasUsers()) {
772 return;
773 }
774 auto instBlock = inst->GetBasicBlock();
775 auto clone = GetClone(inst);
776 auto cloneBlock = clone->GetBasicBlock();
777 ASSERT(cloneBlock != nullptr);
778 // phi for inside users
779 auto phiIn = GetGraph()->CreateInstPhi(inst->GetType(), inst->GetPc());
780 // phi for outside users
781 auto phiOut = GetGraph()->CreateInstPhi(inst->GetType(), inst->GetPc());
782 auto userIt = inst->GetUsers().begin();
783 while (userIt != inst->GetUsers().end()) {
784 auto user = userIt->GetInst();
785 auto inputIdx = userIt->GetIndex();
786 ++userIt;
787 ASSERT(user->GetBasicBlock() != nullptr);
788 auto userLoop = user->GetBasicBlock()->GetLoop();
789 if (userLoop == instBlock->GetLoop() || userLoop->IsInside(instBlock->GetLoop())) {
790 // user inside loop
791 // skip users that will be moved to the loop-exit block
792 if (user->GetBasicBlock() == instBlock && !user->IsPhi()) {
793 continue;
794 }
795 user->SetInput(inputIdx, phiIn);
796 } else {
797 // user outside loop
798 user->SetInput(inputIdx, phiOut);
799 }
800 }
801
802 if (phiIn->HasUsers()) {
803 auto cloneIndex {instBlock->GetPredBlockIndex(cloneBlock)};
804 phiIn->AppendInput(clone);
805 phiIn->AppendInput(inst);
806 phiIn->CastToPhi()->SetPhiInputBbNum(0, cloneIndex);
807 phiIn->CastToPhi()->SetPhiInputBbNum(1, 1 - cloneIndex);
808
809 auto firstPhi = instBlock->GetFirstPhi();
810 if (firstPhi == nullptr) {
811 instBlock->AppendPhi(phiIn);
812 } else {
813 instBlock->InsertBefore(phiIn, firstPhi);
814 }
815 }
816
817 if (phiOut->HasUsers()) {
818 phiOut->AppendInput(inst);
819 phiOut->AppendInput(clone);
820 outerBlock->AppendPhi(phiOut);
821 }
822 }
823
IsInstLoopHeaderPhi(Inst * inst,Loop * loop)824 bool GraphCloner::IsInstLoopHeaderPhi(Inst *inst, Loop *loop)
825 {
826 return inst->IsPhi() && inst->GetBasicBlock() == loop->GetHeader();
827 }
828
829 /**
830 * Create clone of loop and insert it after original loop:
831 *
832 * /----[pre-loop]
833 * | |
834 * | v
835 * | [loop-body]<----\
836 * | | | |
837 * | | \-------/
838 * | |
839 * | v
840 * \--->[outside-block]
841 * |
842 * v
843 * /----[pre-loop']
844 * | |
845 * | v
846 * | [loop-body']<----\
847 * | | | |
848 * | | \-------/
849 * | |
850 * | v
851 * \--->[outside-block']
852 */
CloneLoop(Loop * loop)853 Loop *GraphCloner::CloneLoop(Loop *loop)
854 {
855 ASSERT(loop != nullptr && !loop->IsRoot());
856 ASSERT_PRINT(IsLoopSingleBackEdgeExitPoint(loop), "Cloning blocks doesn't have single entry/exit point");
857 ASSERT(loop->GetPreHeader() != nullptr);
858 ASSERT(cloneMarker_ == UNDEF_MARKER);
859
860 auto markerHolder = MarkerHolder(GetGraph());
861 cloneMarker_ = markerHolder.GetMarker();
862 SplitPreHeader(loop);
863 auto unrollData = PrepareLoopToClone(loop);
864
865 ASSERT(unrollData != nullptr);
866 CloneBlocksAndInstructions<InstCloneType::CLONE_ALL, true>(*unrollData->blocks, GetGraph());
867 BuildLoopCloneControlFlow(unrollData);
868 BuildLoopCloneDataFlow(unrollData);
869 MakeLoopCloneInfo(unrollData);
870 GetGraph()->RunPass<DominatorsTree>();
871
872 auto cloneLoop = GetClone(loop->GetHeader())->GetLoop();
873 ASSERT(cloneLoop != loop && cloneLoop->GetOuterLoop() == loop->GetOuterLoop());
874 COMPILER_LOG(DEBUG, GRAPH_CLONER) << "Loop " << loop->GetId() << " is copied";
875 COMPILER_LOG(DEBUG, GRAPH_CLONER) << "Created new loop, id = " << cloneLoop->GetId();
876 return cloneLoop;
877 }
878
CreateNewOutsideSucc(BasicBlock * outsideSucc,BasicBlock * backEdge,BasicBlock * preHeader)879 BasicBlock *GraphCloner::CreateNewOutsideSucc(BasicBlock *outsideSucc, BasicBlock *backEdge, BasicBlock *preHeader)
880 {
881 auto backEdgeIdx = outsideSucc->GetPredBlockIndex(backEdge);
882 auto preHeaderIdx = outsideSucc->GetPredBlockIndex(preHeader);
883 auto rmIdxMax = std::max(backEdgeIdx, preHeaderIdx);
884 auto rmIdxMin = std::min(backEdgeIdx, preHeaderIdx);
885 auto newOutsideSucc = GetGraph()->CreateEmptyBlock();
886 outsideSucc->GetLoop()->AppendBlock(newOutsideSucc);
887 backEdge->ReplaceSucc(outsideSucc, newOutsideSucc);
888 preHeader->ReplaceSucc(outsideSucc, newOutsideSucc);
889 newOutsideSucc->AddSucc(outsideSucc);
890 for (auto phi : outsideSucc->PhiInsts()) {
891 auto newPhi = GetGraph()->CreateInstPhi(phi->GetType(), phi->GetPc());
892 newPhi->AppendInput(phi->CastToPhi()->GetPhiInput(backEdge));
893 newPhi->AppendInput(phi->CastToPhi()->GetPhiInput(preHeader));
894 phi->AppendInput(newPhi);
895 auto phiBackEdgeIdx {phi->CastToPhi()->GetPredBlockIndex(backEdge)};
896 auto phiPreHeaderIdx {phi->CastToPhi()->GetPredBlockIndex(preHeader)};
897 phi->RemoveInput(rmIdxMax == backEdgeIdx ? phiBackEdgeIdx : phiPreHeaderIdx);
898 phi->RemoveInput(rmIdxMin == preHeaderIdx ? phiPreHeaderIdx : phiBackEdgeIdx);
899 newOutsideSucc->AppendPhi(newPhi);
900 }
901 outsideSucc->RemovePred(rmIdxMax);
902 outsideSucc->RemovePred(rmIdxMin);
903
904 COMPILER_LOG(DEBUG, GRAPH_CLONER) << "New loop outside block created: " << newOutsideSucc->GetId();
905 return newOutsideSucc;
906 }
907
PopulateLoopClonerData(Loop * loop,BasicBlock * preHeader,BasicBlock * outsideSucc)908 GraphCloner::LoopClonerData *GraphCloner::PopulateLoopClonerData(Loop *loop, BasicBlock *preHeader,
909 BasicBlock *outsideSucc)
910 {
911 auto allocator = GetGraph()->GetLocalAllocator();
912 auto clonerData = allocator->New<LoopClonerData>();
913 ASSERT(clonerData != nullptr);
914 clonerData->blocks = allocator->New<ArenaVector<BasicBlock *>>(allocator->Adapter());
915 ASSERT(clonerData->blocks != nullptr);
916 clonerData->blocks->resize(loop->GetBlocks().size() + 1);
917 clonerData->blocks->at(0) = preHeader;
918 std::copy(loop->GetBlocks().begin(), loop->GetBlocks().end(), clonerData->blocks->begin() + 1);
919 clonerData->blocks->push_back(outsideSucc);
920 clonerData->outer = outsideSucc;
921 clonerData->header = loop->GetHeader();
922 clonerData->preHeader = loop->GetPreHeader();
923 return clonerData;
924 }
925
926 // Split pre-header to contain `Compare` and `IfImm` instructions only;
SplitPreHeader(Loop * loop)927 void GraphCloner::SplitPreHeader(Loop *loop)
928 {
929 ASSERT(loop != nullptr);
930 auto preHeader = loop->GetPreHeader();
931 auto *ifimm = preHeader->GetLastInst();
932 ASSERT(ifimm != nullptr && ifimm->GetOpcode() == Opcode::IfImm);
933 auto *compare = GetCompareInst(ifimm);
934 if (compare->GetPrev() != nullptr) {
935 auto newPreHeader = preHeader->SplitBlockAfterInstruction(compare->GetPrev(), true);
936 loop->SetPreHeader(newPreHeader);
937 // NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores)
938 preHeader = newPreHeader;
939 }
940 [[maybe_unused]] static constexpr auto PRE_HEADER_INST_COUNT = 2;
941 ASSERT(std::distance(preHeader->AllInsts().begin(), preHeader->AllInsts().end()) == PRE_HEADER_INST_COUNT);
942 }
943
944 /**
945 * - Make sure `outsideSucc` has 2 predecessors only: loop header and back-edge;
946 * - Split `outsideSucc` to contain phi-instructions only;
947 */
PrepareLoopToClone(Loop * loop)948 GraphCloner::LoopClonerData *GraphCloner::PrepareLoopToClone(Loop *loop)
949 {
950 // If `outsideSucc` has more than 2 predecessors, create a new one
951 // with loop header and back-edge predecessors only and insert it before `outsideSucc`
952 auto preHeader = loop->GetPreHeader();
953 auto outsideSucc = GetLoopOutsideSuccessor(loop);
954 constexpr auto PREDS_NUM = 2;
955 if (outsideSucc->GetPredsBlocks().size() > PREDS_NUM) {
956 auto backEdge = loop->GetBackEdges()[0];
957 outsideSucc = CreateNewOutsideSucc(outsideSucc, backEdge, preHeader);
958 }
959 // Split outside succ after last phi
960 // create empty block before outside succ if outside succ don't contain phi insts
961 ASSERT(outsideSucc != nullptr);
962 if (outsideSucc->HasPhi() && outsideSucc->GetFirstInst() != nullptr) {
963 auto lastPhi = outsideSucc->GetFirstInst()->GetPrev();
964 auto block = outsideSucc->SplitBlockAfterInstruction(lastPhi, true);
965 // if `outsideSucc` is pre-header replace it by `block`
966 for (auto inLoop : loop->GetOuterLoop()->GetInnerLoops()) {
967 if (inLoop->GetPreHeader() == outsideSucc) {
968 inLoop->SetPreHeader(block);
969 }
970 }
971 } else if (outsideSucc->GetFirstInst() != nullptr) {
972 auto block = outsideSucc->InsertEmptyBlockBefore();
973 outsideSucc->GetLoop()->AppendBlock(block);
974 outsideSucc = block;
975 }
976 // Populate `LoopClonerData`
977 return PopulateLoopClonerData(loop, preHeader, outsideSucc);
978 }
979
980 /// Create new loop, populate it with cloned blocks and build conrlow-flow
BuildLoopCloneControlFlow(LoopClonerData * unrollData)981 void GraphCloner::BuildLoopCloneControlFlow(LoopClonerData *unrollData)
982 {
983 ASSERT(unrollData != nullptr);
984 auto outerClone = GetClone(unrollData->outer);
985 auto preHeaderClone = GetClone(unrollData->preHeader);
986
987 while (!unrollData->outer->GetSuccsBlocks().empty()) {
988 auto succ = unrollData->outer->GetSuccsBlocks().front();
989 succ->ReplacePred(unrollData->outer, outerClone);
990 unrollData->outer->RemoveSucc(succ);
991 }
992 unrollData->outer->AddSucc(preHeaderClone);
993
994 for (auto &block : *unrollData->blocks) {
995 if (block != unrollData->preHeader) {
996 CloneEdges<CloneEdgeType::EDGE_PRED>(block);
997 }
998 if (block != unrollData->outer) {
999 CloneEdges<CloneEdgeType::EDGE_SUCC>(block);
1000 }
1001 }
1002 ASSERT(unrollData->outer->GetPredBlockIndex(unrollData->preHeader) ==
1003 outerClone->GetPredBlockIndex(preHeaderClone));
1004 ASSERT(unrollData->header->GetPredBlockIndex(unrollData->preHeader) ==
1005 GetClone(unrollData->header)->GetPredBlockIndex(preHeaderClone));
1006 }
1007
1008 /// Insert cloned loop into loop-tree and populated with cloned blocks
MakeLoopCloneInfo(LoopClonerData * unrollData)1009 void GraphCloner::MakeLoopCloneInfo(LoopClonerData *unrollData)
1010 {
1011 ASSERT(unrollData != nullptr);
1012 // Update loop tree
1013 auto loop = unrollData->header->GetLoop();
1014 auto headerClone = GetClone(loop->GetHeader());
1015 auto cloneLoop = GetGraph()->GetAnalysis<LoopAnalyzer>().CreateNewLoop(headerClone);
1016 auto outerLoop = loop->GetOuterLoop();
1017 outerLoop->AppendInnerLoop(cloneLoop);
1018 ASSERT(cloneLoop != nullptr);
1019 cloneLoop->SetOuterLoop(outerLoop);
1020
1021 // Populate cloned loop
1022 auto preLoopClone = GetClone(unrollData->preHeader);
1023 auto outsideSuccClone = GetClone(unrollData->outer);
1024 cloneLoop->SetPreHeader(preLoopClone);
1025 outerLoop->AppendBlock(preLoopClone);
1026 outerLoop->AppendBlock(outsideSuccClone);
1027 for (auto &block : loop->GetBlocks()) {
1028 if (!block->IsLoopHeader()) {
1029 cloneLoop->AppendBlock(GetClone(block));
1030 }
1031 }
1032 for (auto backEdge : loop->GetBackEdges()) {
1033 cloneLoop->AppendBackEdge(GetClone(backEdge));
1034 }
1035 }
1036
1037 /// Find or create phi in the outside_succ block with the same inputs as `check_phi`
GetPhiResolver(Inst * checkPhi,BasicBlock * outsideSucc,BasicBlock * preHeader)1038 Inst *GetPhiResolver(Inst *checkPhi, BasicBlock *outsideSucc, BasicBlock *preHeader)
1039 {
1040 [[maybe_unused]] constexpr auto MAX_PREDS_NUM = 2;
1041 ASSERT(outsideSucc->GetPredsBlocks().size() == MAX_PREDS_NUM);
1042 ASSERT(checkPhi->GetBasicBlock()->IsLoopHeader());
1043 auto initIdx = checkPhi->CastToPhi()->GetPredBlockIndex(preHeader);
1044 auto initInput = checkPhi->GetInput(initIdx).GetInst();
1045 auto updateInput = checkPhi->GetInput(1 - initIdx).GetInst();
1046
1047 for (auto phi : outsideSucc->PhiInsts()) {
1048 auto idx {phi->CastToPhi()->GetPredBlockIndex(preHeader)};
1049 if (phi->GetInput(idx).GetInst() == initInput && phi->GetInput(1 - idx).GetInst() == updateInput) {
1050 return phi;
1051 }
1052 }
1053
1054 auto phiResolver = outsideSucc->GetGraph()->CreateInstPhi(checkPhi->GetType(), checkPhi->GetPc());
1055 auto outInitIdx = outsideSucc->GetPredBlockIndex(preHeader);
1056 phiResolver->AppendInput(initInput);
1057 phiResolver->AppendInput(updateInput);
1058 phiResolver->SetPhiInputBbNum(0, outInitIdx);
1059 phiResolver->SetPhiInputBbNum(1, 1 - outInitIdx);
1060
1061 outsideSucc->AppendPhi(phiResolver);
1062 return phiResolver;
1063 }
1064
ProcessMarkedInsts(LoopClonerData * data)1065 void GraphCloner::ProcessMarkedInsts(LoopClonerData *data)
1066 {
1067 for (const auto &block : *data->blocks) {
1068 for (const auto &inst : block->AllInsts()) {
1069 if (inst->GetOpcode() == Opcode::NOP) {
1070 continue;
1071 }
1072 if (inst->IsMarked(cloneMarker_)) {
1073 SetCloneInputs<false>(inst);
1074 UpdateCaller(inst);
1075 }
1076 }
1077 }
1078 }
1079
1080 /// Build data-flow for cloned instructions
BuildLoopCloneDataFlow(LoopClonerData * unrollData)1081 void GraphCloner::BuildLoopCloneDataFlow(LoopClonerData *unrollData)
1082 {
1083 ASSERT(unrollData != nullptr);
1084 ProcessMarkedInsts(unrollData);
1085
1086 auto preLoopClone = GetClone(unrollData->preHeader);
1087 for (auto phi : unrollData->outer->PhiInsts()) {
1088 auto phiClone = GetClone(phi);
1089 phi->ReplaceUsers(phiClone);
1090 auto idx = phiClone->CastToPhi()->GetPredBlockIndex(preLoopClone);
1091 phiClone->SetInput(idx, phi);
1092 }
1093
1094 auto compare = preLoopClone->GetFirstInst();
1095 auto exitBlock = unrollData->outer->GetPredBlockByIndex(0);
1096 if (exitBlock == unrollData->preHeader) {
1097 exitBlock = unrollData->outer->GetPredBlockByIndex(1);
1098 } else {
1099 ASSERT(unrollData->outer->GetPredBlockByIndex(1) == unrollData->preHeader);
1100 }
1101 auto backEdgeCompare = exitBlock->GetLastInst()->GetInput(0).GetInst();
1102 ASSERT(compare->GetOpcode() == Opcode::Compare);
1103 ASSERT(backEdgeCompare->GetOpcode() == Opcode::Compare);
1104 for (auto phi : unrollData->header->PhiInsts()) {
1105 auto initIdx = phi->CastToPhi()->GetPredBlockIndex(unrollData->preHeader);
1106 ASSERT(GetClone(phi)->CastToPhi()->GetPredBlockIndex(preLoopClone) == initIdx);
1107
1108 auto init = phi->GetInput(initIdx).GetInst();
1109 auto update = phi->GetInput(1 - initIdx).GetInst();
1110 auto resolverPhi = GetPhiResolver(phi, unrollData->outer, unrollData->preHeader);
1111 auto clonePhi = GetClone(phi);
1112 ASSERT(clonePhi->GetInput(initIdx).GetInst() == init);
1113 clonePhi->SetInput(initIdx, resolverPhi);
1114 for (size_t i = 0; i < compare->GetInputsCount(); i++) {
1115 if (compare->GetInput(i).GetInst() == init && backEdgeCompare->GetInput(i).GetInst() == update) {
1116 compare->SetInput(i, resolverPhi);
1117 break;
1118 }
1119 }
1120 }
1121 }
1122
UpdateCaller(Inst * inst)1123 void GraphCloner::UpdateCaller(Inst *inst)
1124 {
1125 if (inst->IsSaveState()) {
1126 auto caller = static_cast<SaveStateInst *>(inst)->GetCallerInst();
1127 if (caller != nullptr && caller->IsInlined() && HasClone(caller)) {
1128 auto ssClone = GetClone(inst);
1129 auto callerClone = GetClone(caller);
1130 static_cast<SaveStateInst *>(ssClone)->SetCallerInst(static_cast<CallInst *>(callerClone));
1131 }
1132 }
1133 }
1134 } // namespace ark::compiler
1135