1 //===- subzero/src/IceCfgNode.cpp - Basic block (node) implementation -----===//
2 //
3 // The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Implements the CfgNode class, including the complexities of
12 /// instruction insertion and in-edge calculation.
13 ///
14 //===----------------------------------------------------------------------===//
15
16 #include "IceCfgNode.h"
17
18 #include "IceAssembler.h"
19 #include "IceCfg.h"
20 #include "IceGlobalInits.h"
21 #include "IceInst.h"
22 #include "IceInstVarIter.h"
23 #include "IceLiveness.h"
24 #include "IceOperand.h"
25 #include "IceTargetLowering.h"
26
27 namespace Ice {
28
29 // Adds an instruction to either the Phi list or the regular instruction list.
30 // Validates that all Phis are added before all regular instructions.
appendInst(Inst * Instr)31 void CfgNode::appendInst(Inst *Instr) {
32 ++InstCountEstimate;
33
34 if (BuildDefs::wasm()) {
35 if (llvm::isa<InstSwitch>(Instr) || llvm::isa<InstBr>(Instr)) {
36 for (auto *N : Instr->getTerminatorEdges()) {
37 N->addInEdge(this);
38 addOutEdge(N);
39 }
40 }
41 }
42
43 if (auto *Phi = llvm::dyn_cast<InstPhi>(Instr)) {
44 if (!Insts.empty()) {
45 Func->setError("Phi instruction added to the middle of a block");
46 return;
47 }
48 Phis.push_back(Phi);
49 } else {
50 Insts.push_back(Instr);
51 }
52 }
53
replaceInEdge(CfgNode * Old,CfgNode * New)54 void CfgNode::replaceInEdge(CfgNode *Old, CfgNode *New) {
55 for (SizeT i = 0; i < InEdges.size(); ++i) {
56 if (InEdges[i] == Old) {
57 InEdges[i] = New;
58 }
59 }
60 for (auto &Inst : getPhis()) {
61 auto &Phi = llvm::cast<InstPhi>(Inst);
62 for (SizeT i = 0; i < Phi.getSrcSize(); ++i) {
63 if (Phi.getLabel(i) == Old) {
64 Phi.setLabel(i, New);
65 }
66 }
67 }
68 }
69
70 namespace {
removeDeletedAndRenumber(List * L,Cfg * Func)71 template <typename List> void removeDeletedAndRenumber(List *L, Cfg *Func) {
72 const bool DoDelete =
73 BuildDefs::minimal() || !getFlags().getKeepDeletedInsts();
74 auto I = L->begin(), E = L->end(), Next = I;
75 for (++Next; I != E; I = Next++) {
76 if (DoDelete && I->isDeleted()) {
77 L->remove(I);
78 } else {
79 I->renumber(Func);
80 }
81 }
82 }
83 } // end of anonymous namespace
84
renumberInstructions()85 void CfgNode::renumberInstructions() {
86 InstNumberT FirstNumber = Func->getNextInstNumber();
87 removeDeletedAndRenumber(&Phis, Func);
88 removeDeletedAndRenumber(&Insts, Func);
89 InstCountEstimate = Func->getNextInstNumber() - FirstNumber;
90 }
91
92 // When a node is created, the OutEdges are immediately known, but the InEdges
93 // have to be built up incrementally. After the CFG has been constructed, the
94 // computePredecessors() pass finalizes it by creating the InEdges list.
computePredecessors()95 void CfgNode::computePredecessors() {
96 for (CfgNode *Succ : OutEdges)
97 Succ->InEdges.push_back(this);
98 }
99
computeSuccessors()100 void CfgNode::computeSuccessors() {
101 OutEdges.clear();
102 InEdges.clear();
103 assert(!Insts.empty());
104 OutEdges = Insts.rbegin()->getTerminatorEdges();
105 }
106
107 // Ensure each Phi instruction in the node is consistent with respect to control
108 // flow. For each predecessor, there must be a phi argument with that label.
109 // If a phi argument's label doesn't appear in the predecessor list (which can
110 // happen as a result of e.g. unreachable node elimination), its value is
111 // modified to be zero, to maintain consistency in liveness analysis. This
112 // allows us to remove some dead control flow without a major rework of the phi
113 // instructions. We don't check that phi arguments with the same label have the
114 // same value.
enforcePhiConsistency()115 void CfgNode::enforcePhiConsistency() {
116 for (Inst &Instr : Phis) {
117 auto *Phi = llvm::cast<InstPhi>(&Instr);
118 // We do a simple O(N^2) algorithm to check for consistency. Even so, it
119 // shows up as only about 0.2% of the total translation time. But if
120 // necessary, we could improve the complexity by using a hash table to
121 // count how many times each node is referenced in the Phi instruction, and
122 // how many times each node is referenced in the incoming edge list, and
123 // compare the two for equality.
124 for (SizeT i = 0; i < Phi->getSrcSize(); ++i) {
125 CfgNode *Label = Phi->getLabel(i);
126 bool Found = false;
127 for (CfgNode *InNode : getInEdges()) {
128 if (InNode == Label) {
129 Found = true;
130 break;
131 }
132 }
133 if (!Found) {
134 // Predecessor was unreachable, so if (impossibly) the control flow
135 // enters from that predecessor, the value should be zero.
136 Phi->clearOperandForTarget(Label);
137 }
138 }
139 for (CfgNode *InNode : getInEdges()) {
140 bool Found = false;
141 for (SizeT i = 0; i < Phi->getSrcSize(); ++i) {
142 CfgNode *Label = Phi->getLabel(i);
143 if (InNode == Label) {
144 Found = true;
145 break;
146 }
147 }
148 if (!Found)
149 llvm::report_fatal_error("Phi error: missing label for incoming edge");
150 }
151 }
152 }
153
154 // This does part 1 of Phi lowering, by creating a new dest variable for each
155 // Phi instruction, replacing the Phi instruction's dest with that variable,
156 // and adding an explicit assignment of the old dest to the new dest. For
157 // example,
158 // a=phi(...)
159 // changes to
160 // "a_phi=phi(...); a=a_phi".
161 //
162 // This is in preparation for part 2 which deletes the Phi instructions and
163 // appends assignment instructions to predecessor blocks. Note that this
164 // transformation preserves SSA form.
placePhiLoads()165 void CfgNode::placePhiLoads() {
166 for (Inst &I : Phis) {
167 auto *Phi = llvm::dyn_cast<InstPhi>(&I);
168 Insts.insert(Insts.begin(), Phi->lower(Func));
169 }
170 }
171
172 // This does part 2 of Phi lowering. For each Phi instruction at each out-edge,
173 // create a corresponding assignment instruction, and add all the assignments
174 // near the end of this block. They need to be added before any branch
175 // instruction, and also if the block ends with a compare instruction followed
176 // by a branch instruction that we may want to fuse, it's better to insert the
177 // new assignments before the compare instruction. The
178 // tryOptimizedCmpxchgCmpBr() method assumes this ordering of instructions.
179 //
180 // Note that this transformation takes the Phi dest variables out of SSA form,
181 // as there may be assignments to the dest variable in multiple blocks.
placePhiStores()182 void CfgNode::placePhiStores() {
183 // Find the insertion point.
184 InstList::iterator InsertionPoint = Insts.end();
185 // Every block must end in a terminator instruction, and therefore must have
186 // at least one instruction, so it's valid to decrement InsertionPoint (but
187 // assert just in case).
188 assert(InsertionPoint != Insts.begin());
189 --InsertionPoint;
190 // Confirm that InsertionPoint is a terminator instruction. Calling
191 // getTerminatorEdges() on a non-terminator instruction will cause an
192 // llvm_unreachable().
193 (void)InsertionPoint->getTerminatorEdges();
194 // SafeInsertionPoint is always immediately before the terminator
195 // instruction. If the block ends in a compare and conditional branch, it's
196 // better to place the Phi store before the compare so as not to interfere
197 // with compare/branch fusing. However, if the compare instruction's dest
198 // operand is the same as the new assignment statement's source operand, this
199 // can't be done due to data dependences, so we need to fall back to the
200 // SafeInsertionPoint. To illustrate:
201 // ; <label>:95
202 // %97 = load i8* %96, align 1
203 // %98 = icmp ne i8 %97, 0
204 // br i1 %98, label %99, label %2132
205 // ; <label>:99
206 // %100 = phi i8 [ %97, %95 ], [ %110, %108 ]
207 // %101 = phi i1 [ %98, %95 ], [ %111, %108 ]
208 // would be Phi-lowered as:
209 // ; <label>:95
210 // %97 = load i8* %96, align 1
211 // %100_phi = %97 ; can be at InsertionPoint
212 // %98 = icmp ne i8 %97, 0
213 // %101_phi = %98 ; must be at SafeInsertionPoint
214 // br i1 %98, label %99, label %2132
215 // ; <label>:99
216 // %100 = %100_phi
217 // %101 = %101_phi
218 //
219 // TODO(stichnot): It may be possible to bypass this whole SafeInsertionPoint
220 // mechanism. If a source basic block ends in a conditional branch:
221 // labelSource:
222 // ...
223 // br i1 %foo, label %labelTrue, label %labelFalse
224 // and a branch target has a Phi involving the branch operand:
225 // labelTrue:
226 // %bar = phi i1 [ %foo, %labelSource ], ...
227 // then we actually know the constant i1 value of the Phi operand:
228 // labelTrue:
229 // %bar = phi i1 [ true, %labelSource ], ...
230 // It seems that this optimization should be done by clang or opt, but we
231 // could also do it here.
232 InstList::iterator SafeInsertionPoint = InsertionPoint;
233 // Keep track of the dest variable of a compare instruction, so that we
234 // insert the new instruction at the SafeInsertionPoint if the compare's dest
235 // matches the Phi-lowered assignment's source.
236 Variable *CmpInstDest = nullptr;
237 // If the current insertion point is at a conditional branch instruction, and
238 // the previous instruction is a compare instruction, then we move the
239 // insertion point before the compare instruction so as not to interfere with
240 // compare/branch fusing.
241 if (auto *Branch = llvm::dyn_cast<InstBr>(InsertionPoint)) {
242 if (!Branch->isUnconditional()) {
243 if (InsertionPoint != Insts.begin()) {
244 --InsertionPoint;
245 if (llvm::isa<InstIcmp>(InsertionPoint) ||
246 llvm::isa<InstFcmp>(InsertionPoint)) {
247 CmpInstDest = InsertionPoint->getDest();
248 } else {
249 ++InsertionPoint;
250 }
251 }
252 }
253 }
254
255 // Consider every out-edge.
256 for (CfgNode *Succ : OutEdges) {
257 // Consider every Phi instruction at the out-edge.
258 for (Inst &I : Succ->Phis) {
259 auto *Phi = llvm::dyn_cast<InstPhi>(&I);
260 Operand *Operand = Phi->getOperandForTarget(this);
261 assert(Operand);
262 Variable *Dest = I.getDest();
263 assert(Dest);
264 auto *NewInst = InstAssign::create(Func, Dest, Operand);
265 if (CmpInstDest == Operand)
266 Insts.insert(SafeInsertionPoint, NewInst);
267 else
268 Insts.insert(InsertionPoint, NewInst);
269 }
270 }
271 }
272
273 // Deletes the phi instructions after the loads and stores are placed.
deletePhis()274 void CfgNode::deletePhis() {
275 for (Inst &I : Phis)
276 I.setDeleted();
277 }
278
279 // Splits the edge from Pred to this node by creating a new node and hooking up
280 // the in and out edges appropriately. (The EdgeIndex parameter is only used to
281 // make the new node's name unique when there are multiple edges between the
282 // same pair of nodes.) The new node's instruction list is initialized to the
283 // empty list, with no terminator instruction. There must not be multiple edges
284 // from Pred to this node so all Inst::getTerminatorEdges implementations must
285 // not contain duplicates.
splitIncomingEdge(CfgNode * Pred,SizeT EdgeIndex)286 CfgNode *CfgNode::splitIncomingEdge(CfgNode *Pred, SizeT EdgeIndex) {
287 CfgNode *NewNode = Func->makeNode();
288 // Depth is the minimum as it works if both are the same, but if one is
289 // outside the loop and the other is inside, the new node should be placed
290 // outside and not be executed multiple times within the loop.
291 NewNode->setLoopNestDepth(
292 std::min(getLoopNestDepth(), Pred->getLoopNestDepth()));
293 if (BuildDefs::dump())
294 NewNode->setName("split_" + Pred->getName() + "_" + getName() + "_" +
295 std::to_string(EdgeIndex));
296 // The new node is added to the end of the node list, and will later need to
297 // be sorted into a reasonable topological order.
298 NewNode->setNeedsPlacement(true);
299 // Repoint Pred's out-edge.
300 bool Found = false;
301 for (CfgNode *&I : Pred->OutEdges) {
302 if (I == this) {
303 I = NewNode;
304 NewNode->InEdges.push_back(Pred);
305 Found = true;
306 break;
307 }
308 }
309 assert(Found);
310 (void)Found;
311 // Repoint this node's in-edge.
312 Found = false;
313 for (CfgNode *&I : InEdges) {
314 if (I == Pred) {
315 I = NewNode;
316 NewNode->OutEdges.push_back(this);
317 Found = true;
318 break;
319 }
320 }
321 assert(Found);
322 (void)Found;
323 // Repoint all suitable branch instructions' target and return.
324 Found = false;
325 for (Inst &I : Pred->getInsts())
326 if (!I.isDeleted() && I.repointEdges(this, NewNode))
327 Found = true;
328 assert(Found);
329 (void)Found;
330 return NewNode;
331 }
332
333 namespace {
334
335 // Helpers for advancedPhiLowering().
336
337 class PhiDesc {
338 PhiDesc() = delete;
339 PhiDesc(const PhiDesc &) = delete;
340 PhiDesc &operator=(const PhiDesc &) = delete;
341
342 public:
PhiDesc(InstPhi * Phi,Variable * Dest)343 PhiDesc(InstPhi *Phi, Variable *Dest) : Phi(Phi), Dest(Dest) {}
344 PhiDesc(PhiDesc &&) = default;
345 InstPhi *Phi = nullptr;
346 Variable *Dest = nullptr;
347 Operand *Src = nullptr;
348 bool Processed = false;
349 size_t NumPred = 0; // number of entries whose Src is this Dest
350 int32_t Weight = 0; // preference for topological order
351 };
352 using PhiDescList = llvm::SmallVector<PhiDesc, 32>;
353
354 // Always pick NumPred=0 over NumPred>0.
355 constexpr int32_t WeightNoPreds = 8;
356 // Prefer Src as a register because the register might free up.
357 constexpr int32_t WeightSrcIsReg = 4;
358 // Prefer Dest not as a register because the register stays free longer.
359 constexpr int32_t WeightDestNotReg = 2;
360 // Prefer NumPred=1 over NumPred>1. This is used as a tiebreaker when a
361 // dependency cycle must be broken so that hopefully only one temporary
362 // assignment has to be added to break the cycle.
363 constexpr int32_t WeightOnePred = 1;
364
sameVarOrReg(TargetLowering * Target,const Variable * Var1,const Operand * Opnd)365 bool sameVarOrReg(TargetLowering *Target, const Variable *Var1,
366 const Operand *Opnd) {
367 if (Var1 == Opnd)
368 return true;
369 const auto *Var2 = llvm::dyn_cast<Variable>(Opnd);
370 if (Var2 == nullptr)
371 return false;
372
373 // If either operand lacks a register, they cannot be the same.
374 if (!Var1->hasReg())
375 return false;
376 if (!Var2->hasReg())
377 return false;
378
379 const auto RegNum1 = Var1->getRegNum();
380 const auto RegNum2 = Var2->getRegNum();
381 // Quick common-case check.
382 if (RegNum1 == RegNum2)
383 return true;
384
385 assert(Target->getAliasesForRegister(RegNum1)[RegNum2] ==
386 Target->getAliasesForRegister(RegNum2)[RegNum1]);
387 return Target->getAliasesForRegister(RegNum1)[RegNum2];
388 }
389
390 // Update NumPred for all Phi assignments using Var as their Dest variable.
391 // Also update Weight if NumPred dropped from 2 to 1, or 1 to 0.
updatePreds(PhiDescList & Desc,TargetLowering * Target,Variable * Var)392 void updatePreds(PhiDescList &Desc, TargetLowering *Target, Variable *Var) {
393 for (PhiDesc &Item : Desc) {
394 if (!Item.Processed && sameVarOrReg(Target, Var, Item.Dest)) {
395 --Item.NumPred;
396 if (Item.NumPred == 1) {
397 // If NumPred changed from 2 to 1, add in WeightOnePred.
398 Item.Weight += WeightOnePred;
399 } else if (Item.NumPred == 0) {
400 // If NumPred changed from 1 to 0, subtract WeightOnePred and add in
401 // WeightNoPreds.
402 Item.Weight += (WeightNoPreds - WeightOnePred);
403 }
404 }
405 }
406 }
407
408 } // end of anonymous namespace
409
410 // This the "advanced" version of Phi lowering for a basic block, in contrast
411 // to the simple version that lowers through assignments involving temporaries.
412 //
413 // All Phi instructions in a basic block are conceptually executed in parallel.
414 // However, if we lower Phis early and commit to a sequential ordering, we may
415 // end up creating unnecessary interferences which lead to worse register
416 // allocation. Delaying Phi scheduling until after register allocation can help
417 // unless there are no free registers for shuffling registers or stack slots
418 // and spilling becomes necessary.
419 //
420 // The advanced Phi lowering starts by finding a topological sort of the Phi
421 // instructions, where "A=B" comes before "B=C" due to the anti-dependence on
422 // B. Preexisting register assignments are considered in the topological sort.
423 // If a topological sort is not possible due to a cycle, the cycle is broken by
424 // introducing a non-parallel temporary. For example, a cycle arising from a
425 // permutation like "A=B;B=C;C=A" can become "T=A;A=B;B=C;C=T". All else being
426 // equal, prefer to schedule assignments with register-allocated Src operands
427 // earlier, in case that register becomes free afterwards, and prefer to
428 // schedule assignments with register-allocated Dest variables later, to keep
429 // that register free for longer.
430 //
431 // Once the ordering is determined, the Cfg edge is split and the assignment
432 // list is lowered by the target lowering layer. Since the assignment lowering
433 // may create new infinite-weight temporaries, a follow-on register allocation
434 // pass will be needed. To prepare for this, liveness (including live range
435 // calculation) of the split nodes needs to be calculated, and liveness of the
436 // original node need to be updated to "undo" the effects of the phi
437 // assignments.
438
439 // The specific placement of the new node within the Cfg node list is deferred
440 // until later, including after empty node contraction.
441 //
442 // After phi assignments are lowered across all blocks, another register
443 // allocation pass is run, focusing only on pre-colored and infinite-weight
444 // variables, similar to Om1 register allocation (except without the need to
445 // specially compute these variables' live ranges, since they have already been
446 // precisely calculated). The register allocator in this mode needs the ability
447 // to forcibly spill and reload registers in case none are naturally available.
advancedPhiLowering()448 void CfgNode::advancedPhiLowering() {
449 if (getPhis().empty())
450 return;
451
452 PhiDescList Desc;
453
454 for (Inst &I : Phis) {
455 auto *Phi = llvm::dyn_cast<InstPhi>(&I);
456 if (!Phi->isDeleted()) {
457 Variable *Dest = Phi->getDest();
458 Desc.emplace_back(Phi, Dest);
459 // Undo the effect of the phi instruction on this node's live-in set by
460 // marking the phi dest variable as live on entry.
461 SizeT VarNum = Func->getLiveness()->getLiveIndex(Dest->getIndex());
462 auto &LiveIn = Func->getLiveness()->getLiveIn(this);
463 if (VarNum < LiveIn.size()) {
464 assert(!LiveIn[VarNum]);
465 LiveIn[VarNum] = true;
466 }
467 Phi->setDeleted();
468 }
469 }
470 if (Desc.empty())
471 return;
472
473 TargetLowering *Target = Func->getTarget();
474 SizeT InEdgeIndex = 0;
475 for (CfgNode *Pred : InEdges) {
476 CfgNode *Split = splitIncomingEdge(Pred, InEdgeIndex++);
477 SizeT Remaining = Desc.size();
478
479 // First pass computes Src and initializes NumPred.
480 for (PhiDesc &Item : Desc) {
481 Variable *Dest = Item.Dest;
482 Operand *Src = Item.Phi->getOperandForTarget(Pred);
483 Item.Src = Src;
484 Item.Processed = false;
485 Item.NumPred = 0;
486 // Cherry-pick any trivial assignments, so that they don't contribute to
487 // the running complexity of the topological sort.
488 if (sameVarOrReg(Target, Dest, Src)) {
489 Item.Processed = true;
490 --Remaining;
491 if (Dest != Src)
492 // If Dest and Src are syntactically the same, don't bother adding
493 // the assignment, because in all respects it would be redundant, and
494 // if Dest/Src are on the stack, the target lowering may naively
495 // decide to lower it using a temporary register.
496 Split->appendInst(InstAssign::create(Func, Dest, Src));
497 }
498 }
499 // Second pass computes NumPred by comparing every pair of Phi instructions.
500 for (PhiDesc &Item : Desc) {
501 if (Item.Processed)
502 continue;
503 const Variable *Dest = Item.Dest;
504 for (PhiDesc &Item2 : Desc) {
505 if (Item2.Processed)
506 continue;
507 // There shouldn't be two different Phis with the same Dest variable or
508 // register.
509 assert((&Item == &Item2) || !sameVarOrReg(Target, Dest, Item2.Dest));
510 if (sameVarOrReg(Target, Dest, Item2.Src))
511 ++Item.NumPred;
512 }
513 }
514
515 // Another pass to compute initial Weight values.
516 for (PhiDesc &Item : Desc) {
517 if (Item.Processed)
518 continue;
519 int32_t Weight = 0;
520 if (Item.NumPred == 0)
521 Weight += WeightNoPreds;
522 if (Item.NumPred == 1)
523 Weight += WeightOnePred;
524 if (auto *Var = llvm::dyn_cast<Variable>(Item.Src))
525 if (Var->hasReg())
526 Weight += WeightSrcIsReg;
527 if (!Item.Dest->hasReg())
528 Weight += WeightDestNotReg;
529 Item.Weight = Weight;
530 }
531
532 // Repeatedly choose and process the best candidate in the topological sort,
533 // until no candidates remain. This implementation is O(N^2) where N is the
534 // number of Phi instructions, but with a small constant factor compared to
535 // a likely implementation of O(N) topological sort.
536 for (; Remaining; --Remaining) {
537 int32_t BestWeight = -1;
538 PhiDesc *BestItem = nullptr;
539 // Find the best candidate.
540 for (PhiDesc &Item : Desc) {
541 if (Item.Processed)
542 continue;
543 const int32_t Weight = Item.Weight;
544 if (Weight > BestWeight) {
545 BestItem = &Item;
546 BestWeight = Weight;
547 }
548 }
549 assert(BestWeight >= 0);
550 Variable *Dest = BestItem->Dest;
551 Operand *Src = BestItem->Src;
552 assert(!sameVarOrReg(Target, Dest, Src));
553 // Break a cycle by introducing a temporary.
554 while (BestItem->NumPred > 0) {
555 bool Found = false;
556 // If the target instruction "A=B" is part of a cycle, find the "X=A"
557 // assignment in the cycle because it will have to be rewritten as
558 // "X=tmp".
559 for (PhiDesc &Item : Desc) {
560 if (Item.Processed)
561 continue;
562 Operand *OtherSrc = Item.Src;
563 if (Item.NumPred && sameVarOrReg(Target, Dest, OtherSrc)) {
564 SizeT VarNum = Func->getNumVariables();
565 Variable *Tmp = Func->makeVariable(OtherSrc->getType());
566 if (BuildDefs::dump())
567 Tmp->setName(Func, "__split_" + std::to_string(VarNum));
568 Split->appendInst(InstAssign::create(Func, Tmp, OtherSrc));
569 Item.Src = Tmp;
570 updatePreds(Desc, Target, llvm::cast<Variable>(OtherSrc));
571 Found = true;
572 break;
573 }
574 }
575 assert(Found);
576 (void)Found;
577 }
578 // Now that a cycle (if any) has been broken, create the actual
579 // assignment.
580 Split->appendInst(InstAssign::create(Func, Dest, Src));
581 if (auto *Var = llvm::dyn_cast<Variable>(Src))
582 updatePreds(Desc, Target, Var);
583 BestItem->Processed = true;
584 }
585 Split->appendInst(InstBr::create(Func, this));
586
587 Split->genCode();
588 Func->getVMetadata()->addNode(Split);
589 // Validate to be safe. All items should be marked as processed, and have
590 // no predecessors.
591 if (BuildDefs::asserts()) {
592 for (PhiDesc &Item : Desc) {
593 (void)Item;
594 assert(Item.Processed);
595 assert(Item.NumPred == 0);
596 }
597 }
598 }
599 }
600
601 // Does address mode optimization. Pass each instruction to the TargetLowering
602 // object. If it returns a new instruction (representing the optimized address
603 // mode), then insert the new instruction and delete the old.
doAddressOpt()604 void CfgNode::doAddressOpt() {
605 TargetLowering *Target = Func->getTarget();
606 LoweringContext &Context = Target->getContext();
607 Context.init(this);
608 while (!Context.atEnd()) {
609 Target->doAddressOpt();
610 }
611 }
612
613 // Drives the target lowering. Passes the current instruction and the next
614 // non-deleted instruction for target lowering.
genCode()615 void CfgNode::genCode() {
616 TargetLowering *Target = Func->getTarget();
617 LoweringContext &Context = Target->getContext();
618 // Lower the regular instructions.
619 Context.init(this);
620 Target->initNodeForLowering(this);
621 while (!Context.atEnd()) {
622 InstList::iterator Orig = Context.getCur();
623 if (llvm::isa<InstRet>(*Orig))
624 setHasReturn();
625 Target->lower();
626 // Ensure target lowering actually moved the cursor.
627 assert(Context.getCur() != Orig);
628 }
629 Context.availabilityReset();
630 // Do preliminary lowering of the Phi instructions.
631 Target->prelowerPhis();
632 }
633
livenessLightweight()634 void CfgNode::livenessLightweight() {
635 SizeT NumVars = Func->getNumVariables();
636 LivenessBV Live(NumVars);
637 // Process regular instructions in reverse order.
638 for (Inst &I : reverse_range(Insts)) {
639 if (I.isDeleted())
640 continue;
641 I.livenessLightweight(Func, Live);
642 }
643 for (Inst &I : Phis) {
644 if (I.isDeleted())
645 continue;
646 I.livenessLightweight(Func, Live);
647 }
648 }
649
650 // Performs liveness analysis on the block. Returns true if the incoming
651 // liveness changed from before, false if it stayed the same. (If it changes,
652 // the node's predecessors need to be processed again.)
liveness(Liveness * Liveness)653 bool CfgNode::liveness(Liveness *Liveness) {
654 const SizeT NumVars = Liveness->getNumVarsInNode(this);
655 const SizeT NumGlobalVars = Liveness->getNumGlobalVars();
656 LivenessBV &Live = Liveness->getScratchBV();
657 Live.clear();
658
659 LiveBeginEndMap *LiveBegin = nullptr;
660 LiveBeginEndMap *LiveEnd = nullptr;
661 // Mark the beginning and ending of each variable's live range with the
662 // sentinel instruction number 0.
663 if (Liveness->getMode() == Liveness_Intervals) {
664 LiveBegin = Liveness->getLiveBegin(this);
665 LiveEnd = Liveness->getLiveEnd(this);
666 LiveBegin->clear();
667 LiveEnd->clear();
668 // Guess that the number of live ranges beginning is roughly the number of
669 // instructions, and same for live ranges ending.
670 LiveBegin->reserve(getInstCountEstimate());
671 LiveEnd->reserve(getInstCountEstimate());
672 }
673
674 // Initialize Live to be the union of all successors' LiveIn.
675 for (CfgNode *Succ : OutEdges) {
676 const LivenessBV &LiveIn = Liveness->getLiveIn(Succ);
677 assert(LiveIn.empty() || LiveIn.size() == NumGlobalVars);
678 Live |= LiveIn;
679 // Mark corresponding argument of phis in successor as live.
680 for (Inst &I : Succ->Phis) {
681 if (I.isDeleted())
682 continue;
683 auto *Phi = llvm::cast<InstPhi>(&I);
684 Phi->livenessPhiOperand(Live, this, Liveness);
685 }
686 }
687 assert(Live.empty() || Live.size() == NumGlobalVars);
688 Liveness->getLiveOut(this) = Live;
689
690 // Expand Live so it can hold locals in addition to globals.
691 Live.resize(NumVars);
692 // Process regular instructions in reverse order.
693 for (Inst &I : reverse_range(Insts)) {
694 if (I.isDeleted())
695 continue;
696 I.liveness(I.getNumber(), Live, Liveness, LiveBegin, LiveEnd);
697 }
698 // Process phis in forward order so that we can override the instruction
699 // number to be that of the earliest phi instruction in the block.
700 SizeT NumNonDeadPhis = 0;
701 InstNumberT FirstPhiNumber = Inst::NumberSentinel;
702 for (Inst &I : Phis) {
703 if (I.isDeleted())
704 continue;
705 if (FirstPhiNumber == Inst::NumberSentinel)
706 FirstPhiNumber = I.getNumber();
707 if (I.liveness(FirstPhiNumber, Live, Liveness, LiveBegin, LiveEnd))
708 ++NumNonDeadPhis;
709 }
710
711 // When using the sparse representation, after traversing the instructions in
712 // the block, the Live bitvector should only contain set bits for global
713 // variables upon block entry. We validate this by testing the upper bits of
714 // the Live bitvector.
715 if (Live.find_next(NumGlobalVars) != -1) {
716 if (BuildDefs::dump()) {
717 // This is a fatal liveness consistency error. Print some diagnostics and
718 // abort.
719 Ostream &Str = Func->getContext()->getStrDump();
720 Func->resetCurrentNode();
721 Str << "Invalid Live =";
722 for (SizeT i = NumGlobalVars; i < Live.size(); ++i) {
723 if (Live.test(i)) {
724 Str << " ";
725 Liveness->getVariable(i, this)->dump(Func);
726 }
727 }
728 Str << "\n";
729 }
730 llvm::report_fatal_error("Fatal inconsistency in liveness analysis");
731 }
732 // Now truncate Live to prevent LiveIn from growing.
733 Live.resize(NumGlobalVars);
734
735 bool Changed = false;
736 LivenessBV &LiveIn = Liveness->getLiveIn(this);
737 assert(LiveIn.empty() || LiveIn.size() == NumGlobalVars);
738 // Add in current LiveIn
739 Live |= LiveIn;
740 // Check result, set LiveIn=Live
741 SizeT &PrevNumNonDeadPhis = Liveness->getNumNonDeadPhis(this);
742 bool LiveInChanged = (Live != LiveIn);
743 Changed = (NumNonDeadPhis != PrevNumNonDeadPhis || LiveInChanged);
744 if (LiveInChanged)
745 LiveIn = Live;
746 PrevNumNonDeadPhis = NumNonDeadPhis;
747 return Changed;
748 }
749
750 // Validate the integrity of the live ranges in this block. If there are any
751 // errors, it prints details and returns false. On success, it returns true.
livenessValidateIntervals(Liveness * Liveness) const752 bool CfgNode::livenessValidateIntervals(Liveness *Liveness) const {
753 if (!BuildDefs::asserts())
754 return true;
755
756 // Verify there are no duplicates.
757 auto ComparePair = [](const LiveBeginEndMapEntry &A,
758 const LiveBeginEndMapEntry &B) {
759 return A.first == B.first;
760 };
761 LiveBeginEndMap &MapBegin = *Liveness->getLiveBegin(this);
762 LiveBeginEndMap &MapEnd = *Liveness->getLiveEnd(this);
763 if (std::adjacent_find(MapBegin.begin(), MapBegin.end(), ComparePair) ==
764 MapBegin.end() &&
765 std::adjacent_find(MapEnd.begin(), MapEnd.end(), ComparePair) ==
766 MapEnd.end())
767 return true;
768
769 // There is definitely a liveness error. All paths from here return false.
770 if (!BuildDefs::dump())
771 return false;
772
773 // Print all the errors.
774 if (BuildDefs::dump()) {
775 GlobalContext *Ctx = Func->getContext();
776 OstreamLocker L(Ctx);
777 Ostream &Str = Ctx->getStrDump();
778 if (Func->isVerbose()) {
779 Str << "Live range errors in the following block:\n";
780 dump(Func);
781 }
782 for (auto Start = MapBegin.begin();
783 (Start = std::adjacent_find(Start, MapBegin.end(), ComparePair)) !=
784 MapBegin.end();
785 ++Start) {
786 auto Next = Start + 1;
787 Str << "Duplicate LR begin, block " << getName() << ", instructions "
788 << Start->second << " & " << Next->second << ", variable "
789 << Liveness->getVariable(Start->first, this)->getName() << "\n";
790 }
791 for (auto Start = MapEnd.begin();
792 (Start = std::adjacent_find(Start, MapEnd.end(), ComparePair)) !=
793 MapEnd.end();
794 ++Start) {
795 auto Next = Start + 1;
796 Str << "Duplicate LR end, block " << getName() << ", instructions "
797 << Start->second << " & " << Next->second << ", variable "
798 << Liveness->getVariable(Start->first, this)->getName() << "\n";
799 }
800 }
801
802 return false;
803 }
804
805 // Once basic liveness is complete, compute actual live ranges. It is assumed
806 // that within a single basic block, a live range begins at most once and ends
807 // at most once. This is certainly true for pure SSA form. It is also true once
808 // phis are lowered, since each assignment to the phi-based temporary is in a
809 // different basic block, and there is a single read that ends the live in the
810 // basic block that contained the actual phi instruction.
livenessAddIntervals(Liveness * Liveness,InstNumberT FirstInstNum,InstNumberT LastInstNum)811 void CfgNode::livenessAddIntervals(Liveness *Liveness, InstNumberT FirstInstNum,
812 InstNumberT LastInstNum) {
813 TimerMarker T1(TimerStack::TT_liveRange, Func);
814
815 const SizeT NumVars = Liveness->getNumVarsInNode(this);
816 const LivenessBV &LiveIn = Liveness->getLiveIn(this);
817 const LivenessBV &LiveOut = Liveness->getLiveOut(this);
818 LiveBeginEndMap &MapBegin = *Liveness->getLiveBegin(this);
819 LiveBeginEndMap &MapEnd = *Liveness->getLiveEnd(this);
820 std::sort(MapBegin.begin(), MapBegin.end());
821 std::sort(MapEnd.begin(), MapEnd.end());
822
823 if (!livenessValidateIntervals(Liveness)) {
824 llvm::report_fatal_error("livenessAddIntervals: Liveness error");
825 return;
826 }
827
828 LivenessBV &LiveInAndOut = Liveness->getScratchBV();
829 LiveInAndOut = LiveIn;
830 LiveInAndOut &= LiveOut;
831
832 // Iterate in parallel across the sorted MapBegin[] and MapEnd[].
833 auto IBB = MapBegin.begin(), IEB = MapEnd.begin();
834 auto IBE = MapBegin.end(), IEE = MapEnd.end();
835 while (IBB != IBE || IEB != IEE) {
836 SizeT i1 = IBB == IBE ? NumVars : IBB->first;
837 SizeT i2 = IEB == IEE ? NumVars : IEB->first;
838 SizeT i = std::min(i1, i2);
839 // i1 is the Variable number of the next MapBegin entry, and i2 is the
840 // Variable number of the next MapEnd entry. If i1==i2, then the Variable's
841 // live range begins and ends in this block. If i1<i2, then i1's live range
842 // begins at instruction IBB->second and extends through the end of the
843 // block. If i1>i2, then i2's live range begins at the first instruction of
844 // the block and ends at IEB->second. In any case, we choose the lesser of
845 // i1 and i2 and proceed accordingly.
846 InstNumberT LB = i == i1 ? IBB->second : FirstInstNum;
847 InstNumberT LE = i == i2 ? IEB->second : LastInstNum + 1;
848
849 Variable *Var = Liveness->getVariable(i, this);
850 if (LB > LE) {
851 Var->addLiveRange(FirstInstNum, LE, this);
852 Var->addLiveRange(LB, LastInstNum + 1, this);
853 // Assert that Var is a global variable by checking that its liveness
854 // index is less than the number of globals. This ensures that the
855 // LiveInAndOut[] access is valid.
856 assert(i < Liveness->getNumGlobalVars());
857 LiveInAndOut[i] = false;
858 } else {
859 Var->addLiveRange(LB, LE, this);
860 }
861 if (i == i1)
862 ++IBB;
863 if (i == i2)
864 ++IEB;
865 }
866 // Process the variables that are live across the entire block.
867 for (int i = LiveInAndOut.find_first(); i != -1;
868 i = LiveInAndOut.find_next(i)) {
869 Variable *Var = Liveness->getVariable(i, this);
870 if (Liveness->getRangeMask(Var->getIndex()))
871 Var->addLiveRange(FirstInstNum, LastInstNum + 1, this);
872 }
873 }
874
875 // If this node contains only deleted instructions, and ends in an
876 // unconditional branch, contract the node by repointing all its in-edges to
877 // its successor.
contractIfEmpty()878 void CfgNode::contractIfEmpty() {
879 if (InEdges.empty())
880 return;
881 Inst *Branch = nullptr;
882 for (Inst &I : Insts) {
883 if (I.isDeleted())
884 continue;
885 if (I.isUnconditionalBranch())
886 Branch = &I;
887 else if (!I.isRedundantAssign())
888 return;
889 }
890 // Make sure there is actually a successor to repoint in-edges to.
891 if (OutEdges.empty())
892 return;
893 assert(hasSingleOutEdge());
894 // Don't try to delete a self-loop.
895 if (OutEdges[0] == this)
896 return;
897 // Make sure the node actually contains (ends with) an unconditional branch.
898 if (Branch == nullptr)
899 return;
900
901 Branch->setDeleted();
902 CfgNode *Successor = OutEdges.front();
903 // Repoint all this node's in-edges to this node's successor, unless this
904 // node's successor is actually itself (in which case the statement
905 // "OutEdges.front()->InEdges.push_back(Pred)" could invalidate the iterator
906 // over this->InEdges).
907 if (Successor != this) {
908 for (CfgNode *Pred : InEdges) {
909 for (CfgNode *&I : Pred->OutEdges) {
910 if (I == this) {
911 I = Successor;
912 Successor->InEdges.push_back(Pred);
913 }
914 }
915 for (Inst &I : Pred->getInsts()) {
916 if (!I.isDeleted())
917 I.repointEdges(this, Successor);
918 }
919 }
920
921 // Remove the in-edge to the successor to allow node reordering to make
922 // better decisions. For example it's more helpful to place a node after a
923 // reachable predecessor than an unreachable one (like the one we just
924 // contracted).
925 Successor->InEdges.erase(
926 std::find(Successor->InEdges.begin(), Successor->InEdges.end(), this));
927 }
928 InEdges.clear();
929 }
930
doBranchOpt(const CfgNode * NextNode)931 void CfgNode::doBranchOpt(const CfgNode *NextNode) {
932 TargetLowering *Target = Func->getTarget();
933 // Find the first opportunity for branch optimization (which will be the last
934 // instruction in the block) and stop. This is sufficient unless there is
935 // some target lowering where we have the possibility of multiple
936 // optimizations per block. Take care with switch lowering as there are
937 // multiple unconditional branches and only the last can be deleted.
938 for (Inst &I : reverse_range(Insts)) {
939 if (!I.isDeleted()) {
940 Target->doBranchOpt(&I, NextNode);
941 return;
942 }
943 }
944 }
945
946 // ======================== Dump routines ======================== //
947
948 namespace {
949
950 // Helper functions for emit().
951
emitRegisterUsage(Ostream & Str,const Cfg * Func,const CfgNode * Node,bool IsLiveIn,CfgVector<SizeT> & LiveRegCount)952 void emitRegisterUsage(Ostream &Str, const Cfg *Func, const CfgNode *Node,
953 bool IsLiveIn, CfgVector<SizeT> &LiveRegCount) {
954 if (!BuildDefs::dump())
955 return;
956 Liveness *Liveness = Func->getLiveness();
957 const LivenessBV *Live;
958 const auto StackReg = Func->getTarget()->getStackReg();
959 const auto FrameOrStackReg = Func->getTarget()->getFrameOrStackReg();
960 if (IsLiveIn) {
961 Live = &Liveness->getLiveIn(Node);
962 Str << "\t\t\t\t/* LiveIn=";
963 } else {
964 Live = &Liveness->getLiveOut(Node);
965 Str << "\t\t\t\t/* LiveOut=";
966 }
967 if (!Live->empty()) {
968 CfgVector<Variable *> LiveRegs;
969 for (SizeT i = 0; i < Live->size(); ++i) {
970 if (!(*Live)[i])
971 continue;
972 Variable *Var = Liveness->getVariable(i, Node);
973 if (!Var->hasReg())
974 continue;
975 const auto RegNum = Var->getRegNum();
976 if (RegNum == StackReg || RegNum == FrameOrStackReg)
977 continue;
978 if (IsLiveIn)
979 ++LiveRegCount[RegNum];
980 LiveRegs.push_back(Var);
981 }
982 // Sort the variables by regnum so they are always printed in a familiar
983 // order.
984 std::sort(LiveRegs.begin(), LiveRegs.end(),
985 [](const Variable *V1, const Variable *V2) {
986 return unsigned(V1->getRegNum()) < unsigned(V2->getRegNum());
987 });
988 bool First = true;
989 for (Variable *Var : LiveRegs) {
990 if (!First)
991 Str << ",";
992 First = false;
993 Var->emit(Func);
994 }
995 }
996 Str << " */\n";
997 }
998
999 /// Returns true if some text was emitted - in which case the caller definitely
1000 /// needs to emit a newline character.
emitLiveRangesEnded(Ostream & Str,const Cfg * Func,const Inst * Instr,CfgVector<SizeT> & LiveRegCount)1001 bool emitLiveRangesEnded(Ostream &Str, const Cfg *Func, const Inst *Instr,
1002 CfgVector<SizeT> &LiveRegCount) {
1003 bool Printed = false;
1004 if (!BuildDefs::dump())
1005 return Printed;
1006 Variable *Dest = Instr->getDest();
1007 // Normally we increment the live count for the dest register. But we
1008 // shouldn't if the instruction's IsDestRedefined flag is set, because this
1009 // means that the target lowering created this instruction as a non-SSA
1010 // assignment; i.e., a different, previous instruction started the dest
1011 // variable's live range.
1012 if (!Instr->isDestRedefined() && Dest && Dest->hasReg())
1013 ++LiveRegCount[Dest->getRegNum()];
1014 FOREACH_VAR_IN_INST(Var, *Instr) {
1015 bool ShouldReport = Instr->isLastUse(Var);
1016 if (ShouldReport && Var->hasReg()) {
1017 // Don't report end of live range until the live count reaches 0.
1018 SizeT NewCount = --LiveRegCount[Var->getRegNum()];
1019 if (NewCount)
1020 ShouldReport = false;
1021 }
1022 if (ShouldReport) {
1023 if (Printed)
1024 Str << ",";
1025 else
1026 Str << " \t/* END=";
1027 Var->emit(Func);
1028 Printed = true;
1029 }
1030 }
1031 if (Printed)
1032 Str << " */";
1033 return Printed;
1034 }
1035
updateStats(Cfg * Func,const Inst * I)1036 void updateStats(Cfg *Func, const Inst *I) {
1037 if (!BuildDefs::dump())
1038 return;
1039 // Update emitted instruction count, plus fill/spill count for Variable
1040 // operands without a physical register.
1041 if (uint32_t Count = I->getEmitInstCount()) {
1042 Func->getContext()->statsUpdateEmitted(Count);
1043 if (Variable *Dest = I->getDest()) {
1044 if (!Dest->hasReg())
1045 Func->getContext()->statsUpdateFills();
1046 }
1047 for (SizeT S = 0; S < I->getSrcSize(); ++S) {
1048 if (auto *Src = llvm::dyn_cast<Variable>(I->getSrc(S))) {
1049 if (!Src->hasReg())
1050 Func->getContext()->statsUpdateSpills();
1051 }
1052 }
1053 }
1054 }
1055
1056 } // end of anonymous namespace
1057
emit(Cfg * Func) const1058 void CfgNode::emit(Cfg *Func) const {
1059 if (!BuildDefs::dump())
1060 return;
1061 Func->setCurrentNode(this);
1062 Ostream &Str = Func->getContext()->getStrEmit();
1063 Liveness *Liveness = Func->getLiveness();
1064 const bool DecorateAsm = Liveness && getFlags().getDecorateAsm();
1065 Str << getAsmName() << ":\n";
1066 // LiveRegCount keeps track of the number of currently live variables that
1067 // each register is assigned to. Normally that would be only 0 or 1, but the
1068 // register allocator's AllowOverlap inference allows it to be greater than 1
1069 // for short periods.
1070 CfgVector<SizeT> LiveRegCount(Func->getTarget()->getNumRegisters());
1071 if (DecorateAsm) {
1072 constexpr bool IsLiveIn = true;
1073 emitRegisterUsage(Str, Func, this, IsLiveIn, LiveRegCount);
1074 if (getInEdges().size()) {
1075 Str << "\t\t\t\t/* preds=";
1076 bool First = true;
1077 for (CfgNode *I : getInEdges()) {
1078 if (!First)
1079 Str << ",";
1080 First = false;
1081 Str << "$" << I->getName();
1082 }
1083 Str << " */\n";
1084 }
1085 if (getLoopNestDepth()) {
1086 Str << "\t\t\t\t/* loop depth=" << getLoopNestDepth() << " */\n";
1087 }
1088 }
1089
1090 for (const Inst &I : Phis) {
1091 if (I.isDeleted())
1092 continue;
1093 // Emitting a Phi instruction should cause an error.
1094 I.emit(Func);
1095 }
1096 for (const Inst &I : Insts) {
1097 if (I.isDeleted())
1098 continue;
1099 if (I.isRedundantAssign()) {
1100 // Usually, redundant assignments end the live range of the src variable
1101 // and begin the live range of the dest variable, with no net effect on
1102 // the liveness of their register. However, if the register allocator
1103 // infers the AllowOverlap condition, then this may be a redundant
1104 // assignment that does not end the src variable's live range, in which
1105 // case the active variable count for that register needs to be bumped.
1106 // That normally would have happened as part of emitLiveRangesEnded(),
1107 // but that isn't called for redundant assignments.
1108 Variable *Dest = I.getDest();
1109 if (DecorateAsm && Dest->hasReg()) {
1110 ++LiveRegCount[Dest->getRegNum()];
1111 if (I.isLastUse(I.getSrc(0)))
1112 --LiveRegCount[llvm::cast<Variable>(I.getSrc(0))->getRegNum()];
1113 }
1114 continue;
1115 }
1116 I.emit(Func);
1117 bool Printed = false;
1118 if (DecorateAsm)
1119 Printed = emitLiveRangesEnded(Str, Func, &I, LiveRegCount);
1120 if (Printed || llvm::isa<InstTarget>(&I))
1121 Str << "\n";
1122 updateStats(Func, &I);
1123 }
1124 if (DecorateAsm) {
1125 constexpr bool IsLiveIn = false;
1126 emitRegisterUsage(Str, Func, this, IsLiveIn, LiveRegCount);
1127 }
1128 }
1129
1130 // Helper class for emitIAS().
1131 namespace {
1132 class BundleEmitHelper {
1133 BundleEmitHelper() = delete;
1134 BundleEmitHelper(const BundleEmitHelper &) = delete;
1135 BundleEmitHelper &operator=(const BundleEmitHelper &) = delete;
1136
1137 public:
BundleEmitHelper(Assembler * Asm,const InstList & Insts)1138 BundleEmitHelper(Assembler *Asm, const InstList &Insts)
1139 : Asm(Asm), End(Insts.end()), BundleLockStart(End),
1140 BundleSize(1 << Asm->getBundleAlignLog2Bytes()),
1141 BundleMaskLo(BundleSize - 1), BundleMaskHi(~BundleMaskLo) {}
1142 // Check whether we're currently within a bundle_lock region.
isInBundleLockRegion() const1143 bool isInBundleLockRegion() const { return BundleLockStart != End; }
1144 // Check whether the current bundle_lock region has the align_to_end option.
isAlignToEnd() const1145 bool isAlignToEnd() const {
1146 assert(isInBundleLockRegion());
1147 return llvm::cast<InstBundleLock>(getBundleLockStart())->getOption() ==
1148 InstBundleLock::Opt_AlignToEnd;
1149 }
isPadToEnd() const1150 bool isPadToEnd() const {
1151 assert(isInBundleLockRegion());
1152 return llvm::cast<InstBundleLock>(getBundleLockStart())->getOption() ==
1153 InstBundleLock::Opt_PadToEnd;
1154 }
1155 // Check whether the entire bundle_lock region falls within the same bundle.
isSameBundle() const1156 bool isSameBundle() const {
1157 assert(isInBundleLockRegion());
1158 return SizeSnapshotPre == SizeSnapshotPost ||
1159 (SizeSnapshotPre & BundleMaskHi) ==
1160 ((SizeSnapshotPost - 1) & BundleMaskHi);
1161 }
1162 // Get the bundle alignment of the first instruction of the bundle_lock
1163 // region.
getPreAlignment() const1164 intptr_t getPreAlignment() const {
1165 assert(isInBundleLockRegion());
1166 return SizeSnapshotPre & BundleMaskLo;
1167 }
1168 // Get the bundle alignment of the first instruction past the bundle_lock
1169 // region.
getPostAlignment() const1170 intptr_t getPostAlignment() const {
1171 assert(isInBundleLockRegion());
1172 return SizeSnapshotPost & BundleMaskLo;
1173 }
1174 // Get the iterator pointing to the bundle_lock instruction, e.g. to roll
1175 // back the instruction iteration to that point.
getBundleLockStart() const1176 InstList::const_iterator getBundleLockStart() const {
1177 assert(isInBundleLockRegion());
1178 return BundleLockStart;
1179 }
1180 // Set up bookkeeping when the bundle_lock instruction is first processed.
enterBundleLock(InstList::const_iterator I)1181 void enterBundleLock(InstList::const_iterator I) {
1182 assert(!isInBundleLockRegion());
1183 BundleLockStart = I;
1184 SizeSnapshotPre = Asm->getBufferSize();
1185 Asm->setPreliminary(true);
1186 assert(isInBundleLockRegion());
1187 }
1188 // Update bookkeeping when the bundle_unlock instruction is processed.
enterBundleUnlock()1189 void enterBundleUnlock() {
1190 assert(isInBundleLockRegion());
1191 SizeSnapshotPost = Asm->getBufferSize();
1192 }
1193 // Update bookkeeping when we are completely finished with the bundle_lock
1194 // region.
leaveBundleLockRegion()1195 void leaveBundleLockRegion() { BundleLockStart = End; }
1196 // Check whether the instruction sequence fits within the current bundle, and
1197 // if not, add nop padding to the end of the current bundle.
padToNextBundle()1198 void padToNextBundle() {
1199 assert(isInBundleLockRegion());
1200 if (!isSameBundle()) {
1201 intptr_t PadToNextBundle = BundleSize - getPreAlignment();
1202 Asm->padWithNop(PadToNextBundle);
1203 SizeSnapshotPre += PadToNextBundle;
1204 SizeSnapshotPost += PadToNextBundle;
1205 assert((Asm->getBufferSize() & BundleMaskLo) == 0);
1206 assert(Asm->getBufferSize() == SizeSnapshotPre);
1207 }
1208 }
1209 // If align_to_end is specified, add padding such that the instruction
1210 // sequences ends precisely at a bundle boundary.
padForAlignToEnd()1211 void padForAlignToEnd() {
1212 assert(isInBundleLockRegion());
1213 if (isAlignToEnd()) {
1214 if (intptr_t Offset = getPostAlignment()) {
1215 Asm->padWithNop(BundleSize - Offset);
1216 SizeSnapshotPre = Asm->getBufferSize();
1217 }
1218 }
1219 }
1220 // If pad_to_end is specified, add padding such that the first instruction
1221 // after the instruction sequence starts at a bundle boundary.
padForPadToEnd()1222 void padForPadToEnd() {
1223 assert(isInBundleLockRegion());
1224 if (isPadToEnd()) {
1225 if (intptr_t Offset = getPostAlignment()) {
1226 Asm->padWithNop(BundleSize - Offset);
1227 SizeSnapshotPre = Asm->getBufferSize();
1228 }
1229 }
1230 } // Update bookkeeping when rolling back for the second pass.
rollback()1231 void rollback() {
1232 assert(isInBundleLockRegion());
1233 Asm->setBufferSize(SizeSnapshotPre);
1234 Asm->setPreliminary(false);
1235 }
1236
1237 private:
1238 Assembler *const Asm;
1239 // End is a sentinel value such that BundleLockStart==End implies that we are
1240 // not in a bundle_lock region.
1241 const InstList::const_iterator End;
1242 InstList::const_iterator BundleLockStart;
1243 const intptr_t BundleSize;
1244 // Masking with BundleMaskLo identifies an address's bundle offset.
1245 const intptr_t BundleMaskLo;
1246 // Masking with BundleMaskHi identifies an address's bundle.
1247 const intptr_t BundleMaskHi;
1248 intptr_t SizeSnapshotPre = 0;
1249 intptr_t SizeSnapshotPost = 0;
1250 };
1251
1252 } // end of anonymous namespace
1253
emitIAS(Cfg * Func) const1254 void CfgNode::emitIAS(Cfg *Func) const {
1255 Func->setCurrentNode(this);
1256 Assembler *Asm = Func->getAssembler<>();
1257 // TODO(stichnot): When sandboxing, defer binding the node label until just
1258 // before the first instruction is emitted, to reduce the chance that a
1259 // padding nop is a branch target.
1260 Asm->bindCfgNodeLabel(this);
1261 for (const Inst &I : Phis) {
1262 if (I.isDeleted())
1263 continue;
1264 // Emitting a Phi instruction should cause an error.
1265 I.emitIAS(Func);
1266 }
1267
1268 // Do the simple emission if not sandboxed.
1269 if (!getFlags().getUseSandboxing()) {
1270 for (const Inst &I : Insts) {
1271 if (!I.isDeleted() && !I.isRedundantAssign()) {
1272 I.emitIAS(Func);
1273 updateStats(Func, &I);
1274 }
1275 }
1276 return;
1277 }
1278
1279 // The remainder of the function handles emission with sandboxing. There are
1280 // explicit bundle_lock regions delimited by bundle_lock and bundle_unlock
1281 // instructions. All other instructions are treated as an implicit
1282 // one-instruction bundle_lock region. Emission is done twice for each
1283 // bundle_lock region. The first pass is a preliminary pass, after which we
1284 // can figure out what nop padding is needed, then roll back, and make the
1285 // final pass.
1286 //
1287 // Ideally, the first pass would be speculative and the second pass would
1288 // only be done if nop padding were needed, but the structure of the
1289 // integrated assembler makes it hard to roll back the state of label
1290 // bindings, label links, and relocation fixups. Instead, the first pass just
1291 // disables all mutation of that state.
1292
1293 BundleEmitHelper Helper(Asm, Insts);
1294 InstList::const_iterator End = Insts.end();
1295 // Retrying indicates that we had to roll back to the bundle_lock instruction
1296 // to apply padding before the bundle_lock sequence.
1297 bool Retrying = false;
1298 for (InstList::const_iterator I = Insts.begin(); I != End; ++I) {
1299 if (I->isDeleted() || I->isRedundantAssign())
1300 continue;
1301
1302 if (llvm::isa<InstBundleLock>(I)) {
1303 // Set up the initial bundle_lock state. This should not happen while
1304 // retrying, because the retry rolls back to the instruction following
1305 // the bundle_lock instruction.
1306 assert(!Retrying);
1307 Helper.enterBundleLock(I);
1308 continue;
1309 }
1310
1311 if (llvm::isa<InstBundleUnlock>(I)) {
1312 Helper.enterBundleUnlock();
1313 if (Retrying) {
1314 // Make sure all instructions are in the same bundle.
1315 assert(Helper.isSameBundle());
1316 // If align_to_end is specified, make sure the next instruction begins
1317 // the bundle.
1318 assert(!Helper.isAlignToEnd() || Helper.getPostAlignment() == 0);
1319 Helper.padForPadToEnd();
1320 Helper.leaveBundleLockRegion();
1321 Retrying = false;
1322 } else {
1323 // This is the first pass, so roll back for the retry pass.
1324 Helper.rollback();
1325 // Pad to the next bundle if the instruction sequence crossed a bundle
1326 // boundary.
1327 Helper.padToNextBundle();
1328 // Insert additional padding to make AlignToEnd work.
1329 Helper.padForAlignToEnd();
1330 // Prepare for the retry pass after padding is done.
1331 Retrying = true;
1332 I = Helper.getBundleLockStart();
1333 }
1334 continue;
1335 }
1336
1337 // I points to a non bundle_lock/bundle_unlock instruction.
1338 if (Helper.isInBundleLockRegion()) {
1339 I->emitIAS(Func);
1340 // Only update stats during the final pass.
1341 if (Retrying)
1342 updateStats(Func, iteratorToInst(I));
1343 } else {
1344 // Treat it as though there were an implicit bundle_lock and
1345 // bundle_unlock wrapping the instruction.
1346 Helper.enterBundleLock(I);
1347 I->emitIAS(Func);
1348 Helper.enterBundleUnlock();
1349 Helper.rollback();
1350 Helper.padToNextBundle();
1351 I->emitIAS(Func);
1352 updateStats(Func, iteratorToInst(I));
1353 Helper.leaveBundleLockRegion();
1354 }
1355 }
1356
1357 // Don't allow bundle locking across basic blocks, to keep the backtracking
1358 // mechanism simple.
1359 assert(!Helper.isInBundleLockRegion());
1360 assert(!Retrying);
1361 }
1362
dump(Cfg * Func) const1363 void CfgNode::dump(Cfg *Func) const {
1364 if (!BuildDefs::dump())
1365 return;
1366 Func->setCurrentNode(this);
1367 Ostream &Str = Func->getContext()->getStrDump();
1368 Liveness *Liveness = Func->getLiveness();
1369 if (Func->isVerbose(IceV_Instructions) || Func->isVerbose(IceV_Loop))
1370 Str << getName() << ":\n";
1371 // Dump the loop nest depth
1372 if (Func->isVerbose(IceV_Loop))
1373 Str << " // LoopNestDepth = " << getLoopNestDepth() << "\n";
1374 // Dump list of predecessor nodes.
1375 if (Func->isVerbose(IceV_Preds) && !InEdges.empty()) {
1376 Str << " // preds = ";
1377 bool First = true;
1378 for (CfgNode *I : InEdges) {
1379 if (!First)
1380 Str << ", ";
1381 First = false;
1382 Str << "%" << I->getName();
1383 }
1384 Str << "\n";
1385 }
1386 // Dump the live-in variables.
1387 if (Func->isVerbose(IceV_Liveness)) {
1388 if (Liveness != nullptr && !Liveness->getLiveIn(this).empty()) {
1389 const LivenessBV &LiveIn = Liveness->getLiveIn(this);
1390 Str << " // LiveIn:";
1391 for (SizeT i = 0; i < LiveIn.size(); ++i) {
1392 if (LiveIn[i]) {
1393 Variable *Var = Liveness->getVariable(i, this);
1394 Str << " %" << Var->getName();
1395 if (Func->isVerbose(IceV_RegOrigins) && Var->hasReg()) {
1396 Str << ":"
1397 << Func->getTarget()->getRegName(Var->getRegNum(),
1398 Var->getType());
1399 }
1400 }
1401 }
1402 Str << "\n";
1403 }
1404 }
1405 // Dump each instruction.
1406 if (Func->isVerbose(IceV_Instructions)) {
1407 for (const Inst &I : Phis)
1408 I.dumpDecorated(Func);
1409 for (const Inst &I : Insts)
1410 I.dumpDecorated(Func);
1411 }
1412 // Dump the live-out variables.
1413 if (Func->isVerbose(IceV_Liveness)) {
1414 if (Liveness != nullptr && !Liveness->getLiveOut(this).empty()) {
1415 const LivenessBV &LiveOut = Liveness->getLiveOut(this);
1416 Str << " // LiveOut:";
1417 for (SizeT i = 0; i < LiveOut.size(); ++i) {
1418 if (LiveOut[i]) {
1419 Variable *Var = Liveness->getVariable(i, this);
1420 Str << " %" << Var->getName();
1421 if (Func->isVerbose(IceV_RegOrigins) && Var->hasReg()) {
1422 Str << ":"
1423 << Func->getTarget()->getRegName(Var->getRegNum(),
1424 Var->getType());
1425 }
1426 }
1427 }
1428 Str << "\n";
1429 }
1430 }
1431 // Dump list of successor nodes.
1432 if (Func->isVerbose(IceV_Succs)) {
1433 Str << " // succs = ";
1434 bool First = true;
1435 for (CfgNode *I : OutEdges) {
1436 if (!First)
1437 Str << ", ";
1438 First = false;
1439 Str << "%" << I->getName();
1440 }
1441 Str << "\n";
1442 }
1443 }
1444
removeInEdge(CfgNode * In)1445 void CfgNode::removeInEdge(CfgNode *In) {
1446 InEdges.erase(std::find(InEdges.begin(), InEdges.end(), In));
1447 }
1448
shortCircuit()1449 CfgNode *CfgNode::shortCircuit() {
1450 auto *Func = getCfg();
1451 auto *Last = &getInsts().back();
1452 Variable *Condition = nullptr;
1453 InstBr *Br = nullptr;
1454 if ((Br = llvm::dyn_cast<InstBr>(Last))) {
1455 if (!Br->isUnconditional()) {
1456 Condition = llvm::dyn_cast<Variable>(Br->getCondition());
1457 }
1458 }
1459 if (Condition == nullptr)
1460 return nullptr;
1461
1462 auto *JumpOnTrue = Br->getTargetTrue();
1463 auto *JumpOnFalse = Br->getTargetFalse();
1464
1465 bool FoundOr = false;
1466 bool FoundAnd = false;
1467
1468 InstArithmetic *TopLevelBoolOp = nullptr;
1469
1470 for (auto &Inst : reverse_range(getInsts())) {
1471 if (Inst.isDeleted())
1472 continue;
1473 if (Inst.getDest() == Condition) {
1474 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(&Inst)) {
1475
1476 FoundOr = (Arith->getOp() == InstArithmetic::OpKind::Or);
1477 FoundAnd = (Arith->getOp() == InstArithmetic::OpKind::And);
1478
1479 if (FoundOr || FoundAnd) {
1480 TopLevelBoolOp = Arith;
1481 break;
1482 }
1483 }
1484 }
1485 }
1486
1487 if (!TopLevelBoolOp)
1488 return nullptr;
1489
1490 auto IsOperand = [](Inst *Instr, Operand *Opr) -> bool {
1491 for (SizeT i = 0; i < Instr->getSrcSize(); ++i) {
1492 if (Instr->getSrc(i) == Opr)
1493 return true;
1494 }
1495 return false;
1496 };
1497 Inst *FirstOperandDef = nullptr;
1498 for (auto &Inst : getInsts()) {
1499 if (IsOperand(TopLevelBoolOp, Inst.getDest())) {
1500 FirstOperandDef = &Inst;
1501 break;
1502 }
1503 }
1504
1505 if (FirstOperandDef == nullptr) {
1506 return nullptr;
1507 }
1508
1509 // Check for side effects
1510 auto It = Ice::instToIterator(FirstOperandDef);
1511 while (It != getInsts().end()) {
1512 if (It->isDeleted()) {
1513 ++It;
1514 continue;
1515 }
1516 if (llvm::isa<InstBr>(It) || llvm::isa<InstRet>(It)) {
1517 break;
1518 }
1519 auto *Dest = It->getDest();
1520 if (It->getDest() == nullptr || It->hasSideEffects() ||
1521 !Func->getVMetadata()->isSingleBlock(Dest)) {
1522 // Relying on short cicuit eval here.
1523 // getVMetadata()->isSingleBlock(Dest)
1524 // will segfault if It->getDest() == nullptr
1525 return nullptr;
1526 }
1527 It++;
1528 }
1529
1530 auto *NewNode = Func->makeNode();
1531 NewNode->setLoopNestDepth(getLoopNestDepth());
1532 It = Ice::instToIterator(FirstOperandDef);
1533 It++; // Have to split after the def
1534
1535 NewNode->getInsts().splice(NewNode->getInsts().begin(), getInsts(), It,
1536 getInsts().end());
1537
1538 if (BuildDefs::dump()) {
1539 NewNode->setName(getName().append("_2"));
1540 setName(getName().append("_1"));
1541 }
1542
1543 // Point edges properly
1544 NewNode->addInEdge(this);
1545 for (auto *Out : getOutEdges()) {
1546 NewNode->addOutEdge(Out);
1547 Out->addInEdge(NewNode);
1548 }
1549 removeAllOutEdges();
1550 addOutEdge(NewNode);
1551
1552 // Manage Phi instructions of successors
1553 for (auto *Succ : NewNode->getOutEdges()) {
1554 for (auto &Inst : Succ->getPhis()) {
1555 auto *Phi = llvm::cast<InstPhi>(&Inst);
1556 for (SizeT i = 0; i < Phi->getSrcSize(); ++i) {
1557 if (Phi->getLabel(i) == this) {
1558 Phi->addArgument(Phi->getSrc(i), NewNode);
1559 }
1560 }
1561 }
1562 }
1563
1564 // Create new Br instruction
1565 InstBr *NewInst = nullptr;
1566 if (FoundOr) {
1567 addOutEdge(JumpOnTrue);
1568 JumpOnFalse->removeInEdge(this);
1569 NewInst =
1570 InstBr::create(Func, FirstOperandDef->getDest(), JumpOnTrue, NewNode);
1571 } else if (FoundAnd) {
1572 addOutEdge(JumpOnFalse);
1573 JumpOnTrue->removeInEdge(this);
1574 NewInst =
1575 InstBr::create(Func, FirstOperandDef->getDest(), NewNode, JumpOnFalse);
1576 } else {
1577 return nullptr;
1578 }
1579
1580 assert(NewInst != nullptr);
1581 appendInst(NewInst);
1582
1583 Operand *UnusedOperand = nullptr;
1584 assert(TopLevelBoolOp->getSrcSize() == 2);
1585 if (TopLevelBoolOp->getSrc(0) == FirstOperandDef->getDest())
1586 UnusedOperand = TopLevelBoolOp->getSrc(1);
1587 else if (TopLevelBoolOp->getSrc(1) == FirstOperandDef->getDest())
1588 UnusedOperand = TopLevelBoolOp->getSrc(0);
1589 assert(UnusedOperand);
1590
1591 Br->replaceSource(0, UnusedOperand); // Index 0 has the condition of the Br
1592
1593 TopLevelBoolOp->setDeleted();
1594 return NewNode;
1595 }
1596
1597 } // end of namespace Ice
1598