• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===- subzero/src/IceCfgNode.cpp - Basic block (node) implementation -----===//
2 //
3 //                        The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Implements the CfgNode class, including the complexities of
12 /// instruction insertion and in-edge calculation.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "IceCfgNode.h"
17 
18 #include "IceAssembler.h"
19 #include "IceCfg.h"
20 #include "IceGlobalInits.h"
21 #include "IceInst.h"
22 #include "IceInstVarIter.h"
23 #include "IceLiveness.h"
24 #include "IceOperand.h"
25 #include "IceTargetLowering.h"
26 
27 namespace Ice {
28 
29 // Adds an instruction to either the Phi list or the regular instruction list.
30 // Validates that all Phis are added before all regular instructions.
appendInst(Inst * Instr)31 void CfgNode::appendInst(Inst *Instr) {
32   ++InstCountEstimate;
33 
34   if (BuildDefs::wasm()) {
35     if (llvm::isa<InstSwitch>(Instr) || llvm::isa<InstBr>(Instr)) {
36       for (auto *N : Instr->getTerminatorEdges()) {
37         N->addInEdge(this);
38         addOutEdge(N);
39       }
40     }
41   }
42 
43   if (auto *Phi = llvm::dyn_cast<InstPhi>(Instr)) {
44     if (!Insts.empty()) {
45       Func->setError("Phi instruction added to the middle of a block");
46       return;
47     }
48     Phis.push_back(Phi);
49   } else {
50     Insts.push_back(Instr);
51   }
52 }
53 
replaceInEdge(CfgNode * Old,CfgNode * New)54 void CfgNode::replaceInEdge(CfgNode *Old, CfgNode *New) {
55   for (SizeT i = 0; i < InEdges.size(); ++i) {
56     if (InEdges[i] == Old) {
57       InEdges[i] = New;
58     }
59   }
60   for (auto &Inst : getPhis()) {
61     auto &Phi = llvm::cast<InstPhi>(Inst);
62     for (SizeT i = 0; i < Phi.getSrcSize(); ++i) {
63       if (Phi.getLabel(i) == Old) {
64         Phi.setLabel(i, New);
65       }
66     }
67   }
68 }
69 
70 namespace {
removeDeletedAndRenumber(List * L,Cfg * Func)71 template <typename List> void removeDeletedAndRenumber(List *L, Cfg *Func) {
72   const bool DoDelete =
73       BuildDefs::minimal() || !getFlags().getKeepDeletedInsts();
74   auto I = L->begin(), E = L->end(), Next = I;
75   for (++Next; I != E; I = Next++) {
76     if (DoDelete && I->isDeleted()) {
77       L->remove(I);
78     } else {
79       I->renumber(Func);
80     }
81   }
82 }
83 } // end of anonymous namespace
84 
renumberInstructions()85 void CfgNode::renumberInstructions() {
86   InstNumberT FirstNumber = Func->getNextInstNumber();
87   removeDeletedAndRenumber(&Phis, Func);
88   removeDeletedAndRenumber(&Insts, Func);
89   InstCountEstimate = Func->getNextInstNumber() - FirstNumber;
90 }
91 
92 // When a node is created, the OutEdges are immediately known, but the InEdges
93 // have to be built up incrementally. After the CFG has been constructed, the
94 // computePredecessors() pass finalizes it by creating the InEdges list.
computePredecessors()95 void CfgNode::computePredecessors() {
96   for (CfgNode *Succ : OutEdges)
97     Succ->InEdges.push_back(this);
98 }
99 
computeSuccessors()100 void CfgNode::computeSuccessors() {
101   OutEdges.clear();
102   InEdges.clear();
103   assert(!Insts.empty());
104   OutEdges = Insts.rbegin()->getTerminatorEdges();
105 }
106 
107 // Ensure each Phi instruction in the node is consistent with respect to control
108 // flow.  For each predecessor, there must be a phi argument with that label.
109 // If a phi argument's label doesn't appear in the predecessor list (which can
110 // happen as a result of e.g. unreachable node elimination), its value is
111 // modified to be zero, to maintain consistency in liveness analysis.  This
112 // allows us to remove some dead control flow without a major rework of the phi
113 // instructions.  We don't check that phi arguments with the same label have the
114 // same value.
enforcePhiConsistency()115 void CfgNode::enforcePhiConsistency() {
116   for (Inst &Instr : Phis) {
117     auto *Phi = llvm::cast<InstPhi>(&Instr);
118     // We do a simple O(N^2) algorithm to check for consistency. Even so, it
119     // shows up as only about 0.2% of the total translation time. But if
120     // necessary, we could improve the complexity by using a hash table to
121     // count how many times each node is referenced in the Phi instruction, and
122     // how many times each node is referenced in the incoming edge list, and
123     // compare the two for equality.
124     for (SizeT i = 0; i < Phi->getSrcSize(); ++i) {
125       CfgNode *Label = Phi->getLabel(i);
126       bool Found = false;
127       for (CfgNode *InNode : getInEdges()) {
128         if (InNode == Label) {
129           Found = true;
130           break;
131         }
132       }
133       if (!Found) {
134         // Predecessor was unreachable, so if (impossibly) the control flow
135         // enters from that predecessor, the value should be zero.
136         Phi->clearOperandForTarget(Label);
137       }
138     }
139     for (CfgNode *InNode : getInEdges()) {
140       bool Found = false;
141       for (SizeT i = 0; i < Phi->getSrcSize(); ++i) {
142         CfgNode *Label = Phi->getLabel(i);
143         if (InNode == Label) {
144           Found = true;
145           break;
146         }
147       }
148       if (!Found)
149         llvm::report_fatal_error("Phi error: missing label for incoming edge");
150     }
151   }
152 }
153 
154 // This does part 1 of Phi lowering, by creating a new dest variable for each
155 // Phi instruction, replacing the Phi instruction's dest with that variable,
156 // and adding an explicit assignment of the old dest to the new dest. For
157 // example,
158 //   a=phi(...)
159 // changes to
160 //   "a_phi=phi(...); a=a_phi".
161 //
162 // This is in preparation for part 2 which deletes the Phi instructions and
163 // appends assignment instructions to predecessor blocks. Note that this
164 // transformation preserves SSA form.
placePhiLoads()165 void CfgNode::placePhiLoads() {
166   for (Inst &I : Phis) {
167     auto *Phi = llvm::dyn_cast<InstPhi>(&I);
168     Insts.insert(Insts.begin(), Phi->lower(Func));
169   }
170 }
171 
172 // This does part 2 of Phi lowering. For each Phi instruction at each out-edge,
173 // create a corresponding assignment instruction, and add all the assignments
174 // near the end of this block. They need to be added before any branch
175 // instruction, and also if the block ends with a compare instruction followed
176 // by a branch instruction that we may want to fuse, it's better to insert the
177 // new assignments before the compare instruction. The
178 // tryOptimizedCmpxchgCmpBr() method assumes this ordering of instructions.
179 //
180 // Note that this transformation takes the Phi dest variables out of SSA form,
181 // as there may be assignments to the dest variable in multiple blocks.
placePhiStores()182 void CfgNode::placePhiStores() {
183   // Find the insertion point.
184   InstList::iterator InsertionPoint = Insts.end();
185   // Every block must end in a terminator instruction, and therefore must have
186   // at least one instruction, so it's valid to decrement InsertionPoint (but
187   // assert just in case).
188   assert(InsertionPoint != Insts.begin());
189   --InsertionPoint;
190   // Confirm that InsertionPoint is a terminator instruction. Calling
191   // getTerminatorEdges() on a non-terminator instruction will cause an
192   // llvm_unreachable().
193   (void)InsertionPoint->getTerminatorEdges();
194   // SafeInsertionPoint is always immediately before the terminator
195   // instruction. If the block ends in a compare and conditional branch, it's
196   // better to place the Phi store before the compare so as not to interfere
197   // with compare/branch fusing. However, if the compare instruction's dest
198   // operand is the same as the new assignment statement's source operand, this
199   // can't be done due to data dependences, so we need to fall back to the
200   // SafeInsertionPoint. To illustrate:
201   //   ; <label>:95
202   //   %97 = load i8* %96, align 1
203   //   %98 = icmp ne i8 %97, 0
204   //   br i1 %98, label %99, label %2132
205   //   ; <label>:99
206   //   %100 = phi i8 [ %97, %95 ], [ %110, %108 ]
207   //   %101 = phi i1 [ %98, %95 ], [ %111, %108 ]
208   // would be Phi-lowered as:
209   //   ; <label>:95
210   //   %97 = load i8* %96, align 1
211   //   %100_phi = %97 ; can be at InsertionPoint
212   //   %98 = icmp ne i8 %97, 0
213   //   %101_phi = %98 ; must be at SafeInsertionPoint
214   //   br i1 %98, label %99, label %2132
215   //   ; <label>:99
216   //   %100 = %100_phi
217   //   %101 = %101_phi
218   //
219   // TODO(stichnot): It may be possible to bypass this whole SafeInsertionPoint
220   // mechanism. If a source basic block ends in a conditional branch:
221   //   labelSource:
222   //   ...
223   //   br i1 %foo, label %labelTrue, label %labelFalse
224   // and a branch target has a Phi involving the branch operand:
225   //   labelTrue:
226   //   %bar = phi i1 [ %foo, %labelSource ], ...
227   // then we actually know the constant i1 value of the Phi operand:
228   //   labelTrue:
229   //   %bar = phi i1 [ true, %labelSource ], ...
230   // It seems that this optimization should be done by clang or opt, but we
231   // could also do it here.
232   InstList::iterator SafeInsertionPoint = InsertionPoint;
233   // Keep track of the dest variable of a compare instruction, so that we
234   // insert the new instruction at the SafeInsertionPoint if the compare's dest
235   // matches the Phi-lowered assignment's source.
236   Variable *CmpInstDest = nullptr;
237   // If the current insertion point is at a conditional branch instruction, and
238   // the previous instruction is a compare instruction, then we move the
239   // insertion point before the compare instruction so as not to interfere with
240   // compare/branch fusing.
241   if (auto *Branch = llvm::dyn_cast<InstBr>(InsertionPoint)) {
242     if (!Branch->isUnconditional()) {
243       if (InsertionPoint != Insts.begin()) {
244         --InsertionPoint;
245         if (llvm::isa<InstIcmp>(InsertionPoint) ||
246             llvm::isa<InstFcmp>(InsertionPoint)) {
247           CmpInstDest = InsertionPoint->getDest();
248         } else {
249           ++InsertionPoint;
250         }
251       }
252     }
253   }
254 
255   // Consider every out-edge.
256   for (CfgNode *Succ : OutEdges) {
257     // Consider every Phi instruction at the out-edge.
258     for (Inst &I : Succ->Phis) {
259       auto *Phi = llvm::dyn_cast<InstPhi>(&I);
260       Operand *Operand = Phi->getOperandForTarget(this);
261       assert(Operand);
262       Variable *Dest = I.getDest();
263       assert(Dest);
264       auto *NewInst = InstAssign::create(Func, Dest, Operand);
265       if (CmpInstDest == Operand)
266         Insts.insert(SafeInsertionPoint, NewInst);
267       else
268         Insts.insert(InsertionPoint, NewInst);
269     }
270   }
271 }
272 
273 // Deletes the phi instructions after the loads and stores are placed.
deletePhis()274 void CfgNode::deletePhis() {
275   for (Inst &I : Phis)
276     I.setDeleted();
277 }
278 
279 // Splits the edge from Pred to this node by creating a new node and hooking up
280 // the in and out edges appropriately. (The EdgeIndex parameter is only used to
281 // make the new node's name unique when there are multiple edges between the
282 // same pair of nodes.) The new node's instruction list is initialized to the
283 // empty list, with no terminator instruction. There must not be multiple edges
284 // from Pred to this node so all Inst::getTerminatorEdges implementations must
285 // not contain duplicates.
splitIncomingEdge(CfgNode * Pred,SizeT EdgeIndex)286 CfgNode *CfgNode::splitIncomingEdge(CfgNode *Pred, SizeT EdgeIndex) {
287   CfgNode *NewNode = Func->makeNode();
288   // Depth is the minimum as it works if both are the same, but if one is
289   // outside the loop and the other is inside, the new node should be placed
290   // outside and not be executed multiple times within the loop.
291   NewNode->setLoopNestDepth(
292       std::min(getLoopNestDepth(), Pred->getLoopNestDepth()));
293   if (BuildDefs::dump())
294     NewNode->setName("split_" + Pred->getName() + "_" + getName() + "_" +
295                      std::to_string(EdgeIndex));
296   // The new node is added to the end of the node list, and will later need to
297   // be sorted into a reasonable topological order.
298   NewNode->setNeedsPlacement(true);
299   // Repoint Pred's out-edge.
300   bool Found = false;
301   for (CfgNode *&I : Pred->OutEdges) {
302     if (I == this) {
303       I = NewNode;
304       NewNode->InEdges.push_back(Pred);
305       Found = true;
306       break;
307     }
308   }
309   assert(Found);
310   (void)Found;
311   // Repoint this node's in-edge.
312   Found = false;
313   for (CfgNode *&I : InEdges) {
314     if (I == Pred) {
315       I = NewNode;
316       NewNode->OutEdges.push_back(this);
317       Found = true;
318       break;
319     }
320   }
321   assert(Found);
322   (void)Found;
323   // Repoint all suitable branch instructions' target and return.
324   Found = false;
325   for (Inst &I : Pred->getInsts())
326     if (!I.isDeleted() && I.repointEdges(this, NewNode))
327       Found = true;
328   assert(Found);
329   (void)Found;
330   return NewNode;
331 }
332 
333 namespace {
334 
335 // Helpers for advancedPhiLowering().
336 
337 class PhiDesc {
338   PhiDesc() = delete;
339   PhiDesc(const PhiDesc &) = delete;
340   PhiDesc &operator=(const PhiDesc &) = delete;
341 
342 public:
PhiDesc(InstPhi * Phi,Variable * Dest)343   PhiDesc(InstPhi *Phi, Variable *Dest) : Phi(Phi), Dest(Dest) {}
344   PhiDesc(PhiDesc &&) = default;
345   InstPhi *Phi = nullptr;
346   Variable *Dest = nullptr;
347   Operand *Src = nullptr;
348   bool Processed = false;
349   size_t NumPred = 0; // number of entries whose Src is this Dest
350   int32_t Weight = 0; // preference for topological order
351 };
352 using PhiDescList = llvm::SmallVector<PhiDesc, 32>;
353 
354 // Always pick NumPred=0 over NumPred>0.
355 constexpr int32_t WeightNoPreds = 8;
356 // Prefer Src as a register because the register might free up.
357 constexpr int32_t WeightSrcIsReg = 4;
358 // Prefer Dest not as a register because the register stays free longer.
359 constexpr int32_t WeightDestNotReg = 2;
360 // Prefer NumPred=1 over NumPred>1.  This is used as a tiebreaker when a
361 // dependency cycle must be broken so that hopefully only one temporary
362 // assignment has to be added to break the cycle.
363 constexpr int32_t WeightOnePred = 1;
364 
sameVarOrReg(TargetLowering * Target,const Variable * Var1,const Operand * Opnd)365 bool sameVarOrReg(TargetLowering *Target, const Variable *Var1,
366                   const Operand *Opnd) {
367   if (Var1 == Opnd)
368     return true;
369   const auto *Var2 = llvm::dyn_cast<Variable>(Opnd);
370   if (Var2 == nullptr)
371     return false;
372 
373   // If either operand lacks a register, they cannot be the same.
374   if (!Var1->hasReg())
375     return false;
376   if (!Var2->hasReg())
377     return false;
378 
379   const auto RegNum1 = Var1->getRegNum();
380   const auto RegNum2 = Var2->getRegNum();
381   // Quick common-case check.
382   if (RegNum1 == RegNum2)
383     return true;
384 
385   assert(Target->getAliasesForRegister(RegNum1)[RegNum2] ==
386          Target->getAliasesForRegister(RegNum2)[RegNum1]);
387   return Target->getAliasesForRegister(RegNum1)[RegNum2];
388 }
389 
390 // Update NumPred for all Phi assignments using Var as their Dest variable.
391 // Also update Weight if NumPred dropped from 2 to 1, or 1 to 0.
updatePreds(PhiDescList & Desc,TargetLowering * Target,Variable * Var)392 void updatePreds(PhiDescList &Desc, TargetLowering *Target, Variable *Var) {
393   for (PhiDesc &Item : Desc) {
394     if (!Item.Processed && sameVarOrReg(Target, Var, Item.Dest)) {
395       --Item.NumPred;
396       if (Item.NumPred == 1) {
397         // If NumPred changed from 2 to 1, add in WeightOnePred.
398         Item.Weight += WeightOnePred;
399       } else if (Item.NumPred == 0) {
400         // If NumPred changed from 1 to 0, subtract WeightOnePred and add in
401         // WeightNoPreds.
402         Item.Weight += (WeightNoPreds - WeightOnePred);
403       }
404     }
405   }
406 }
407 
408 } // end of anonymous namespace
409 
410 // This the "advanced" version of Phi lowering for a basic block, in contrast
411 // to the simple version that lowers through assignments involving temporaries.
412 //
413 // All Phi instructions in a basic block are conceptually executed in parallel.
414 // However, if we lower Phis early and commit to a sequential ordering, we may
415 // end up creating unnecessary interferences which lead to worse register
416 // allocation. Delaying Phi scheduling until after register allocation can help
417 // unless there are no free registers for shuffling registers or stack slots
418 // and spilling becomes necessary.
419 //
420 // The advanced Phi lowering starts by finding a topological sort of the Phi
421 // instructions, where "A=B" comes before "B=C" due to the anti-dependence on
422 // B. Preexisting register assignments are considered in the topological sort.
423 // If a topological sort is not possible due to a cycle, the cycle is broken by
424 // introducing a non-parallel temporary. For example, a cycle arising from a
425 // permutation like "A=B;B=C;C=A" can become "T=A;A=B;B=C;C=T". All else being
426 // equal, prefer to schedule assignments with register-allocated Src operands
427 // earlier, in case that register becomes free afterwards, and prefer to
428 // schedule assignments with register-allocated Dest variables later, to keep
429 // that register free for longer.
430 //
431 // Once the ordering is determined, the Cfg edge is split and the assignment
432 // list is lowered by the target lowering layer. Since the assignment lowering
433 // may create new infinite-weight temporaries, a follow-on register allocation
434 // pass will be needed. To prepare for this, liveness (including live range
435 // calculation) of the split nodes needs to be calculated, and liveness of the
436 // original node need to be updated to "undo" the effects of the phi
437 // assignments.
438 
439 // The specific placement of the new node within the Cfg node list is deferred
440 // until later, including after empty node contraction.
441 //
442 // After phi assignments are lowered across all blocks, another register
443 // allocation pass is run, focusing only on pre-colored and infinite-weight
444 // variables, similar to Om1 register allocation (except without the need to
445 // specially compute these variables' live ranges, since they have already been
446 // precisely calculated). The register allocator in this mode needs the ability
447 // to forcibly spill and reload registers in case none are naturally available.
advancedPhiLowering()448 void CfgNode::advancedPhiLowering() {
449   if (getPhis().empty())
450     return;
451 
452   PhiDescList Desc;
453 
454   for (Inst &I : Phis) {
455     auto *Phi = llvm::dyn_cast<InstPhi>(&I);
456     if (!Phi->isDeleted()) {
457       Variable *Dest = Phi->getDest();
458       Desc.emplace_back(Phi, Dest);
459       // Undo the effect of the phi instruction on this node's live-in set by
460       // marking the phi dest variable as live on entry.
461       SizeT VarNum = Func->getLiveness()->getLiveIndex(Dest->getIndex());
462       auto &LiveIn = Func->getLiveness()->getLiveIn(this);
463       if (VarNum < LiveIn.size()) {
464         assert(!LiveIn[VarNum]);
465         LiveIn[VarNum] = true;
466       }
467       Phi->setDeleted();
468     }
469   }
470   if (Desc.empty())
471     return;
472 
473   TargetLowering *Target = Func->getTarget();
474   SizeT InEdgeIndex = 0;
475   for (CfgNode *Pred : InEdges) {
476     CfgNode *Split = splitIncomingEdge(Pred, InEdgeIndex++);
477     SizeT Remaining = Desc.size();
478 
479     // First pass computes Src and initializes NumPred.
480     for (PhiDesc &Item : Desc) {
481       Variable *Dest = Item.Dest;
482       Operand *Src = Item.Phi->getOperandForTarget(Pred);
483       Item.Src = Src;
484       Item.Processed = false;
485       Item.NumPred = 0;
486       // Cherry-pick any trivial assignments, so that they don't contribute to
487       // the running complexity of the topological sort.
488       if (sameVarOrReg(Target, Dest, Src)) {
489         Item.Processed = true;
490         --Remaining;
491         if (Dest != Src)
492           // If Dest and Src are syntactically the same, don't bother adding
493           // the assignment, because in all respects it would be redundant, and
494           // if Dest/Src are on the stack, the target lowering may naively
495           // decide to lower it using a temporary register.
496           Split->appendInst(InstAssign::create(Func, Dest, Src));
497       }
498     }
499     // Second pass computes NumPred by comparing every pair of Phi instructions.
500     for (PhiDesc &Item : Desc) {
501       if (Item.Processed)
502         continue;
503       const Variable *Dest = Item.Dest;
504       for (PhiDesc &Item2 : Desc) {
505         if (Item2.Processed)
506           continue;
507         // There shouldn't be two different Phis with the same Dest variable or
508         // register.
509         assert((&Item == &Item2) || !sameVarOrReg(Target, Dest, Item2.Dest));
510         if (sameVarOrReg(Target, Dest, Item2.Src))
511           ++Item.NumPred;
512       }
513     }
514 
515     // Another pass to compute initial Weight values.
516     for (PhiDesc &Item : Desc) {
517       if (Item.Processed)
518         continue;
519       int32_t Weight = 0;
520       if (Item.NumPred == 0)
521         Weight += WeightNoPreds;
522       if (Item.NumPred == 1)
523         Weight += WeightOnePred;
524       if (auto *Var = llvm::dyn_cast<Variable>(Item.Src))
525         if (Var->hasReg())
526           Weight += WeightSrcIsReg;
527       if (!Item.Dest->hasReg())
528         Weight += WeightDestNotReg;
529       Item.Weight = Weight;
530     }
531 
532     // Repeatedly choose and process the best candidate in the topological sort,
533     // until no candidates remain. This implementation is O(N^2) where N is the
534     // number of Phi instructions, but with a small constant factor compared to
535     // a likely implementation of O(N) topological sort.
536     for (; Remaining; --Remaining) {
537       int32_t BestWeight = -1;
538       PhiDesc *BestItem = nullptr;
539       // Find the best candidate.
540       for (PhiDesc &Item : Desc) {
541         if (Item.Processed)
542           continue;
543         const int32_t Weight = Item.Weight;
544         if (Weight > BestWeight) {
545           BestItem = &Item;
546           BestWeight = Weight;
547         }
548       }
549       assert(BestWeight >= 0);
550       Variable *Dest = BestItem->Dest;
551       Operand *Src = BestItem->Src;
552       assert(!sameVarOrReg(Target, Dest, Src));
553       // Break a cycle by introducing a temporary.
554       while (BestItem->NumPred > 0) {
555         bool Found = false;
556         // If the target instruction "A=B" is part of a cycle, find the "X=A"
557         // assignment in the cycle because it will have to be rewritten as
558         // "X=tmp".
559         for (PhiDesc &Item : Desc) {
560           if (Item.Processed)
561             continue;
562           Operand *OtherSrc = Item.Src;
563           if (Item.NumPred && sameVarOrReg(Target, Dest, OtherSrc)) {
564             SizeT VarNum = Func->getNumVariables();
565             Variable *Tmp = Func->makeVariable(OtherSrc->getType());
566             if (BuildDefs::dump())
567               Tmp->setName(Func, "__split_" + std::to_string(VarNum));
568             Split->appendInst(InstAssign::create(Func, Tmp, OtherSrc));
569             Item.Src = Tmp;
570             updatePreds(Desc, Target, llvm::cast<Variable>(OtherSrc));
571             Found = true;
572             break;
573           }
574         }
575         assert(Found);
576         (void)Found;
577       }
578       // Now that a cycle (if any) has been broken, create the actual
579       // assignment.
580       Split->appendInst(InstAssign::create(Func, Dest, Src));
581       if (auto *Var = llvm::dyn_cast<Variable>(Src))
582         updatePreds(Desc, Target, Var);
583       BestItem->Processed = true;
584     }
585     Split->appendInst(InstBr::create(Func, this));
586 
587     Split->genCode();
588     Func->getVMetadata()->addNode(Split);
589     // Validate to be safe.  All items should be marked as processed, and have
590     // no predecessors.
591     if (BuildDefs::asserts()) {
592       for (PhiDesc &Item : Desc) {
593         (void)Item;
594         assert(Item.Processed);
595         assert(Item.NumPred == 0);
596       }
597     }
598   }
599 }
600 
601 // Does address mode optimization. Pass each instruction to the TargetLowering
602 // object. If it returns a new instruction (representing the optimized address
603 // mode), then insert the new instruction and delete the old.
doAddressOpt()604 void CfgNode::doAddressOpt() {
605   TargetLowering *Target = Func->getTarget();
606   LoweringContext &Context = Target->getContext();
607   Context.init(this);
608   while (!Context.atEnd()) {
609     Target->doAddressOpt();
610   }
611 }
612 
613 // Drives the target lowering. Passes the current instruction and the next
614 // non-deleted instruction for target lowering.
genCode()615 void CfgNode::genCode() {
616   TargetLowering *Target = Func->getTarget();
617   LoweringContext &Context = Target->getContext();
618   // Lower the regular instructions.
619   Context.init(this);
620   Target->initNodeForLowering(this);
621   while (!Context.atEnd()) {
622     InstList::iterator Orig = Context.getCur();
623     if (llvm::isa<InstRet>(*Orig))
624       setHasReturn();
625     Target->lower();
626     // Ensure target lowering actually moved the cursor.
627     assert(Context.getCur() != Orig);
628   }
629   Context.availabilityReset();
630   // Do preliminary lowering of the Phi instructions.
631   Target->prelowerPhis();
632 }
633 
livenessLightweight()634 void CfgNode::livenessLightweight() {
635   SizeT NumVars = Func->getNumVariables();
636   LivenessBV Live(NumVars);
637   // Process regular instructions in reverse order.
638   for (Inst &I : reverse_range(Insts)) {
639     if (I.isDeleted())
640       continue;
641     I.livenessLightweight(Func, Live);
642   }
643   for (Inst &I : Phis) {
644     if (I.isDeleted())
645       continue;
646     I.livenessLightweight(Func, Live);
647   }
648 }
649 
650 // Performs liveness analysis on the block. Returns true if the incoming
651 // liveness changed from before, false if it stayed the same. (If it changes,
652 // the node's predecessors need to be processed again.)
liveness(Liveness * Liveness)653 bool CfgNode::liveness(Liveness *Liveness) {
654   const SizeT NumVars = Liveness->getNumVarsInNode(this);
655   const SizeT NumGlobalVars = Liveness->getNumGlobalVars();
656   LivenessBV &Live = Liveness->getScratchBV();
657   Live.clear();
658 
659   LiveBeginEndMap *LiveBegin = nullptr;
660   LiveBeginEndMap *LiveEnd = nullptr;
661   // Mark the beginning and ending of each variable's live range with the
662   // sentinel instruction number 0.
663   if (Liveness->getMode() == Liveness_Intervals) {
664     LiveBegin = Liveness->getLiveBegin(this);
665     LiveEnd = Liveness->getLiveEnd(this);
666     LiveBegin->clear();
667     LiveEnd->clear();
668     // Guess that the number of live ranges beginning is roughly the number of
669     // instructions, and same for live ranges ending.
670     LiveBegin->reserve(getInstCountEstimate());
671     LiveEnd->reserve(getInstCountEstimate());
672   }
673 
674   // Initialize Live to be the union of all successors' LiveIn.
675   for (CfgNode *Succ : OutEdges) {
676     const LivenessBV &LiveIn = Liveness->getLiveIn(Succ);
677     assert(LiveIn.empty() || LiveIn.size() == NumGlobalVars);
678     Live |= LiveIn;
679     // Mark corresponding argument of phis in successor as live.
680     for (Inst &I : Succ->Phis) {
681       if (I.isDeleted())
682         continue;
683       auto *Phi = llvm::cast<InstPhi>(&I);
684       Phi->livenessPhiOperand(Live, this, Liveness);
685     }
686   }
687   assert(Live.empty() || Live.size() == NumGlobalVars);
688   Liveness->getLiveOut(this) = Live;
689 
690   // Expand Live so it can hold locals in addition to globals.
691   Live.resize(NumVars);
692   // Process regular instructions in reverse order.
693   for (Inst &I : reverse_range(Insts)) {
694     if (I.isDeleted())
695       continue;
696     I.liveness(I.getNumber(), Live, Liveness, LiveBegin, LiveEnd);
697   }
698   // Process phis in forward order so that we can override the instruction
699   // number to be that of the earliest phi instruction in the block.
700   SizeT NumNonDeadPhis = 0;
701   InstNumberT FirstPhiNumber = Inst::NumberSentinel;
702   for (Inst &I : Phis) {
703     if (I.isDeleted())
704       continue;
705     if (FirstPhiNumber == Inst::NumberSentinel)
706       FirstPhiNumber = I.getNumber();
707     if (I.liveness(FirstPhiNumber, Live, Liveness, LiveBegin, LiveEnd))
708       ++NumNonDeadPhis;
709   }
710 
711   // When using the sparse representation, after traversing the instructions in
712   // the block, the Live bitvector should only contain set bits for global
713   // variables upon block entry.  We validate this by testing the upper bits of
714   // the Live bitvector.
715   if (Live.find_next(NumGlobalVars) != -1) {
716     if (BuildDefs::dump()) {
717       // This is a fatal liveness consistency error. Print some diagnostics and
718       // abort.
719       Ostream &Str = Func->getContext()->getStrDump();
720       Func->resetCurrentNode();
721       Str << "Invalid Live =";
722       for (SizeT i = NumGlobalVars; i < Live.size(); ++i) {
723         if (Live.test(i)) {
724           Str << " ";
725           Liveness->getVariable(i, this)->dump(Func);
726         }
727       }
728       Str << "\n";
729     }
730     llvm::report_fatal_error("Fatal inconsistency in liveness analysis");
731   }
732   // Now truncate Live to prevent LiveIn from growing.
733   Live.resize(NumGlobalVars);
734 
735   bool Changed = false;
736   LivenessBV &LiveIn = Liveness->getLiveIn(this);
737   assert(LiveIn.empty() || LiveIn.size() == NumGlobalVars);
738   // Add in current LiveIn
739   Live |= LiveIn;
740   // Check result, set LiveIn=Live
741   SizeT &PrevNumNonDeadPhis = Liveness->getNumNonDeadPhis(this);
742   bool LiveInChanged = (Live != LiveIn);
743   Changed = (NumNonDeadPhis != PrevNumNonDeadPhis || LiveInChanged);
744   if (LiveInChanged)
745     LiveIn = Live;
746   PrevNumNonDeadPhis = NumNonDeadPhis;
747   return Changed;
748 }
749 
750 // Validate the integrity of the live ranges in this block.  If there are any
751 // errors, it prints details and returns false.  On success, it returns true.
livenessValidateIntervals(Liveness * Liveness) const752 bool CfgNode::livenessValidateIntervals(Liveness *Liveness) const {
753   if (!BuildDefs::asserts())
754     return true;
755 
756   // Verify there are no duplicates.
757   auto ComparePair = [](const LiveBeginEndMapEntry &A,
758                         const LiveBeginEndMapEntry &B) {
759     return A.first == B.first;
760   };
761   LiveBeginEndMap &MapBegin = *Liveness->getLiveBegin(this);
762   LiveBeginEndMap &MapEnd = *Liveness->getLiveEnd(this);
763   if (std::adjacent_find(MapBegin.begin(), MapBegin.end(), ComparePair) ==
764           MapBegin.end() &&
765       std::adjacent_find(MapEnd.begin(), MapEnd.end(), ComparePair) ==
766           MapEnd.end())
767     return true;
768 
769   // There is definitely a liveness error.  All paths from here return false.
770   if (!BuildDefs::dump())
771     return false;
772 
773   // Print all the errors.
774   if (BuildDefs::dump()) {
775     GlobalContext *Ctx = Func->getContext();
776     OstreamLocker L(Ctx);
777     Ostream &Str = Ctx->getStrDump();
778     if (Func->isVerbose()) {
779       Str << "Live range errors in the following block:\n";
780       dump(Func);
781     }
782     for (auto Start = MapBegin.begin();
783          (Start = std::adjacent_find(Start, MapBegin.end(), ComparePair)) !=
784          MapBegin.end();
785          ++Start) {
786       auto Next = Start + 1;
787       Str << "Duplicate LR begin, block " << getName() << ", instructions "
788           << Start->second << " & " << Next->second << ", variable "
789           << Liveness->getVariable(Start->first, this)->getName() << "\n";
790     }
791     for (auto Start = MapEnd.begin();
792          (Start = std::adjacent_find(Start, MapEnd.end(), ComparePair)) !=
793          MapEnd.end();
794          ++Start) {
795       auto Next = Start + 1;
796       Str << "Duplicate LR end,   block " << getName() << ", instructions "
797           << Start->second << " & " << Next->second << ", variable "
798           << Liveness->getVariable(Start->first, this)->getName() << "\n";
799     }
800   }
801 
802   return false;
803 }
804 
805 // Once basic liveness is complete, compute actual live ranges. It is assumed
806 // that within a single basic block, a live range begins at most once and ends
807 // at most once. This is certainly true for pure SSA form. It is also true once
808 // phis are lowered, since each assignment to the phi-based temporary is in a
809 // different basic block, and there is a single read that ends the live in the
810 // basic block that contained the actual phi instruction.
livenessAddIntervals(Liveness * Liveness,InstNumberT FirstInstNum,InstNumberT LastInstNum)811 void CfgNode::livenessAddIntervals(Liveness *Liveness, InstNumberT FirstInstNum,
812                                    InstNumberT LastInstNum) {
813   TimerMarker T1(TimerStack::TT_liveRange, Func);
814 
815   const SizeT NumVars = Liveness->getNumVarsInNode(this);
816   const LivenessBV &LiveIn = Liveness->getLiveIn(this);
817   const LivenessBV &LiveOut = Liveness->getLiveOut(this);
818   LiveBeginEndMap &MapBegin = *Liveness->getLiveBegin(this);
819   LiveBeginEndMap &MapEnd = *Liveness->getLiveEnd(this);
820   std::sort(MapBegin.begin(), MapBegin.end());
821   std::sort(MapEnd.begin(), MapEnd.end());
822 
823   if (!livenessValidateIntervals(Liveness)) {
824     llvm::report_fatal_error("livenessAddIntervals: Liveness error");
825     return;
826   }
827 
828   LivenessBV &LiveInAndOut = Liveness->getScratchBV();
829   LiveInAndOut = LiveIn;
830   LiveInAndOut &= LiveOut;
831 
832   // Iterate in parallel across the sorted MapBegin[] and MapEnd[].
833   auto IBB = MapBegin.begin(), IEB = MapEnd.begin();
834   auto IBE = MapBegin.end(), IEE = MapEnd.end();
835   while (IBB != IBE || IEB != IEE) {
836     SizeT i1 = IBB == IBE ? NumVars : IBB->first;
837     SizeT i2 = IEB == IEE ? NumVars : IEB->first;
838     SizeT i = std::min(i1, i2);
839     // i1 is the Variable number of the next MapBegin entry, and i2 is the
840     // Variable number of the next MapEnd entry. If i1==i2, then the Variable's
841     // live range begins and ends in this block. If i1<i2, then i1's live range
842     // begins at instruction IBB->second and extends through the end of the
843     // block. If i1>i2, then i2's live range begins at the first instruction of
844     // the block and ends at IEB->second. In any case, we choose the lesser of
845     // i1 and i2 and proceed accordingly.
846     InstNumberT LB = i == i1 ? IBB->second : FirstInstNum;
847     InstNumberT LE = i == i2 ? IEB->second : LastInstNum + 1;
848 
849     Variable *Var = Liveness->getVariable(i, this);
850     if (LB > LE) {
851       Var->addLiveRange(FirstInstNum, LE, this);
852       Var->addLiveRange(LB, LastInstNum + 1, this);
853       // Assert that Var is a global variable by checking that its liveness
854       // index is less than the number of globals. This ensures that the
855       // LiveInAndOut[] access is valid.
856       assert(i < Liveness->getNumGlobalVars());
857       LiveInAndOut[i] = false;
858     } else {
859       Var->addLiveRange(LB, LE, this);
860     }
861     if (i == i1)
862       ++IBB;
863     if (i == i2)
864       ++IEB;
865   }
866   // Process the variables that are live across the entire block.
867   for (int i = LiveInAndOut.find_first(); i != -1;
868        i = LiveInAndOut.find_next(i)) {
869     Variable *Var = Liveness->getVariable(i, this);
870     if (Liveness->getRangeMask(Var->getIndex()))
871       Var->addLiveRange(FirstInstNum, LastInstNum + 1, this);
872   }
873 }
874 
875 // If this node contains only deleted instructions, and ends in an
876 // unconditional branch, contract the node by repointing all its in-edges to
877 // its successor.
contractIfEmpty()878 void CfgNode::contractIfEmpty() {
879   if (InEdges.empty())
880     return;
881   Inst *Branch = nullptr;
882   for (Inst &I : Insts) {
883     if (I.isDeleted())
884       continue;
885     if (I.isUnconditionalBranch())
886       Branch = &I;
887     else if (!I.isRedundantAssign())
888       return;
889   }
890   // Make sure there is actually a successor to repoint in-edges to.
891   if (OutEdges.empty())
892     return;
893   assert(hasSingleOutEdge());
894   // Don't try to delete a self-loop.
895   if (OutEdges[0] == this)
896     return;
897   // Make sure the node actually contains (ends with) an unconditional branch.
898   if (Branch == nullptr)
899     return;
900 
901   Branch->setDeleted();
902   CfgNode *Successor = OutEdges.front();
903   // Repoint all this node's in-edges to this node's successor, unless this
904   // node's successor is actually itself (in which case the statement
905   // "OutEdges.front()->InEdges.push_back(Pred)" could invalidate the iterator
906   // over this->InEdges).
907   if (Successor != this) {
908     for (CfgNode *Pred : InEdges) {
909       for (CfgNode *&I : Pred->OutEdges) {
910         if (I == this) {
911           I = Successor;
912           Successor->InEdges.push_back(Pred);
913         }
914       }
915       for (Inst &I : Pred->getInsts()) {
916         if (!I.isDeleted())
917           I.repointEdges(this, Successor);
918       }
919     }
920 
921     // Remove the in-edge to the successor to allow node reordering to make
922     // better decisions. For example it's more helpful to place a node after a
923     // reachable predecessor than an unreachable one (like the one we just
924     // contracted).
925     Successor->InEdges.erase(
926         std::find(Successor->InEdges.begin(), Successor->InEdges.end(), this));
927   }
928   InEdges.clear();
929 }
930 
doBranchOpt(const CfgNode * NextNode)931 void CfgNode::doBranchOpt(const CfgNode *NextNode) {
932   TargetLowering *Target = Func->getTarget();
933   // Find the first opportunity for branch optimization (which will be the last
934   // instruction in the block) and stop. This is sufficient unless there is
935   // some target lowering where we have the possibility of multiple
936   // optimizations per block. Take care with switch lowering as there are
937   // multiple unconditional branches and only the last can be deleted.
938   for (Inst &I : reverse_range(Insts)) {
939     if (!I.isDeleted()) {
940       Target->doBranchOpt(&I, NextNode);
941       return;
942     }
943   }
944 }
945 
946 // ======================== Dump routines ======================== //
947 
948 namespace {
949 
950 // Helper functions for emit().
951 
emitRegisterUsage(Ostream & Str,const Cfg * Func,const CfgNode * Node,bool IsLiveIn,CfgVector<SizeT> & LiveRegCount)952 void emitRegisterUsage(Ostream &Str, const Cfg *Func, const CfgNode *Node,
953                        bool IsLiveIn, CfgVector<SizeT> &LiveRegCount) {
954   if (!BuildDefs::dump())
955     return;
956   Liveness *Liveness = Func->getLiveness();
957   const LivenessBV *Live;
958   const auto StackReg = Func->getTarget()->getStackReg();
959   const auto FrameOrStackReg = Func->getTarget()->getFrameOrStackReg();
960   if (IsLiveIn) {
961     Live = &Liveness->getLiveIn(Node);
962     Str << "\t\t\t\t/* LiveIn=";
963   } else {
964     Live = &Liveness->getLiveOut(Node);
965     Str << "\t\t\t\t/* LiveOut=";
966   }
967   if (!Live->empty()) {
968     CfgVector<Variable *> LiveRegs;
969     for (SizeT i = 0; i < Live->size(); ++i) {
970       if (!(*Live)[i])
971         continue;
972       Variable *Var = Liveness->getVariable(i, Node);
973       if (!Var->hasReg())
974         continue;
975       const auto RegNum = Var->getRegNum();
976       if (RegNum == StackReg || RegNum == FrameOrStackReg)
977         continue;
978       if (IsLiveIn)
979         ++LiveRegCount[RegNum];
980       LiveRegs.push_back(Var);
981     }
982     // Sort the variables by regnum so they are always printed in a familiar
983     // order.
984     std::sort(LiveRegs.begin(), LiveRegs.end(),
985               [](const Variable *V1, const Variable *V2) {
986                 return unsigned(V1->getRegNum()) < unsigned(V2->getRegNum());
987               });
988     bool First = true;
989     for (Variable *Var : LiveRegs) {
990       if (!First)
991         Str << ",";
992       First = false;
993       Var->emit(Func);
994     }
995   }
996   Str << " */\n";
997 }
998 
999 /// Returns true if some text was emitted - in which case the caller definitely
1000 /// needs to emit a newline character.
emitLiveRangesEnded(Ostream & Str,const Cfg * Func,const Inst * Instr,CfgVector<SizeT> & LiveRegCount)1001 bool emitLiveRangesEnded(Ostream &Str, const Cfg *Func, const Inst *Instr,
1002                          CfgVector<SizeT> &LiveRegCount) {
1003   bool Printed = false;
1004   if (!BuildDefs::dump())
1005     return Printed;
1006   Variable *Dest = Instr->getDest();
1007   // Normally we increment the live count for the dest register. But we
1008   // shouldn't if the instruction's IsDestRedefined flag is set, because this
1009   // means that the target lowering created this instruction as a non-SSA
1010   // assignment; i.e., a different, previous instruction started the dest
1011   // variable's live range.
1012   if (!Instr->isDestRedefined() && Dest && Dest->hasReg())
1013     ++LiveRegCount[Dest->getRegNum()];
1014   FOREACH_VAR_IN_INST(Var, *Instr) {
1015     bool ShouldReport = Instr->isLastUse(Var);
1016     if (ShouldReport && Var->hasReg()) {
1017       // Don't report end of live range until the live count reaches 0.
1018       SizeT NewCount = --LiveRegCount[Var->getRegNum()];
1019       if (NewCount)
1020         ShouldReport = false;
1021     }
1022     if (ShouldReport) {
1023       if (Printed)
1024         Str << ",";
1025       else
1026         Str << " \t/* END=";
1027       Var->emit(Func);
1028       Printed = true;
1029     }
1030   }
1031   if (Printed)
1032     Str << " */";
1033   return Printed;
1034 }
1035 
updateStats(Cfg * Func,const Inst * I)1036 void updateStats(Cfg *Func, const Inst *I) {
1037   if (!BuildDefs::dump())
1038     return;
1039   // Update emitted instruction count, plus fill/spill count for Variable
1040   // operands without a physical register.
1041   if (uint32_t Count = I->getEmitInstCount()) {
1042     Func->getContext()->statsUpdateEmitted(Count);
1043     if (Variable *Dest = I->getDest()) {
1044       if (!Dest->hasReg())
1045         Func->getContext()->statsUpdateFills();
1046     }
1047     for (SizeT S = 0; S < I->getSrcSize(); ++S) {
1048       if (auto *Src = llvm::dyn_cast<Variable>(I->getSrc(S))) {
1049         if (!Src->hasReg())
1050           Func->getContext()->statsUpdateSpills();
1051       }
1052     }
1053   }
1054 }
1055 
1056 } // end of anonymous namespace
1057 
emit(Cfg * Func) const1058 void CfgNode::emit(Cfg *Func) const {
1059   if (!BuildDefs::dump())
1060     return;
1061   Func->setCurrentNode(this);
1062   Ostream &Str = Func->getContext()->getStrEmit();
1063   Liveness *Liveness = Func->getLiveness();
1064   const bool DecorateAsm = Liveness && getFlags().getDecorateAsm();
1065   Str << getAsmName() << ":\n";
1066   // LiveRegCount keeps track of the number of currently live variables that
1067   // each register is assigned to. Normally that would be only 0 or 1, but the
1068   // register allocator's AllowOverlap inference allows it to be greater than 1
1069   // for short periods.
1070   CfgVector<SizeT> LiveRegCount(Func->getTarget()->getNumRegisters());
1071   if (DecorateAsm) {
1072     constexpr bool IsLiveIn = true;
1073     emitRegisterUsage(Str, Func, this, IsLiveIn, LiveRegCount);
1074     if (getInEdges().size()) {
1075       Str << "\t\t\t\t/* preds=";
1076       bool First = true;
1077       for (CfgNode *I : getInEdges()) {
1078         if (!First)
1079           Str << ",";
1080         First = false;
1081         Str << "$" << I->getName();
1082       }
1083       Str << " */\n";
1084     }
1085     if (getLoopNestDepth()) {
1086       Str << "\t\t\t\t/* loop depth=" << getLoopNestDepth() << " */\n";
1087     }
1088   }
1089 
1090   for (const Inst &I : Phis) {
1091     if (I.isDeleted())
1092       continue;
1093     // Emitting a Phi instruction should cause an error.
1094     I.emit(Func);
1095   }
1096   for (const Inst &I : Insts) {
1097     if (I.isDeleted())
1098       continue;
1099     if (I.isRedundantAssign()) {
1100       // Usually, redundant assignments end the live range of the src variable
1101       // and begin the live range of the dest variable, with no net effect on
1102       // the liveness of their register. However, if the register allocator
1103       // infers the AllowOverlap condition, then this may be a redundant
1104       // assignment that does not end the src variable's live range, in which
1105       // case the active variable count for that register needs to be bumped.
1106       // That normally would have happened as part of emitLiveRangesEnded(),
1107       // but that isn't called for redundant assignments.
1108       Variable *Dest = I.getDest();
1109       if (DecorateAsm && Dest->hasReg()) {
1110         ++LiveRegCount[Dest->getRegNum()];
1111         if (I.isLastUse(I.getSrc(0)))
1112           --LiveRegCount[llvm::cast<Variable>(I.getSrc(0))->getRegNum()];
1113       }
1114       continue;
1115     }
1116     I.emit(Func);
1117     bool Printed = false;
1118     if (DecorateAsm)
1119       Printed = emitLiveRangesEnded(Str, Func, &I, LiveRegCount);
1120     if (Printed || llvm::isa<InstTarget>(&I))
1121       Str << "\n";
1122     updateStats(Func, &I);
1123   }
1124   if (DecorateAsm) {
1125     constexpr bool IsLiveIn = false;
1126     emitRegisterUsage(Str, Func, this, IsLiveIn, LiveRegCount);
1127   }
1128 }
1129 
1130 // Helper class for emitIAS().
1131 namespace {
1132 class BundleEmitHelper {
1133   BundleEmitHelper() = delete;
1134   BundleEmitHelper(const BundleEmitHelper &) = delete;
1135   BundleEmitHelper &operator=(const BundleEmitHelper &) = delete;
1136 
1137 public:
BundleEmitHelper(Assembler * Asm,const InstList & Insts)1138   BundleEmitHelper(Assembler *Asm, const InstList &Insts)
1139       : Asm(Asm), End(Insts.end()), BundleLockStart(End),
1140         BundleSize(1 << Asm->getBundleAlignLog2Bytes()),
1141         BundleMaskLo(BundleSize - 1), BundleMaskHi(~BundleMaskLo) {}
1142   // Check whether we're currently within a bundle_lock region.
isInBundleLockRegion() const1143   bool isInBundleLockRegion() const { return BundleLockStart != End; }
1144   // Check whether the current bundle_lock region has the align_to_end option.
isAlignToEnd() const1145   bool isAlignToEnd() const {
1146     assert(isInBundleLockRegion());
1147     return llvm::cast<InstBundleLock>(getBundleLockStart())->getOption() ==
1148            InstBundleLock::Opt_AlignToEnd;
1149   }
isPadToEnd() const1150   bool isPadToEnd() const {
1151     assert(isInBundleLockRegion());
1152     return llvm::cast<InstBundleLock>(getBundleLockStart())->getOption() ==
1153            InstBundleLock::Opt_PadToEnd;
1154   }
1155   // Check whether the entire bundle_lock region falls within the same bundle.
isSameBundle() const1156   bool isSameBundle() const {
1157     assert(isInBundleLockRegion());
1158     return SizeSnapshotPre == SizeSnapshotPost ||
1159            (SizeSnapshotPre & BundleMaskHi) ==
1160                ((SizeSnapshotPost - 1) & BundleMaskHi);
1161   }
1162   // Get the bundle alignment of the first instruction of the bundle_lock
1163   // region.
getPreAlignment() const1164   intptr_t getPreAlignment() const {
1165     assert(isInBundleLockRegion());
1166     return SizeSnapshotPre & BundleMaskLo;
1167   }
1168   // Get the bundle alignment of the first instruction past the bundle_lock
1169   // region.
getPostAlignment() const1170   intptr_t getPostAlignment() const {
1171     assert(isInBundleLockRegion());
1172     return SizeSnapshotPost & BundleMaskLo;
1173   }
1174   // Get the iterator pointing to the bundle_lock instruction, e.g. to roll
1175   // back the instruction iteration to that point.
getBundleLockStart() const1176   InstList::const_iterator getBundleLockStart() const {
1177     assert(isInBundleLockRegion());
1178     return BundleLockStart;
1179   }
1180   // Set up bookkeeping when the bundle_lock instruction is first processed.
enterBundleLock(InstList::const_iterator I)1181   void enterBundleLock(InstList::const_iterator I) {
1182     assert(!isInBundleLockRegion());
1183     BundleLockStart = I;
1184     SizeSnapshotPre = Asm->getBufferSize();
1185     Asm->setPreliminary(true);
1186     assert(isInBundleLockRegion());
1187   }
1188   // Update bookkeeping when the bundle_unlock instruction is processed.
enterBundleUnlock()1189   void enterBundleUnlock() {
1190     assert(isInBundleLockRegion());
1191     SizeSnapshotPost = Asm->getBufferSize();
1192   }
1193   // Update bookkeeping when we are completely finished with the bundle_lock
1194   // region.
leaveBundleLockRegion()1195   void leaveBundleLockRegion() { BundleLockStart = End; }
1196   // Check whether the instruction sequence fits within the current bundle, and
1197   // if not, add nop padding to the end of the current bundle.
padToNextBundle()1198   void padToNextBundle() {
1199     assert(isInBundleLockRegion());
1200     if (!isSameBundle()) {
1201       intptr_t PadToNextBundle = BundleSize - getPreAlignment();
1202       Asm->padWithNop(PadToNextBundle);
1203       SizeSnapshotPre += PadToNextBundle;
1204       SizeSnapshotPost += PadToNextBundle;
1205       assert((Asm->getBufferSize() & BundleMaskLo) == 0);
1206       assert(Asm->getBufferSize() == SizeSnapshotPre);
1207     }
1208   }
1209   // If align_to_end is specified, add padding such that the instruction
1210   // sequences ends precisely at a bundle boundary.
padForAlignToEnd()1211   void padForAlignToEnd() {
1212     assert(isInBundleLockRegion());
1213     if (isAlignToEnd()) {
1214       if (intptr_t Offset = getPostAlignment()) {
1215         Asm->padWithNop(BundleSize - Offset);
1216         SizeSnapshotPre = Asm->getBufferSize();
1217       }
1218     }
1219   }
1220   // If pad_to_end is specified, add padding such that the first instruction
1221   // after the instruction sequence starts at a bundle boundary.
padForPadToEnd()1222   void padForPadToEnd() {
1223     assert(isInBundleLockRegion());
1224     if (isPadToEnd()) {
1225       if (intptr_t Offset = getPostAlignment()) {
1226         Asm->padWithNop(BundleSize - Offset);
1227         SizeSnapshotPre = Asm->getBufferSize();
1228       }
1229     }
1230   } // Update bookkeeping when rolling back for the second pass.
rollback()1231   void rollback() {
1232     assert(isInBundleLockRegion());
1233     Asm->setBufferSize(SizeSnapshotPre);
1234     Asm->setPreliminary(false);
1235   }
1236 
1237 private:
1238   Assembler *const Asm;
1239   // End is a sentinel value such that BundleLockStart==End implies that we are
1240   // not in a bundle_lock region.
1241   const InstList::const_iterator End;
1242   InstList::const_iterator BundleLockStart;
1243   const intptr_t BundleSize;
1244   // Masking with BundleMaskLo identifies an address's bundle offset.
1245   const intptr_t BundleMaskLo;
1246   // Masking with BundleMaskHi identifies an address's bundle.
1247   const intptr_t BundleMaskHi;
1248   intptr_t SizeSnapshotPre = 0;
1249   intptr_t SizeSnapshotPost = 0;
1250 };
1251 
1252 } // end of anonymous namespace
1253 
emitIAS(Cfg * Func) const1254 void CfgNode::emitIAS(Cfg *Func) const {
1255   Func->setCurrentNode(this);
1256   Assembler *Asm = Func->getAssembler<>();
1257   // TODO(stichnot): When sandboxing, defer binding the node label until just
1258   // before the first instruction is emitted, to reduce the chance that a
1259   // padding nop is a branch target.
1260   Asm->bindCfgNodeLabel(this);
1261   for (const Inst &I : Phis) {
1262     if (I.isDeleted())
1263       continue;
1264     // Emitting a Phi instruction should cause an error.
1265     I.emitIAS(Func);
1266   }
1267 
1268   // Do the simple emission if not sandboxed.
1269   if (!getFlags().getUseSandboxing()) {
1270     for (const Inst &I : Insts) {
1271       if (!I.isDeleted() && !I.isRedundantAssign()) {
1272         I.emitIAS(Func);
1273         updateStats(Func, &I);
1274       }
1275     }
1276     return;
1277   }
1278 
1279   // The remainder of the function handles emission with sandboxing. There are
1280   // explicit bundle_lock regions delimited by bundle_lock and bundle_unlock
1281   // instructions. All other instructions are treated as an implicit
1282   // one-instruction bundle_lock region. Emission is done twice for each
1283   // bundle_lock region. The first pass is a preliminary pass, after which we
1284   // can figure out what nop padding is needed, then roll back, and make the
1285   // final pass.
1286   //
1287   // Ideally, the first pass would be speculative and the second pass would
1288   // only be done if nop padding were needed, but the structure of the
1289   // integrated assembler makes it hard to roll back the state of label
1290   // bindings, label links, and relocation fixups. Instead, the first pass just
1291   // disables all mutation of that state.
1292 
1293   BundleEmitHelper Helper(Asm, Insts);
1294   InstList::const_iterator End = Insts.end();
1295   // Retrying indicates that we had to roll back to the bundle_lock instruction
1296   // to apply padding before the bundle_lock sequence.
1297   bool Retrying = false;
1298   for (InstList::const_iterator I = Insts.begin(); I != End; ++I) {
1299     if (I->isDeleted() || I->isRedundantAssign())
1300       continue;
1301 
1302     if (llvm::isa<InstBundleLock>(I)) {
1303       // Set up the initial bundle_lock state. This should not happen while
1304       // retrying, because the retry rolls back to the instruction following
1305       // the bundle_lock instruction.
1306       assert(!Retrying);
1307       Helper.enterBundleLock(I);
1308       continue;
1309     }
1310 
1311     if (llvm::isa<InstBundleUnlock>(I)) {
1312       Helper.enterBundleUnlock();
1313       if (Retrying) {
1314         // Make sure all instructions are in the same bundle.
1315         assert(Helper.isSameBundle());
1316         // If align_to_end is specified, make sure the next instruction begins
1317         // the bundle.
1318         assert(!Helper.isAlignToEnd() || Helper.getPostAlignment() == 0);
1319         Helper.padForPadToEnd();
1320         Helper.leaveBundleLockRegion();
1321         Retrying = false;
1322       } else {
1323         // This is the first pass, so roll back for the retry pass.
1324         Helper.rollback();
1325         // Pad to the next bundle if the instruction sequence crossed a bundle
1326         // boundary.
1327         Helper.padToNextBundle();
1328         // Insert additional padding to make AlignToEnd work.
1329         Helper.padForAlignToEnd();
1330         // Prepare for the retry pass after padding is done.
1331         Retrying = true;
1332         I = Helper.getBundleLockStart();
1333       }
1334       continue;
1335     }
1336 
1337     // I points to a non bundle_lock/bundle_unlock instruction.
1338     if (Helper.isInBundleLockRegion()) {
1339       I->emitIAS(Func);
1340       // Only update stats during the final pass.
1341       if (Retrying)
1342         updateStats(Func, iteratorToInst(I));
1343     } else {
1344       // Treat it as though there were an implicit bundle_lock and
1345       // bundle_unlock wrapping the instruction.
1346       Helper.enterBundleLock(I);
1347       I->emitIAS(Func);
1348       Helper.enterBundleUnlock();
1349       Helper.rollback();
1350       Helper.padToNextBundle();
1351       I->emitIAS(Func);
1352       updateStats(Func, iteratorToInst(I));
1353       Helper.leaveBundleLockRegion();
1354     }
1355   }
1356 
1357   // Don't allow bundle locking across basic blocks, to keep the backtracking
1358   // mechanism simple.
1359   assert(!Helper.isInBundleLockRegion());
1360   assert(!Retrying);
1361 }
1362 
dump(Cfg * Func) const1363 void CfgNode::dump(Cfg *Func) const {
1364   if (!BuildDefs::dump())
1365     return;
1366   Func->setCurrentNode(this);
1367   Ostream &Str = Func->getContext()->getStrDump();
1368   Liveness *Liveness = Func->getLiveness();
1369   if (Func->isVerbose(IceV_Instructions) || Func->isVerbose(IceV_Loop))
1370     Str << getName() << ":\n";
1371   // Dump the loop nest depth
1372   if (Func->isVerbose(IceV_Loop))
1373     Str << "    // LoopNestDepth = " << getLoopNestDepth() << "\n";
1374   // Dump list of predecessor nodes.
1375   if (Func->isVerbose(IceV_Preds) && !InEdges.empty()) {
1376     Str << "    // preds = ";
1377     bool First = true;
1378     for (CfgNode *I : InEdges) {
1379       if (!First)
1380         Str << ", ";
1381       First = false;
1382       Str << "%" << I->getName();
1383     }
1384     Str << "\n";
1385   }
1386   // Dump the live-in variables.
1387   if (Func->isVerbose(IceV_Liveness)) {
1388     if (Liveness != nullptr && !Liveness->getLiveIn(this).empty()) {
1389       const LivenessBV &LiveIn = Liveness->getLiveIn(this);
1390       Str << "    // LiveIn:";
1391       for (SizeT i = 0; i < LiveIn.size(); ++i) {
1392         if (LiveIn[i]) {
1393           Variable *Var = Liveness->getVariable(i, this);
1394           Str << " %" << Var->getName();
1395           if (Func->isVerbose(IceV_RegOrigins) && Var->hasReg()) {
1396             Str << ":"
1397                 << Func->getTarget()->getRegName(Var->getRegNum(),
1398                                                  Var->getType());
1399           }
1400         }
1401       }
1402       Str << "\n";
1403     }
1404   }
1405   // Dump each instruction.
1406   if (Func->isVerbose(IceV_Instructions)) {
1407     for (const Inst &I : Phis)
1408       I.dumpDecorated(Func);
1409     for (const Inst &I : Insts)
1410       I.dumpDecorated(Func);
1411   }
1412   // Dump the live-out variables.
1413   if (Func->isVerbose(IceV_Liveness)) {
1414     if (Liveness != nullptr && !Liveness->getLiveOut(this).empty()) {
1415       const LivenessBV &LiveOut = Liveness->getLiveOut(this);
1416       Str << "    // LiveOut:";
1417       for (SizeT i = 0; i < LiveOut.size(); ++i) {
1418         if (LiveOut[i]) {
1419           Variable *Var = Liveness->getVariable(i, this);
1420           Str << " %" << Var->getName();
1421           if (Func->isVerbose(IceV_RegOrigins) && Var->hasReg()) {
1422             Str << ":"
1423                 << Func->getTarget()->getRegName(Var->getRegNum(),
1424                                                  Var->getType());
1425           }
1426         }
1427       }
1428       Str << "\n";
1429     }
1430   }
1431   // Dump list of successor nodes.
1432   if (Func->isVerbose(IceV_Succs)) {
1433     Str << "    // succs = ";
1434     bool First = true;
1435     for (CfgNode *I : OutEdges) {
1436       if (!First)
1437         Str << ", ";
1438       First = false;
1439       Str << "%" << I->getName();
1440     }
1441     Str << "\n";
1442   }
1443 }
1444 
removeInEdge(CfgNode * In)1445 void CfgNode::removeInEdge(CfgNode *In) {
1446   InEdges.erase(std::find(InEdges.begin(), InEdges.end(), In));
1447 }
1448 
shortCircuit()1449 CfgNode *CfgNode::shortCircuit() {
1450   auto *Func = getCfg();
1451   auto *Last = &getInsts().back();
1452   Variable *Condition = nullptr;
1453   InstBr *Br = nullptr;
1454   if ((Br = llvm::dyn_cast<InstBr>(Last))) {
1455     if (!Br->isUnconditional()) {
1456       Condition = llvm::dyn_cast<Variable>(Br->getCondition());
1457     }
1458   }
1459   if (Condition == nullptr)
1460     return nullptr;
1461 
1462   auto *JumpOnTrue = Br->getTargetTrue();
1463   auto *JumpOnFalse = Br->getTargetFalse();
1464 
1465   bool FoundOr = false;
1466   bool FoundAnd = false;
1467 
1468   InstArithmetic *TopLevelBoolOp = nullptr;
1469 
1470   for (auto &Inst : reverse_range(getInsts())) {
1471     if (Inst.isDeleted())
1472       continue;
1473     if (Inst.getDest() == Condition) {
1474       if (auto *Arith = llvm::dyn_cast<InstArithmetic>(&Inst)) {
1475 
1476         FoundOr = (Arith->getOp() == InstArithmetic::OpKind::Or);
1477         FoundAnd = (Arith->getOp() == InstArithmetic::OpKind::And);
1478 
1479         if (FoundOr || FoundAnd) {
1480           TopLevelBoolOp = Arith;
1481           break;
1482         }
1483       }
1484     }
1485   }
1486 
1487   if (!TopLevelBoolOp)
1488     return nullptr;
1489 
1490   auto IsOperand = [](Inst *Instr, Operand *Opr) -> bool {
1491     for (SizeT i = 0; i < Instr->getSrcSize(); ++i) {
1492       if (Instr->getSrc(i) == Opr)
1493         return true;
1494     }
1495     return false;
1496   };
1497   Inst *FirstOperandDef = nullptr;
1498   for (auto &Inst : getInsts()) {
1499     if (IsOperand(TopLevelBoolOp, Inst.getDest())) {
1500       FirstOperandDef = &Inst;
1501       break;
1502     }
1503   }
1504 
1505   if (FirstOperandDef == nullptr) {
1506     return nullptr;
1507   }
1508 
1509   // Check for side effects
1510   auto It = Ice::instToIterator(FirstOperandDef);
1511   while (It != getInsts().end()) {
1512     if (It->isDeleted()) {
1513       ++It;
1514       continue;
1515     }
1516     if (llvm::isa<InstBr>(It) || llvm::isa<InstRet>(It)) {
1517       break;
1518     }
1519     auto *Dest = It->getDest();
1520     if (It->getDest() == nullptr || It->hasSideEffects() ||
1521         !Func->getVMetadata()->isSingleBlock(Dest)) {
1522       // Relying on short cicuit eval here.
1523       // getVMetadata()->isSingleBlock(Dest)
1524       // will segfault if It->getDest() == nullptr
1525       return nullptr;
1526     }
1527     It++;
1528   }
1529 
1530   auto *NewNode = Func->makeNode();
1531   NewNode->setLoopNestDepth(getLoopNestDepth());
1532   It = Ice::instToIterator(FirstOperandDef);
1533   It++; // Have to split after the def
1534 
1535   NewNode->getInsts().splice(NewNode->getInsts().begin(), getInsts(), It,
1536                              getInsts().end());
1537 
1538   if (BuildDefs::dump()) {
1539     NewNode->setName(getName().append("_2"));
1540     setName(getName().append("_1"));
1541   }
1542 
1543   // Point edges properly
1544   NewNode->addInEdge(this);
1545   for (auto *Out : getOutEdges()) {
1546     NewNode->addOutEdge(Out);
1547     Out->addInEdge(NewNode);
1548   }
1549   removeAllOutEdges();
1550   addOutEdge(NewNode);
1551 
1552   // Manage Phi instructions of successors
1553   for (auto *Succ : NewNode->getOutEdges()) {
1554     for (auto &Inst : Succ->getPhis()) {
1555       auto *Phi = llvm::cast<InstPhi>(&Inst);
1556       for (SizeT i = 0; i < Phi->getSrcSize(); ++i) {
1557         if (Phi->getLabel(i) == this) {
1558           Phi->addArgument(Phi->getSrc(i), NewNode);
1559         }
1560       }
1561     }
1562   }
1563 
1564   // Create new Br instruction
1565   InstBr *NewInst = nullptr;
1566   if (FoundOr) {
1567     addOutEdge(JumpOnTrue);
1568     JumpOnFalse->removeInEdge(this);
1569     NewInst =
1570         InstBr::create(Func, FirstOperandDef->getDest(), JumpOnTrue, NewNode);
1571   } else if (FoundAnd) {
1572     addOutEdge(JumpOnFalse);
1573     JumpOnTrue->removeInEdge(this);
1574     NewInst =
1575         InstBr::create(Func, FirstOperandDef->getDest(), NewNode, JumpOnFalse);
1576   } else {
1577     return nullptr;
1578   }
1579 
1580   assert(NewInst != nullptr);
1581   appendInst(NewInst);
1582 
1583   Operand *UnusedOperand = nullptr;
1584   assert(TopLevelBoolOp->getSrcSize() == 2);
1585   if (TopLevelBoolOp->getSrc(0) == FirstOperandDef->getDest())
1586     UnusedOperand = TopLevelBoolOp->getSrc(1);
1587   else if (TopLevelBoolOp->getSrc(1) == FirstOperandDef->getDest())
1588     UnusedOperand = TopLevelBoolOp->getSrc(0);
1589   assert(UnusedOperand);
1590 
1591   Br->replaceSource(0, UnusedOperand); // Index 0 has the condition of the Br
1592 
1593   TopLevelBoolOp->setDeleted();
1594   return NewNode;
1595 }
1596 
1597 } // end of namespace Ice
1598