1 //===- subzero/src/IceCfgNode.cpp - Basic block (node) implementation -----===//
2 //
3 // The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Implements the CfgNode class, including the complexities of
12 /// instruction insertion and in-edge calculation.
13 ///
14 //===----------------------------------------------------------------------===//
15
16 #include "IceCfgNode.h"
17
18 #include "IceAssembler.h"
19 #include "IceCfg.h"
20 #include "IceGlobalInits.h"
21 #include "IceInst.h"
22 #include "IceInstVarIter.h"
23 #include "IceLiveness.h"
24 #include "IceOperand.h"
25 #include "IceTargetLowering.h"
26
27 namespace Ice {
28
29 // Adds an instruction to either the Phi list or the regular instruction list.
30 // Validates that all Phis are added before all regular instructions.
appendInst(Inst * Instr)31 void CfgNode::appendInst(Inst *Instr) {
32 ++InstCountEstimate;
33
34 if (BuildDefs::wasm()) {
35 if (llvm::isa<InstSwitch>(Instr) || llvm::isa<InstBr>(Instr)) {
36 for (auto *N : Instr->getTerminatorEdges()) {
37 N->addInEdge(this);
38 addOutEdge(N);
39 }
40 }
41 }
42
43 if (auto *Phi = llvm::dyn_cast<InstPhi>(Instr)) {
44 if (!Insts.empty()) {
45 Func->setError("Phi instruction added to the middle of a block");
46 return;
47 }
48 Phis.push_back(Phi);
49 } else {
50 Insts.push_back(Instr);
51 }
52 }
53
replaceInEdge(CfgNode * Old,CfgNode * New)54 void CfgNode::replaceInEdge(CfgNode *Old, CfgNode *New) {
55 for (SizeT i = 0; i < InEdges.size(); ++i) {
56 if (InEdges[i] == Old) {
57 InEdges[i] = New;
58 }
59 }
60 for (auto &Inst : getPhis()) {
61 auto &Phi = llvm::cast<InstPhi>(Inst);
62 for (SizeT i = 0; i < Phi.getSrcSize(); ++i) {
63 if (Phi.getLabel(i) == Old) {
64 Phi.setLabel(i, New);
65 }
66 }
67 }
68 }
69
70 namespace {
removeDeletedAndRenumber(List * L,Cfg * Func)71 template <typename List> void removeDeletedAndRenumber(List *L, Cfg *Func) {
72 const bool DoDelete =
73 BuildDefs::minimal() || !getFlags().getKeepDeletedInsts();
74 auto I = L->begin(), E = L->end(), Next = I;
75 for (++Next; I != E; I = Next++) {
76 if (DoDelete && I->isDeleted()) {
77 L->remove(I);
78 } else {
79 I->renumber(Func);
80 }
81 }
82 }
83 } // end of anonymous namespace
84
renumberInstructions()85 void CfgNode::renumberInstructions() {
86 InstNumberT FirstNumber = Func->getNextInstNumber();
87 removeDeletedAndRenumber(&Phis, Func);
88 removeDeletedAndRenumber(&Insts, Func);
89 InstCountEstimate = Func->getNextInstNumber() - FirstNumber;
90 }
91
92 // When a node is created, the OutEdges are immediately known, but the InEdges
93 // have to be built up incrementally. After the CFG has been constructed, the
94 // computePredecessors() pass finalizes it by creating the InEdges list.
computePredecessors()95 void CfgNode::computePredecessors() {
96 for (CfgNode *Succ : OutEdges)
97 Succ->InEdges.push_back(this);
98 }
99
computeSuccessors()100 void CfgNode::computeSuccessors() {
101 OutEdges.clear();
102 InEdges.clear();
103 assert(!Insts.empty());
104 OutEdges = Insts.rbegin()->getTerminatorEdges();
105 }
106
107 // Ensure each Phi instruction in the node is consistent with respect to control
108 // flow. For each predecessor, there must be a phi argument with that label.
109 // If a phi argument's label doesn't appear in the predecessor list (which can
110 // happen as a result of e.g. unreachable node elimination), its value is
111 // modified to be zero, to maintain consistency in liveness analysis. This
112 // allows us to remove some dead control flow without a major rework of the phi
113 // instructions. We don't check that phi arguments with the same label have the
114 // same value.
enforcePhiConsistency()115 void CfgNode::enforcePhiConsistency() {
116 for (Inst &Instr : Phis) {
117 auto *Phi = llvm::cast<InstPhi>(&Instr);
118 // We do a simple O(N^2) algorithm to check for consistency. Even so, it
119 // shows up as only about 0.2% of the total translation time. But if
120 // necessary, we could improve the complexity by using a hash table to
121 // count how many times each node is referenced in the Phi instruction, and
122 // how many times each node is referenced in the incoming edge list, and
123 // compare the two for equality.
124 for (SizeT i = 0; i < Phi->getSrcSize(); ++i) {
125 CfgNode *Label = Phi->getLabel(i);
126 bool Found = false;
127 for (CfgNode *InNode : getInEdges()) {
128 if (InNode == Label) {
129 Found = true;
130 break;
131 }
132 }
133 if (!Found) {
134 // Predecessor was unreachable, so if (impossibly) the control flow
135 // enters from that predecessor, the value should be zero.
136 Phi->clearOperandForTarget(Label);
137 }
138 }
139 for (CfgNode *InNode : getInEdges()) {
140 bool Found = false;
141 for (SizeT i = 0; i < Phi->getSrcSize(); ++i) {
142 CfgNode *Label = Phi->getLabel(i);
143 if (InNode == Label) {
144 Found = true;
145 break;
146 }
147 }
148 if (!Found)
149 llvm::report_fatal_error("Phi error: missing label for incoming edge");
150 }
151 }
152 }
153
154 // This does part 1 of Phi lowering, by creating a new dest variable for each
155 // Phi instruction, replacing the Phi instruction's dest with that variable,
156 // and adding an explicit assignment of the old dest to the new dest. For
157 // example,
158 // a=phi(...)
159 // changes to
160 // "a_phi=phi(...); a=a_phi".
161 //
162 // This is in preparation for part 2 which deletes the Phi instructions and
163 // appends assignment instructions to predecessor blocks. Note that this
164 // transformation preserves SSA form.
placePhiLoads()165 void CfgNode::placePhiLoads() {
166 for (Inst &I : Phis) {
167 auto *Phi = llvm::dyn_cast<InstPhi>(&I);
168 Insts.insert(Insts.begin(), Phi->lower(Func));
169 }
170 }
171
172 // This does part 2 of Phi lowering. For each Phi instruction at each out-edge,
173 // create a corresponding assignment instruction, and add all the assignments
174 // near the end of this block. They need to be added before any branch
175 // instruction, and also if the block ends with a compare instruction followed
176 // by a branch instruction that we may want to fuse, it's better to insert the
177 // new assignments before the compare instruction. The
178 // tryOptimizedCmpxchgCmpBr() method assumes this ordering of instructions.
179 //
180 // Note that this transformation takes the Phi dest variables out of SSA form,
181 // as there may be assignments to the dest variable in multiple blocks.
placePhiStores()182 void CfgNode::placePhiStores() {
183 // Find the insertion point.
184 InstList::iterator InsertionPoint = Insts.end();
185 // Every block must end in a terminator instruction, and therefore must have
186 // at least one instruction, so it's valid to decrement InsertionPoint (but
187 // assert just in case).
188 assert(InsertionPoint != Insts.begin());
189 --InsertionPoint;
190 // Confirm that InsertionPoint is a terminator instruction. Calling
191 // getTerminatorEdges() on a non-terminator instruction will cause an
192 // llvm_unreachable().
193 (void)InsertionPoint->getTerminatorEdges();
194 // SafeInsertionPoint is always immediately before the terminator
195 // instruction. If the block ends in a compare and conditional branch, it's
196 // better to place the Phi store before the compare so as not to interfere
197 // with compare/branch fusing. However, if the compare instruction's dest
198 // operand is the same as the new assignment statement's source operand, this
199 // can't be done due to data dependences, so we need to fall back to the
200 // SafeInsertionPoint. To illustrate:
201 // ; <label>:95
202 // %97 = load i8* %96, align 1
203 // %98 = icmp ne i8 %97, 0
204 // br i1 %98, label %99, label %2132
205 // ; <label>:99
206 // %100 = phi i8 [ %97, %95 ], [ %110, %108 ]
207 // %101 = phi i1 [ %98, %95 ], [ %111, %108 ]
208 // would be Phi-lowered as:
209 // ; <label>:95
210 // %97 = load i8* %96, align 1
211 // %100_phi = %97 ; can be at InsertionPoint
212 // %98 = icmp ne i8 %97, 0
213 // %101_phi = %98 ; must be at SafeInsertionPoint
214 // br i1 %98, label %99, label %2132
215 // ; <label>:99
216 // %100 = %100_phi
217 // %101 = %101_phi
218 //
219 // TODO(stichnot): It may be possible to bypass this whole SafeInsertionPoint
220 // mechanism. If a source basic block ends in a conditional branch:
221 // labelSource:
222 // ...
223 // br i1 %foo, label %labelTrue, label %labelFalse
224 // and a branch target has a Phi involving the branch operand:
225 // labelTrue:
226 // %bar = phi i1 [ %foo, %labelSource ], ...
227 // then we actually know the constant i1 value of the Phi operand:
228 // labelTrue:
229 // %bar = phi i1 [ true, %labelSource ], ...
230 // It seems that this optimization should be done by clang or opt, but we
231 // could also do it here.
232 InstList::iterator SafeInsertionPoint = InsertionPoint;
233 // Keep track of the dest variable of a compare instruction, so that we
234 // insert the new instruction at the SafeInsertionPoint if the compare's dest
235 // matches the Phi-lowered assignment's source.
236 Variable *CmpInstDest = nullptr;
237 // If the current insertion point is at a conditional branch instruction, and
238 // the previous instruction is a compare instruction, then we move the
239 // insertion point before the compare instruction so as not to interfere with
240 // compare/branch fusing.
241 if (auto *Branch = llvm::dyn_cast<InstBr>(InsertionPoint)) {
242 if (!Branch->isUnconditional()) {
243 if (InsertionPoint != Insts.begin()) {
244 --InsertionPoint;
245 if (llvm::isa<InstIcmp>(InsertionPoint) ||
246 llvm::isa<InstFcmp>(InsertionPoint)) {
247 CmpInstDest = InsertionPoint->getDest();
248 } else {
249 ++InsertionPoint;
250 }
251 }
252 }
253 }
254
255 // Consider every out-edge.
256 for (CfgNode *Succ : OutEdges) {
257 // Consider every Phi instruction at the out-edge.
258 for (Inst &I : Succ->Phis) {
259 auto *Phi = llvm::dyn_cast<InstPhi>(&I);
260 Operand *Operand = Phi->getOperandForTarget(this);
261 assert(Operand);
262 Variable *Dest = I.getDest();
263 assert(Dest);
264 auto *NewInst = InstAssign::create(Func, Dest, Operand);
265 if (CmpInstDest == Operand)
266 Insts.insert(SafeInsertionPoint, NewInst);
267 else
268 Insts.insert(InsertionPoint, NewInst);
269 }
270 }
271 }
272
273 // Deletes the phi instructions after the loads and stores are placed.
deletePhis()274 void CfgNode::deletePhis() {
275 for (Inst &I : Phis)
276 I.setDeleted();
277 }
278
279 // Splits the edge from Pred to this node by creating a new node and hooking up
280 // the in and out edges appropriately. (The EdgeIndex parameter is only used to
281 // make the new node's name unique when there are multiple edges between the
282 // same pair of nodes.) The new node's instruction list is initialized to the
283 // empty list, with no terminator instruction. There must not be multiple edges
284 // from Pred to this node so all Inst::getTerminatorEdges implementations must
285 // not contain duplicates.
splitIncomingEdge(CfgNode * Pred,SizeT EdgeIndex)286 CfgNode *CfgNode::splitIncomingEdge(CfgNode *Pred, SizeT EdgeIndex) {
287 CfgNode *NewNode = Func->makeNode();
288 // Depth is the minimum as it works if both are the same, but if one is
289 // outside the loop and the other is inside, the new node should be placed
290 // outside and not be executed multiple times within the loop.
291 NewNode->setLoopNestDepth(
292 std::min(getLoopNestDepth(), Pred->getLoopNestDepth()));
293 if (BuildDefs::dump())
294 NewNode->setName("split_" + Pred->getName() + "_" + getName() + "_" +
295 std::to_string(EdgeIndex));
296 // The new node is added to the end of the node list, and will later need to
297 // be sorted into a reasonable topological order.
298 NewNode->setNeedsPlacement(true);
299 // Repoint Pred's out-edge.
300 bool Found = false;
301 for (CfgNode *&I : Pred->OutEdges) {
302 if (I == this) {
303 I = NewNode;
304 NewNode->InEdges.push_back(Pred);
305 Found = true;
306 break;
307 }
308 }
309 assert(Found);
310 (void)Found;
311 // Repoint this node's in-edge.
312 Found = false;
313 for (CfgNode *&I : InEdges) {
314 if (I == Pred) {
315 I = NewNode;
316 NewNode->OutEdges.push_back(this);
317 Found = true;
318 break;
319 }
320 }
321 assert(Found);
322 (void)Found;
323 // Repoint all suitable branch instructions' target and return.
324 Found = false;
325 for (Inst &I : Pred->getInsts())
326 if (!I.isDeleted() && I.repointEdges(this, NewNode))
327 Found = true;
328 assert(Found);
329 (void)Found;
330 return NewNode;
331 }
332
333 namespace {
334
335 // Helpers for advancedPhiLowering().
336
337 class PhiDesc {
338 PhiDesc() = delete;
339 PhiDesc(const PhiDesc &) = delete;
340 PhiDesc &operator=(const PhiDesc &) = delete;
341
342 public:
PhiDesc(InstPhi * Phi,Variable * Dest)343 PhiDesc(InstPhi *Phi, Variable *Dest) : Phi(Phi), Dest(Dest) {}
344 PhiDesc(PhiDesc &&) = default;
345 InstPhi *Phi = nullptr;
346 Variable *Dest = nullptr;
347 Operand *Src = nullptr;
348 bool Processed = false;
349 size_t NumPred = 0; // number of entries whose Src is this Dest
350 int32_t Weight = 0; // preference for topological order
351 };
352 using PhiDescList = llvm::SmallVector<PhiDesc, 32>;
353
354 // Always pick NumPred=0 over NumPred>0.
355 constexpr int32_t WeightNoPreds = 8;
356 // Prefer Src as a register because the register might free up.
357 constexpr int32_t WeightSrcIsReg = 4;
358 // Prefer Dest not as a register because the register stays free longer.
359 constexpr int32_t WeightDestNotReg = 2;
360 // Prefer NumPred=1 over NumPred>1. This is used as a tiebreaker when a
361 // dependency cycle must be broken so that hopefully only one temporary
362 // assignment has to be added to break the cycle.
363 constexpr int32_t WeightOnePred = 1;
364
sameVarOrReg(TargetLowering * Target,const Variable * Var1,const Operand * Opnd)365 bool sameVarOrReg(TargetLowering *Target, const Variable *Var1,
366 const Operand *Opnd) {
367 if (Var1 == Opnd)
368 return true;
369 const auto *Var2 = llvm::dyn_cast<Variable>(Opnd);
370 if (Var2 == nullptr)
371 return false;
372
373 // If either operand lacks a register, they cannot be the same.
374 if (!Var1->hasReg())
375 return false;
376 if (!Var2->hasReg())
377 return false;
378
379 const auto RegNum1 = Var1->getRegNum();
380 const auto RegNum2 = Var2->getRegNum();
381 // Quick common-case check.
382 if (RegNum1 == RegNum2)
383 return true;
384
385 assert(Target->getAliasesForRegister(RegNum1)[RegNum2] ==
386 Target->getAliasesForRegister(RegNum2)[RegNum1]);
387 return Target->getAliasesForRegister(RegNum1)[RegNum2];
388 }
389
390 // Update NumPred for all Phi assignments using Var as their Dest variable.
391 // Also update Weight if NumPred dropped from 2 to 1, or 1 to 0.
updatePreds(PhiDescList & Desc,TargetLowering * Target,Variable * Var)392 void updatePreds(PhiDescList &Desc, TargetLowering *Target, Variable *Var) {
393 for (PhiDesc &Item : Desc) {
394 if (!Item.Processed && sameVarOrReg(Target, Var, Item.Dest)) {
395 --Item.NumPred;
396 if (Item.NumPred == 1) {
397 // If NumPred changed from 2 to 1, add in WeightOnePred.
398 Item.Weight += WeightOnePred;
399 } else if (Item.NumPred == 0) {
400 // If NumPred changed from 1 to 0, subtract WeightOnePred and add in
401 // WeightNoPreds.
402 Item.Weight += (WeightNoPreds - WeightOnePred);
403 }
404 }
405 }
406 }
407
408 } // end of anonymous namespace
409
410 // This the "advanced" version of Phi lowering for a basic block, in contrast
411 // to the simple version that lowers through assignments involving temporaries.
412 //
413 // All Phi instructions in a basic block are conceptually executed in parallel.
414 // However, if we lower Phis early and commit to a sequential ordering, we may
415 // end up creating unnecessary interferences which lead to worse register
416 // allocation. Delaying Phi scheduling until after register allocation can help
417 // unless there are no free registers for shuffling registers or stack slots
418 // and spilling becomes necessary.
419 //
420 // The advanced Phi lowering starts by finding a topological sort of the Phi
421 // instructions, where "A=B" comes before "B=C" due to the anti-dependence on
422 // B. Preexisting register assignments are considered in the topological sort.
423 // If a topological sort is not possible due to a cycle, the cycle is broken by
424 // introducing a non-parallel temporary. For example, a cycle arising from a
425 // permutation like "A=B;B=C;C=A" can become "T=A;A=B;B=C;C=T". All else being
426 // equal, prefer to schedule assignments with register-allocated Src operands
427 // earlier, in case that register becomes free afterwards, and prefer to
428 // schedule assignments with register-allocated Dest variables later, to keep
429 // that register free for longer.
430 //
431 // Once the ordering is determined, the Cfg edge is split and the assignment
432 // list is lowered by the target lowering layer. Since the assignment lowering
433 // may create new infinite-weight temporaries, a follow-on register allocation
434 // pass will be needed. To prepare for this, liveness (including live range
435 // calculation) of the split nodes needs to be calculated, and liveness of the
436 // original node need to be updated to "undo" the effects of the phi
437 // assignments.
438
439 // The specific placement of the new node within the Cfg node list is deferred
440 // until later, including after empty node contraction.
441 //
442 // After phi assignments are lowered across all blocks, another register
443 // allocation pass is run, focusing only on pre-colored and infinite-weight
444 // variables, similar to Om1 register allocation (except without the need to
445 // specially compute these variables' live ranges, since they have already been
446 // precisely calculated). The register allocator in this mode needs the ability
447 // to forcibly spill and reload registers in case none are naturally available.
advancedPhiLowering()448 void CfgNode::advancedPhiLowering() {
449 if (getPhis().empty())
450 return;
451
452 PhiDescList Desc;
453
454 for (Inst &I : Phis) {
455 auto *Phi = llvm::dyn_cast<InstPhi>(&I);
456 if (!Phi->isDeleted()) {
457 Variable *Dest = Phi->getDest();
458 Desc.emplace_back(Phi, Dest);
459 // Undo the effect of the phi instruction on this node's live-in set by
460 // marking the phi dest variable as live on entry.
461 SizeT VarNum = Func->getLiveness()->getLiveIndex(Dest->getIndex());
462 auto &LiveIn = Func->getLiveness()->getLiveIn(this);
463 if (VarNum < LiveIn.size()) {
464 assert(!LiveIn[VarNum]);
465 LiveIn[VarNum] = true;
466 }
467 Phi->setDeleted();
468 }
469 }
470 if (Desc.empty())
471 return;
472
473 TargetLowering *Target = Func->getTarget();
474 SizeT InEdgeIndex = 0;
475 for (CfgNode *Pred : InEdges) {
476 CfgNode *Split = splitIncomingEdge(Pred, InEdgeIndex++);
477 SizeT Remaining = Desc.size();
478
479 // First pass computes Src and initializes NumPred.
480 for (PhiDesc &Item : Desc) {
481 Variable *Dest = Item.Dest;
482 Operand *Src = Item.Phi->getOperandForTarget(Pred);
483 Item.Src = Src;
484 Item.Processed = false;
485 Item.NumPred = 0;
486 // Cherry-pick any trivial assignments, so that they don't contribute to
487 // the running complexity of the topological sort.
488 if (sameVarOrReg(Target, Dest, Src)) {
489 Item.Processed = true;
490 --Remaining;
491 if (Dest != Src)
492 // If Dest and Src are syntactically the same, don't bother adding
493 // the assignment, because in all respects it would be redundant, and
494 // if Dest/Src are on the stack, the target lowering may naively
495 // decide to lower it using a temporary register.
496 Split->appendInst(InstAssign::create(Func, Dest, Src));
497 }
498 }
499 // Second pass computes NumPred by comparing every pair of Phi instructions.
500 for (PhiDesc &Item : Desc) {
501 if (Item.Processed)
502 continue;
503 const Variable *Dest = Item.Dest;
504 for (PhiDesc &Item2 : Desc) {
505 if (Item2.Processed)
506 continue;
507 // There shouldn't be two different Phis with the same Dest variable or
508 // register.
509 assert((&Item == &Item2) || !sameVarOrReg(Target, Dest, Item2.Dest));
510 if (sameVarOrReg(Target, Dest, Item2.Src))
511 ++Item.NumPred;
512 }
513 }
514
515 // Another pass to compute initial Weight values.
516 for (PhiDesc &Item : Desc) {
517 if (Item.Processed)
518 continue;
519 int32_t Weight = 0;
520 if (Item.NumPred == 0)
521 Weight += WeightNoPreds;
522 if (Item.NumPred == 1)
523 Weight += WeightOnePred;
524 if (auto *Var = llvm::dyn_cast<Variable>(Item.Src))
525 if (Var->hasReg())
526 Weight += WeightSrcIsReg;
527 if (!Item.Dest->hasReg())
528 Weight += WeightDestNotReg;
529 Item.Weight = Weight;
530 }
531
532 // Repeatedly choose and process the best candidate in the topological sort,
533 // until no candidates remain. This implementation is O(N^2) where N is the
534 // number of Phi instructions, but with a small constant factor compared to
535 // a likely implementation of O(N) topological sort.
536 for (; Remaining; --Remaining) {
537 int32_t BestWeight = -1;
538 PhiDesc *BestItem = nullptr;
539 // Find the best candidate.
540 for (PhiDesc &Item : Desc) {
541 if (Item.Processed)
542 continue;
543 const int32_t Weight = Item.Weight;
544 if (Weight > BestWeight) {
545 BestItem = &Item;
546 BestWeight = Weight;
547 }
548 }
549 assert(BestWeight >= 0);
550 Variable *Dest = BestItem->Dest;
551 Operand *Src = BestItem->Src;
552 assert(!sameVarOrReg(Target, Dest, Src));
553 // Break a cycle by introducing a temporary.
554 while (BestItem->NumPred > 0) {
555 bool Found = false;
556 // If the target instruction "A=B" is part of a cycle, find the "X=A"
557 // assignment in the cycle because it will have to be rewritten as
558 // "X=tmp".
559 for (PhiDesc &Item : Desc) {
560 if (Item.Processed)
561 continue;
562 Operand *OtherSrc = Item.Src;
563 if (Item.NumPred && sameVarOrReg(Target, Dest, OtherSrc)) {
564 SizeT VarNum = Func->getNumVariables();
565 Variable *Tmp = Func->makeVariable(OtherSrc->getType());
566 if (BuildDefs::dump())
567 Tmp->setName(Func, "__split_" + std::to_string(VarNum));
568 Split->appendInst(InstAssign::create(Func, Tmp, OtherSrc));
569 Item.Src = Tmp;
570 updatePreds(Desc, Target, llvm::cast<Variable>(OtherSrc));
571 Found = true;
572 break;
573 }
574 }
575 assert(Found);
576 (void)Found;
577 }
578 // Now that a cycle (if any) has been broken, create the actual
579 // assignment.
580 Split->appendInst(InstAssign::create(Func, Dest, Src));
581 if (auto *Var = llvm::dyn_cast<Variable>(Src))
582 updatePreds(Desc, Target, Var);
583 BestItem->Processed = true;
584 }
585 Split->appendInst(InstBr::create(Func, this));
586
587 Split->genCode();
588 Func->getVMetadata()->addNode(Split);
589 // Validate to be safe. All items should be marked as processed, and have
590 // no predecessors.
591 if (BuildDefs::asserts()) {
592 for (PhiDesc &Item : Desc) {
593 (void)Item;
594 assert(Item.Processed);
595 assert(Item.NumPred == 0);
596 }
597 }
598 }
599 }
600
601 // Does address mode optimization. Pass each instruction to the TargetLowering
602 // object. If it returns a new instruction (representing the optimized address
603 // mode), then insert the new instruction and delete the old.
doAddressOpt()604 void CfgNode::doAddressOpt() {
605 TargetLowering *Target = Func->getTarget();
606 LoweringContext &Context = Target->getContext();
607 Context.init(this);
608 while (!Context.atEnd()) {
609 Target->doAddressOpt();
610 }
611 }
612
doNopInsertion(RandomNumberGenerator & RNG)613 void CfgNode::doNopInsertion(RandomNumberGenerator &RNG) {
614 TargetLowering *Target = Func->getTarget();
615 LoweringContext &Context = Target->getContext();
616 Context.init(this);
617 Context.setInsertPoint(Context.getCur());
618 // Do not insert nop in bundle locked instructions.
619 bool PauseNopInsertion = false;
620 while (!Context.atEnd()) {
621 if (llvm::isa<InstBundleLock>(Context.getCur())) {
622 PauseNopInsertion = true;
623 } else if (llvm::isa<InstBundleUnlock>(Context.getCur())) {
624 PauseNopInsertion = false;
625 }
626 if (!PauseNopInsertion)
627 Target->doNopInsertion(RNG);
628 // Ensure Cur=Next, so that the nops are inserted before the current
629 // instruction rather than after.
630 Context.advanceCur();
631 Context.advanceNext();
632 }
633 }
634
635 // Drives the target lowering. Passes the current instruction and the next
636 // non-deleted instruction for target lowering.
genCode()637 void CfgNode::genCode() {
638 TargetLowering *Target = Func->getTarget();
639 LoweringContext &Context = Target->getContext();
640 // Lower the regular instructions.
641 Context.init(this);
642 Target->initNodeForLowering(this);
643 while (!Context.atEnd()) {
644 InstList::iterator Orig = Context.getCur();
645 if (llvm::isa<InstRet>(*Orig))
646 setHasReturn();
647 Target->lower();
648 // Ensure target lowering actually moved the cursor.
649 assert(Context.getCur() != Orig);
650 }
651 Context.availabilityReset();
652 // Do preliminary lowering of the Phi instructions.
653 Target->prelowerPhis();
654 }
655
livenessLightweight()656 void CfgNode::livenessLightweight() {
657 SizeT NumVars = Func->getNumVariables();
658 LivenessBV Live(NumVars);
659 // Process regular instructions in reverse order.
660 for (Inst &I : reverse_range(Insts)) {
661 if (I.isDeleted())
662 continue;
663 I.livenessLightweight(Func, Live);
664 }
665 for (Inst &I : Phis) {
666 if (I.isDeleted())
667 continue;
668 I.livenessLightweight(Func, Live);
669 }
670 }
671
672 // Performs liveness analysis on the block. Returns true if the incoming
673 // liveness changed from before, false if it stayed the same. (If it changes,
674 // the node's predecessors need to be processed again.)
liveness(Liveness * Liveness)675 bool CfgNode::liveness(Liveness *Liveness) {
676 const SizeT NumVars = Liveness->getNumVarsInNode(this);
677 const SizeT NumGlobalVars = Liveness->getNumGlobalVars();
678 LivenessBV &Live = Liveness->getScratchBV();
679 Live.clear();
680
681 LiveBeginEndMap *LiveBegin = nullptr;
682 LiveBeginEndMap *LiveEnd = nullptr;
683 // Mark the beginning and ending of each variable's live range with the
684 // sentinel instruction number 0.
685 if (Liveness->getMode() == Liveness_Intervals) {
686 LiveBegin = Liveness->getLiveBegin(this);
687 LiveEnd = Liveness->getLiveEnd(this);
688 LiveBegin->clear();
689 LiveEnd->clear();
690 // Guess that the number of live ranges beginning is roughly the number of
691 // instructions, and same for live ranges ending.
692 LiveBegin->reserve(getInstCountEstimate());
693 LiveEnd->reserve(getInstCountEstimate());
694 }
695
696 // Initialize Live to be the union of all successors' LiveIn.
697 for (CfgNode *Succ : OutEdges) {
698 const LivenessBV &LiveIn = Liveness->getLiveIn(Succ);
699 assert(LiveIn.empty() || LiveIn.size() == NumGlobalVars);
700 Live |= LiveIn;
701 // Mark corresponding argument of phis in successor as live.
702 for (Inst &I : Succ->Phis) {
703 if (I.isDeleted())
704 continue;
705 auto *Phi = llvm::cast<InstPhi>(&I);
706 Phi->livenessPhiOperand(Live, this, Liveness);
707 }
708 }
709 assert(Live.empty() || Live.size() == NumGlobalVars);
710 Liveness->getLiveOut(this) = Live;
711
712 // Expand Live so it can hold locals in addition to globals.
713 Live.resize(NumVars);
714 // Process regular instructions in reverse order.
715 for (Inst &I : reverse_range(Insts)) {
716 if (I.isDeleted())
717 continue;
718 I.liveness(I.getNumber(), Live, Liveness, LiveBegin, LiveEnd);
719 }
720 // Process phis in forward order so that we can override the instruction
721 // number to be that of the earliest phi instruction in the block.
722 SizeT NumNonDeadPhis = 0;
723 InstNumberT FirstPhiNumber = Inst::NumberSentinel;
724 for (Inst &I : Phis) {
725 if (I.isDeleted())
726 continue;
727 if (FirstPhiNumber == Inst::NumberSentinel)
728 FirstPhiNumber = I.getNumber();
729 if (I.liveness(FirstPhiNumber, Live, Liveness, LiveBegin, LiveEnd))
730 ++NumNonDeadPhis;
731 }
732
733 // When using the sparse representation, after traversing the instructions in
734 // the block, the Live bitvector should only contain set bits for global
735 // variables upon block entry. We validate this by testing the upper bits of
736 // the Live bitvector.
737 if (Live.find_next(NumGlobalVars) != -1) {
738 if (BuildDefs::dump()) {
739 // This is a fatal liveness consistency error. Print some diagnostics and
740 // abort.
741 Ostream &Str = Func->getContext()->getStrDump();
742 Func->resetCurrentNode();
743 Str << "Invalid Live =";
744 for (SizeT i = NumGlobalVars; i < Live.size(); ++i) {
745 if (Live.test(i)) {
746 Str << " ";
747 Liveness->getVariable(i, this)->dump(Func);
748 }
749 }
750 Str << "\n";
751 }
752 llvm::report_fatal_error("Fatal inconsistency in liveness analysis");
753 }
754 // Now truncate Live to prevent LiveIn from growing.
755 Live.resize(NumGlobalVars);
756
757 bool Changed = false;
758 LivenessBV &LiveIn = Liveness->getLiveIn(this);
759 assert(LiveIn.empty() || LiveIn.size() == NumGlobalVars);
760 // Add in current LiveIn
761 Live |= LiveIn;
762 // Check result, set LiveIn=Live
763 SizeT &PrevNumNonDeadPhis = Liveness->getNumNonDeadPhis(this);
764 bool LiveInChanged = (Live != LiveIn);
765 Changed = (NumNonDeadPhis != PrevNumNonDeadPhis || LiveInChanged);
766 if (LiveInChanged)
767 LiveIn = Live;
768 PrevNumNonDeadPhis = NumNonDeadPhis;
769 return Changed;
770 }
771
772 // Validate the integrity of the live ranges in this block. If there are any
773 // errors, it prints details and returns false. On success, it returns true.
livenessValidateIntervals(Liveness * Liveness) const774 bool CfgNode::livenessValidateIntervals(Liveness *Liveness) const {
775 if (!BuildDefs::asserts())
776 return true;
777
778 // Verify there are no duplicates.
779 auto ComparePair =
780 [](const LiveBeginEndMapEntry &A, const LiveBeginEndMapEntry &B) {
781 return A.first == B.first;
782 };
783 LiveBeginEndMap &MapBegin = *Liveness->getLiveBegin(this);
784 LiveBeginEndMap &MapEnd = *Liveness->getLiveEnd(this);
785 if (std::adjacent_find(MapBegin.begin(), MapBegin.end(), ComparePair) ==
786 MapBegin.end() &&
787 std::adjacent_find(MapEnd.begin(), MapEnd.end(), ComparePair) ==
788 MapEnd.end())
789 return true;
790
791 // There is definitely a liveness error. All paths from here return false.
792 if (!BuildDefs::dump())
793 return false;
794
795 // Print all the errors.
796 if (BuildDefs::dump()) {
797 GlobalContext *Ctx = Func->getContext();
798 OstreamLocker L(Ctx);
799 Ostream &Str = Ctx->getStrDump();
800 if (Func->isVerbose()) {
801 Str << "Live range errors in the following block:\n";
802 dump(Func);
803 }
804 for (auto Start = MapBegin.begin();
805 (Start = std::adjacent_find(Start, MapBegin.end(), ComparePair)) !=
806 MapBegin.end();
807 ++Start) {
808 auto Next = Start + 1;
809 Str << "Duplicate LR begin, block " << getName() << ", instructions "
810 << Start->second << " & " << Next->second << ", variable "
811 << Liveness->getVariable(Start->first, this)->getName() << "\n";
812 }
813 for (auto Start = MapEnd.begin();
814 (Start = std::adjacent_find(Start, MapEnd.end(), ComparePair)) !=
815 MapEnd.end();
816 ++Start) {
817 auto Next = Start + 1;
818 Str << "Duplicate LR end, block " << getName() << ", instructions "
819 << Start->second << " & " << Next->second << ", variable "
820 << Liveness->getVariable(Start->first, this)->getName() << "\n";
821 }
822 }
823
824 return false;
825 }
826
827 // Once basic liveness is complete, compute actual live ranges. It is assumed
828 // that within a single basic block, a live range begins at most once and ends
829 // at most once. This is certainly true for pure SSA form. It is also true once
830 // phis are lowered, since each assignment to the phi-based temporary is in a
831 // different basic block, and there is a single read that ends the live in the
832 // basic block that contained the actual phi instruction.
livenessAddIntervals(Liveness * Liveness,InstNumberT FirstInstNum,InstNumberT LastInstNum)833 void CfgNode::livenessAddIntervals(Liveness *Liveness, InstNumberT FirstInstNum,
834 InstNumberT LastInstNum) {
835 TimerMarker T1(TimerStack::TT_liveRange, Func);
836
837 const SizeT NumVars = Liveness->getNumVarsInNode(this);
838 const LivenessBV &LiveIn = Liveness->getLiveIn(this);
839 const LivenessBV &LiveOut = Liveness->getLiveOut(this);
840 LiveBeginEndMap &MapBegin = *Liveness->getLiveBegin(this);
841 LiveBeginEndMap &MapEnd = *Liveness->getLiveEnd(this);
842 std::sort(MapBegin.begin(), MapBegin.end());
843 std::sort(MapEnd.begin(), MapEnd.end());
844
845 if (!livenessValidateIntervals(Liveness)) {
846 llvm::report_fatal_error("livenessAddIntervals: Liveness error");
847 return;
848 }
849
850 LivenessBV &LiveInAndOut = Liveness->getScratchBV();
851 LiveInAndOut = LiveIn;
852 LiveInAndOut &= LiveOut;
853
854 // Iterate in parallel across the sorted MapBegin[] and MapEnd[].
855 auto IBB = MapBegin.begin(), IEB = MapEnd.begin();
856 auto IBE = MapBegin.end(), IEE = MapEnd.end();
857 while (IBB != IBE || IEB != IEE) {
858 SizeT i1 = IBB == IBE ? NumVars : IBB->first;
859 SizeT i2 = IEB == IEE ? NumVars : IEB->first;
860 SizeT i = std::min(i1, i2);
861 // i1 is the Variable number of the next MapBegin entry, and i2 is the
862 // Variable number of the next MapEnd entry. If i1==i2, then the Variable's
863 // live range begins and ends in this block. If i1<i2, then i1's live range
864 // begins at instruction IBB->second and extends through the end of the
865 // block. If i1>i2, then i2's live range begins at the first instruction of
866 // the block and ends at IEB->second. In any case, we choose the lesser of
867 // i1 and i2 and proceed accordingly.
868 InstNumberT LB = i == i1 ? IBB->second : FirstInstNum;
869 InstNumberT LE = i == i2 ? IEB->second : LastInstNum + 1;
870
871 Variable *Var = Liveness->getVariable(i, this);
872 if (LB > LE) {
873 Var->addLiveRange(FirstInstNum, LE, this);
874 Var->addLiveRange(LB, LastInstNum + 1, this);
875 // Assert that Var is a global variable by checking that its liveness
876 // index is less than the number of globals. This ensures that the
877 // LiveInAndOut[] access is valid.
878 assert(i < Liveness->getNumGlobalVars());
879 LiveInAndOut[i] = false;
880 } else {
881 Var->addLiveRange(LB, LE, this);
882 }
883 if (i == i1)
884 ++IBB;
885 if (i == i2)
886 ++IEB;
887 }
888 // Process the variables that are live across the entire block.
889 for (int i = LiveInAndOut.find_first(); i != -1;
890 i = LiveInAndOut.find_next(i)) {
891 Variable *Var = Liveness->getVariable(i, this);
892 if (Liveness->getRangeMask(Var->getIndex()))
893 Var->addLiveRange(FirstInstNum, LastInstNum + 1, this);
894 }
895 }
896
897 // If this node contains only deleted instructions, and ends in an
898 // unconditional branch, contract the node by repointing all its in-edges to
899 // its successor.
contractIfEmpty()900 void CfgNode::contractIfEmpty() {
901 if (InEdges.empty())
902 return;
903 Inst *Branch = nullptr;
904 for (Inst &I : Insts) {
905 if (I.isDeleted())
906 continue;
907 if (I.isUnconditionalBranch())
908 Branch = &I;
909 else if (!I.isRedundantAssign())
910 return;
911 }
912 // Make sure there is actually a successor to repoint in-edges to.
913 if (OutEdges.empty())
914 return;
915 assert(hasSingleOutEdge());
916 // Don't try to delete a self-loop.
917 if (OutEdges[0] == this)
918 return;
919 // Make sure the node actually contains (ends with) an unconditional branch.
920 if (Branch == nullptr)
921 return;
922
923 Branch->setDeleted();
924 CfgNode *Successor = OutEdges.front();
925 // Repoint all this node's in-edges to this node's successor, unless this
926 // node's successor is actually itself (in which case the statement
927 // "OutEdges.front()->InEdges.push_back(Pred)" could invalidate the iterator
928 // over this->InEdges).
929 if (Successor != this) {
930 for (CfgNode *Pred : InEdges) {
931 for (CfgNode *&I : Pred->OutEdges) {
932 if (I == this) {
933 I = Successor;
934 Successor->InEdges.push_back(Pred);
935 }
936 }
937 for (Inst &I : Pred->getInsts()) {
938 if (!I.isDeleted())
939 I.repointEdges(this, Successor);
940 }
941 }
942
943 // Remove the in-edge to the successor to allow node reordering to make
944 // better decisions. For example it's more helpful to place a node after a
945 // reachable predecessor than an unreachable one (like the one we just
946 // contracted).
947 Successor->InEdges.erase(
948 std::find(Successor->InEdges.begin(), Successor->InEdges.end(), this));
949 }
950 InEdges.clear();
951 }
952
doBranchOpt(const CfgNode * NextNode)953 void CfgNode::doBranchOpt(const CfgNode *NextNode) {
954 TargetLowering *Target = Func->getTarget();
955 // Find the first opportunity for branch optimization (which will be the last
956 // instruction in the block) and stop. This is sufficient unless there is
957 // some target lowering where we have the possibility of multiple
958 // optimizations per block. Take care with switch lowering as there are
959 // multiple unconditional branches and only the last can be deleted.
960 for (Inst &I : reverse_range(Insts)) {
961 if (!I.isDeleted()) {
962 Target->doBranchOpt(&I, NextNode);
963 return;
964 }
965 }
966 }
967
968 // ======================== Dump routines ======================== //
969
970 namespace {
971
972 // Helper functions for emit().
973
emitRegisterUsage(Ostream & Str,const Cfg * Func,const CfgNode * Node,bool IsLiveIn,CfgVector<SizeT> & LiveRegCount)974 void emitRegisterUsage(Ostream &Str, const Cfg *Func, const CfgNode *Node,
975 bool IsLiveIn, CfgVector<SizeT> &LiveRegCount) {
976 if (!BuildDefs::dump())
977 return;
978 Liveness *Liveness = Func->getLiveness();
979 const LivenessBV *Live;
980 const auto StackReg = Func->getTarget()->getStackReg();
981 const auto FrameOrStackReg = Func->getTarget()->getFrameOrStackReg();
982 if (IsLiveIn) {
983 Live = &Liveness->getLiveIn(Node);
984 Str << "\t\t\t\t/* LiveIn=";
985 } else {
986 Live = &Liveness->getLiveOut(Node);
987 Str << "\t\t\t\t/* LiveOut=";
988 }
989 if (!Live->empty()) {
990 CfgVector<Variable *> LiveRegs;
991 for (SizeT i = 0; i < Live->size(); ++i) {
992 if (!(*Live)[i])
993 continue;
994 Variable *Var = Liveness->getVariable(i, Node);
995 if (!Var->hasReg())
996 continue;
997 const auto RegNum = Var->getRegNum();
998 if (RegNum == StackReg || RegNum == FrameOrStackReg)
999 continue;
1000 if (IsLiveIn)
1001 ++LiveRegCount[RegNum];
1002 LiveRegs.push_back(Var);
1003 }
1004 // Sort the variables by regnum so they are always printed in a familiar
1005 // order.
1006 std::sort(LiveRegs.begin(), LiveRegs.end(),
1007 [](const Variable *V1, const Variable *V2) {
1008 return unsigned(V1->getRegNum()) < unsigned(V2->getRegNum());
1009 });
1010 bool First = true;
1011 for (Variable *Var : LiveRegs) {
1012 if (!First)
1013 Str << ",";
1014 First = false;
1015 Var->emit(Func);
1016 }
1017 }
1018 Str << " */\n";
1019 }
1020
1021 /// Returns true if some text was emitted - in which case the caller definitely
1022 /// needs to emit a newline character.
emitLiveRangesEnded(Ostream & Str,const Cfg * Func,const Inst * Instr,CfgVector<SizeT> & LiveRegCount)1023 bool emitLiveRangesEnded(Ostream &Str, const Cfg *Func, const Inst *Instr,
1024 CfgVector<SizeT> &LiveRegCount) {
1025 bool Printed = false;
1026 if (!BuildDefs::dump())
1027 return Printed;
1028 Variable *Dest = Instr->getDest();
1029 // Normally we increment the live count for the dest register. But we
1030 // shouldn't if the instruction's IsDestRedefined flag is set, because this
1031 // means that the target lowering created this instruction as a non-SSA
1032 // assignment; i.e., a different, previous instruction started the dest
1033 // variable's live range.
1034 if (!Instr->isDestRedefined() && Dest && Dest->hasReg())
1035 ++LiveRegCount[Dest->getRegNum()];
1036 FOREACH_VAR_IN_INST(Var, *Instr) {
1037 bool ShouldReport = Instr->isLastUse(Var);
1038 if (ShouldReport && Var->hasReg()) {
1039 // Don't report end of live range until the live count reaches 0.
1040 SizeT NewCount = --LiveRegCount[Var->getRegNum()];
1041 if (NewCount)
1042 ShouldReport = false;
1043 }
1044 if (ShouldReport) {
1045 if (Printed)
1046 Str << ",";
1047 else
1048 Str << " \t/* END=";
1049 Var->emit(Func);
1050 Printed = true;
1051 }
1052 }
1053 if (Printed)
1054 Str << " */";
1055 return Printed;
1056 }
1057
updateStats(Cfg * Func,const Inst * I)1058 void updateStats(Cfg *Func, const Inst *I) {
1059 if (!BuildDefs::dump())
1060 return;
1061 // Update emitted instruction count, plus fill/spill count for Variable
1062 // operands without a physical register.
1063 if (uint32_t Count = I->getEmitInstCount()) {
1064 Func->getContext()->statsUpdateEmitted(Count);
1065 if (Variable *Dest = I->getDest()) {
1066 if (!Dest->hasReg())
1067 Func->getContext()->statsUpdateFills();
1068 }
1069 for (SizeT S = 0; S < I->getSrcSize(); ++S) {
1070 if (auto *Src = llvm::dyn_cast<Variable>(I->getSrc(S))) {
1071 if (!Src->hasReg())
1072 Func->getContext()->statsUpdateSpills();
1073 }
1074 }
1075 }
1076 }
1077
1078 } // end of anonymous namespace
1079
emit(Cfg * Func) const1080 void CfgNode::emit(Cfg *Func) const {
1081 if (!BuildDefs::dump())
1082 return;
1083 Func->setCurrentNode(this);
1084 Ostream &Str = Func->getContext()->getStrEmit();
1085 Liveness *Liveness = Func->getLiveness();
1086 const bool DecorateAsm = Liveness && getFlags().getDecorateAsm();
1087 Str << getAsmName() << ":\n";
1088 // LiveRegCount keeps track of the number of currently live variables that
1089 // each register is assigned to. Normally that would be only 0 or 1, but the
1090 // register allocator's AllowOverlap inference allows it to be greater than 1
1091 // for short periods.
1092 CfgVector<SizeT> LiveRegCount(Func->getTarget()->getNumRegisters());
1093 if (DecorateAsm) {
1094 constexpr bool IsLiveIn = true;
1095 emitRegisterUsage(Str, Func, this, IsLiveIn, LiveRegCount);
1096 if (getInEdges().size()) {
1097 Str << "\t\t\t\t/* preds=";
1098 bool First = true;
1099 for (CfgNode *I : getInEdges()) {
1100 if (!First)
1101 Str << ",";
1102 First = false;
1103 Str << "$" << I->getName();
1104 }
1105 Str << " */\n";
1106 }
1107 if (getLoopNestDepth()) {
1108 Str << "\t\t\t\t/* loop depth=" << getLoopNestDepth() << " */\n";
1109 }
1110 }
1111
1112 for (const Inst &I : Phis) {
1113 if (I.isDeleted())
1114 continue;
1115 // Emitting a Phi instruction should cause an error.
1116 I.emit(Func);
1117 }
1118 for (const Inst &I : Insts) {
1119 if (I.isDeleted())
1120 continue;
1121 if (I.isRedundantAssign()) {
1122 // Usually, redundant assignments end the live range of the src variable
1123 // and begin the live range of the dest variable, with no net effect on
1124 // the liveness of their register. However, if the register allocator
1125 // infers the AllowOverlap condition, then this may be a redundant
1126 // assignment that does not end the src variable's live range, in which
1127 // case the active variable count for that register needs to be bumped.
1128 // That normally would have happened as part of emitLiveRangesEnded(),
1129 // but that isn't called for redundant assignments.
1130 Variable *Dest = I.getDest();
1131 if (DecorateAsm && Dest->hasReg()) {
1132 ++LiveRegCount[Dest->getRegNum()];
1133 if (I.isLastUse(I.getSrc(0)))
1134 --LiveRegCount[llvm::cast<Variable>(I.getSrc(0))->getRegNum()];
1135 }
1136 continue;
1137 }
1138 I.emit(Func);
1139 bool Printed = false;
1140 if (DecorateAsm)
1141 Printed = emitLiveRangesEnded(Str, Func, &I, LiveRegCount);
1142 if (Printed || llvm::isa<InstTarget>(&I))
1143 Str << "\n";
1144 updateStats(Func, &I);
1145 }
1146 if (DecorateAsm) {
1147 constexpr bool IsLiveIn = false;
1148 emitRegisterUsage(Str, Func, this, IsLiveIn, LiveRegCount);
1149 }
1150 }
1151
1152 // Helper class for emitIAS().
1153 namespace {
1154 class BundleEmitHelper {
1155 BundleEmitHelper() = delete;
1156 BundleEmitHelper(const BundleEmitHelper &) = delete;
1157 BundleEmitHelper &operator=(const BundleEmitHelper &) = delete;
1158
1159 public:
BundleEmitHelper(Assembler * Asm,const InstList & Insts)1160 BundleEmitHelper(Assembler *Asm, const InstList &Insts)
1161 : Asm(Asm), End(Insts.end()), BundleLockStart(End),
1162 BundleSize(1 << Asm->getBundleAlignLog2Bytes()),
1163 BundleMaskLo(BundleSize - 1), BundleMaskHi(~BundleMaskLo) {}
1164 // Check whether we're currently within a bundle_lock region.
isInBundleLockRegion() const1165 bool isInBundleLockRegion() const { return BundleLockStart != End; }
1166 // Check whether the current bundle_lock region has the align_to_end option.
isAlignToEnd() const1167 bool isAlignToEnd() const {
1168 assert(isInBundleLockRegion());
1169 return llvm::cast<InstBundleLock>(getBundleLockStart())->getOption() ==
1170 InstBundleLock::Opt_AlignToEnd;
1171 }
isPadToEnd() const1172 bool isPadToEnd() const {
1173 assert(isInBundleLockRegion());
1174 return llvm::cast<InstBundleLock>(getBundleLockStart())->getOption() ==
1175 InstBundleLock::Opt_PadToEnd;
1176 }
1177 // Check whether the entire bundle_lock region falls within the same bundle.
isSameBundle() const1178 bool isSameBundle() const {
1179 assert(isInBundleLockRegion());
1180 return SizeSnapshotPre == SizeSnapshotPost ||
1181 (SizeSnapshotPre & BundleMaskHi) ==
1182 ((SizeSnapshotPost - 1) & BundleMaskHi);
1183 }
1184 // Get the bundle alignment of the first instruction of the bundle_lock
1185 // region.
getPreAlignment() const1186 intptr_t getPreAlignment() const {
1187 assert(isInBundleLockRegion());
1188 return SizeSnapshotPre & BundleMaskLo;
1189 }
1190 // Get the bundle alignment of the first instruction past the bundle_lock
1191 // region.
getPostAlignment() const1192 intptr_t getPostAlignment() const {
1193 assert(isInBundleLockRegion());
1194 return SizeSnapshotPost & BundleMaskLo;
1195 }
1196 // Get the iterator pointing to the bundle_lock instruction, e.g. to roll
1197 // back the instruction iteration to that point.
getBundleLockStart() const1198 InstList::const_iterator getBundleLockStart() const {
1199 assert(isInBundleLockRegion());
1200 return BundleLockStart;
1201 }
1202 // Set up bookkeeping when the bundle_lock instruction is first processed.
enterBundleLock(InstList::const_iterator I)1203 void enterBundleLock(InstList::const_iterator I) {
1204 assert(!isInBundleLockRegion());
1205 BundleLockStart = I;
1206 SizeSnapshotPre = Asm->getBufferSize();
1207 Asm->setPreliminary(true);
1208 assert(isInBundleLockRegion());
1209 }
1210 // Update bookkeeping when the bundle_unlock instruction is processed.
enterBundleUnlock()1211 void enterBundleUnlock() {
1212 assert(isInBundleLockRegion());
1213 SizeSnapshotPost = Asm->getBufferSize();
1214 }
1215 // Update bookkeeping when we are completely finished with the bundle_lock
1216 // region.
leaveBundleLockRegion()1217 void leaveBundleLockRegion() { BundleLockStart = End; }
1218 // Check whether the instruction sequence fits within the current bundle, and
1219 // if not, add nop padding to the end of the current bundle.
padToNextBundle()1220 void padToNextBundle() {
1221 assert(isInBundleLockRegion());
1222 if (!isSameBundle()) {
1223 intptr_t PadToNextBundle = BundleSize - getPreAlignment();
1224 Asm->padWithNop(PadToNextBundle);
1225 SizeSnapshotPre += PadToNextBundle;
1226 SizeSnapshotPost += PadToNextBundle;
1227 assert((Asm->getBufferSize() & BundleMaskLo) == 0);
1228 assert(Asm->getBufferSize() == SizeSnapshotPre);
1229 }
1230 }
1231 // If align_to_end is specified, add padding such that the instruction
1232 // sequences ends precisely at a bundle boundary.
padForAlignToEnd()1233 void padForAlignToEnd() {
1234 assert(isInBundleLockRegion());
1235 if (isAlignToEnd()) {
1236 if (intptr_t Offset = getPostAlignment()) {
1237 Asm->padWithNop(BundleSize - Offset);
1238 SizeSnapshotPre = Asm->getBufferSize();
1239 }
1240 }
1241 }
1242 // If pad_to_end is specified, add padding such that the first instruction
1243 // after the instruction sequence starts at a bundle boundary.
padForPadToEnd()1244 void padForPadToEnd() {
1245 assert(isInBundleLockRegion());
1246 if (isPadToEnd()) {
1247 if (intptr_t Offset = getPostAlignment()) {
1248 Asm->padWithNop(BundleSize - Offset);
1249 SizeSnapshotPre = Asm->getBufferSize();
1250 }
1251 }
1252 } // Update bookkeeping when rolling back for the second pass.
rollback()1253 void rollback() {
1254 assert(isInBundleLockRegion());
1255 Asm->setBufferSize(SizeSnapshotPre);
1256 Asm->setPreliminary(false);
1257 }
1258
1259 private:
1260 Assembler *const Asm;
1261 // End is a sentinel value such that BundleLockStart==End implies that we are
1262 // not in a bundle_lock region.
1263 const InstList::const_iterator End;
1264 InstList::const_iterator BundleLockStart;
1265 const intptr_t BundleSize;
1266 // Masking with BundleMaskLo identifies an address's bundle offset.
1267 const intptr_t BundleMaskLo;
1268 // Masking with BundleMaskHi identifies an address's bundle.
1269 const intptr_t BundleMaskHi;
1270 intptr_t SizeSnapshotPre = 0;
1271 intptr_t SizeSnapshotPost = 0;
1272 };
1273
1274 } // end of anonymous namespace
1275
emitIAS(Cfg * Func) const1276 void CfgNode::emitIAS(Cfg *Func) const {
1277 Func->setCurrentNode(this);
1278 Assembler *Asm = Func->getAssembler<>();
1279 // TODO(stichnot): When sandboxing, defer binding the node label until just
1280 // before the first instruction is emitted, to reduce the chance that a
1281 // padding nop is a branch target.
1282 Asm->bindCfgNodeLabel(this);
1283 for (const Inst &I : Phis) {
1284 if (I.isDeleted())
1285 continue;
1286 // Emitting a Phi instruction should cause an error.
1287 I.emitIAS(Func);
1288 }
1289
1290 // Do the simple emission if not sandboxed.
1291 if (!getFlags().getUseSandboxing()) {
1292 for (const Inst &I : Insts) {
1293 if (!I.isDeleted() && !I.isRedundantAssign()) {
1294 I.emitIAS(Func);
1295 updateStats(Func, &I);
1296 }
1297 }
1298 return;
1299 }
1300
1301 // The remainder of the function handles emission with sandboxing. There are
1302 // explicit bundle_lock regions delimited by bundle_lock and bundle_unlock
1303 // instructions. All other instructions are treated as an implicit
1304 // one-instruction bundle_lock region. Emission is done twice for each
1305 // bundle_lock region. The first pass is a preliminary pass, after which we
1306 // can figure out what nop padding is needed, then roll back, and make the
1307 // final pass.
1308 //
1309 // Ideally, the first pass would be speculative and the second pass would
1310 // only be done if nop padding were needed, but the structure of the
1311 // integrated assembler makes it hard to roll back the state of label
1312 // bindings, label links, and relocation fixups. Instead, the first pass just
1313 // disables all mutation of that state.
1314
1315 BundleEmitHelper Helper(Asm, Insts);
1316 InstList::const_iterator End = Insts.end();
1317 // Retrying indicates that we had to roll back to the bundle_lock instruction
1318 // to apply padding before the bundle_lock sequence.
1319 bool Retrying = false;
1320 for (InstList::const_iterator I = Insts.begin(); I != End; ++I) {
1321 if (I->isDeleted() || I->isRedundantAssign())
1322 continue;
1323
1324 if (llvm::isa<InstBundleLock>(I)) {
1325 // Set up the initial bundle_lock state. This should not happen while
1326 // retrying, because the retry rolls back to the instruction following
1327 // the bundle_lock instruction.
1328 assert(!Retrying);
1329 Helper.enterBundleLock(I);
1330 continue;
1331 }
1332
1333 if (llvm::isa<InstBundleUnlock>(I)) {
1334 Helper.enterBundleUnlock();
1335 if (Retrying) {
1336 // Make sure all instructions are in the same bundle.
1337 assert(Helper.isSameBundle());
1338 // If align_to_end is specified, make sure the next instruction begins
1339 // the bundle.
1340 assert(!Helper.isAlignToEnd() || Helper.getPostAlignment() == 0);
1341 Helper.padForPadToEnd();
1342 Helper.leaveBundleLockRegion();
1343 Retrying = false;
1344 } else {
1345 // This is the first pass, so roll back for the retry pass.
1346 Helper.rollback();
1347 // Pad to the next bundle if the instruction sequence crossed a bundle
1348 // boundary.
1349 Helper.padToNextBundle();
1350 // Insert additional padding to make AlignToEnd work.
1351 Helper.padForAlignToEnd();
1352 // Prepare for the retry pass after padding is done.
1353 Retrying = true;
1354 I = Helper.getBundleLockStart();
1355 }
1356 continue;
1357 }
1358
1359 // I points to a non bundle_lock/bundle_unlock instruction.
1360 if (Helper.isInBundleLockRegion()) {
1361 I->emitIAS(Func);
1362 // Only update stats during the final pass.
1363 if (Retrying)
1364 updateStats(Func, iteratorToInst(I));
1365 } else {
1366 // Treat it as though there were an implicit bundle_lock and
1367 // bundle_unlock wrapping the instruction.
1368 Helper.enterBundleLock(I);
1369 I->emitIAS(Func);
1370 Helper.enterBundleUnlock();
1371 Helper.rollback();
1372 Helper.padToNextBundle();
1373 I->emitIAS(Func);
1374 updateStats(Func, iteratorToInst(I));
1375 Helper.leaveBundleLockRegion();
1376 }
1377 }
1378
1379 // Don't allow bundle locking across basic blocks, to keep the backtracking
1380 // mechanism simple.
1381 assert(!Helper.isInBundleLockRegion());
1382 assert(!Retrying);
1383 }
1384
dump(Cfg * Func) const1385 void CfgNode::dump(Cfg *Func) const {
1386 if (!BuildDefs::dump())
1387 return;
1388 Func->setCurrentNode(this);
1389 Ostream &Str = Func->getContext()->getStrDump();
1390 Liveness *Liveness = Func->getLiveness();
1391 if (Func->isVerbose(IceV_Instructions) || Func->isVerbose(IceV_Loop))
1392 Str << getName() << ":\n";
1393 // Dump the loop nest depth
1394 if (Func->isVerbose(IceV_Loop))
1395 Str << " // LoopNestDepth = " << getLoopNestDepth() << "\n";
1396 // Dump list of predecessor nodes.
1397 if (Func->isVerbose(IceV_Preds) && !InEdges.empty()) {
1398 Str << " // preds = ";
1399 bool First = true;
1400 for (CfgNode *I : InEdges) {
1401 if (!First)
1402 Str << ", ";
1403 First = false;
1404 Str << "%" << I->getName();
1405 }
1406 Str << "\n";
1407 }
1408 // Dump the live-in variables.
1409 if (Func->isVerbose(IceV_Liveness)) {
1410 if (Liveness != nullptr && !Liveness->getLiveIn(this).empty()) {
1411 const LivenessBV &LiveIn = Liveness->getLiveIn(this);
1412 Str << " // LiveIn:";
1413 for (SizeT i = 0; i < LiveIn.size(); ++i) {
1414 if (LiveIn[i]) {
1415 Variable *Var = Liveness->getVariable(i, this);
1416 Str << " %" << Var->getName();
1417 if (Func->isVerbose(IceV_RegOrigins) && Var->hasReg()) {
1418 Str << ":"
1419 << Func->getTarget()->getRegName(Var->getRegNum(),
1420 Var->getType());
1421 }
1422 }
1423 }
1424 Str << "\n";
1425 }
1426 }
1427 // Dump each instruction.
1428 if (Func->isVerbose(IceV_Instructions)) {
1429 for (const Inst &I : Phis)
1430 I.dumpDecorated(Func);
1431 for (const Inst &I : Insts)
1432 I.dumpDecorated(Func);
1433 }
1434 // Dump the live-out variables.
1435 if (Func->isVerbose(IceV_Liveness)) {
1436 if (Liveness != nullptr && !Liveness->getLiveOut(this).empty()) {
1437 const LivenessBV &LiveOut = Liveness->getLiveOut(this);
1438 Str << " // LiveOut:";
1439 for (SizeT i = 0; i < LiveOut.size(); ++i) {
1440 if (LiveOut[i]) {
1441 Variable *Var = Liveness->getVariable(i, this);
1442 Str << " %" << Var->getName();
1443 if (Func->isVerbose(IceV_RegOrigins) && Var->hasReg()) {
1444 Str << ":"
1445 << Func->getTarget()->getRegName(Var->getRegNum(),
1446 Var->getType());
1447 }
1448 }
1449 }
1450 Str << "\n";
1451 }
1452 }
1453 // Dump list of successor nodes.
1454 if (Func->isVerbose(IceV_Succs)) {
1455 Str << " // succs = ";
1456 bool First = true;
1457 for (CfgNode *I : OutEdges) {
1458 if (!First)
1459 Str << ", ";
1460 First = false;
1461 Str << "%" << I->getName();
1462 }
1463 Str << "\n";
1464 }
1465 }
1466
profileExecutionCount(VariableDeclaration * Var)1467 void CfgNode::profileExecutionCount(VariableDeclaration *Var) {
1468 GlobalContext *Ctx = Func->getContext();
1469 GlobalString RMW_I64 = Ctx->getGlobalString("llvm.nacl.atomic.rmw.i64");
1470
1471 bool BadIntrinsic = false;
1472 const Intrinsics::FullIntrinsicInfo *Info =
1473 Ctx->getIntrinsicsInfo().find(RMW_I64, BadIntrinsic);
1474 assert(!BadIntrinsic);
1475 assert(Info != nullptr);
1476
1477 Operand *RMWI64Name = Ctx->getConstantExternSym(RMW_I64);
1478 constexpr RelocOffsetT Offset = 0;
1479 Constant *Counter = Ctx->getConstantSym(Offset, Var->getName());
1480 Constant *AtomicRMWOp = Ctx->getConstantInt32(Intrinsics::AtomicAdd);
1481 Constant *One = Ctx->getConstantInt64(1);
1482 Constant *OrderAcquireRelease =
1483 Ctx->getConstantInt32(Intrinsics::MemoryOrderAcquireRelease);
1484
1485 auto *Instr = InstIntrinsicCall::create(
1486 Func, 5, Func->makeVariable(IceType_i64), RMWI64Name, Info->Info);
1487 Instr->addArg(AtomicRMWOp);
1488 Instr->addArg(Counter);
1489 Instr->addArg(One);
1490 Instr->addArg(OrderAcquireRelease);
1491 Insts.push_front(Instr);
1492 }
1493
removeInEdge(CfgNode * In)1494 void CfgNode::removeInEdge(CfgNode *In) {
1495 InEdges.erase(std::find(InEdges.begin(), InEdges.end(), In));
1496 }
1497
shortCircuit()1498 CfgNode *CfgNode::shortCircuit() {
1499 auto *Func = getCfg();
1500 auto *Last = &getInsts().back();
1501 Variable *Condition = nullptr;
1502 InstBr *Br = nullptr;
1503 if ((Br = llvm::dyn_cast<InstBr>(Last))) {
1504 if (!Br->isUnconditional()) {
1505 Condition = llvm::dyn_cast<Variable>(Br->getCondition());
1506 }
1507 }
1508 if (Condition == nullptr)
1509 return nullptr;
1510
1511 auto *JumpOnTrue = Br->getTargetTrue();
1512 auto *JumpOnFalse = Br->getTargetFalse();
1513
1514 bool FoundOr = false;
1515 bool FoundAnd = false;
1516
1517 InstArithmetic *TopLevelBoolOp = nullptr;
1518
1519 for (auto &Inst : reverse_range(getInsts())) {
1520 if (Inst.isDeleted())
1521 continue;
1522 if (Inst.getDest() == Condition) {
1523 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(&Inst)) {
1524
1525 FoundOr = (Arith->getOp() == InstArithmetic::OpKind::Or);
1526 FoundAnd = (Arith->getOp() == InstArithmetic::OpKind::And);
1527
1528 if (FoundOr || FoundAnd) {
1529 TopLevelBoolOp = Arith;
1530 break;
1531 }
1532 }
1533 }
1534 }
1535
1536 if (!TopLevelBoolOp)
1537 return nullptr;
1538
1539 auto IsOperand = [](Inst *Instr, Operand *Opr) -> bool {
1540 for (SizeT i = 0; i < Instr->getSrcSize(); ++i) {
1541 if (Instr->getSrc(i) == Opr)
1542 return true;
1543 }
1544 return false;
1545 };
1546 Inst *FirstOperandDef = nullptr;
1547 for (auto &Inst : getInsts()) {
1548 if (IsOperand(TopLevelBoolOp, Inst.getDest())) {
1549 FirstOperandDef = &Inst;
1550 break;
1551 }
1552 }
1553
1554 if (FirstOperandDef == nullptr) {
1555 return nullptr;
1556 }
1557
1558 // Check for side effects
1559 auto It = Ice::instToIterator(FirstOperandDef);
1560 while (It != getInsts().end()) {
1561 if (It->isDeleted()) {
1562 ++It;
1563 continue;
1564 }
1565 if (llvm::isa<InstBr>(It) || llvm::isa<InstRet>(It)) {
1566 break;
1567 }
1568 auto *Dest = It->getDest();
1569 if (It->getDest() == nullptr || It->hasSideEffects() ||
1570 !Func->getVMetadata()->isSingleBlock(Dest)) {
1571 // Relying on short cicuit eval here.
1572 // getVMetadata()->isSingleBlock(Dest)
1573 // will segfault if It->getDest() == nullptr
1574 return nullptr;
1575 }
1576 It++;
1577 }
1578
1579 auto *NewNode = Func->makeNode();
1580 NewNode->setLoopNestDepth(getLoopNestDepth());
1581 It = Ice::instToIterator(FirstOperandDef);
1582 It++; // Have to split after the def
1583
1584 NewNode->getInsts().splice(NewNode->getInsts().begin(), getInsts(), It,
1585 getInsts().end());
1586
1587 if (BuildDefs::dump()) {
1588 NewNode->setName(getName().append("_2"));
1589 setName(getName().append("_1"));
1590 }
1591
1592 // Point edges properly
1593 NewNode->addInEdge(this);
1594 for (auto *Out : getOutEdges()) {
1595 NewNode->addOutEdge(Out);
1596 Out->addInEdge(NewNode);
1597 }
1598 removeAllOutEdges();
1599 addOutEdge(NewNode);
1600
1601 // Manage Phi instructions of successors
1602 for (auto *Succ : NewNode->getOutEdges()) {
1603 for (auto &Inst : Succ->getPhis()) {
1604 auto *Phi = llvm::cast<InstPhi>(&Inst);
1605 for (SizeT i = 0; i < Phi->getSrcSize(); ++i) {
1606 if (Phi->getLabel(i) == this) {
1607 Phi->addArgument(Phi->getSrc(i), NewNode);
1608 }
1609 }
1610 }
1611 }
1612
1613 // Create new Br instruction
1614 InstBr *NewInst = nullptr;
1615 if (FoundOr) {
1616 addOutEdge(JumpOnTrue);
1617 JumpOnFalse->removeInEdge(this);
1618 NewInst =
1619 InstBr::create(Func, FirstOperandDef->getDest(), JumpOnTrue, NewNode);
1620 } else if (FoundAnd) {
1621 addOutEdge(JumpOnFalse);
1622 JumpOnTrue->removeInEdge(this);
1623 NewInst =
1624 InstBr::create(Func, FirstOperandDef->getDest(), NewNode, JumpOnFalse);
1625 } else {
1626 return nullptr;
1627 }
1628
1629 assert(NewInst != nullptr);
1630 appendInst(NewInst);
1631
1632 Operand *UnusedOperand = nullptr;
1633 assert(TopLevelBoolOp->getSrcSize() == 2);
1634 if (TopLevelBoolOp->getSrc(0) == FirstOperandDef->getDest())
1635 UnusedOperand = TopLevelBoolOp->getSrc(1);
1636 else if (TopLevelBoolOp->getSrc(1) == FirstOperandDef->getDest())
1637 UnusedOperand = TopLevelBoolOp->getSrc(0);
1638 assert(UnusedOperand);
1639
1640 Br->replaceSource(0, UnusedOperand); // Index 0 has the condition of the Br
1641
1642 TopLevelBoolOp->setDeleted();
1643 return NewNode;
1644 }
1645
1646 } // end of namespace Ice
1647