1 //===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===//
2 //
3 // The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Implements the Cfg class.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #include "IceCfg.h"
16
17 #include "IceAssembler.h"
18 #include "IceBitVector.h"
19 #include "IceCfgNode.h"
20 #include "IceClFlags.h"
21 #include "IceDefs.h"
22 #include "IceELFObjectWriter.h"
23 #include "IceGlobalInits.h"
24 #include "IceInst.h"
25 #include "IceInstVarIter.h"
26 #include "IceInstrumentation.h"
27 #include "IceLiveness.h"
28 #include "IceLoopAnalyzer.h"
29 #include "IceOperand.h"
30 #include "IceTargetLowering.h"
31
32 #include <memory>
33 #include <utility>
34
35 namespace Ice {
36
Cfg(GlobalContext * Ctx,uint32_t SequenceNumber)37 Cfg::Cfg(GlobalContext *Ctx, uint32_t SequenceNumber)
38 : Allocator(createAllocator()), Ctx(Ctx), SequenceNumber(SequenceNumber),
39 VMask(getFlags().getVerbose()), FunctionName(),
40 NextInstNumber(Inst::NumberInitial), Live(nullptr) {
41 NodeStrings.reset(new StringPool);
42 VarStrings.reset(new StringPool);
43 CfgLocalAllocatorScope _(this);
44 Target = TargetLowering::createLowering(getFlags().getTargetArch(), this);
45 VMetadata.reset(new VariablesMetadata(this));
46 TargetAssembler = Target->createAssembler();
47 }
48
~Cfg()49 Cfg::~Cfg() {
50 assert(CfgAllocatorTraits::current() == nullptr);
51 if (getFlags().getDumpStrings()) {
52 OstreamLocker _(Ctx);
53 Ostream &Str = Ctx->getStrDump();
54 getNodeStrings()->dump(Str);
55 getVarStrings()->dump(Str);
56 }
57 }
58
59 // Called in the initalizer list of Cfg's constructor to create the Allocator
60 // and set it as TLS before any other member fields are constructed, since they
61 // may depend on it.
createAllocator()62 ArenaAllocator *Cfg::createAllocator() {
63 ArenaAllocator *Allocator = new ArenaAllocator();
64 CfgAllocatorTraits::set_current(Allocator);
65 return Allocator;
66 }
67
68 /// Create a string like "foo(i=123:b=9)" indicating the function name, number
69 /// of high-level instructions, and number of basic blocks. This string is only
70 /// used for dumping and other diagnostics, and the idea is that given a set of
71 /// functions to debug a problem on, it's easy to find the smallest or simplest
72 /// function to attack. Note that the counts may change somewhat depending on
73 /// what point it is called during the translation passes.
getFunctionNameAndSize() const74 std::string Cfg::getFunctionNameAndSize() const {
75 if (!BuildDefs::dump())
76 return getFunctionName().toString();
77 SizeT NodeCount = 0;
78 SizeT InstCount = 0;
79 for (CfgNode *Node : getNodes()) {
80 ++NodeCount;
81 // Note: deleted instructions are *not* ignored.
82 InstCount += Node->getPhis().size();
83 for (Inst &I : Node->getInsts()) {
84 if (!llvm::isa<InstTarget>(&I))
85 ++InstCount;
86 }
87 }
88 return getFunctionName() + "(i=" + std::to_string(InstCount) +
89 ":b=" + std::to_string(NodeCount) + ")";
90 }
91
setError(const std::string & Message)92 void Cfg::setError(const std::string &Message) {
93 HasError = true;
94 ErrorMessage = Message;
95 }
96
makeNode()97 CfgNode *Cfg::makeNode() {
98 SizeT LabelIndex = Nodes.size();
99 auto *Node = CfgNode::create(this, LabelIndex);
100 Nodes.push_back(Node);
101 return Node;
102 }
103
swapNodes(NodeList & NewNodes)104 void Cfg::swapNodes(NodeList &NewNodes) {
105 assert(Nodes.size() == NewNodes.size());
106 Nodes.swap(NewNodes);
107 for (SizeT I = 0, NumNodes = getNumNodes(); I < NumNodes; ++I)
108 Nodes[I]->resetIndex(I);
109 }
110
makeVariable(Type Ty)111 template <> Variable *Cfg::makeVariable<Variable>(Type Ty) {
112 SizeT Index = Variables.size();
113 Variable *Var;
114 if (Target->shouldSplitToVariableVecOn32(Ty)) {
115 Var = VariableVecOn32::create(this, Ty, Index);
116 } else if (Target->shouldSplitToVariable64On32(Ty)) {
117 Var = Variable64On32::create(this, Ty, Index);
118 } else {
119 Var = Variable::create(this, Ty, Index);
120 }
121 Variables.push_back(Var);
122 return Var;
123 }
124
addArg(Variable * Arg)125 void Cfg::addArg(Variable *Arg) {
126 Arg->setIsArg();
127 Args.push_back(Arg);
128 }
129
addImplicitArg(Variable * Arg)130 void Cfg::addImplicitArg(Variable *Arg) {
131 Arg->setIsImplicitArg();
132 ImplicitArgs.push_back(Arg);
133 }
134
135 // Returns whether the stack frame layout has been computed yet. This is used
136 // for dumping the stack frame location of Variables.
hasComputedFrame() const137 bool Cfg::hasComputedFrame() const { return getTarget()->hasComputedFrame(); }
138
139 namespace {
140 constexpr char BlockNameGlobalPrefix[] = ".L$profiler$block_name$";
141 constexpr char BlockStatsGlobalPrefix[] = ".L$profiler$block_info$";
142 } // end of anonymous namespace
143
createNodeNameDeclaration(const std::string & NodeAsmName)144 void Cfg::createNodeNameDeclaration(const std::string &NodeAsmName) {
145 auto *Var = VariableDeclaration::create(GlobalInits.get());
146 Var->setName(Ctx, BlockNameGlobalPrefix + NodeAsmName);
147 Var->setIsConstant(true);
148 Var->addInitializer(VariableDeclaration::DataInitializer::create(
149 GlobalInits.get(), NodeAsmName.data(), NodeAsmName.size() + 1));
150 const SizeT Int64ByteSize = typeWidthInBytes(IceType_i64);
151 Var->setAlignment(Int64ByteSize); // Wasteful, 32-bit could use 4 bytes.
152 GlobalInits->push_back(Var);
153 }
154
createBlockProfilingInfoDeclaration(const std::string & NodeAsmName,VariableDeclaration * NodeNameDeclaration)155 void Cfg::createBlockProfilingInfoDeclaration(
156 const std::string &NodeAsmName, VariableDeclaration *NodeNameDeclaration) {
157 auto *Var = VariableDeclaration::create(GlobalInits.get());
158 Var->setName(Ctx, BlockStatsGlobalPrefix + NodeAsmName);
159 const SizeT Int64ByteSize = typeWidthInBytes(IceType_i64);
160 Var->addInitializer(VariableDeclaration::ZeroInitializer::create(
161 GlobalInits.get(), Int64ByteSize));
162
163 const RelocOffsetT NodeNameDeclarationOffset = 0;
164 Var->addInitializer(VariableDeclaration::RelocInitializer::create(
165 GlobalInits.get(), NodeNameDeclaration,
166 {RelocOffset::create(Ctx, NodeNameDeclarationOffset)}));
167 Var->setAlignment(Int64ByteSize);
168 GlobalInits->push_back(Var);
169 }
170
translate()171 void Cfg::translate() {
172 if (hasError())
173 return;
174 // Cache the possibly-overridden optimization level once translation begins.
175 // It would be nicer to do this in the constructor, but we need to wait until
176 // after setFunctionName() has a chance to be called.
177 OptimizationLevel =
178 getFlags().matchForceO2(getFunctionName(), getSequenceNumber())
179 ? Opt_2
180 : getFlags().getOptLevel();
181 if (BuildDefs::timers()) {
182 if (getFlags().matchTimingFocus(getFunctionName(), getSequenceNumber())) {
183 setFocusedTiming();
184 getContext()->resetTimer(GlobalContext::TSK_Default);
185 }
186 }
187 if (BuildDefs::dump()) {
188 if (isVerbose(IceV_Status) &&
189 getFlags().matchTestStatus(getFunctionName(), getSequenceNumber())) {
190 getContext()->getStrDump()
191 << ">>>Translating " << getFunctionNameAndSize()
192 << " seq=" << getSequenceNumber() << "\n";
193 }
194 }
195 TimerMarker T_func(getContext(), getFunctionName().toStringOrEmpty());
196 TimerMarker T(TimerStack::TT_translate, this);
197
198 dump("Initial CFG");
199
200 // Create the Hi and Lo variables where a split was needed
201 for (Variable *Var : Variables) {
202 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Var)) {
203 Var64On32->initHiLo(this);
204 } else if (auto *VarVecOn32 = llvm::dyn_cast<VariableVecOn32>(Var)) {
205 VarVecOn32->initVecElement(this);
206 }
207 }
208
209 // Instrument the Cfg, e.g. with AddressSanitizer
210 if (!BuildDefs::minimal() && getFlags().getSanitizeAddresses()) {
211 getContext()->instrumentFunc(this);
212 dump("Instrumented CFG");
213 }
214
215 // The set of translation passes and their order are determined by the
216 // target.
217 getTarget()->translate();
218
219 dump("Final output");
220 if (getFocusedTiming()) {
221 getContext()->dumpLocalTimers(getFunctionName().toString());
222 }
223 }
224
fixPhiNodes()225 void Cfg::fixPhiNodes() {
226 for (auto *Node : Nodes) {
227 // Fix all the phi edges since WASM can't tell how to make them correctly at
228 // the beginning.
229 assert(Node);
230 const auto &InEdges = Node->getInEdges();
231 for (auto &Instr : Node->getPhis()) {
232 auto *Phi = llvm::cast<InstPhi>(&Instr);
233 assert(Phi);
234 for (SizeT i = 0; i < InEdges.size(); ++i) {
235 Phi->setLabel(i, InEdges[i]);
236 }
237 }
238 }
239 }
240
computeInOutEdges()241 void Cfg::computeInOutEdges() {
242 // Compute the out-edges.
243 for (CfgNode *Node : Nodes) {
244 Node->computeSuccessors();
245 }
246
247 // Prune any unreachable nodes before computing in-edges.
248 SizeT NumNodes = getNumNodes();
249 BitVector Reachable(NumNodes);
250 BitVector Pending(NumNodes);
251 Pending.set(getEntryNode()->getIndex());
252 while (true) {
253 int Index = Pending.find_first();
254 if (Index == -1)
255 break;
256 Pending.reset(Index);
257 Reachable.set(Index);
258 CfgNode *Node = Nodes[Index];
259 assert(Node->getIndex() == (SizeT)Index);
260 for (CfgNode *Succ : Node->getOutEdges()) {
261 SizeT SuccIndex = Succ->getIndex();
262 if (!Reachable.test(SuccIndex))
263 Pending.set(SuccIndex);
264 }
265 }
266 SizeT Dest = 0;
267 for (SizeT Source = 0; Source < NumNodes; ++Source) {
268 if (Reachable.test(Source)) {
269 Nodes[Dest] = Nodes[Source];
270 Nodes[Dest]->resetIndex(Dest);
271 // Compute the in-edges.
272 Nodes[Dest]->computePredecessors();
273 ++Dest;
274 }
275 }
276 Nodes.resize(Dest);
277
278 TimerMarker T(TimerStack::TT_phiValidation, this);
279 for (CfgNode *Node : Nodes)
280 Node->enforcePhiConsistency();
281 }
282
renumberInstructions()283 void Cfg::renumberInstructions() {
284 TimerMarker T(TimerStack::TT_renumberInstructions, this);
285 NextInstNumber = Inst::NumberInitial;
286 for (CfgNode *Node : Nodes)
287 Node->renumberInstructions();
288 // Make sure the entry node is the first node and therefore got the lowest
289 // instruction numbers, to facilitate live range computation of function
290 // arguments. We want to model function arguments as being live on entry to
291 // the function, otherwise an argument whose only use is in the first
292 // instruction will be assigned a trivial live range and the register
293 // allocator will not recognize its live range as overlapping another
294 // variable's live range.
295 assert(Nodes.empty() || (*Nodes.begin() == getEntryNode()));
296 }
297
298 // placePhiLoads() must be called before placePhiStores().
placePhiLoads()299 void Cfg::placePhiLoads() {
300 TimerMarker T(TimerStack::TT_placePhiLoads, this);
301 for (CfgNode *Node : Nodes)
302 Node->placePhiLoads();
303 }
304
305 // placePhiStores() must be called after placePhiLoads().
placePhiStores()306 void Cfg::placePhiStores() {
307 TimerMarker T(TimerStack::TT_placePhiStores, this);
308 for (CfgNode *Node : Nodes)
309 Node->placePhiStores();
310 }
311
deletePhis()312 void Cfg::deletePhis() {
313 TimerMarker T(TimerStack::TT_deletePhis, this);
314 for (CfgNode *Node : Nodes)
315 Node->deletePhis();
316 }
317
advancedPhiLowering()318 void Cfg::advancedPhiLowering() {
319 TimerMarker T(TimerStack::TT_advancedPhiLowering, this);
320 // Clear all previously computed live ranges (but not live-in/live-out bit
321 // vectors or last-use markers), because the follow-on register allocation is
322 // only concerned with live ranges across the newly created blocks.
323 for (Variable *Var : Variables) {
324 Var->getLiveRange().reset();
325 }
326 // This splits edges and appends new nodes to the end of the node list. This
327 // can invalidate iterators, so don't use an iterator.
328 SizeT NumNodes = getNumNodes();
329 SizeT NumVars = getNumVariables();
330 for (SizeT I = 0; I < NumNodes; ++I)
331 Nodes[I]->advancedPhiLowering();
332
333 TimerMarker TT(TimerStack::TT_lowerPhiAssignments, this);
334 if (true) {
335 // The following code does an in-place update of liveness and live ranges
336 // as a result of adding the new phi edge split nodes.
337 getLiveness()->initPhiEdgeSplits(Nodes.begin() + NumNodes,
338 Variables.begin() + NumVars);
339 TimerMarker TTT(TimerStack::TT_liveness, this);
340 // Iterate over the newly added nodes to add their liveness info.
341 for (auto I = Nodes.begin() + NumNodes, E = Nodes.end(); I != E; ++I) {
342 InstNumberT FirstInstNum = getNextInstNumber();
343 (*I)->renumberInstructions();
344 InstNumberT LastInstNum = getNextInstNumber() - 1;
345 (*I)->liveness(getLiveness());
346 (*I)->livenessAddIntervals(getLiveness(), FirstInstNum, LastInstNum);
347 }
348 } else {
349 // The following code does a brute-force recalculation of live ranges as a
350 // result of adding the new phi edge split nodes. The liveness calculation
351 // is particularly expensive because the new nodes are not yet in a proper
352 // topological order and so convergence is slower.
353 //
354 // This code is kept here for reference and can be temporarily enabled in
355 // case the incremental code is under suspicion.
356 renumberInstructions();
357 liveness(Liveness_Intervals);
358 getVMetadata()->init(VMK_All);
359 }
360 Target->regAlloc(RAK_Phi);
361 }
362
363 // Find a reasonable placement for nodes that have not yet been placed, while
364 // maintaining the same relative ordering among already placed nodes.
reorderNodes()365 void Cfg::reorderNodes() {
366 // TODO(ascull): it would be nice if the switch tests were always followed by
367 // the default case to allow for fall through.
368 using PlacedList = CfgList<CfgNode *>;
369 PlacedList Placed; // Nodes with relative placement locked down
370 PlacedList Unreachable; // Unreachable nodes
371 PlacedList::iterator NoPlace = Placed.end();
372 // Keep track of where each node has been tentatively placed so that we can
373 // manage insertions into the middle.
374 CfgVector<PlacedList::iterator> PlaceIndex(Nodes.size(), NoPlace);
375 for (CfgNode *Node : Nodes) {
376 // The "do ... while(0);" construct is to factor out the --PlaceIndex and
377 // assert() statements before moving to the next node.
378 do {
379 if (Node != getEntryNode() && Node->getInEdges().empty()) {
380 // The node has essentially been deleted since it is not a successor of
381 // any other node.
382 Unreachable.push_back(Node);
383 PlaceIndex[Node->getIndex()] = Unreachable.end();
384 Node->setNeedsPlacement(false);
385 continue;
386 }
387 if (!Node->needsPlacement()) {
388 // Add to the end of the Placed list.
389 Placed.push_back(Node);
390 PlaceIndex[Node->getIndex()] = Placed.end();
391 continue;
392 }
393 Node->setNeedsPlacement(false);
394 // Assume for now that the unplaced node is from edge-splitting and
395 // therefore has 1 in-edge and 1 out-edge (actually, possibly more than 1
396 // in-edge if the predecessor node was contracted). If this changes in
397 // the future, rethink the strategy.
398 assert(Node->getInEdges().size() >= 1);
399 assert(Node->hasSingleOutEdge());
400
401 // If it's a (non-critical) edge where the successor has a single
402 // in-edge, then place it before the successor.
403 CfgNode *Succ = Node->getOutEdges().front();
404 if (Succ->getInEdges().size() == 1 &&
405 PlaceIndex[Succ->getIndex()] != NoPlace) {
406 Placed.insert(PlaceIndex[Succ->getIndex()], Node);
407 PlaceIndex[Node->getIndex()] = PlaceIndex[Succ->getIndex()];
408 continue;
409 }
410
411 // Otherwise, place it after the (first) predecessor.
412 CfgNode *Pred = Node->getInEdges().front();
413 auto PredPosition = PlaceIndex[Pred->getIndex()];
414 // It shouldn't be the case that PredPosition==NoPlace, but if that
415 // somehow turns out to be true, we just insert Node before
416 // PredPosition=NoPlace=Placed.end() .
417 if (PredPosition != NoPlace)
418 ++PredPosition;
419 Placed.insert(PredPosition, Node);
420 PlaceIndex[Node->getIndex()] = PredPosition;
421 } while (0);
422
423 --PlaceIndex[Node->getIndex()];
424 assert(*PlaceIndex[Node->getIndex()] == Node);
425 }
426
427 // Reorder Nodes according to the built-up lists.
428 NodeList Reordered;
429 Reordered.reserve(Placed.size() + Unreachable.size());
430 for (CfgNode *Node : Placed)
431 Reordered.push_back(Node);
432 for (CfgNode *Node : Unreachable)
433 Reordered.push_back(Node);
434 assert(getNumNodes() == Reordered.size());
435 swapNodes(Reordered);
436 }
437
localCSE(bool AssumeSSA)438 void Cfg::localCSE(bool AssumeSSA) {
439 // Performs basic-block local common-subexpression elimination
440 // If we have
441 // t1 = op b c
442 // t2 = op b c
443 // This pass will replace future references to t2 in a basic block by t1
444 // Points to note:
445 // 1. Assumes SSA by default. To change this, use -lcse=no-ssa
446 // This is needed if this pass is moved to a point later in the pipeline.
447 // If variables have a single definition (in the node), CSE can work just
448 // on the basis of an equality compare on instructions (sans Dest). When
449 // variables can be updated (hence, non-SSA) the result of a previous
450 // instruction which used that variable as an operand can not be reused.
451 // 2. Leaves removal of instructions to DCE.
452 // 3. Only enabled on arithmetic instructions. pnacl-clang (-O2) is expected
453 // to take care of cases not arising from GEP simplification.
454 // 4. By default, a single pass is made over each basic block. Control this
455 // with -lcse-max-iters=N
456
457 TimerMarker T(TimerStack::TT_localCse, this);
458 struct VariableHash {
459 size_t operator()(const Variable *Var) const { return Var->hashValue(); }
460 };
461
462 struct InstHash {
463 size_t operator()(const Inst *Instr) const {
464 auto Kind = Instr->getKind();
465 auto Result =
466 std::hash<typename std::underlying_type<Inst::InstKind>::type>()(
467 Kind);
468 for (SizeT i = 0; i < Instr->getSrcSize(); ++i) {
469 Result ^= Instr->getSrc(i)->hashValue();
470 }
471 return Result;
472 }
473 };
474 struct InstEq {
475 bool srcEq(const Operand *A, const Operand *B) const {
476 if (llvm::isa<Variable>(A) || llvm::isa<Constant>(A))
477 return (A == B);
478 return false;
479 }
480 bool operator()(const Inst *InstrA, const Inst *InstrB) const {
481 if ((InstrA->getKind() != InstrB->getKind()) ||
482 (InstrA->getSrcSize() != InstrB->getSrcSize()))
483 return false;
484
485 if (auto *A = llvm::dyn_cast<InstArithmetic>(InstrA)) {
486 auto *B = llvm::cast<InstArithmetic>(InstrB);
487 // A, B are guaranteed to be of the same 'kind' at this point
488 // So, dyn_cast is not needed
489 if (A->getOp() != B->getOp())
490 return false;
491 }
492 // Does not enter loop if different kind or number of operands
493 for (SizeT i = 0; i < InstrA->getSrcSize(); ++i) {
494 if (!srcEq(InstrA->getSrc(i), InstrB->getSrc(i)))
495 return false;
496 }
497 return true;
498 }
499 };
500
501 for (CfgNode *Node : getNodes()) {
502 CfgUnorderedSet<Inst *, InstHash, InstEq> Seen;
503 // Stores currently available instructions.
504
505 CfgUnorderedMap<Variable *, Variable *, VariableHash> Replacements;
506 // Combining the above two into a single data structure might consume less
507 // memory but will be slower i.e map of Instruction -> Set of Variables
508
509 CfgUnorderedMap<Variable *, std::vector<Inst *>, VariableHash> Dependency;
510 // Maps a variable to the Instructions that depend on it.
511 // a = op1 b c
512 // x = op2 c d
513 // Will result in the map : b -> {a}, c -> {a, x}, d -> {x}
514 // Not necessary for SSA as dependencies will never be invalidated, and the
515 // container will use minimal memory when left unused.
516
517 auto IterCount = getFlags().getLocalCseMaxIterations();
518
519 for (uint32_t i = 0; i < IterCount; ++i) {
520 // TODO(manasijm): Stats on IterCount -> performance
521 for (Inst &Instr : Node->getInsts()) {
522 if (Instr.isDeleted() || !llvm::isa<InstArithmetic>(&Instr))
523 continue;
524 if (!AssumeSSA) {
525 // Invalidate replacements
526 auto Iter = Replacements.find(Instr.getDest());
527 if (Iter != Replacements.end()) {
528 Replacements.erase(Iter);
529 }
530
531 // Invalidate 'seen' instructions whose operands were just updated.
532 auto DepIter = Dependency.find(Instr.getDest());
533 if (DepIter != Dependency.end()) {
534 for (auto *DepInst : DepIter->second) {
535 Seen.erase(DepInst);
536 }
537 }
538 }
539
540 // Replace - doing this before checking for repetitions might enable
541 // more optimizations
542 for (SizeT i = 0; i < Instr.getSrcSize(); ++i) {
543 auto *Opnd = Instr.getSrc(i);
544 if (auto *Var = llvm::dyn_cast<Variable>(Opnd)) {
545 if (Replacements.find(Var) != Replacements.end()) {
546 Instr.replaceSource(i, Replacements[Var]);
547 }
548 }
549 }
550
551 // Check for repetitions
552 auto SeenIter = Seen.find(&Instr);
553 if (SeenIter != Seen.end()) { // seen before
554 const Inst *Found = *SeenIter;
555 Replacements[Instr.getDest()] = Found->getDest();
556 } else { // new
557 Seen.insert(&Instr);
558
559 if (!AssumeSSA) {
560 // Update dependencies
561 for (SizeT i = 0; i < Instr.getSrcSize(); ++i) {
562 auto *Opnd = Instr.getSrc(i);
563 if (auto *Var = llvm::dyn_cast<Variable>(Opnd)) {
564 Dependency[Var].push_back(&Instr);
565 }
566 }
567 }
568 }
569 }
570 }
571 }
572 }
573
loopInvariantCodeMotion()574 void Cfg::loopInvariantCodeMotion() {
575 TimerMarker T(TimerStack::TT_loopInvariantCodeMotion, this);
576 // Does not introduce new nodes as of now.
577 for (auto &Loop : LoopInfo) {
578 CfgNode *Header = Loop.Header;
579 assert(Header);
580 if (Header->getLoopNestDepth() < 1)
581 return;
582 CfgNode *PreHeader = Loop.PreHeader;
583 if (PreHeader == nullptr || PreHeader->getInsts().size() == 0) {
584 return; // try next loop
585 }
586
587 auto &Insts = PreHeader->getInsts();
588 auto &LastInst = Insts.back();
589 Insts.pop_back();
590
591 for (auto *Inst : findLoopInvariantInstructions(Loop.Body)) {
592 PreHeader->appendInst(Inst);
593 }
594 PreHeader->appendInst(&LastInst);
595 }
596 }
597
598 CfgVector<Inst *>
findLoopInvariantInstructions(const CfgUnorderedSet<SizeT> & Body)599 Cfg::findLoopInvariantInstructions(const CfgUnorderedSet<SizeT> &Body) {
600 CfgUnorderedSet<Inst *> InvariantInsts;
601 CfgUnorderedSet<Variable *> InvariantVars;
602 for (auto *Var : getArgs()) {
603 InvariantVars.insert(Var);
604 }
605 bool Changed = false;
606 do {
607 Changed = false;
608 for (auto NodeIndex : Body) {
609 auto *Node = Nodes[NodeIndex];
610 CfgVector<std::reference_wrapper<Inst>> Insts(Node->getInsts().begin(),
611 Node->getInsts().end());
612
613 for (auto &InstRef : Insts) {
614 auto &Inst = InstRef.get();
615 if (Inst.isDeleted() ||
616 InvariantInsts.find(&Inst) != InvariantInsts.end())
617 continue;
618 switch (Inst.getKind()) {
619 case Inst::InstKind::Alloca:
620 case Inst::InstKind::Br:
621 case Inst::InstKind::Ret:
622 case Inst::InstKind::Phi:
623 case Inst::InstKind::Call:
624 case Inst::InstKind::Intrinsic:
625 case Inst::InstKind::Load:
626 case Inst::InstKind::Store:
627 case Inst::InstKind::Switch:
628 continue;
629 default:
630 break;
631 }
632
633 bool IsInvariant = true;
634 for (SizeT i = 0; i < Inst.getSrcSize(); ++i) {
635 if (auto *Var = llvm::dyn_cast<Variable>(Inst.getSrc(i))) {
636 if (InvariantVars.find(Var) == InvariantVars.end()) {
637 IsInvariant = false;
638 }
639 }
640 }
641 if (IsInvariant) {
642 Changed = true;
643 InvariantInsts.insert(&Inst);
644 Node->getInsts().remove(Inst);
645 if (Inst.getDest() != nullptr) {
646 InvariantVars.insert(Inst.getDest());
647 }
648 }
649 }
650 }
651 } while (Changed);
652
653 CfgVector<Inst *> InstVector(InvariantInsts.begin(), InvariantInsts.end());
654 std::sort(InstVector.begin(), InstVector.end(),
655 [](Inst *A, Inst *B) { return A->getNumber() < B->getNumber(); });
656 return InstVector;
657 }
658
shortCircuitJumps()659 void Cfg::shortCircuitJumps() {
660 // Split Nodes whenever an early jump is possible.
661 // __N :
662 // a = <something>
663 // Instruction 1 without side effect
664 // ... b = <something> ...
665 // Instruction N without side effect
666 // t1 = or a b
667 // br t1 __X __Y
668 //
669 // is transformed into:
670 // __N :
671 // a = <something>
672 // br a __X __N_ext
673 //
674 // __N_ext :
675 // Instruction 1 without side effect
676 // ... b = <something> ...
677 // Instruction N without side effect
678 // br b __X __Y
679 // (Similar logic for AND, jump to false instead of true target.)
680
681 TimerMarker T(TimerStack::TT_shortCircuit, this);
682 getVMetadata()->init(VMK_Uses);
683 auto NodeStack = this->getNodes();
684 CfgUnorderedMap<SizeT, CfgVector<CfgNode *>> Splits;
685 while (!NodeStack.empty()) {
686 auto *Node = NodeStack.back();
687 NodeStack.pop_back();
688 auto NewNode = Node->shortCircuit();
689 if (NewNode) {
690 NodeStack.push_back(NewNode);
691 NodeStack.push_back(Node);
692 Splits[Node->getIndex()].push_back(NewNode);
693 }
694 }
695
696 // Insert nodes in the right place
697 NodeList NewList;
698 NewList.reserve(Nodes.size());
699 CfgUnorderedSet<SizeT> Inserted;
700 for (auto *Node : Nodes) {
701 if (Inserted.find(Node->getIndex()) != Inserted.end())
702 continue; // already inserted
703 NodeList Stack{Node};
704 while (!Stack.empty()) {
705 auto *Current = Stack.back();
706 Stack.pop_back();
707 Inserted.insert(Current->getIndex());
708 NewList.push_back(Current);
709 for (auto *Next : Splits[Current->getIndex()]) {
710 Stack.push_back(Next);
711 }
712 }
713 }
714
715 SizeT NodeIndex = 0;
716 for (auto *Node : NewList) {
717 Node->resetIndex(NodeIndex++);
718 }
719 Nodes = NewList;
720 }
721
floatConstantCSE()722 void Cfg::floatConstantCSE() {
723 // Load multiple uses of a floating point constant (between two call
724 // instructions or block start/end) into a variable before its first use.
725 // t1 = b + 1.0
726 // t2 = c + 1.0
727 // Gets transformed to:
728 // t0 = 1.0
729 // t0_1 = t0
730 // t1 = b + t0_1
731 // t2 = c + t0_1
732 // Call instructions reset the procedure, but use the same variable, just in
733 // case it got a register. We are assuming floating point registers are not
734 // callee saved in general. Example, continuing from before:
735 // result = call <some function>
736 // t3 = d + 1.0
737 // Gets transformed to:
738 // result = call <some function>
739 // t0_2 = t0
740 // t3 = d + t0_2
741 // TODO(manasijm, stichnot): Figure out how to 'link' t0 to the stack slot of
742 // 1.0. When t0 does not get a register, introducing an extra assignment
743 // statement does not make sense. The relevant portion is marked below.
744
745 TimerMarker _(TimerStack::TT_floatConstantCse, this);
746 for (CfgNode *Node : getNodes()) {
747
748 CfgUnorderedMap<Constant *, Variable *> ConstCache;
749 auto Current = Node->getInsts().begin();
750 auto End = Node->getInsts().end();
751 while (Current != End) {
752 CfgUnorderedMap<Constant *, CfgVector<InstList::iterator>> FloatUses;
753 if (llvm::isa<InstCall>(iteratorToInst(Current))) {
754 ++Current;
755 assert(Current != End);
756 // Block should not end with a call
757 }
758 while (Current != End && !llvm::isa<InstCall>(iteratorToInst(Current))) {
759 if (!Current->isDeleted()) {
760 for (SizeT i = 0; i < Current->getSrcSize(); ++i) {
761 if (auto *Const = llvm::dyn_cast<Constant>(Current->getSrc(i))) {
762 if (Const->getType() == IceType_f32 ||
763 Const->getType() == IceType_f64) {
764 FloatUses[Const].push_back(Current);
765 }
766 }
767 }
768 }
769 ++Current;
770 }
771 for (auto &Pair : FloatUses) {
772 static constexpr SizeT MinUseThreshold = 3;
773 if (Pair.second.size() < MinUseThreshold)
774 continue;
775 // Only consider constants with at least `MinUseThreshold` uses
776 auto &Insts = Node->getInsts();
777
778 if (ConstCache.find(Pair.first) == ConstCache.end()) {
779 // Saw a constant (which is used at least twice) for the first time
780 auto *NewVar = makeVariable(Pair.first->getType());
781 // NewVar->setLinkedTo(Pair.first);
782 // TODO(manasijm): Plumbing for linking to an Operand.
783 auto *Assign = InstAssign::create(Node->getCfg(), NewVar, Pair.first);
784 Insts.insert(Pair.second[0], Assign);
785 ConstCache[Pair.first] = NewVar;
786 }
787
788 auto *NewVar = makeVariable(Pair.first->getType());
789 NewVar->setLinkedTo(ConstCache[Pair.first]);
790 auto *Assign =
791 InstAssign::create(Node->getCfg(), NewVar, ConstCache[Pair.first]);
792
793 Insts.insert(Pair.second[0], Assign);
794 for (auto InstUse : Pair.second) {
795 for (SizeT i = 0; i < InstUse->getSrcSize(); ++i) {
796 if (auto *Const = llvm::dyn_cast<Constant>(InstUse->getSrc(i))) {
797 if (Const == Pair.first) {
798 InstUse->replaceSource(i, NewVar);
799 }
800 }
801 }
802 }
803 }
804 }
805 }
806 }
807
doArgLowering()808 void Cfg::doArgLowering() {
809 TimerMarker T(TimerStack::TT_doArgLowering, this);
810 getTarget()->lowerArguments();
811 }
812
sortAndCombineAllocas(CfgVector<InstAlloca * > & Allocas,uint32_t CombinedAlignment,InstList & Insts,AllocaBaseVariableType BaseVariableType)813 void Cfg::sortAndCombineAllocas(CfgVector<InstAlloca *> &Allocas,
814 uint32_t CombinedAlignment, InstList &Insts,
815 AllocaBaseVariableType BaseVariableType) {
816 if (Allocas.empty())
817 return;
818 // Sort by decreasing alignment.
819 std::sort(Allocas.begin(), Allocas.end(), [](InstAlloca *A1, InstAlloca *A2) {
820 uint32_t Align1 = A1->getAlignInBytes();
821 uint32_t Align2 = A2->getAlignInBytes();
822 if (Align1 == Align2)
823 return A1->getNumber() < A2->getNumber();
824 else
825 return Align1 > Align2;
826 });
827 // Process the allocas in order of decreasing stack alignment. This allows
828 // us to pack less-aligned pieces after more-aligned ones, resulting in less
829 // stack growth. It also allows there to be at most one stack alignment "and"
830 // instruction for a whole list of allocas.
831 uint32_t CurrentOffset = 0;
832 CfgVector<int32_t> Offsets;
833 for (Inst *Instr : Allocas) {
834 auto *Alloca = llvm::cast<InstAlloca>(Instr);
835 // Adjust the size of the allocation up to the next multiple of the
836 // object's alignment.
837 uint32_t Alignment = std::max(Alloca->getAlignInBytes(), 1u);
838 auto *ConstSize =
839 llvm::dyn_cast<ConstantInteger32>(Alloca->getSizeInBytes());
840 uint32_t Size = Utils::applyAlignment(ConstSize->getValue(), Alignment);
841 if (BaseVariableType == BVT_FramePointer) {
842 // Addressing is relative to the frame pointer. Subtract the offset after
843 // adding the size of the alloca, because it grows downwards from the
844 // frame pointer.
845 Offsets.push_back(Target->getFramePointerOffset(CurrentOffset, Size));
846 } else {
847 // Addressing is relative to the stack pointer or to a user pointer. Add
848 // the offset before adding the size of the object, because it grows
849 // upwards from the stack pointer. In addition, if the addressing is
850 // relative to the stack pointer, we need to add the pre-computed max out
851 // args size bytes.
852 const uint32_t OutArgsOffsetOrZero =
853 (BaseVariableType == BVT_StackPointer)
854 ? getTarget()->maxOutArgsSizeBytes()
855 : 0;
856 Offsets.push_back(CurrentOffset + OutArgsOffsetOrZero);
857 }
858 // Update the running offset of the fused alloca region.
859 CurrentOffset += Size;
860 }
861 // Round the offset up to the alignment granularity to use as the size.
862 uint32_t TotalSize = Utils::applyAlignment(CurrentOffset, CombinedAlignment);
863 // Ensure every alloca was assigned an offset.
864 assert(Allocas.size() == Offsets.size());
865
866 switch (BaseVariableType) {
867 case BVT_UserPointer: {
868 Variable *BaseVariable = makeVariable(IceType_i32);
869 for (SizeT i = 0; i < Allocas.size(); ++i) {
870 auto *Alloca = llvm::cast<InstAlloca>(Allocas[i]);
871 // Emit a new addition operation to replace the alloca.
872 Operand *AllocaOffset = Ctx->getConstantInt32(Offsets[i]);
873 InstArithmetic *Add =
874 InstArithmetic::create(this, InstArithmetic::Add, Alloca->getDest(),
875 BaseVariable, AllocaOffset);
876 Insts.push_front(Add);
877 Alloca->setDeleted();
878 }
879 Operand *AllocaSize = Ctx->getConstantInt32(TotalSize);
880 InstAlloca *CombinedAlloca =
881 InstAlloca::create(this, BaseVariable, AllocaSize, CombinedAlignment);
882 CombinedAlloca->setKnownFrameOffset();
883 Insts.push_front(CombinedAlloca);
884 } break;
885 case BVT_StackPointer:
886 case BVT_FramePointer: {
887 for (SizeT i = 0; i < Allocas.size(); ++i) {
888 auto *Alloca = llvm::cast<InstAlloca>(Allocas[i]);
889 // Emit a fake definition of the rematerializable variable.
890 Variable *Dest = Alloca->getDest();
891 auto *Def = InstFakeDef::create(this, Dest);
892 if (BaseVariableType == BVT_StackPointer)
893 Dest->setRematerializable(getTarget()->getStackReg(), Offsets[i]);
894 else
895 Dest->setRematerializable(getTarget()->getFrameReg(), Offsets[i]);
896 Insts.push_front(Def);
897 Alloca->setDeleted();
898 }
899 // Allocate the fixed area in the function prolog.
900 getTarget()->reserveFixedAllocaArea(TotalSize, CombinedAlignment);
901 } break;
902 }
903 }
904
processAllocas(bool SortAndCombine)905 void Cfg::processAllocas(bool SortAndCombine) {
906 TimerMarker _(TimerStack::TT_alloca, this);
907 const uint32_t StackAlignment = getTarget()->getStackAlignment();
908 CfgNode *EntryNode = getEntryNode();
909 assert(EntryNode);
910 // LLVM enforces power of 2 alignment.
911 assert(llvm::isPowerOf2_32(StackAlignment));
912 // If the ABI's stack alignment is smaller than the vector size (16 bytes),
913 // conservatively use a frame pointer to allow for explicit alignment of the
914 // stack pointer. This needs to happen before register allocation so the frame
915 // pointer can be reserved.
916 if (getTarget()->needsStackPointerAlignment()) {
917 getTarget()->setHasFramePointer();
918 }
919 // Determine if there are large alignment allocations in the entry block or
920 // dynamic allocations (variable size in the entry block).
921 bool HasLargeAlignment = false;
922 bool HasDynamicAllocation = false;
923 for (Inst &Instr : EntryNode->getInsts()) {
924 if (Instr.isDeleted())
925 continue;
926 if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) {
927 uint32_t AlignmentParam = Alloca->getAlignInBytes();
928 if (AlignmentParam > StackAlignment)
929 HasLargeAlignment = true;
930 if (llvm::isa<Constant>(Alloca->getSizeInBytes()))
931 Alloca->setKnownFrameOffset();
932 else {
933 HasDynamicAllocation = true;
934 // If Allocas are not sorted, the first dynamic allocation causes
935 // later Allocas to be at unknown offsets relative to the stack/frame.
936 if (!SortAndCombine)
937 break;
938 }
939 }
940 }
941 // Don't do the heavyweight sorting and layout for low optimization levels.
942 if (!SortAndCombine)
943 return;
944 // Any alloca outside the entry block is a dynamic allocation.
945 for (CfgNode *Node : Nodes) {
946 if (Node == EntryNode)
947 continue;
948 for (Inst &Instr : Node->getInsts()) {
949 if (Instr.isDeleted())
950 continue;
951 if (llvm::isa<InstAlloca>(&Instr)) {
952 // Allocations outside the entry block require a frame pointer.
953 HasDynamicAllocation = true;
954 break;
955 }
956 }
957 if (HasDynamicAllocation)
958 break;
959 }
960 // Mark the target as requiring a frame pointer.
961 if (HasLargeAlignment || HasDynamicAllocation)
962 getTarget()->setHasFramePointer();
963 // Collect the Allocas into the two vectors.
964 // Allocas in the entry block that have constant size and alignment less
965 // than or equal to the function's stack alignment.
966 CfgVector<InstAlloca *> FixedAllocas;
967 // Allocas in the entry block that have constant size and alignment greater
968 // than the function's stack alignment.
969 CfgVector<InstAlloca *> AlignedAllocas;
970 // Maximum alignment used by any alloca.
971 uint32_t MaxAlignment = StackAlignment;
972 for (Inst &Instr : EntryNode->getInsts()) {
973 if (Instr.isDeleted())
974 continue;
975 if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) {
976 if (!llvm::isa<Constant>(Alloca->getSizeInBytes()))
977 continue;
978 uint32_t AlignmentParam = Alloca->getAlignInBytes();
979 // For default align=0, set it to the real value 1, to avoid any
980 // bit-manipulation problems below.
981 AlignmentParam = std::max(AlignmentParam, 1u);
982 assert(llvm::isPowerOf2_32(AlignmentParam));
983 if (HasDynamicAllocation && AlignmentParam > StackAlignment) {
984 // If we have both dynamic allocations and large stack alignments,
985 // high-alignment allocations are pulled out with their own base.
986 AlignedAllocas.push_back(Alloca);
987 } else {
988 FixedAllocas.push_back(Alloca);
989 }
990 MaxAlignment = std::max(AlignmentParam, MaxAlignment);
991 }
992 }
993 // Add instructions to the head of the entry block in reverse order.
994 InstList &Insts = getEntryNode()->getInsts();
995 if (HasDynamicAllocation && HasLargeAlignment) {
996 // We are using a frame pointer, but fixed large-alignment alloca addresses
997 // do not have a known offset from either the stack or frame pointer.
998 // They grow up from a user pointer from an alloca.
999 sortAndCombineAllocas(AlignedAllocas, MaxAlignment, Insts, BVT_UserPointer);
1000 // Fixed size allocas are addressed relative to the frame pointer.
1001 sortAndCombineAllocas(FixedAllocas, StackAlignment, Insts,
1002 BVT_FramePointer);
1003 } else {
1004 // Otherwise, fixed size allocas are addressed relative to the stack unless
1005 // there are dynamic allocas.
1006 const AllocaBaseVariableType BasePointerType =
1007 (HasDynamicAllocation ? BVT_FramePointer : BVT_StackPointer);
1008 sortAndCombineAllocas(FixedAllocas, MaxAlignment, Insts, BasePointerType);
1009 }
1010 if (!FixedAllocas.empty() || !AlignedAllocas.empty())
1011 // No use calling findRematerializable() unless there is some
1012 // rematerializable alloca instruction to seed it.
1013 findRematerializable();
1014 }
1015
1016 namespace {
1017
1018 // Helpers for findRematerializable(). For each of them, if a suitable
1019 // rematerialization is found, the instruction's Dest variable is set to be
1020 // rematerializable and it returns true, otherwise it returns false.
1021
rematerializeArithmetic(const Inst * Instr)1022 bool rematerializeArithmetic(const Inst *Instr) {
1023 // Check that it's an Arithmetic instruction with an Add operation.
1024 auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr);
1025 if (Arith == nullptr || Arith->getOp() != InstArithmetic::Add)
1026 return false;
1027 // Check that Src(0) is rematerializable.
1028 auto *Src0Var = llvm::dyn_cast<Variable>(Arith->getSrc(0));
1029 if (Src0Var == nullptr || !Src0Var->isRematerializable())
1030 return false;
1031 // Check that Src(1) is an immediate.
1032 auto *Src1Imm = llvm::dyn_cast<ConstantInteger32>(Arith->getSrc(1));
1033 if (Src1Imm == nullptr)
1034 return false;
1035 Arith->getDest()->setRematerializable(
1036 Src0Var->getRegNum(), Src0Var->getStackOffset() + Src1Imm->getValue());
1037 return true;
1038 }
1039
rematerializeAssign(const Inst * Instr)1040 bool rematerializeAssign(const Inst *Instr) {
1041 // An InstAssign only originates from an inttoptr or ptrtoint instruction,
1042 // which never occurs in a MINIMAL build.
1043 if (BuildDefs::minimal())
1044 return false;
1045 // Check that it's an Assign instruction.
1046 if (!llvm::isa<InstAssign>(Instr))
1047 return false;
1048 // Check that Src(0) is rematerializable.
1049 auto *Src0Var = llvm::dyn_cast<Variable>(Instr->getSrc(0));
1050 if (Src0Var == nullptr || !Src0Var->isRematerializable())
1051 return false;
1052 Instr->getDest()->setRematerializable(Src0Var->getRegNum(),
1053 Src0Var->getStackOffset());
1054 return true;
1055 }
1056
rematerializeCast(const Inst * Instr)1057 bool rematerializeCast(const Inst *Instr) {
1058 // An pointer-type bitcast never occurs in a MINIMAL build.
1059 if (BuildDefs::minimal())
1060 return false;
1061 // Check that it's a Cast instruction with a Bitcast operation.
1062 auto *Cast = llvm::dyn_cast<InstCast>(Instr);
1063 if (Cast == nullptr || Cast->getCastKind() != InstCast::Bitcast)
1064 return false;
1065 // Check that Src(0) is rematerializable.
1066 auto *Src0Var = llvm::dyn_cast<Variable>(Cast->getSrc(0));
1067 if (Src0Var == nullptr || !Src0Var->isRematerializable())
1068 return false;
1069 // Check that Dest and Src(0) have the same type.
1070 Variable *Dest = Cast->getDest();
1071 if (Dest->getType() != Src0Var->getType())
1072 return false;
1073 Dest->setRematerializable(Src0Var->getRegNum(), Src0Var->getStackOffset());
1074 return true;
1075 }
1076
1077 } // end of anonymous namespace
1078
1079 /// Scan the function to find additional rematerializable variables. This is
1080 /// possible when the source operand of an InstAssignment is a rematerializable
1081 /// variable, or the same for a pointer-type InstCast::Bitcast, or when an
1082 /// InstArithmetic is an add of a rematerializable variable and an immediate.
1083 /// Note that InstAssignment instructions and pointer-type InstCast::Bitcast
1084 /// instructions generally only come about from the IceConverter's treatment of
1085 /// inttoptr, ptrtoint, and bitcast instructions. TODO(stichnot): Consider
1086 /// other possibilities, however unlikely, such as InstArithmetic::Sub, or
1087 /// commutativity.
findRematerializable()1088 void Cfg::findRematerializable() {
1089 // Scan the instructions in order, and repeat until no new opportunities are
1090 // found. It may take more than one iteration because a variable's defining
1091 // block may happen to come after a block where it is used, depending on the
1092 // CfgNode linearization order.
1093 bool FoundNewAssignment;
1094 do {
1095 FoundNewAssignment = false;
1096 for (CfgNode *Node : getNodes()) {
1097 // No need to process Phi instructions.
1098 for (Inst &Instr : Node->getInsts()) {
1099 if (Instr.isDeleted())
1100 continue;
1101 Variable *Dest = Instr.getDest();
1102 if (Dest == nullptr || Dest->isRematerializable())
1103 continue;
1104 if (rematerializeArithmetic(&Instr) || rematerializeAssign(&Instr) ||
1105 rematerializeCast(&Instr)) {
1106 FoundNewAssignment = true;
1107 }
1108 }
1109 }
1110 } while (FoundNewAssignment);
1111 }
1112
doAddressOpt()1113 void Cfg::doAddressOpt() {
1114 TimerMarker T(TimerStack::TT_doAddressOpt, this);
1115 for (CfgNode *Node : Nodes)
1116 Node->doAddressOpt();
1117 }
1118
1119 namespace {
1120 // ShuffleVectorUtils implements helper functions for rematerializing
1121 // shufflevector instructions from a sequence of extractelement/insertelement
1122 // instructions. It looks for the following pattern:
1123 //
1124 // %t0 = extractelement A, %n0
1125 // %t1 = extractelement B, %n1
1126 // %t2 = extractelement C, %n2
1127 // ...
1128 // %tN = extractelement N, %nN
1129 // %d0 = insertelement undef, %t0, 0
1130 // %d1 = insertelement %d0, %t1, 1
1131 // %d2 = insertelement %d1, %t2, 2
1132 // ...
1133 // %dest = insertelement %d_N-1, %tN, N
1134 //
1135 // where N is num_element(typeof(%dest)), and A, B, C, ... N are at most two
1136 // distinct variables.
1137 namespace ShuffleVectorUtils {
1138 // findAllInserts is used when searching for all the insertelements that are
1139 // used in a shufflevector operation. This function works recursively, when
1140 // invoked with I = i, the function assumes Insts[i] is the last found
1141 // insertelement in the chain. The next insertelement insertruction is saved in
1142 // Insts[i+1].
findAllInserts(Cfg * Func,GlobalContext * Ctx,VariablesMetadata * VM,CfgVector<const Inst * > * Insts,SizeT I=0)1143 bool findAllInserts(Cfg *Func, GlobalContext *Ctx, VariablesMetadata *VM,
1144 CfgVector<const Inst *> *Insts, SizeT I = 0) {
1145 const bool Verbose = BuildDefs::dump() && Func->isVerbose(IceV_ShufMat);
1146
1147 if (I > Insts->size()) {
1148 if (Verbose) {
1149 Ctx->getStrDump() << "\tToo many inserts.\n";
1150 }
1151 return false;
1152 }
1153
1154 const auto *LastInsert = Insts->at(I);
1155 assert(llvm::isa<InstInsertElement>(LastInsert));
1156
1157 if (I == Insts->size() - 1) {
1158 // Matching against undef is not really needed because the value in Src(0)
1159 // will be totally overwritten. We still enforce it anyways because the
1160 // PNaCl toolchain generates the bitcode with it.
1161 if (!llvm::isa<ConstantUndef>(LastInsert->getSrc(0))) {
1162 if (Verbose) {
1163 Ctx->getStrDump() << "\tSrc0 is not undef: " << I << " "
1164 << Insts->size();
1165 LastInsert->dump(Func);
1166 Ctx->getStrDump() << "\n";
1167 }
1168 return false;
1169 }
1170
1171 // The following loop ensures that the insertelements are sorted. In theory,
1172 // we could relax this restriction and allow any order. As long as each
1173 // index appears exactly once, this chain is still a candidate for becoming
1174 // a shufflevector. The Insts vector is traversed backwards because the
1175 // instructions are "enqueued" in reverse order.
1176 int32_t ExpectedElement = 0;
1177 for (const auto *I : reverse_range(*Insts)) {
1178 if (llvm::cast<ConstantInteger32>(I->getSrc(2))->getValue() !=
1179 ExpectedElement) {
1180 return false;
1181 }
1182 ++ExpectedElement;
1183 }
1184 return true;
1185 }
1186
1187 const auto *Src0V = llvm::cast<Variable>(LastInsert->getSrc(0));
1188 const auto *Def = VM->getSingleDefinition(Src0V);
1189
1190 // Only optimize if the first operand in
1191 //
1192 // Dest = insertelement A, B, 10
1193 //
1194 // is singly-def'ed.
1195 if (Def == nullptr) {
1196 if (Verbose) {
1197 Ctx->getStrDump() << "\tmulti-def: ";
1198 (*Insts)[I]->dump(Func);
1199 Ctx->getStrDump() << "\n";
1200 }
1201 return false;
1202 }
1203
1204 // We also require the (single) definition to come from an insertelement
1205 // instruction.
1206 if (!llvm::isa<InstInsertElement>(Def)) {
1207 if (Verbose) {
1208 Ctx->getStrDump() << "\tnot insert element: ";
1209 Def->dump(Func);
1210 Ctx->getStrDump() << "\n";
1211 }
1212 return false;
1213 }
1214
1215 // Everything seems fine, so we save Def in Insts, and delegate the decision
1216 // to findAllInserts.
1217 (*Insts)[I + 1] = Def;
1218
1219 return findAllInserts(Func, Ctx, VM, Insts, I + 1);
1220 }
1221
1222 // insertsLastElement returns true if Insert is inserting an element in the last
1223 // position of a vector.
insertsLastElement(const Inst & Insert)1224 bool insertsLastElement(const Inst &Insert) {
1225 const Type DestTy = Insert.getDest()->getType();
1226 assert(isVectorType(DestTy));
1227 const SizeT Elem =
1228 llvm::cast<ConstantInteger32>(Insert.getSrc(2))->getValue();
1229 return Elem == typeNumElements(DestTy) - 1;
1230 }
1231
1232 // findAllExtracts goes over all the insertelement instructions that are
1233 // candidates to be replaced by a shufflevector, and searches for all the
1234 // definitions of the elements being inserted. If all of the elements are the
1235 // result of an extractelement instruction, and all of the extractelements
1236 // operate on at most two different sources, than the instructions can be
1237 // replaced by a shufflevector.
findAllExtracts(Cfg * Func,GlobalContext * Ctx,VariablesMetadata * VM,const CfgVector<const Inst * > & Insts,Variable ** Src0,Variable ** Src1,CfgVector<const Inst * > * Extracts)1238 bool findAllExtracts(Cfg *Func, GlobalContext *Ctx, VariablesMetadata *VM,
1239 const CfgVector<const Inst *> &Insts, Variable **Src0,
1240 Variable **Src1, CfgVector<const Inst *> *Extracts) {
1241 const bool Verbose = BuildDefs::dump() && Func->isVerbose(IceV_ShufMat);
1242
1243 *Src0 = nullptr;
1244 *Src1 = nullptr;
1245 assert(Insts.size() > 0);
1246 for (SizeT I = 0; I < Insts.size(); ++I) {
1247 const auto *Insert = Insts.at(I);
1248 const auto *Src1V = llvm::dyn_cast<Variable>(Insert->getSrc(1));
1249 if (Src1V == nullptr) {
1250 if (Verbose) {
1251 Ctx->getStrDump() << "src(1) is not a variable: ";
1252 Insert->dump(Func);
1253 Ctx->getStrDump() << "\n";
1254 }
1255 return false;
1256 }
1257
1258 const auto *Def = VM->getSingleDefinition(Src1V);
1259 if (Def == nullptr) {
1260 if (Verbose) {
1261 Ctx->getStrDump() << "multi-def src(1): ";
1262 Insert->dump(Func);
1263 Ctx->getStrDump() << "\n";
1264 }
1265 return false;
1266 }
1267
1268 if (!llvm::isa<InstExtractElement>(Def)) {
1269 if (Verbose) {
1270 Ctx->getStrDump() << "not extractelement: ";
1271 Def->dump(Func);
1272 Ctx->getStrDump() << "\n";
1273 }
1274 return false;
1275 }
1276
1277 auto *Src = llvm::cast<Variable>(Def->getSrc(0));
1278 if (*Src0 == nullptr) {
1279 // No sources yet. Save Src to Src0.
1280 *Src0 = Src;
1281 } else if (*Src1 == nullptr) {
1282 // We already have a source, so we might save Src in Src1 -- but only if
1283 // Src0 is not Src.
1284 if (*Src0 != Src) {
1285 *Src1 = Src;
1286 }
1287 } else if (Src != *Src0 && Src != *Src1) {
1288 // More than two sources, so we can't rematerialize the shufflevector
1289 // instruction.
1290 if (Verbose) {
1291 Ctx->getStrDump() << "Can't shuffle more than two sources.\n";
1292 }
1293 return false;
1294 }
1295
1296 (*Extracts)[I] = Def;
1297 }
1298
1299 // We should have seen at least one source operand.
1300 assert(*Src0 != nullptr);
1301
1302 // If a second source was not seen, then we just make Src1 = Src0 to simplify
1303 // things down stream. This should not matter, as all of the indexes in the
1304 // shufflevector instruction will point to Src0.
1305 if (*Src1 == nullptr) {
1306 *Src1 = *Src0;
1307 }
1308
1309 return true;
1310 }
1311
1312 } // end of namespace ShuffleVectorUtils
1313 } // end of anonymous namespace
1314
materializeVectorShuffles()1315 void Cfg::materializeVectorShuffles() {
1316 const bool Verbose = BuildDefs::dump() && isVerbose(IceV_ShufMat);
1317
1318 std::unique_ptr<OstreamLocker> L;
1319 if (Verbose) {
1320 L.reset(new OstreamLocker(getContext()));
1321 getContext()->getStrDump() << "\nShuffle materialization:\n";
1322 }
1323
1324 // MaxVectorElements is the maximum number of elements in the vector types
1325 // handled by Subzero. We use it to create the Inserts and Extracts vectors
1326 // with the appropriate size, thus avoiding resize() calls.
1327 const SizeT MaxVectorElements = typeNumElements(IceType_v16i8);
1328 CfgVector<const Inst *> Inserts(MaxVectorElements);
1329 CfgVector<const Inst *> Extracts(MaxVectorElements);
1330
1331 TimerMarker T(TimerStack::TT_materializeVectorShuffles, this);
1332 for (CfgNode *Node : Nodes) {
1333 for (auto &Instr : Node->getInsts()) {
1334 if (!llvm::isa<InstInsertElement>(Instr)) {
1335 continue;
1336 }
1337 if (!ShuffleVectorUtils::insertsLastElement(Instr)) {
1338 // To avoid wasting time, we only start the pattern match at the last
1339 // insertelement instruction -- and go backwards from there.
1340 continue;
1341 }
1342 if (Verbose) {
1343 getContext()->getStrDump() << "\tCandidate: ";
1344 Instr.dump(this);
1345 getContext()->getStrDump() << "\n";
1346 }
1347 Inserts.resize(typeNumElements(Instr.getDest()->getType()));
1348 Inserts[0] = &Instr;
1349 if (!ShuffleVectorUtils::findAllInserts(this, getContext(),
1350 VMetadata.get(), &Inserts)) {
1351 // If we fail to find a sequence of insertelements, we stop the
1352 // optimization.
1353 if (Verbose) {
1354 getContext()->getStrDump() << "\tFalse alarm.\n";
1355 }
1356 continue;
1357 }
1358 if (Verbose) {
1359 getContext()->getStrDump() << "\tFound the following insertelement: \n";
1360 for (auto *I : reverse_range(Inserts)) {
1361 getContext()->getStrDump() << "\t\t";
1362 I->dump(this);
1363 getContext()->getStrDump() << "\n";
1364 }
1365 }
1366 Extracts.resize(Inserts.size());
1367 Variable *Src0;
1368 Variable *Src1;
1369 if (!ShuffleVectorUtils::findAllExtracts(this, getContext(),
1370 VMetadata.get(), Inserts, &Src0,
1371 &Src1, &Extracts)) {
1372 // If we fail to match the definitions of the insertelements' sources
1373 // with extractelement instructions -- or if those instructions operate
1374 // on more than two different variables -- we stop the optimization.
1375 if (Verbose) {
1376 getContext()->getStrDump() << "\tFailed to match extractelements.\n";
1377 }
1378 continue;
1379 }
1380 if (Verbose) {
1381 getContext()->getStrDump()
1382 << "\tFound the following insert/extract element pairs: \n";
1383 for (SizeT I = 0; I < Inserts.size(); ++I) {
1384 const SizeT Pos = Inserts.size() - I - 1;
1385 getContext()->getStrDump() << "\t\tInsert : ";
1386 Inserts[Pos]->dump(this);
1387 getContext()->getStrDump() << "\n\t\tExtract: ";
1388 Extracts[Pos]->dump(this);
1389 getContext()->getStrDump() << "\n";
1390 }
1391 }
1392
1393 assert(Src0 != nullptr);
1394 assert(Src1 != nullptr);
1395
1396 auto *ShuffleVector =
1397 InstShuffleVector::create(this, Instr.getDest(), Src0, Src1);
1398 assert(ShuffleVector->getSrc(0) == Src0);
1399 assert(ShuffleVector->getSrc(1) == Src1);
1400 for (SizeT I = 0; I < Extracts.size(); ++I) {
1401 const SizeT Pos = Extracts.size() - I - 1;
1402 auto *Index = llvm::cast<ConstantInteger32>(Extracts[Pos]->getSrc(1));
1403 if (Src0 == Extracts[Pos]->getSrc(0)) {
1404 ShuffleVector->addIndex(Index);
1405 } else {
1406 ShuffleVector->addIndex(llvm::cast<ConstantInteger32>(
1407 Ctx->getConstantInt32(Index->getValue() + Extracts.size())));
1408 }
1409 }
1410
1411 if (Verbose) {
1412 getContext()->getStrDump() << "Created: ";
1413 ShuffleVector->dump(this);
1414 getContext()->getStrDump() << "\n";
1415 }
1416
1417 Instr.setDeleted();
1418 auto &LoweringContext = getTarget()->getContext();
1419 LoweringContext.setInsertPoint(instToIterator(&Instr));
1420 LoweringContext.insert(ShuffleVector);
1421 }
1422 }
1423 }
1424
genCode()1425 void Cfg::genCode() {
1426 TimerMarker T(TimerStack::TT_genCode, this);
1427 for (CfgNode *Node : Nodes)
1428 Node->genCode();
1429 }
1430
1431 // Compute the stack frame layout.
genFrame()1432 void Cfg::genFrame() {
1433 TimerMarker T(TimerStack::TT_genFrame, this);
1434 getTarget()->addProlog(Entry);
1435 for (CfgNode *Node : Nodes)
1436 if (Node->getHasReturn())
1437 getTarget()->addEpilog(Node);
1438 }
1439
generateLoopInfo()1440 void Cfg::generateLoopInfo() {
1441 TimerMarker T(TimerStack::TT_computeLoopNestDepth, this);
1442 LoopInfo = ComputeLoopInfo(this);
1443 }
1444
1445 // This is a lightweight version of live-range-end calculation. Marks the last
1446 // use of only those variables whose definition and uses are completely with a
1447 // single block. It is a quick single pass and doesn't need to iterate until
1448 // convergence.
livenessLightweight()1449 void Cfg::livenessLightweight() {
1450 TimerMarker T(TimerStack::TT_livenessLightweight, this);
1451 getVMetadata()->init(VMK_Uses);
1452 for (CfgNode *Node : Nodes)
1453 Node->livenessLightweight();
1454 }
1455
liveness(LivenessMode Mode)1456 void Cfg::liveness(LivenessMode Mode) {
1457 TimerMarker T(TimerStack::TT_liveness, this);
1458 // Destroying the previous (if any) Liveness information clears the Liveness
1459 // allocator TLS pointer.
1460 Live = nullptr;
1461 Live = Liveness::create(this, Mode);
1462
1463 getVMetadata()->init(VMK_Uses);
1464 Live->init();
1465
1466 // Initialize with all nodes needing to be processed.
1467 BitVector NeedToProcess(Nodes.size(), true);
1468 while (NeedToProcess.any()) {
1469 // Iterate in reverse topological order to speed up convergence.
1470 for (CfgNode *Node : reverse_range(Nodes)) {
1471 if (NeedToProcess[Node->getIndex()]) {
1472 NeedToProcess[Node->getIndex()] = false;
1473 bool Changed = Node->liveness(getLiveness());
1474 if (Changed) {
1475 // If the beginning-of-block liveness changed since the last
1476 // iteration, mark all in-edges as needing to be processed.
1477 for (CfgNode *Pred : Node->getInEdges())
1478 NeedToProcess[Pred->getIndex()] = true;
1479 }
1480 }
1481 }
1482 }
1483 if (Mode == Liveness_Intervals) {
1484 // Reset each variable's live range.
1485 for (Variable *Var : Variables)
1486 Var->resetLiveRange();
1487 }
1488 // Make a final pass over each node to delete dead instructions, collect the
1489 // first and last instruction numbers, and add live range segments for that
1490 // node.
1491 for (CfgNode *Node : Nodes) {
1492 InstNumberT FirstInstNum = Inst::NumberSentinel;
1493 InstNumberT LastInstNum = Inst::NumberSentinel;
1494 for (Inst &I : Node->getPhis()) {
1495 I.deleteIfDead();
1496 if (Mode == Liveness_Intervals && !I.isDeleted()) {
1497 if (FirstInstNum == Inst::NumberSentinel)
1498 FirstInstNum = I.getNumber();
1499 assert(I.getNumber() > LastInstNum);
1500 LastInstNum = I.getNumber();
1501 }
1502 }
1503 for (Inst &I : Node->getInsts()) {
1504 I.deleteIfDead();
1505 if (Mode == Liveness_Intervals && !I.isDeleted()) {
1506 if (FirstInstNum == Inst::NumberSentinel)
1507 FirstInstNum = I.getNumber();
1508 assert(I.getNumber() > LastInstNum);
1509 LastInstNum = I.getNumber();
1510 }
1511 }
1512 if (Mode == Liveness_Intervals) {
1513 // Special treatment for live in-args. Their liveness needs to extend
1514 // beyond the beginning of the function, otherwise an arg whose only use
1515 // is in the first instruction will end up having the trivial live range
1516 // [2,2) and will *not* interfere with other arguments. So if the first
1517 // instruction of the method is "r=arg1+arg2", both args may be assigned
1518 // the same register. This is accomplished by extending the entry block's
1519 // instruction range from [2,n) to [1,n) which will transform the
1520 // problematic [2,2) live ranges into [1,2). This extension works because
1521 // the entry node is guaranteed to have the lowest instruction numbers.
1522 if (Node == getEntryNode()) {
1523 FirstInstNum = Inst::NumberExtended;
1524 // Just in case the entry node somehow contains no instructions...
1525 if (LastInstNum == Inst::NumberSentinel)
1526 LastInstNum = FirstInstNum;
1527 }
1528 // If this node somehow contains no instructions, don't bother trying to
1529 // add liveness intervals for it, because variables that are live-in and
1530 // live-out will have a bogus interval added.
1531 if (FirstInstNum != Inst::NumberSentinel)
1532 Node->livenessAddIntervals(getLiveness(), FirstInstNum, LastInstNum);
1533 }
1534 }
1535 }
1536
1537 // Traverse every Variable of every Inst and verify that it appears within the
1538 // Variable's computed live range.
validateLiveness() const1539 bool Cfg::validateLiveness() const {
1540 TimerMarker T(TimerStack::TT_validateLiveness, this);
1541 bool Valid = true;
1542 OstreamLocker L(Ctx);
1543 Ostream &Str = Ctx->getStrDump();
1544 for (CfgNode *Node : Nodes) {
1545 Inst *FirstInst = nullptr;
1546 for (Inst &Instr : Node->getInsts()) {
1547 if (Instr.isDeleted())
1548 continue;
1549 if (FirstInst == nullptr)
1550 FirstInst = &Instr;
1551 InstNumberT InstNumber = Instr.getNumber();
1552 if (Variable *Dest = Instr.getDest()) {
1553 if (!Dest->getIgnoreLiveness()) {
1554 bool Invalid = false;
1555 constexpr bool IsDest = true;
1556 if (!Dest->getLiveRange().containsValue(InstNumber, IsDest))
1557 Invalid = true;
1558 // Check that this instruction actually *begins* Dest's live range,
1559 // by checking that Dest is not live in the previous instruction. As
1560 // a special exception, we don't check this for the first instruction
1561 // of the block, because a Phi temporary may be live at the end of
1562 // the previous block, and if it is also assigned in the first
1563 // instruction of this block, the adjacent live ranges get merged.
1564 if (&Instr != FirstInst && !Instr.isDestRedefined() &&
1565 Dest->getLiveRange().containsValue(InstNumber - 1, IsDest))
1566 Invalid = true;
1567 if (Invalid) {
1568 Valid = false;
1569 Str << "Liveness error: inst " << Instr.getNumber() << " dest ";
1570 Dest->dump(this);
1571 Str << " live range " << Dest->getLiveRange() << "\n";
1572 }
1573 }
1574 }
1575 FOREACH_VAR_IN_INST(Var, Instr) {
1576 static constexpr bool IsDest = false;
1577 if (!Var->getIgnoreLiveness() &&
1578 !Var->getLiveRange().containsValue(InstNumber, IsDest)) {
1579 Valid = false;
1580 Str << "Liveness error: inst " << Instr.getNumber() << " var ";
1581 Var->dump(this);
1582 Str << " live range " << Var->getLiveRange() << "\n";
1583 }
1584 }
1585 }
1586 }
1587 return Valid;
1588 }
1589
contractEmptyNodes()1590 void Cfg::contractEmptyNodes() {
1591 // If we're decorating the asm output with register liveness info, this
1592 // information may become corrupted or incorrect after contracting nodes that
1593 // contain only redundant assignments. As such, we disable this pass when
1594 // DecorateAsm is specified. This may make the resulting code look more
1595 // branchy, but it should have no effect on the register assignments.
1596 if (getFlags().getDecorateAsm())
1597 return;
1598 for (CfgNode *Node : Nodes) {
1599 Node->contractIfEmpty();
1600 }
1601 }
1602
doBranchOpt()1603 void Cfg::doBranchOpt() {
1604 TimerMarker T(TimerStack::TT_doBranchOpt, this);
1605 for (auto I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
1606 auto NextNode = I + 1;
1607 (*I)->doBranchOpt(NextNode == E ? nullptr : *NextNode);
1608 }
1609 }
1610
1611 // ======================== Dump routines ======================== //
1612
1613 // emitTextHeader() is not target-specific (apart from what is abstracted by
1614 // the Assembler), so it is defined here rather than in the target lowering
1615 // class.
emitTextHeader(GlobalString Name,GlobalContext * Ctx,const Assembler * Asm)1616 void Cfg::emitTextHeader(GlobalString Name, GlobalContext *Ctx,
1617 const Assembler *Asm) {
1618 if (!BuildDefs::dump())
1619 return;
1620 Ostream &Str = Ctx->getStrEmit();
1621 Str << "\t.text\n";
1622 if (getFlags().getFunctionSections())
1623 Str << "\t.section\t.text." << Name << ",\"ax\",%progbits\n";
1624 if (!Asm->getInternal() || getFlags().getDisableInternal()) {
1625 Str << "\t.globl\t" << Name << "\n";
1626 Str << "\t.type\t" << Name << ",%function\n";
1627 }
1628 Str << "\t" << Asm->getAlignDirective() << " "
1629 << Asm->getBundleAlignLog2Bytes() << ",0x";
1630 for (uint8_t I : Asm->getNonExecBundlePadding())
1631 Str.write_hex(I);
1632 Str << "\n";
1633 Str << Name << ":\n";
1634 }
1635
emitJumpTables()1636 void Cfg::emitJumpTables() {
1637 switch (getFlags().getOutFileType()) {
1638 case FT_Elf:
1639 case FT_Iasm: {
1640 // The emission needs to be delayed until the after the text section so
1641 // save the offsets in the global context.
1642 for (const InstJumpTable *JumpTable : JumpTables) {
1643 Ctx->addJumpTableData(JumpTable->toJumpTableData(getAssembler()));
1644 }
1645 } break;
1646 case FT_Asm: {
1647 // Emit the assembly directly so we don't need to hang on to all the names
1648 for (const InstJumpTable *JumpTable : JumpTables)
1649 getTarget()->emitJumpTable(this, JumpTable);
1650 } break;
1651 }
1652 }
1653
emit()1654 void Cfg::emit() {
1655 if (!BuildDefs::dump())
1656 return;
1657 TimerMarker T(TimerStack::TT_emitAsm, this);
1658 if (getFlags().getDecorateAsm()) {
1659 renumberInstructions();
1660 getVMetadata()->init(VMK_Uses);
1661 liveness(Liveness_Basic);
1662 dump("After recomputing liveness for -decorate-asm");
1663 }
1664 OstreamLocker L(Ctx);
1665 Ostream &Str = Ctx->getStrEmit();
1666 const Assembler *Asm = getAssembler<>();
1667
1668 emitTextHeader(FunctionName, Ctx, Asm);
1669 if (getFlags().getDecorateAsm()) {
1670 for (Variable *Var : getVariables()) {
1671 if (Var->hasKnownStackOffset() && !Var->isRematerializable()) {
1672 Str << "\t" << Var->getSymbolicStackOffset() << " = "
1673 << Var->getStackOffset() << "\n";
1674 }
1675 }
1676 }
1677 for (CfgNode *Node : Nodes) {
1678 Node->emit(this);
1679 }
1680 emitJumpTables();
1681 Str << "\n";
1682 }
1683
emitIAS()1684 void Cfg::emitIAS() {
1685 TimerMarker T(TimerStack::TT_emitAsm, this);
1686 // The emitIAS() routines emit into the internal assembler buffer, so there's
1687 // no need to lock the streams.
1688 for (CfgNode *Node : Nodes) {
1689 Node->emitIAS(this);
1690 }
1691 emitJumpTables();
1692 }
1693
getTotalMemoryMB() const1694 size_t Cfg::getTotalMemoryMB() const {
1695 constexpr size_t _1MB = 1024 * 1024;
1696 assert(Allocator != nullptr);
1697 assert(CfgAllocatorTraits::current() == Allocator.get());
1698 return Allocator->getTotalMemory() / _1MB;
1699 }
1700
getLivenessMemoryMB() const1701 size_t Cfg::getLivenessMemoryMB() const {
1702 constexpr size_t _1MB = 1024 * 1024;
1703 if (Live == nullptr) {
1704 return 0;
1705 }
1706 return Live->getAllocator()->getTotalMemory() / _1MB;
1707 }
1708
1709 // Dumps the IR with an optional introductory message.
dump(const char * Message)1710 void Cfg::dump(const char *Message) {
1711 if (!BuildDefs::dump())
1712 return;
1713 if (!isVerbose())
1714 return;
1715 OstreamLocker L(Ctx);
1716 Ostream &Str = Ctx->getStrDump();
1717 if (Message[0])
1718 Str << "================ " << Message << " ================\n";
1719 if (isVerbose(IceV_Mem)) {
1720 Str << "Memory size = " << getTotalMemoryMB() << " MB\n";
1721 }
1722 setCurrentNode(getEntryNode());
1723 // Print function name+args
1724 if (isVerbose(IceV_Instructions)) {
1725 Str << "define ";
1726 if (getInternal() && !getFlags().getDisableInternal())
1727 Str << "internal ";
1728 Str << ReturnType << " @" << getFunctionName() << "(";
1729 for (SizeT i = 0; i < Args.size(); ++i) {
1730 if (i > 0)
1731 Str << ", ";
1732 Str << Args[i]->getType() << " ";
1733 Args[i]->dump(this);
1734 }
1735 // Append an extra copy of the function name here, in order to print its
1736 // size stats but not mess up lit tests.
1737 Str << ") { # " << getFunctionNameAndSize() << "\n";
1738 }
1739 resetCurrentNode();
1740 if (isVerbose(IceV_Liveness)) {
1741 // Print summary info about variables
1742 for (Variable *Var : Variables) {
1743 Str << "// multiblock=";
1744 if (getVMetadata()->isTracked(Var))
1745 Str << getVMetadata()->isMultiBlock(Var);
1746 else
1747 Str << "?";
1748 Str << " defs=";
1749 bool FirstPrint = true;
1750 if (VMetadata->getKind() != VMK_Uses) {
1751 if (const Inst *FirstDef = VMetadata->getFirstDefinition(Var)) {
1752 Str << FirstDef->getNumber();
1753 FirstPrint = false;
1754 }
1755 }
1756 if (VMetadata->getKind() == VMK_All) {
1757 for (const Inst *Instr : VMetadata->getLatterDefinitions(Var)) {
1758 if (!FirstPrint)
1759 Str << ",";
1760 Str << Instr->getNumber();
1761 FirstPrint = false;
1762 }
1763 }
1764 Str << " weight=" << Var->getWeight(this) << " ";
1765 Var->dump(this);
1766 Str << " LIVE=" << Var->getLiveRange() << "\n";
1767 }
1768 }
1769 // Print each basic block
1770 for (CfgNode *Node : Nodes)
1771 Node->dump(this);
1772 if (isVerbose(IceV_Instructions))
1773 Str << "}\n";
1774 }
1775
1776 } // end of namespace Ice
1777