1 //===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass performs loop invariant code motion on machine instructions. We
11 // attempt to remove as much code from the body of a loop as possible.
12 //
13 // This pass does not attempt to throttle itself to limit register pressure.
14 // The register allocation phases are expected to perform rematerialization
15 // to recover when register pressure is high.
16 //
17 // This pass is not intended to be a replacement or a complete alternative
18 // for the LLVM-IR-level LICM pass. It is only designed to hoist simple
19 // constructs that are not exposed before lowering and instruction selection.
20 //
21 //===----------------------------------------------------------------------===//
22
23 #define DEBUG_TYPE "machine-licm"
24 #include "llvm/CodeGen/Passes.h"
25 #include "llvm/CodeGen/MachineDominators.h"
26 #include "llvm/CodeGen/MachineFrameInfo.h"
27 #include "llvm/CodeGen/MachineLoopInfo.h"
28 #include "llvm/CodeGen/MachineMemOperand.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/CodeGen/PseudoSourceValue.h"
31 #include "llvm/MC/MCInstrItineraries.h"
32 #include "llvm/Target/TargetLowering.h"
33 #include "llvm/Target/TargetRegisterInfo.h"
34 #include "llvm/Target/TargetInstrInfo.h"
35 #include "llvm/Target/TargetMachine.h"
36 #include "llvm/Analysis/AliasAnalysis.h"
37 #include "llvm/ADT/DenseMap.h"
38 #include "llvm/ADT/SmallSet.h"
39 #include "llvm/ADT/Statistic.h"
40 #include "llvm/Support/CommandLine.h"
41 #include "llvm/Support/Debug.h"
42 #include "llvm/Support/raw_ostream.h"
43 using namespace llvm;
44
45 static cl::opt<bool>
46 AvoidSpeculation("avoid-speculation",
47 cl::desc("MachineLICM should avoid speculation"),
48 cl::init(false), cl::Hidden);
49
50 STATISTIC(NumHoisted,
51 "Number of machine instructions hoisted out of loops");
52 STATISTIC(NumLowRP,
53 "Number of instructions hoisted in low reg pressure situation");
54 STATISTIC(NumHighLatency,
55 "Number of high latency instructions hoisted");
56 STATISTIC(NumCSEed,
57 "Number of hoisted machine instructions CSEed");
58 STATISTIC(NumPostRAHoisted,
59 "Number of machine instructions hoisted out of loops post regalloc");
60
61 namespace {
62 class MachineLICM : public MachineFunctionPass {
63 bool PreRegAlloc;
64
65 const TargetMachine *TM;
66 const TargetInstrInfo *TII;
67 const TargetLowering *TLI;
68 const TargetRegisterInfo *TRI;
69 const MachineFrameInfo *MFI;
70 MachineRegisterInfo *MRI;
71 const InstrItineraryData *InstrItins;
72
73 // Various analyses that we use...
74 AliasAnalysis *AA; // Alias analysis info.
75 MachineLoopInfo *MLI; // Current MachineLoopInfo
76 MachineDominatorTree *DT; // Machine dominator tree for the cur loop
77
78 // State that is updated as we process loops
79 bool Changed; // True if a loop is changed.
80 bool FirstInLoop; // True if it's the first LICM in the loop.
81 MachineLoop *CurLoop; // The current loop we are working on.
82 MachineBasicBlock *CurPreheader; // The preheader for CurLoop.
83
84 BitVector AllocatableSet;
85
86 // Track 'estimated' register pressure.
87 SmallSet<unsigned, 32> RegSeen;
88 SmallVector<unsigned, 8> RegPressure;
89
90 // Register pressure "limit" per register class. If the pressure
91 // is higher than the limit, then it's considered high.
92 SmallVector<unsigned, 8> RegLimit;
93
94 // Register pressure on path leading from loop preheader to current BB.
95 SmallVector<SmallVector<unsigned, 8>, 16> BackTrace;
96
97 // For each opcode, keep a list of potential CSE instructions.
98 DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap;
99
100 enum {
101 SpeculateFalse = 0,
102 SpeculateTrue = 1,
103 SpeculateUnknown = 2
104 };
105
106 // If a MBB does not dominate loop exiting blocks then it may not safe
107 // to hoist loads from this block.
108 // Tri-state: 0 - false, 1 - true, 2 - unknown
109 unsigned SpeculationState;
110
111 public:
112 static char ID; // Pass identification, replacement for typeid
MachineLICM()113 MachineLICM() :
114 MachineFunctionPass(ID), PreRegAlloc(true) {
115 initializeMachineLICMPass(*PassRegistry::getPassRegistry());
116 }
117
MachineLICM(bool PreRA)118 explicit MachineLICM(bool PreRA) :
119 MachineFunctionPass(ID), PreRegAlloc(PreRA) {
120 initializeMachineLICMPass(*PassRegistry::getPassRegistry());
121 }
122
123 virtual bool runOnMachineFunction(MachineFunction &MF);
124
getPassName() const125 const char *getPassName() const { return "Machine Instruction LICM"; }
126
getAnalysisUsage(AnalysisUsage & AU) const127 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
128 AU.addRequired<MachineLoopInfo>();
129 AU.addRequired<MachineDominatorTree>();
130 AU.addRequired<AliasAnalysis>();
131 AU.addPreserved<MachineLoopInfo>();
132 AU.addPreserved<MachineDominatorTree>();
133 MachineFunctionPass::getAnalysisUsage(AU);
134 }
135
releaseMemory()136 virtual void releaseMemory() {
137 RegSeen.clear();
138 RegPressure.clear();
139 RegLimit.clear();
140 BackTrace.clear();
141 for (DenseMap<unsigned,std::vector<const MachineInstr*> >::iterator
142 CI = CSEMap.begin(), CE = CSEMap.end(); CI != CE; ++CI)
143 CI->second.clear();
144 CSEMap.clear();
145 }
146
147 private:
148 /// CandidateInfo - Keep track of information about hoisting candidates.
149 struct CandidateInfo {
150 MachineInstr *MI;
151 unsigned Def;
152 int FI;
CandidateInfo__anon0ec594db0111::MachineLICM::CandidateInfo153 CandidateInfo(MachineInstr *mi, unsigned def, int fi)
154 : MI(mi), Def(def), FI(fi) {}
155 };
156
157 /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
158 /// invariants out to the preheader.
159 void HoistRegionPostRA();
160
161 /// HoistPostRA - When an instruction is found to only use loop invariant
162 /// operands that is safe to hoist, this instruction is called to do the
163 /// dirty work.
164 void HoistPostRA(MachineInstr *MI, unsigned Def);
165
166 /// ProcessMI - Examine the instruction for potentai LICM candidate. Also
167 /// gather register def and frame object update information.
168 void ProcessMI(MachineInstr *MI, unsigned *PhysRegDefs,
169 SmallSet<int, 32> &StoredFIs,
170 SmallVector<CandidateInfo, 32> &Candidates);
171
172 /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the
173 /// current loop.
174 void AddToLiveIns(unsigned Reg);
175
176 /// IsLICMCandidate - Returns true if the instruction may be a suitable
177 /// candidate for LICM. e.g. If the instruction is a call, then it's
178 /// obviously not safe to hoist it.
179 bool IsLICMCandidate(MachineInstr &I);
180
181 /// IsLoopInvariantInst - Returns true if the instruction is loop
182 /// invariant. I.e., all virtual register operands are defined outside of
183 /// the loop, physical registers aren't accessed (explicitly or implicitly),
184 /// and the instruction is hoistable.
185 ///
186 bool IsLoopInvariantInst(MachineInstr &I);
187
188 /// HasAnyPHIUse - Return true if the specified register is used by any
189 /// phi node.
190 bool HasAnyPHIUse(unsigned Reg) const;
191
192 /// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
193 /// and an use in the current loop, return true if the target considered
194 /// it 'high'.
195 bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
196 unsigned Reg) const;
197
198 bool IsCheapInstruction(MachineInstr &MI) const;
199
200 /// CanCauseHighRegPressure - Visit BBs from header to current BB,
201 /// check if hoisting an instruction of the given cost matrix can cause high
202 /// register pressure.
203 bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost);
204
205 /// UpdateBackTraceRegPressure - Traverse the back trace from header to
206 /// the current block and update their register pressures to reflect the
207 /// effect of hoisting MI from the current block to the preheader.
208 void UpdateBackTraceRegPressure(const MachineInstr *MI);
209
210 /// IsProfitableToHoist - Return true if it is potentially profitable to
211 /// hoist the given loop invariant.
212 bool IsProfitableToHoist(MachineInstr &MI);
213
214 /// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute.
215 /// If not then a load from this mbb may not be safe to hoist.
216 bool IsGuaranteedToExecute(MachineBasicBlock *BB);
217
218 /// HoistRegion - Walk the specified region of the CFG (defined by all
219 /// blocks dominated by the specified block, and that are in the current
220 /// loop) in depth first order w.r.t the DominatorTree. This allows us to
221 /// visit definitions before uses, allowing us to hoist a loop body in one
222 /// pass without iteration.
223 ///
224 void HoistRegion(MachineDomTreeNode *N, bool IsHeader = false);
225
226 /// getRegisterClassIDAndCost - For a given MI, register, and the operand
227 /// index, return the ID and cost of its representative register class by
228 /// reference.
229 void getRegisterClassIDAndCost(const MachineInstr *MI,
230 unsigned Reg, unsigned OpIdx,
231 unsigned &RCId, unsigned &RCCost) const;
232
233 /// InitRegPressure - Find all virtual register references that are liveout
234 /// of the preheader to initialize the starting "register pressure". Note
235 /// this does not count live through (livein but not used) registers.
236 void InitRegPressure(MachineBasicBlock *BB);
237
238 /// UpdateRegPressure - Update estimate of register pressure after the
239 /// specified instruction.
240 void UpdateRegPressure(const MachineInstr *MI);
241
242 /// ExtractHoistableLoad - Unfold a load from the given machineinstr if
243 /// the load itself could be hoisted. Return the unfolded and hoistable
244 /// load, or null if the load couldn't be unfolded or if it wouldn't
245 /// be hoistable.
246 MachineInstr *ExtractHoistableLoad(MachineInstr *MI);
247
248 /// LookForDuplicate - Find an instruction amount PrevMIs that is a
249 /// duplicate of MI. Return this instruction if it's found.
250 const MachineInstr *LookForDuplicate(const MachineInstr *MI,
251 std::vector<const MachineInstr*> &PrevMIs);
252
253 /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on
254 /// the preheader that compute the same value. If it's found, do a RAU on
255 /// with the definition of the existing instruction rather than hoisting
256 /// the instruction to the preheader.
257 bool EliminateCSE(MachineInstr *MI,
258 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI);
259
260 /// MayCSE - Return true if the given instruction will be CSE'd if it's
261 /// hoisted out of the loop.
262 bool MayCSE(MachineInstr *MI);
263
264 /// Hoist - When an instruction is found to only use loop invariant operands
265 /// that is safe to hoist, this instruction is called to do the dirty work.
266 /// It returns true if the instruction is hoisted.
267 bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader);
268
269 /// InitCSEMap - Initialize the CSE map with instructions that are in the
270 /// current loop preheader that may become duplicates of instructions that
271 /// are hoisted out of the loop.
272 void InitCSEMap(MachineBasicBlock *BB);
273
274 /// getCurPreheader - Get the preheader for the current loop, splitting
275 /// a critical edge if needed.
276 MachineBasicBlock *getCurPreheader();
277 };
278 } // end anonymous namespace
279
280 char MachineLICM::ID = 0;
281 INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm",
282 "Machine Loop Invariant Code Motion", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)283 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
284 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
285 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
286 INITIALIZE_PASS_END(MachineLICM, "machinelicm",
287 "Machine Loop Invariant Code Motion", false, false)
288
289 FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) {
290 return new MachineLICM(PreRegAlloc);
291 }
292
293 /// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most
294 /// loop that has a unique predecessor.
LoopIsOuterMostWithPredecessor(MachineLoop * CurLoop)295 static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
296 // Check whether this loop even has a unique predecessor.
297 if (!CurLoop->getLoopPredecessor())
298 return false;
299 // Ok, now check to see if any of its outer loops do.
300 for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop())
301 if (L->getLoopPredecessor())
302 return false;
303 // None of them did, so this is the outermost with a unique predecessor.
304 return true;
305 }
306
runOnMachineFunction(MachineFunction & MF)307 bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
308 if (PreRegAlloc)
309 DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
310 else
311 DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
312 DEBUG(dbgs() << MF.getFunction()->getName() << " ********\n");
313
314 Changed = FirstInLoop = false;
315 TM = &MF.getTarget();
316 TII = TM->getInstrInfo();
317 TLI = TM->getTargetLowering();
318 TRI = TM->getRegisterInfo();
319 MFI = MF.getFrameInfo();
320 MRI = &MF.getRegInfo();
321 InstrItins = TM->getInstrItineraryData();
322 AllocatableSet = TRI->getAllocatableSet(MF);
323
324 if (PreRegAlloc) {
325 // Estimate register pressure during pre-regalloc pass.
326 unsigned NumRC = TRI->getNumRegClasses();
327 RegPressure.resize(NumRC);
328 std::fill(RegPressure.begin(), RegPressure.end(), 0);
329 RegLimit.resize(NumRC);
330 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
331 E = TRI->regclass_end(); I != E; ++I)
332 RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, MF);
333 }
334
335 // Get our Loop information...
336 MLI = &getAnalysis<MachineLoopInfo>();
337 DT = &getAnalysis<MachineDominatorTree>();
338 AA = &getAnalysis<AliasAnalysis>();
339
340 SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
341 while (!Worklist.empty()) {
342 CurLoop = Worklist.pop_back_val();
343 CurPreheader = 0;
344
345 // If this is done before regalloc, only visit outer-most preheader-sporting
346 // loops.
347 if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) {
348 Worklist.append(CurLoop->begin(), CurLoop->end());
349 continue;
350 }
351
352 if (!PreRegAlloc)
353 HoistRegionPostRA();
354 else {
355 // CSEMap is initialized for loop header when the first instruction is
356 // being hoisted.
357 MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
358 FirstInLoop = true;
359 HoistRegion(N, true);
360 CSEMap.clear();
361 }
362 }
363
364 return Changed;
365 }
366
367 /// InstructionStoresToFI - Return true if instruction stores to the
368 /// specified frame.
InstructionStoresToFI(const MachineInstr * MI,int FI)369 static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
370 for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
371 oe = MI->memoperands_end(); o != oe; ++o) {
372 if (!(*o)->isStore() || !(*o)->getValue())
373 continue;
374 if (const FixedStackPseudoSourceValue *Value =
375 dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
376 if (Value->getFrameIndex() == FI)
377 return true;
378 }
379 }
380 return false;
381 }
382
383 /// ProcessMI - Examine the instruction for potentai LICM candidate. Also
384 /// gather register def and frame object update information.
ProcessMI(MachineInstr * MI,unsigned * PhysRegDefs,SmallSet<int,32> & StoredFIs,SmallVector<CandidateInfo,32> & Candidates)385 void MachineLICM::ProcessMI(MachineInstr *MI,
386 unsigned *PhysRegDefs,
387 SmallSet<int, 32> &StoredFIs,
388 SmallVector<CandidateInfo, 32> &Candidates) {
389 bool RuledOut = false;
390 bool HasNonInvariantUse = false;
391 unsigned Def = 0;
392 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
393 const MachineOperand &MO = MI->getOperand(i);
394 if (MO.isFI()) {
395 // Remember if the instruction stores to the frame index.
396 int FI = MO.getIndex();
397 if (!StoredFIs.count(FI) &&
398 MFI->isSpillSlotObjectIndex(FI) &&
399 InstructionStoresToFI(MI, FI))
400 StoredFIs.insert(FI);
401 HasNonInvariantUse = true;
402 continue;
403 }
404
405 if (!MO.isReg())
406 continue;
407 unsigned Reg = MO.getReg();
408 if (!Reg)
409 continue;
410 assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
411 "Not expecting virtual register!");
412
413 if (!MO.isDef()) {
414 if (Reg && PhysRegDefs[Reg])
415 // If it's using a non-loop-invariant register, then it's obviously not
416 // safe to hoist.
417 HasNonInvariantUse = true;
418 continue;
419 }
420
421 if (MO.isImplicit()) {
422 ++PhysRegDefs[Reg];
423 for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
424 ++PhysRegDefs[*AS];
425 if (!MO.isDead())
426 // Non-dead implicit def? This cannot be hoisted.
427 RuledOut = true;
428 // No need to check if a dead implicit def is also defined by
429 // another instruction.
430 continue;
431 }
432
433 // FIXME: For now, avoid instructions with multiple defs, unless
434 // it's a dead implicit def.
435 if (Def)
436 RuledOut = true;
437 else
438 Def = Reg;
439
440 // If we have already seen another instruction that defines the same
441 // register, then this is not safe.
442 if (++PhysRegDefs[Reg] > 1)
443 // MI defined register is seen defined by another instruction in
444 // the loop, it cannot be a LICM candidate.
445 RuledOut = true;
446 for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
447 if (++PhysRegDefs[*AS] > 1)
448 RuledOut = true;
449 }
450
451 // Only consider reloads for now and remats which do not have register
452 // operands. FIXME: Consider unfold load folding instructions.
453 if (Def && !RuledOut) {
454 int FI = INT_MIN;
455 if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) ||
456 (TII->isLoadFromStackSlot(MI, FI) && MFI->isSpillSlotObjectIndex(FI)))
457 Candidates.push_back(CandidateInfo(MI, Def, FI));
458 }
459 }
460
461 /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
462 /// invariants out to the preheader.
HoistRegionPostRA()463 void MachineLICM::HoistRegionPostRA() {
464 unsigned NumRegs = TRI->getNumRegs();
465 unsigned *PhysRegDefs = new unsigned[NumRegs];
466 std::fill(PhysRegDefs, PhysRegDefs + NumRegs, 0);
467
468 SmallVector<CandidateInfo, 32> Candidates;
469 SmallSet<int, 32> StoredFIs;
470
471 // Walk the entire region, count number of defs for each register, and
472 // collect potential LICM candidates.
473 const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks();
474 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
475 MachineBasicBlock *BB = Blocks[i];
476
477 // If the header of the loop containing this basic block is a landing pad,
478 // then don't try to hoist instructions out of this loop.
479 const MachineLoop *ML = MLI->getLoopFor(BB);
480 if (ML && ML->getHeader()->isLandingPad()) continue;
481
482 // Conservatively treat live-in's as an external def.
483 // FIXME: That means a reload that're reused in successor block(s) will not
484 // be LICM'ed.
485 for (MachineBasicBlock::livein_iterator I = BB->livein_begin(),
486 E = BB->livein_end(); I != E; ++I) {
487 unsigned Reg = *I;
488 ++PhysRegDefs[Reg];
489 for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
490 ++PhysRegDefs[*AS];
491 }
492
493 SpeculationState = SpeculateUnknown;
494 for (MachineBasicBlock::iterator
495 MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
496 MachineInstr *MI = &*MII;
497 ProcessMI(MI, PhysRegDefs, StoredFIs, Candidates);
498 }
499 }
500
501 // Now evaluate whether the potential candidates qualify.
502 // 1. Check if the candidate defined register is defined by another
503 // instruction in the loop.
504 // 2. If the candidate is a load from stack slot (always true for now),
505 // check if the slot is stored anywhere in the loop.
506 for (unsigned i = 0, e = Candidates.size(); i != e; ++i) {
507 if (Candidates[i].FI != INT_MIN &&
508 StoredFIs.count(Candidates[i].FI))
509 continue;
510
511 if (PhysRegDefs[Candidates[i].Def] == 1) {
512 bool Safe = true;
513 MachineInstr *MI = Candidates[i].MI;
514 for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
515 const MachineOperand &MO = MI->getOperand(j);
516 if (!MO.isReg() || MO.isDef() || !MO.getReg())
517 continue;
518 if (PhysRegDefs[MO.getReg()]) {
519 // If it's using a non-loop-invariant register, then it's obviously
520 // not safe to hoist.
521 Safe = false;
522 break;
523 }
524 }
525 if (Safe)
526 HoistPostRA(MI, Candidates[i].Def);
527 }
528 }
529
530 delete[] PhysRegDefs;
531 }
532
533 /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current
534 /// loop, and make sure it is not killed by any instructions in the loop.
AddToLiveIns(unsigned Reg)535 void MachineLICM::AddToLiveIns(unsigned Reg) {
536 const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks();
537 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
538 MachineBasicBlock *BB = Blocks[i];
539 if (!BB->isLiveIn(Reg))
540 BB->addLiveIn(Reg);
541 for (MachineBasicBlock::iterator
542 MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
543 MachineInstr *MI = &*MII;
544 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
545 MachineOperand &MO = MI->getOperand(i);
546 if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue;
547 if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg()))
548 MO.setIsKill(false);
549 }
550 }
551 }
552 }
553
554 /// HoistPostRA - When an instruction is found to only use loop invariant
555 /// operands that is safe to hoist, this instruction is called to do the
556 /// dirty work.
HoistPostRA(MachineInstr * MI,unsigned Def)557 void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
558 MachineBasicBlock *Preheader = getCurPreheader();
559 if (!Preheader) return;
560
561 // Now move the instructions to the predecessor, inserting it before any
562 // terminator instructions.
563 DEBUG({
564 dbgs() << "Hoisting " << *MI;
565 if (Preheader->getBasicBlock())
566 dbgs() << " to MachineBasicBlock "
567 << Preheader->getName();
568 if (MI->getParent()->getBasicBlock())
569 dbgs() << " from MachineBasicBlock "
570 << MI->getParent()->getName();
571 dbgs() << "\n";
572 });
573
574 // Splice the instruction to the preheader.
575 MachineBasicBlock *MBB = MI->getParent();
576 Preheader->splice(Preheader->getFirstTerminator(), MBB, MI);
577
578 // Add register to livein list to all the BBs in the current loop since a
579 // loop invariant must be kept live throughout the whole loop. This is
580 // important to ensure later passes do not scavenge the def register.
581 AddToLiveIns(Def);
582
583 ++NumPostRAHoisted;
584 Changed = true;
585 }
586
587 // IsGuaranteedToExecute - Check if this mbb is guaranteed to execute.
588 // If not then a load from this mbb may not be safe to hoist.
IsGuaranteedToExecute(MachineBasicBlock * BB)589 bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) {
590 if (SpeculationState != SpeculateUnknown)
591 return SpeculationState == SpeculateFalse;
592
593 if (BB != CurLoop->getHeader()) {
594 // Check loop exiting blocks.
595 SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks;
596 CurLoop->getExitingBlocks(CurrentLoopExitingBlocks);
597 for (unsigned i = 0, e = CurrentLoopExitingBlocks.size(); i != e; ++i)
598 if (!DT->dominates(BB, CurrentLoopExitingBlocks[i])) {
599 SpeculationState = SpeculateTrue;
600 return false;
601 }
602 }
603
604 SpeculationState = SpeculateFalse;
605 return true;
606 }
607
608 /// HoistRegion - Walk the specified region of the CFG (defined by all blocks
609 /// dominated by the specified block, and that are in the current loop) in depth
610 /// first order w.r.t the DominatorTree. This allows us to visit definitions
611 /// before uses, allowing us to hoist a loop body in one pass without iteration.
612 ///
HoistRegion(MachineDomTreeNode * N,bool IsHeader)613 void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) {
614 assert(N != 0 && "Null dominator tree node?");
615 MachineBasicBlock *BB = N->getBlock();
616
617 // If the header of the loop containing this basic block is a landing pad,
618 // then don't try to hoist instructions out of this loop.
619 const MachineLoop *ML = MLI->getLoopFor(BB);
620 if (ML && ML->getHeader()->isLandingPad()) return;
621
622 // If this subregion is not in the top level loop at all, exit.
623 if (!CurLoop->contains(BB)) return;
624
625 MachineBasicBlock *Preheader = getCurPreheader();
626 if (!Preheader)
627 return;
628
629 if (IsHeader) {
630 // Compute registers which are livein into the loop headers.
631 RegSeen.clear();
632 BackTrace.clear();
633 InitRegPressure(Preheader);
634 }
635
636 // Remember livein register pressure.
637 BackTrace.push_back(RegPressure);
638
639 SpeculationState = SpeculateUnknown;
640 for (MachineBasicBlock::iterator
641 MII = BB->begin(), E = BB->end(); MII != E; ) {
642 MachineBasicBlock::iterator NextMII = MII; ++NextMII;
643 MachineInstr *MI = &*MII;
644 if (!Hoist(MI, Preheader))
645 UpdateRegPressure(MI);
646 MII = NextMII;
647 }
648
649 // Don't hoist things out of a large switch statement. This often causes
650 // code to be hoisted that wasn't going to be executed, and increases
651 // register pressure in a situation where it's likely to matter.
652 if (BB->succ_size() < 25) {
653 const std::vector<MachineDomTreeNode*> &Children = N->getChildren();
654 for (unsigned I = 0, E = Children.size(); I != E; ++I)
655 HoistRegion(Children[I]);
656 }
657
658 BackTrace.pop_back();
659 }
660
isOperandKill(const MachineOperand & MO,MachineRegisterInfo * MRI)661 static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
662 return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
663 }
664
665 /// getRegisterClassIDAndCost - For a given MI, register, and the operand
666 /// index, return the ID and cost of its representative register class.
667 void
getRegisterClassIDAndCost(const MachineInstr * MI,unsigned Reg,unsigned OpIdx,unsigned & RCId,unsigned & RCCost) const668 MachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI,
669 unsigned Reg, unsigned OpIdx,
670 unsigned &RCId, unsigned &RCCost) const {
671 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
672 EVT VT = *RC->vt_begin();
673 if (VT == MVT::untyped) {
674 RCId = RC->getID();
675 RCCost = 1;
676 } else {
677 RCId = TLI->getRepRegClassFor(VT)->getID();
678 RCCost = TLI->getRepRegClassCostFor(VT);
679 }
680 }
681
682 /// InitRegPressure - Find all virtual register references that are liveout of
683 /// the preheader to initialize the starting "register pressure". Note this
684 /// does not count live through (livein but not used) registers.
InitRegPressure(MachineBasicBlock * BB)685 void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
686 std::fill(RegPressure.begin(), RegPressure.end(), 0);
687
688 // If the preheader has only a single predecessor and it ends with a
689 // fallthrough or an unconditional branch, then scan its predecessor for live
690 // defs as well. This happens whenever the preheader is created by splitting
691 // the critical edge from the loop predecessor to the loop header.
692 if (BB->pred_size() == 1) {
693 MachineBasicBlock *TBB = 0, *FBB = 0;
694 SmallVector<MachineOperand, 4> Cond;
695 if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty())
696 InitRegPressure(*BB->pred_begin());
697 }
698
699 for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end();
700 MII != E; ++MII) {
701 MachineInstr *MI = &*MII;
702 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
703 const MachineOperand &MO = MI->getOperand(i);
704 if (!MO.isReg() || MO.isImplicit())
705 continue;
706 unsigned Reg = MO.getReg();
707 if (!TargetRegisterInfo::isVirtualRegister(Reg))
708 continue;
709
710 bool isNew = RegSeen.insert(Reg);
711 unsigned RCId, RCCost;
712 getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost);
713 if (MO.isDef())
714 RegPressure[RCId] += RCCost;
715 else {
716 bool isKill = isOperandKill(MO, MRI);
717 if (isNew && !isKill)
718 // Haven't seen this, it must be a livein.
719 RegPressure[RCId] += RCCost;
720 else if (!isNew && isKill)
721 RegPressure[RCId] -= RCCost;
722 }
723 }
724 }
725 }
726
727 /// UpdateRegPressure - Update estimate of register pressure after the
728 /// specified instruction.
UpdateRegPressure(const MachineInstr * MI)729 void MachineLICM::UpdateRegPressure(const MachineInstr *MI) {
730 if (MI->isImplicitDef())
731 return;
732
733 SmallVector<unsigned, 4> Defs;
734 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
735 const MachineOperand &MO = MI->getOperand(i);
736 if (!MO.isReg() || MO.isImplicit())
737 continue;
738 unsigned Reg = MO.getReg();
739 if (!TargetRegisterInfo::isVirtualRegister(Reg))
740 continue;
741
742 bool isNew = RegSeen.insert(Reg);
743 if (MO.isDef())
744 Defs.push_back(Reg);
745 else if (!isNew && isOperandKill(MO, MRI)) {
746 unsigned RCId, RCCost;
747 getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost);
748 if (RCCost > RegPressure[RCId])
749 RegPressure[RCId] = 0;
750 else
751 RegPressure[RCId] -= RCCost;
752 }
753 }
754
755 unsigned Idx = 0;
756 while (!Defs.empty()) {
757 unsigned Reg = Defs.pop_back_val();
758 unsigned RCId, RCCost;
759 getRegisterClassIDAndCost(MI, Reg, Idx, RCId, RCCost);
760 RegPressure[RCId] += RCCost;
761 ++Idx;
762 }
763 }
764
765 /// IsLICMCandidate - Returns true if the instruction may be a suitable
766 /// candidate for LICM. e.g. If the instruction is a call, then it's obviously
767 /// not safe to hoist it.
IsLICMCandidate(MachineInstr & I)768 bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
769 // Check if it's safe to move the instruction.
770 bool DontMoveAcrossStore = true;
771 if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore))
772 return false;
773
774 // If it is load then check if it is guaranteed to execute by making sure that
775 // it dominates all exiting blocks. If it doesn't, then there is a path out of
776 // the loop which does not execute this load, so we can't hoist it.
777 // Stores and side effects are already checked by isSafeToMove.
778 if (I.getDesc().mayLoad() && !IsGuaranteedToExecute(I.getParent()))
779 return false;
780
781 return true;
782 }
783
784 /// IsLoopInvariantInst - Returns true if the instruction is loop
785 /// invariant. I.e., all virtual register operands are defined outside of the
786 /// loop, physical registers aren't accessed explicitly, and there are no side
787 /// effects that aren't captured by the operands or other flags.
788 ///
IsLoopInvariantInst(MachineInstr & I)789 bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
790 if (!IsLICMCandidate(I))
791 return false;
792
793 // The instruction is loop invariant if all of its operands are.
794 for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
795 const MachineOperand &MO = I.getOperand(i);
796
797 if (!MO.isReg())
798 continue;
799
800 unsigned Reg = MO.getReg();
801 if (Reg == 0) continue;
802
803 // Don't hoist an instruction that uses or defines a physical register.
804 if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
805 if (MO.isUse()) {
806 // If the physreg has no defs anywhere, it's just an ambient register
807 // and we can freely move its uses. Alternatively, if it's allocatable,
808 // it could get allocated to something with a def during allocation.
809 if (!MRI->def_empty(Reg))
810 return false;
811 if (AllocatableSet.test(Reg))
812 return false;
813 // Check for a def among the register's aliases too.
814 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
815 unsigned AliasReg = *Alias;
816 if (!MRI->def_empty(AliasReg))
817 return false;
818 if (AllocatableSet.test(AliasReg))
819 return false;
820 }
821 // Otherwise it's safe to move.
822 continue;
823 } else if (!MO.isDead()) {
824 // A def that isn't dead. We can't move it.
825 return false;
826 } else if (CurLoop->getHeader()->isLiveIn(Reg)) {
827 // If the reg is live into the loop, we can't hoist an instruction
828 // which would clobber it.
829 return false;
830 }
831 }
832
833 if (!MO.isUse())
834 continue;
835
836 assert(MRI->getVRegDef(Reg) &&
837 "Machine instr not mapped for this vreg?!");
838
839 // If the loop contains the definition of an operand, then the instruction
840 // isn't loop invariant.
841 if (CurLoop->contains(MRI->getVRegDef(Reg)))
842 return false;
843 }
844
845 // If we got this far, the instruction is loop invariant!
846 return true;
847 }
848
849
850 /// HasAnyPHIUse - Return true if the specified register is used by any
851 /// phi node.
HasAnyPHIUse(unsigned Reg) const852 bool MachineLICM::HasAnyPHIUse(unsigned Reg) const {
853 for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
854 UE = MRI->use_end(); UI != UE; ++UI) {
855 MachineInstr *UseMI = &*UI;
856 if (UseMI->isPHI())
857 return true;
858 // Look pass copies as well.
859 if (UseMI->isCopy()) {
860 unsigned Def = UseMI->getOperand(0).getReg();
861 if (TargetRegisterInfo::isVirtualRegister(Def) &&
862 HasAnyPHIUse(Def))
863 return true;
864 }
865 }
866 return false;
867 }
868
869 /// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
870 /// and an use in the current loop, return true if the target considered
871 /// it 'high'.
HasHighOperandLatency(MachineInstr & MI,unsigned DefIdx,unsigned Reg) const872 bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
873 unsigned DefIdx, unsigned Reg) const {
874 if (!InstrItins || InstrItins->isEmpty() || MRI->use_nodbg_empty(Reg))
875 return false;
876
877 for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg),
878 E = MRI->use_nodbg_end(); I != E; ++I) {
879 MachineInstr *UseMI = &*I;
880 if (UseMI->isCopyLike())
881 continue;
882 if (!CurLoop->contains(UseMI->getParent()))
883 continue;
884 for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
885 const MachineOperand &MO = UseMI->getOperand(i);
886 if (!MO.isReg() || !MO.isUse())
887 continue;
888 unsigned MOReg = MO.getReg();
889 if (MOReg != Reg)
890 continue;
891
892 if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, UseMI, i))
893 return true;
894 }
895
896 // Only look at the first in loop use.
897 break;
898 }
899
900 return false;
901 }
902
903 /// IsCheapInstruction - Return true if the instruction is marked "cheap" or
904 /// the operand latency between its def and a use is one or less.
IsCheapInstruction(MachineInstr & MI) const905 bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
906 if (MI.getDesc().isAsCheapAsAMove() || MI.isCopyLike())
907 return true;
908 if (!InstrItins || InstrItins->isEmpty())
909 return false;
910
911 bool isCheap = false;
912 unsigned NumDefs = MI.getDesc().getNumDefs();
913 for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
914 MachineOperand &DefMO = MI.getOperand(i);
915 if (!DefMO.isReg() || !DefMO.isDef())
916 continue;
917 --NumDefs;
918 unsigned Reg = DefMO.getReg();
919 if (TargetRegisterInfo::isPhysicalRegister(Reg))
920 continue;
921
922 if (!TII->hasLowDefLatency(InstrItins, &MI, i))
923 return false;
924 isCheap = true;
925 }
926
927 return isCheap;
928 }
929
930 /// CanCauseHighRegPressure - Visit BBs from header to current BB, check
931 /// if hoisting an instruction of the given cost matrix can cause high
932 /// register pressure.
CanCauseHighRegPressure(DenseMap<unsigned,int> & Cost)933 bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost) {
934 for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
935 CI != CE; ++CI) {
936 if (CI->second <= 0)
937 continue;
938
939 unsigned RCId = CI->first;
940 for (unsigned i = BackTrace.size(); i != 0; --i) {
941 SmallVector<unsigned, 8> &RP = BackTrace[i-1];
942 if (RP[RCId] + CI->second >= RegLimit[RCId])
943 return true;
944 }
945 }
946
947 return false;
948 }
949
950 /// UpdateBackTraceRegPressure - Traverse the back trace from header to the
951 /// current block and update their register pressures to reflect the effect
952 /// of hoisting MI from the current block to the preheader.
UpdateBackTraceRegPressure(const MachineInstr * MI)953 void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) {
954 if (MI->isImplicitDef())
955 return;
956
957 // First compute the 'cost' of the instruction, i.e. its contribution
958 // to register pressure.
959 DenseMap<unsigned, int> Cost;
960 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
961 const MachineOperand &MO = MI->getOperand(i);
962 if (!MO.isReg() || MO.isImplicit())
963 continue;
964 unsigned Reg = MO.getReg();
965 if (!TargetRegisterInfo::isVirtualRegister(Reg))
966 continue;
967
968 unsigned RCId, RCCost;
969 getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost);
970 if (MO.isDef()) {
971 DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
972 if (CI != Cost.end())
973 CI->second += RCCost;
974 else
975 Cost.insert(std::make_pair(RCId, RCCost));
976 } else if (isOperandKill(MO, MRI)) {
977 DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
978 if (CI != Cost.end())
979 CI->second -= RCCost;
980 else
981 Cost.insert(std::make_pair(RCId, -RCCost));
982 }
983 }
984
985 // Update register pressure of blocks from loop header to current block.
986 for (unsigned i = 0, e = BackTrace.size(); i != e; ++i) {
987 SmallVector<unsigned, 8> &RP = BackTrace[i];
988 for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
989 CI != CE; ++CI) {
990 unsigned RCId = CI->first;
991 RP[RCId] += CI->second;
992 }
993 }
994 }
995
996 /// IsProfitableToHoist - Return true if it is potentially profitable to hoist
997 /// the given loop invariant.
IsProfitableToHoist(MachineInstr & MI)998 bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
999 if (MI.isImplicitDef())
1000 return true;
1001
1002 // If the instruction is cheap, only hoist if it is re-materilizable. LICM
1003 // will increase register pressure. It's probably not worth it if the
1004 // instruction is cheap.
1005 // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting
1006 // these tend to help performance in low register pressure situation. The
1007 // trade off is it may cause spill in high pressure situation. It will end up
1008 // adding a store in the loop preheader. But the reload is no more expensive.
1009 // The side benefit is these loads are frequently CSE'ed.
1010 if (IsCheapInstruction(MI)) {
1011 if (!TII->isTriviallyReMaterializable(&MI, AA))
1012 return false;
1013 } else {
1014 // Estimate register pressure to determine whether to LICM the instruction.
1015 // In low register pressure situation, we can be more aggressive about
1016 // hoisting. Also, favors hoisting long latency instructions even in
1017 // moderately high pressure situation.
1018 // FIXME: If there are long latency loop-invariant instructions inside the
1019 // loop at this point, why didn't the optimizer's LICM hoist them?
1020 DenseMap<unsigned, int> Cost;
1021 for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
1022 const MachineOperand &MO = MI.getOperand(i);
1023 if (!MO.isReg() || MO.isImplicit())
1024 continue;
1025 unsigned Reg = MO.getReg();
1026 if (!TargetRegisterInfo::isVirtualRegister(Reg))
1027 continue;
1028
1029 unsigned RCId, RCCost;
1030 getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost);
1031 if (MO.isDef()) {
1032 if (HasHighOperandLatency(MI, i, Reg)) {
1033 ++NumHighLatency;
1034 return true;
1035 }
1036
1037 DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
1038 if (CI != Cost.end())
1039 CI->second += RCCost;
1040 else
1041 Cost.insert(std::make_pair(RCId, RCCost));
1042 } else if (isOperandKill(MO, MRI)) {
1043 // Is a virtual register use is a kill, hoisting it out of the loop
1044 // may actually reduce register pressure or be register pressure
1045 // neutral.
1046 DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
1047 if (CI != Cost.end())
1048 CI->second -= RCCost;
1049 else
1050 Cost.insert(std::make_pair(RCId, -RCCost));
1051 }
1052 }
1053
1054 // Visit BBs from header to current BB, if hoisting this doesn't cause
1055 // high register pressure, then it's safe to proceed.
1056 if (!CanCauseHighRegPressure(Cost)) {
1057 ++NumLowRP;
1058 return true;
1059 }
1060
1061 // Do not "speculate" in high register pressure situation. If an
1062 // instruction is not guaranteed to be executed in the loop, it's best to be
1063 // conservative.
1064 if (AvoidSpeculation &&
1065 (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI)))
1066 return false;
1067
1068 // High register pressure situation, only hoist if the instruction is going to
1069 // be remat'ed.
1070 if (!TII->isTriviallyReMaterializable(&MI, AA) &&
1071 !MI.isInvariantLoad(AA))
1072 return false;
1073 }
1074
1075 // If result(s) of this instruction is used by PHIs outside of the loop, then
1076 // don't hoist it if the instruction because it will introduce an extra copy.
1077 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1078 const MachineOperand &MO = MI.getOperand(i);
1079 if (!MO.isReg() || !MO.isDef())
1080 continue;
1081 if (HasAnyPHIUse(MO.getReg()))
1082 return false;
1083 }
1084
1085 return true;
1086 }
1087
ExtractHoistableLoad(MachineInstr * MI)1088 MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
1089 // Don't unfold simple loads.
1090 if (MI->getDesc().canFoldAsLoad())
1091 return 0;
1092
1093 // If not, we may be able to unfold a load and hoist that.
1094 // First test whether the instruction is loading from an amenable
1095 // memory location.
1096 if (!MI->isInvariantLoad(AA))
1097 return 0;
1098
1099 // Next determine the register class for a temporary register.
1100 unsigned LoadRegIndex;
1101 unsigned NewOpc =
1102 TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(),
1103 /*UnfoldLoad=*/true,
1104 /*UnfoldStore=*/false,
1105 &LoadRegIndex);
1106 if (NewOpc == 0) return 0;
1107 const MCInstrDesc &MID = TII->get(NewOpc);
1108 if (MID.getNumDefs() != 1) return 0;
1109 const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI);
1110 // Ok, we're unfolding. Create a temporary register and do the unfold.
1111 unsigned Reg = MRI->createVirtualRegister(RC);
1112
1113 MachineFunction &MF = *MI->getParent()->getParent();
1114 SmallVector<MachineInstr *, 2> NewMIs;
1115 bool Success =
1116 TII->unfoldMemoryOperand(MF, MI, Reg,
1117 /*UnfoldLoad=*/true, /*UnfoldStore=*/false,
1118 NewMIs);
1119 (void)Success;
1120 assert(Success &&
1121 "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
1122 "succeeded!");
1123 assert(NewMIs.size() == 2 &&
1124 "Unfolded a load into multiple instructions!");
1125 MachineBasicBlock *MBB = MI->getParent();
1126 MBB->insert(MI, NewMIs[0]);
1127 MBB->insert(MI, NewMIs[1]);
1128 // If unfolding produced a load that wasn't loop-invariant or profitable to
1129 // hoist, discard the new instructions and bail.
1130 if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) {
1131 NewMIs[0]->eraseFromParent();
1132 NewMIs[1]->eraseFromParent();
1133 return 0;
1134 }
1135
1136 // Update register pressure for the unfolded instruction.
1137 UpdateRegPressure(NewMIs[1]);
1138
1139 // Otherwise we successfully unfolded a load that we can hoist.
1140 MI->eraseFromParent();
1141 return NewMIs[0];
1142 }
1143
InitCSEMap(MachineBasicBlock * BB)1144 void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
1145 for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) {
1146 const MachineInstr *MI = &*I;
1147 unsigned Opcode = MI->getOpcode();
1148 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
1149 CI = CSEMap.find(Opcode);
1150 if (CI != CSEMap.end())
1151 CI->second.push_back(MI);
1152 else {
1153 std::vector<const MachineInstr*> CSEMIs;
1154 CSEMIs.push_back(MI);
1155 CSEMap.insert(std::make_pair(Opcode, CSEMIs));
1156 }
1157 }
1158 }
1159
1160 const MachineInstr*
LookForDuplicate(const MachineInstr * MI,std::vector<const MachineInstr * > & PrevMIs)1161 MachineLICM::LookForDuplicate(const MachineInstr *MI,
1162 std::vector<const MachineInstr*> &PrevMIs) {
1163 for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) {
1164 const MachineInstr *PrevMI = PrevMIs[i];
1165 if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : 0)))
1166 return PrevMI;
1167 }
1168 return 0;
1169 }
1170
EliminateCSE(MachineInstr * MI,DenseMap<unsigned,std::vector<const MachineInstr * >>::iterator & CI)1171 bool MachineLICM::EliminateCSE(MachineInstr *MI,
1172 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) {
1173 // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
1174 // the undef property onto uses.
1175 if (CI == CSEMap.end() || MI->isImplicitDef())
1176 return false;
1177
1178 if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
1179 DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);
1180
1181 // Replace virtual registers defined by MI by their counterparts defined
1182 // by Dup.
1183 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1184 const MachineOperand &MO = MI->getOperand(i);
1185
1186 // Physical registers may not differ here.
1187 assert((!MO.isReg() || MO.getReg() == 0 ||
1188 !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
1189 MO.getReg() == Dup->getOperand(i).getReg()) &&
1190 "Instructions with different phys regs are not identical!");
1191
1192 if (MO.isReg() && MO.isDef() &&
1193 !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
1194 MRI->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
1195 MRI->clearKillFlags(Dup->getOperand(i).getReg());
1196 }
1197 }
1198 MI->eraseFromParent();
1199 ++NumCSEed;
1200 return true;
1201 }
1202 return false;
1203 }
1204
1205 /// MayCSE - Return true if the given instruction will be CSE'd if it's
1206 /// hoisted out of the loop.
MayCSE(MachineInstr * MI)1207 bool MachineLICM::MayCSE(MachineInstr *MI) {
1208 unsigned Opcode = MI->getOpcode();
1209 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
1210 CI = CSEMap.find(Opcode);
1211 // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
1212 // the undef property onto uses.
1213 if (CI == CSEMap.end() || MI->isImplicitDef())
1214 return false;
1215
1216 return LookForDuplicate(MI, CI->second) != 0;
1217 }
1218
1219 /// Hoist - When an instruction is found to use only loop invariant operands
1220 /// that are safe to hoist, this instruction is called to do the dirty work.
1221 ///
Hoist(MachineInstr * MI,MachineBasicBlock * Preheader)1222 bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
1223 // First check whether we should hoist this instruction.
1224 if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
1225 // If not, try unfolding a hoistable load.
1226 MI = ExtractHoistableLoad(MI);
1227 if (!MI) return false;
1228 }
1229
1230 // Now move the instructions to the predecessor, inserting it before any
1231 // terminator instructions.
1232 DEBUG({
1233 dbgs() << "Hoisting " << *MI;
1234 if (Preheader->getBasicBlock())
1235 dbgs() << " to MachineBasicBlock "
1236 << Preheader->getName();
1237 if (MI->getParent()->getBasicBlock())
1238 dbgs() << " from MachineBasicBlock "
1239 << MI->getParent()->getName();
1240 dbgs() << "\n";
1241 });
1242
1243 // If this is the first instruction being hoisted to the preheader,
1244 // initialize the CSE map with potential common expressions.
1245 if (FirstInLoop) {
1246 InitCSEMap(Preheader);
1247 FirstInLoop = false;
1248 }
1249
1250 // Look for opportunity to CSE the hoisted instruction.
1251 unsigned Opcode = MI->getOpcode();
1252 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
1253 CI = CSEMap.find(Opcode);
1254 if (!EliminateCSE(MI, CI)) {
1255 // Otherwise, splice the instruction to the preheader.
1256 Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
1257
1258 // Update register pressure for BBs from header to this block.
1259 UpdateBackTraceRegPressure(MI);
1260
1261 // Clear the kill flags of any register this instruction defines,
1262 // since they may need to be live throughout the entire loop
1263 // rather than just live for part of it.
1264 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1265 MachineOperand &MO = MI->getOperand(i);
1266 if (MO.isReg() && MO.isDef() && !MO.isDead())
1267 MRI->clearKillFlags(MO.getReg());
1268 }
1269
1270 // Add to the CSE map.
1271 if (CI != CSEMap.end())
1272 CI->second.push_back(MI);
1273 else {
1274 std::vector<const MachineInstr*> CSEMIs;
1275 CSEMIs.push_back(MI);
1276 CSEMap.insert(std::make_pair(Opcode, CSEMIs));
1277 }
1278 }
1279
1280 ++NumHoisted;
1281 Changed = true;
1282
1283 return true;
1284 }
1285
getCurPreheader()1286 MachineBasicBlock *MachineLICM::getCurPreheader() {
1287 // Determine the block to which to hoist instructions. If we can't find a
1288 // suitable loop predecessor, we can't do any hoisting.
1289
1290 // If we've tried to get a preheader and failed, don't try again.
1291 if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1))
1292 return 0;
1293
1294 if (!CurPreheader) {
1295 CurPreheader = CurLoop->getLoopPreheader();
1296 if (!CurPreheader) {
1297 MachineBasicBlock *Pred = CurLoop->getLoopPredecessor();
1298 if (!Pred) {
1299 CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
1300 return 0;
1301 }
1302
1303 CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this);
1304 if (!CurPreheader) {
1305 CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
1306 return 0;
1307 }
1308 }
1309 }
1310 return CurPreheader;
1311 }
1312