1 //===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass performs loop invariant code motion on machine instructions. We
11 // attempt to remove as much code from the body of a loop as possible.
12 //
13 // This pass does not attempt to throttle itself to limit register pressure.
14 // The register allocation phases are expected to perform rematerialization
15 // to recover when register pressure is high.
16 //
17 // This pass is not intended to be a replacement or a complete alternative
18 // for the LLVM-IR-level LICM pass. It is only designed to hoist simple
19 // constructs that are not exposed before lowering and instruction selection.
20 //
21 //===----------------------------------------------------------------------===//
22
23 #define DEBUG_TYPE "machine-licm"
24 #include "llvm/CodeGen/Passes.h"
25 #include "llvm/CodeGen/MachineDominators.h"
26 #include "llvm/CodeGen/MachineFrameInfo.h"
27 #include "llvm/CodeGen/MachineLoopInfo.h"
28 #include "llvm/CodeGen/MachineMemOperand.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/CodeGen/PseudoSourceValue.h"
31 #include "llvm/MC/MCInstrItineraries.h"
32 #include "llvm/Target/TargetLowering.h"
33 #include "llvm/Target/TargetRegisterInfo.h"
34 #include "llvm/Target/TargetInstrInfo.h"
35 #include "llvm/Target/TargetMachine.h"
36 #include "llvm/Analysis/AliasAnalysis.h"
37 #include "llvm/ADT/DenseMap.h"
38 #include "llvm/ADT/SmallSet.h"
39 #include "llvm/ADT/Statistic.h"
40 #include "llvm/Support/Debug.h"
41 #include "llvm/Support/raw_ostream.h"
42 using namespace llvm;
43
44 STATISTIC(NumHoisted,
45 "Number of machine instructions hoisted out of loops");
46 STATISTIC(NumLowRP,
47 "Number of instructions hoisted in low reg pressure situation");
48 STATISTIC(NumHighLatency,
49 "Number of high latency instructions hoisted");
50 STATISTIC(NumCSEed,
51 "Number of hoisted machine instructions CSEed");
52 STATISTIC(NumPostRAHoisted,
53 "Number of machine instructions hoisted out of loops post regalloc");
54
55 namespace {
56 class MachineLICM : public MachineFunctionPass {
57 bool PreRegAlloc;
58
59 const TargetMachine *TM;
60 const TargetInstrInfo *TII;
61 const TargetLowering *TLI;
62 const TargetRegisterInfo *TRI;
63 const MachineFrameInfo *MFI;
64 MachineRegisterInfo *MRI;
65 const InstrItineraryData *InstrItins;
66
67 // Various analyses that we use...
68 AliasAnalysis *AA; // Alias analysis info.
69 MachineLoopInfo *MLI; // Current MachineLoopInfo
70 MachineDominatorTree *DT; // Machine dominator tree for the cur loop
71
72 // State that is updated as we process loops
73 bool Changed; // True if a loop is changed.
74 bool FirstInLoop; // True if it's the first LICM in the loop.
75 MachineLoop *CurLoop; // The current loop we are working on.
76 MachineBasicBlock *CurPreheader; // The preheader for CurLoop.
77
78 BitVector AllocatableSet;
79
80 // Track 'estimated' register pressure.
81 SmallSet<unsigned, 32> RegSeen;
82 SmallVector<unsigned, 8> RegPressure;
83
84 // Register pressure "limit" per register class. If the pressure
85 // is higher than the limit, then it's considered high.
86 SmallVector<unsigned, 8> RegLimit;
87
88 // Register pressure on path leading from loop preheader to current BB.
89 SmallVector<SmallVector<unsigned, 8>, 16> BackTrace;
90
91 // For each opcode, keep a list of potential CSE instructions.
92 DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap;
93
94 public:
95 static char ID; // Pass identification, replacement for typeid
MachineLICM()96 MachineLICM() :
97 MachineFunctionPass(ID), PreRegAlloc(true) {
98 initializeMachineLICMPass(*PassRegistry::getPassRegistry());
99 }
100
MachineLICM(bool PreRA)101 explicit MachineLICM(bool PreRA) :
102 MachineFunctionPass(ID), PreRegAlloc(PreRA) {
103 initializeMachineLICMPass(*PassRegistry::getPassRegistry());
104 }
105
106 virtual bool runOnMachineFunction(MachineFunction &MF);
107
getPassName() const108 const char *getPassName() const { return "Machine Instruction LICM"; }
109
getAnalysisUsage(AnalysisUsage & AU) const110 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
111 AU.addRequired<MachineLoopInfo>();
112 AU.addRequired<MachineDominatorTree>();
113 AU.addRequired<AliasAnalysis>();
114 AU.addPreserved<MachineLoopInfo>();
115 AU.addPreserved<MachineDominatorTree>();
116 MachineFunctionPass::getAnalysisUsage(AU);
117 }
118
releaseMemory()119 virtual void releaseMemory() {
120 RegSeen.clear();
121 RegPressure.clear();
122 RegLimit.clear();
123 BackTrace.clear();
124 for (DenseMap<unsigned,std::vector<const MachineInstr*> >::iterator
125 CI = CSEMap.begin(), CE = CSEMap.end(); CI != CE; ++CI)
126 CI->second.clear();
127 CSEMap.clear();
128 }
129
130 private:
131 /// CandidateInfo - Keep track of information about hoisting candidates.
132 struct CandidateInfo {
133 MachineInstr *MI;
134 unsigned Def;
135 int FI;
CandidateInfo__anond0cc49730111::MachineLICM::CandidateInfo136 CandidateInfo(MachineInstr *mi, unsigned def, int fi)
137 : MI(mi), Def(def), FI(fi) {}
138 };
139
140 /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
141 /// invariants out to the preheader.
142 void HoistRegionPostRA();
143
144 /// HoistPostRA - When an instruction is found to only use loop invariant
145 /// operands that is safe to hoist, this instruction is called to do the
146 /// dirty work.
147 void HoistPostRA(MachineInstr *MI, unsigned Def);
148
149 /// ProcessMI - Examine the instruction for potentai LICM candidate. Also
150 /// gather register def and frame object update information.
151 void ProcessMI(MachineInstr *MI, unsigned *PhysRegDefs,
152 SmallSet<int, 32> &StoredFIs,
153 SmallVector<CandidateInfo, 32> &Candidates);
154
155 /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the
156 /// current loop.
157 void AddToLiveIns(unsigned Reg);
158
159 /// IsLICMCandidate - Returns true if the instruction may be a suitable
160 /// candidate for LICM. e.g. If the instruction is a call, then it's
161 /// obviously not safe to hoist it.
162 bool IsLICMCandidate(MachineInstr &I);
163
164 /// IsLoopInvariantInst - Returns true if the instruction is loop
165 /// invariant. I.e., all virtual register operands are defined outside of
166 /// the loop, physical registers aren't accessed (explicitly or implicitly),
167 /// and the instruction is hoistable.
168 ///
169 bool IsLoopInvariantInst(MachineInstr &I);
170
171 /// HasAnyPHIUse - Return true if the specified register is used by any
172 /// phi node.
173 bool HasAnyPHIUse(unsigned Reg) const;
174
175 /// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
176 /// and an use in the current loop, return true if the target considered
177 /// it 'high'.
178 bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
179 unsigned Reg) const;
180
181 bool IsCheapInstruction(MachineInstr &MI) const;
182
183 /// CanCauseHighRegPressure - Visit BBs from header to current BB,
184 /// check if hoisting an instruction of the given cost matrix can cause high
185 /// register pressure.
186 bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost);
187
188 /// UpdateBackTraceRegPressure - Traverse the back trace from header to
189 /// the current block and update their register pressures to reflect the
190 /// effect of hoisting MI from the current block to the preheader.
191 void UpdateBackTraceRegPressure(const MachineInstr *MI);
192
193 /// IsProfitableToHoist - Return true if it is potentially profitable to
194 /// hoist the given loop invariant.
195 bool IsProfitableToHoist(MachineInstr &MI);
196
197 /// HoistRegion - Walk the specified region of the CFG (defined by all
198 /// blocks dominated by the specified block, and that are in the current
199 /// loop) in depth first order w.r.t the DominatorTree. This allows us to
200 /// visit definitions before uses, allowing us to hoist a loop body in one
201 /// pass without iteration.
202 ///
203 void HoistRegion(MachineDomTreeNode *N, bool IsHeader = false);
204
205 /// InitRegPressure - Find all virtual register references that are liveout
206 /// of the preheader to initialize the starting "register pressure". Note
207 /// this does not count live through (livein but not used) registers.
208 void InitRegPressure(MachineBasicBlock *BB);
209
210 /// UpdateRegPressure - Update estimate of register pressure after the
211 /// specified instruction.
212 void UpdateRegPressure(const MachineInstr *MI);
213
214 /// ExtractHoistableLoad - Unfold a load from the given machineinstr if
215 /// the load itself could be hoisted. Return the unfolded and hoistable
216 /// load, or null if the load couldn't be unfolded or if it wouldn't
217 /// be hoistable.
218 MachineInstr *ExtractHoistableLoad(MachineInstr *MI);
219
220 /// LookForDuplicate - Find an instruction amount PrevMIs that is a
221 /// duplicate of MI. Return this instruction if it's found.
222 const MachineInstr *LookForDuplicate(const MachineInstr *MI,
223 std::vector<const MachineInstr*> &PrevMIs);
224
225 /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on
226 /// the preheader that compute the same value. If it's found, do a RAU on
227 /// with the definition of the existing instruction rather than hoisting
228 /// the instruction to the preheader.
229 bool EliminateCSE(MachineInstr *MI,
230 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI);
231
232 /// Hoist - When an instruction is found to only use loop invariant operands
233 /// that is safe to hoist, this instruction is called to do the dirty work.
234 /// It returns true if the instruction is hoisted.
235 bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader);
236
237 /// InitCSEMap - Initialize the CSE map with instructions that are in the
238 /// current loop preheader that may become duplicates of instructions that
239 /// are hoisted out of the loop.
240 void InitCSEMap(MachineBasicBlock *BB);
241
242 /// getCurPreheader - Get the preheader for the current loop, splitting
243 /// a critical edge if needed.
244 MachineBasicBlock *getCurPreheader();
245 };
246 } // end anonymous namespace
247
248 char MachineLICM::ID = 0;
249 INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm",
250 "Machine Loop Invariant Code Motion", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)251 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
252 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
253 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
254 INITIALIZE_PASS_END(MachineLICM, "machinelicm",
255 "Machine Loop Invariant Code Motion", false, false)
256
257 FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) {
258 return new MachineLICM(PreRegAlloc);
259 }
260
261 /// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most
262 /// loop that has a unique predecessor.
LoopIsOuterMostWithPredecessor(MachineLoop * CurLoop)263 static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
264 // Check whether this loop even has a unique predecessor.
265 if (!CurLoop->getLoopPredecessor())
266 return false;
267 // Ok, now check to see if any of its outer loops do.
268 for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop())
269 if (L->getLoopPredecessor())
270 return false;
271 // None of them did, so this is the outermost with a unique predecessor.
272 return true;
273 }
274
runOnMachineFunction(MachineFunction & MF)275 bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
276 if (PreRegAlloc)
277 DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
278 else
279 DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
280 DEBUG(dbgs() << MF.getFunction()->getName() << " ********\n");
281
282 Changed = FirstInLoop = false;
283 TM = &MF.getTarget();
284 TII = TM->getInstrInfo();
285 TLI = TM->getTargetLowering();
286 TRI = TM->getRegisterInfo();
287 MFI = MF.getFrameInfo();
288 MRI = &MF.getRegInfo();
289 InstrItins = TM->getInstrItineraryData();
290 AllocatableSet = TRI->getAllocatableSet(MF);
291
292 if (PreRegAlloc) {
293 // Estimate register pressure during pre-regalloc pass.
294 unsigned NumRC = TRI->getNumRegClasses();
295 RegPressure.resize(NumRC);
296 std::fill(RegPressure.begin(), RegPressure.end(), 0);
297 RegLimit.resize(NumRC);
298 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
299 E = TRI->regclass_end(); I != E; ++I)
300 RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, MF);
301 }
302
303 // Get our Loop information...
304 MLI = &getAnalysis<MachineLoopInfo>();
305 DT = &getAnalysis<MachineDominatorTree>();
306 AA = &getAnalysis<AliasAnalysis>();
307
308 SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
309 while (!Worklist.empty()) {
310 CurLoop = Worklist.pop_back_val();
311 CurPreheader = 0;
312
313 // If this is done before regalloc, only visit outer-most preheader-sporting
314 // loops.
315 if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) {
316 Worklist.append(CurLoop->begin(), CurLoop->end());
317 continue;
318 }
319
320 if (!PreRegAlloc)
321 HoistRegionPostRA();
322 else {
323 // CSEMap is initialized for loop header when the first instruction is
324 // being hoisted.
325 MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
326 FirstInLoop = true;
327 HoistRegion(N, true);
328 CSEMap.clear();
329 }
330 }
331
332 return Changed;
333 }
334
335 /// InstructionStoresToFI - Return true if instruction stores to the
336 /// specified frame.
InstructionStoresToFI(const MachineInstr * MI,int FI)337 static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
338 for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
339 oe = MI->memoperands_end(); o != oe; ++o) {
340 if (!(*o)->isStore() || !(*o)->getValue())
341 continue;
342 if (const FixedStackPseudoSourceValue *Value =
343 dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
344 if (Value->getFrameIndex() == FI)
345 return true;
346 }
347 }
348 return false;
349 }
350
351 /// ProcessMI - Examine the instruction for potentai LICM candidate. Also
352 /// gather register def and frame object update information.
ProcessMI(MachineInstr * MI,unsigned * PhysRegDefs,SmallSet<int,32> & StoredFIs,SmallVector<CandidateInfo,32> & Candidates)353 void MachineLICM::ProcessMI(MachineInstr *MI,
354 unsigned *PhysRegDefs,
355 SmallSet<int, 32> &StoredFIs,
356 SmallVector<CandidateInfo, 32> &Candidates) {
357 bool RuledOut = false;
358 bool HasNonInvariantUse = false;
359 unsigned Def = 0;
360 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
361 const MachineOperand &MO = MI->getOperand(i);
362 if (MO.isFI()) {
363 // Remember if the instruction stores to the frame index.
364 int FI = MO.getIndex();
365 if (!StoredFIs.count(FI) &&
366 MFI->isSpillSlotObjectIndex(FI) &&
367 InstructionStoresToFI(MI, FI))
368 StoredFIs.insert(FI);
369 HasNonInvariantUse = true;
370 continue;
371 }
372
373 if (!MO.isReg())
374 continue;
375 unsigned Reg = MO.getReg();
376 if (!Reg)
377 continue;
378 assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
379 "Not expecting virtual register!");
380
381 if (!MO.isDef()) {
382 if (Reg && PhysRegDefs[Reg])
383 // If it's using a non-loop-invariant register, then it's obviously not
384 // safe to hoist.
385 HasNonInvariantUse = true;
386 continue;
387 }
388
389 if (MO.isImplicit()) {
390 ++PhysRegDefs[Reg];
391 for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
392 ++PhysRegDefs[*AS];
393 if (!MO.isDead())
394 // Non-dead implicit def? This cannot be hoisted.
395 RuledOut = true;
396 // No need to check if a dead implicit def is also defined by
397 // another instruction.
398 continue;
399 }
400
401 // FIXME: For now, avoid instructions with multiple defs, unless
402 // it's a dead implicit def.
403 if (Def)
404 RuledOut = true;
405 else
406 Def = Reg;
407
408 // If we have already seen another instruction that defines the same
409 // register, then this is not safe.
410 if (++PhysRegDefs[Reg] > 1)
411 // MI defined register is seen defined by another instruction in
412 // the loop, it cannot be a LICM candidate.
413 RuledOut = true;
414 for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
415 if (++PhysRegDefs[*AS] > 1)
416 RuledOut = true;
417 }
418
419 // Only consider reloads for now and remats which do not have register
420 // operands. FIXME: Consider unfold load folding instructions.
421 if (Def && !RuledOut) {
422 int FI = INT_MIN;
423 if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) ||
424 (TII->isLoadFromStackSlot(MI, FI) && MFI->isSpillSlotObjectIndex(FI)))
425 Candidates.push_back(CandidateInfo(MI, Def, FI));
426 }
427 }
428
429 /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
430 /// invariants out to the preheader.
HoistRegionPostRA()431 void MachineLICM::HoistRegionPostRA() {
432 unsigned NumRegs = TRI->getNumRegs();
433 unsigned *PhysRegDefs = new unsigned[NumRegs];
434 std::fill(PhysRegDefs, PhysRegDefs + NumRegs, 0);
435
436 SmallVector<CandidateInfo, 32> Candidates;
437 SmallSet<int, 32> StoredFIs;
438
439 // Walk the entire region, count number of defs for each register, and
440 // collect potential LICM candidates.
441 const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks();
442 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
443 MachineBasicBlock *BB = Blocks[i];
444 // Conservatively treat live-in's as an external def.
445 // FIXME: That means a reload that're reused in successor block(s) will not
446 // be LICM'ed.
447 for (MachineBasicBlock::livein_iterator I = BB->livein_begin(),
448 E = BB->livein_end(); I != E; ++I) {
449 unsigned Reg = *I;
450 ++PhysRegDefs[Reg];
451 for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
452 ++PhysRegDefs[*AS];
453 }
454
455 for (MachineBasicBlock::iterator
456 MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
457 MachineInstr *MI = &*MII;
458 ProcessMI(MI, PhysRegDefs, StoredFIs, Candidates);
459 }
460 }
461
462 // Now evaluate whether the potential candidates qualify.
463 // 1. Check if the candidate defined register is defined by another
464 // instruction in the loop.
465 // 2. If the candidate is a load from stack slot (always true for now),
466 // check if the slot is stored anywhere in the loop.
467 for (unsigned i = 0, e = Candidates.size(); i != e; ++i) {
468 if (Candidates[i].FI != INT_MIN &&
469 StoredFIs.count(Candidates[i].FI))
470 continue;
471
472 if (PhysRegDefs[Candidates[i].Def] == 1) {
473 bool Safe = true;
474 MachineInstr *MI = Candidates[i].MI;
475 for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
476 const MachineOperand &MO = MI->getOperand(j);
477 if (!MO.isReg() || MO.isDef() || !MO.getReg())
478 continue;
479 if (PhysRegDefs[MO.getReg()]) {
480 // If it's using a non-loop-invariant register, then it's obviously
481 // not safe to hoist.
482 Safe = false;
483 break;
484 }
485 }
486 if (Safe)
487 HoistPostRA(MI, Candidates[i].Def);
488 }
489 }
490
491 delete[] PhysRegDefs;
492 }
493
494 /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current
495 /// loop, and make sure it is not killed by any instructions in the loop.
AddToLiveIns(unsigned Reg)496 void MachineLICM::AddToLiveIns(unsigned Reg) {
497 const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks();
498 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
499 MachineBasicBlock *BB = Blocks[i];
500 if (!BB->isLiveIn(Reg))
501 BB->addLiveIn(Reg);
502 for (MachineBasicBlock::iterator
503 MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
504 MachineInstr *MI = &*MII;
505 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
506 MachineOperand &MO = MI->getOperand(i);
507 if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue;
508 if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg()))
509 MO.setIsKill(false);
510 }
511 }
512 }
513 }
514
515 /// HoistPostRA - When an instruction is found to only use loop invariant
516 /// operands that is safe to hoist, this instruction is called to do the
517 /// dirty work.
HoistPostRA(MachineInstr * MI,unsigned Def)518 void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
519 MachineBasicBlock *Preheader = getCurPreheader();
520 if (!Preheader) return;
521
522 // Now move the instructions to the predecessor, inserting it before any
523 // terminator instructions.
524 DEBUG({
525 dbgs() << "Hoisting " << *MI;
526 if (Preheader->getBasicBlock())
527 dbgs() << " to MachineBasicBlock "
528 << Preheader->getName();
529 if (MI->getParent()->getBasicBlock())
530 dbgs() << " from MachineBasicBlock "
531 << MI->getParent()->getName();
532 dbgs() << "\n";
533 });
534
535 // Splice the instruction to the preheader.
536 MachineBasicBlock *MBB = MI->getParent();
537 Preheader->splice(Preheader->getFirstTerminator(), MBB, MI);
538
539 // Add register to livein list to all the BBs in the current loop since a
540 // loop invariant must be kept live throughout the whole loop. This is
541 // important to ensure later passes do not scavenge the def register.
542 AddToLiveIns(Def);
543
544 ++NumPostRAHoisted;
545 Changed = true;
546 }
547
548 /// HoistRegion - Walk the specified region of the CFG (defined by all blocks
549 /// dominated by the specified block, and that are in the current loop) in depth
550 /// first order w.r.t the DominatorTree. This allows us to visit definitions
551 /// before uses, allowing us to hoist a loop body in one pass without iteration.
552 ///
HoistRegion(MachineDomTreeNode * N,bool IsHeader)553 void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) {
554 assert(N != 0 && "Null dominator tree node?");
555 MachineBasicBlock *BB = N->getBlock();
556
557 // If this subregion is not in the top level loop at all, exit.
558 if (!CurLoop->contains(BB)) return;
559
560 MachineBasicBlock *Preheader = getCurPreheader();
561 if (!Preheader)
562 return;
563
564 if (IsHeader) {
565 // Compute registers which are livein into the loop headers.
566 RegSeen.clear();
567 BackTrace.clear();
568 InitRegPressure(Preheader);
569 }
570
571 // Remember livein register pressure.
572 BackTrace.push_back(RegPressure);
573
574 for (MachineBasicBlock::iterator
575 MII = BB->begin(), E = BB->end(); MII != E; ) {
576 MachineBasicBlock::iterator NextMII = MII; ++NextMII;
577 MachineInstr *MI = &*MII;
578 if (!Hoist(MI, Preheader))
579 UpdateRegPressure(MI);
580 MII = NextMII;
581 }
582
583 // Don't hoist things out of a large switch statement. This often causes
584 // code to be hoisted that wasn't going to be executed, and increases
585 // register pressure in a situation where it's likely to matter.
586 if (BB->succ_size() < 25) {
587 const std::vector<MachineDomTreeNode*> &Children = N->getChildren();
588 for (unsigned I = 0, E = Children.size(); I != E; ++I)
589 HoistRegion(Children[I]);
590 }
591
592 BackTrace.pop_back();
593 }
594
isOperandKill(const MachineOperand & MO,MachineRegisterInfo * MRI)595 static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
596 return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
597 }
598
599 /// InitRegPressure - Find all virtual register references that are liveout of
600 /// the preheader to initialize the starting "register pressure". Note this
601 /// does not count live through (livein but not used) registers.
InitRegPressure(MachineBasicBlock * BB)602 void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
603 std::fill(RegPressure.begin(), RegPressure.end(), 0);
604
605 // If the preheader has only a single predecessor and it ends with a
606 // fallthrough or an unconditional branch, then scan its predecessor for live
607 // defs as well. This happens whenever the preheader is created by splitting
608 // the critical edge from the loop predecessor to the loop header.
609 if (BB->pred_size() == 1) {
610 MachineBasicBlock *TBB = 0, *FBB = 0;
611 SmallVector<MachineOperand, 4> Cond;
612 if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty())
613 InitRegPressure(*BB->pred_begin());
614 }
615
616 for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end();
617 MII != E; ++MII) {
618 MachineInstr *MI = &*MII;
619 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
620 const MachineOperand &MO = MI->getOperand(i);
621 if (!MO.isReg() || MO.isImplicit())
622 continue;
623 unsigned Reg = MO.getReg();
624 if (!TargetRegisterInfo::isVirtualRegister(Reg))
625 continue;
626
627 bool isNew = RegSeen.insert(Reg);
628 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
629 EVT VT = *RC->vt_begin();
630 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
631 if (MO.isDef())
632 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
633 else {
634 bool isKill = isOperandKill(MO, MRI);
635 if (isNew && !isKill)
636 // Haven't seen this, it must be a livein.
637 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
638 else if (!isNew && isKill)
639 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
640 }
641 }
642 }
643 }
644
645 /// UpdateRegPressure - Update estimate of register pressure after the
646 /// specified instruction.
UpdateRegPressure(const MachineInstr * MI)647 void MachineLICM::UpdateRegPressure(const MachineInstr *MI) {
648 if (MI->isImplicitDef())
649 return;
650
651 SmallVector<unsigned, 4> Defs;
652 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
653 const MachineOperand &MO = MI->getOperand(i);
654 if (!MO.isReg() || MO.isImplicit())
655 continue;
656 unsigned Reg = MO.getReg();
657 if (!TargetRegisterInfo::isVirtualRegister(Reg))
658 continue;
659
660 bool isNew = RegSeen.insert(Reg);
661 if (MO.isDef())
662 Defs.push_back(Reg);
663 else if (!isNew && isOperandKill(MO, MRI)) {
664 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
665 EVT VT = *RC->vt_begin();
666 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
667 unsigned RCCost = TLI->getRepRegClassCostFor(VT);
668
669 if (RCCost > RegPressure[RCId])
670 RegPressure[RCId] = 0;
671 else
672 RegPressure[RCId] -= RCCost;
673 }
674 }
675
676 while (!Defs.empty()) {
677 unsigned Reg = Defs.pop_back_val();
678 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
679 EVT VT = *RC->vt_begin();
680 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
681 unsigned RCCost = TLI->getRepRegClassCostFor(VT);
682 RegPressure[RCId] += RCCost;
683 }
684 }
685
686 /// IsLICMCandidate - Returns true if the instruction may be a suitable
687 /// candidate for LICM. e.g. If the instruction is a call, then it's obviously
688 /// not safe to hoist it.
IsLICMCandidate(MachineInstr & I)689 bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
690 // Check if it's safe to move the instruction.
691 bool DontMoveAcrossStore = true;
692 if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore))
693 return false;
694
695 return true;
696 }
697
698 /// IsLoopInvariantInst - Returns true if the instruction is loop
699 /// invariant. I.e., all virtual register operands are defined outside of the
700 /// loop, physical registers aren't accessed explicitly, and there are no side
701 /// effects that aren't captured by the operands or other flags.
702 ///
IsLoopInvariantInst(MachineInstr & I)703 bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
704 if (!IsLICMCandidate(I))
705 return false;
706
707 // The instruction is loop invariant if all of its operands are.
708 for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
709 const MachineOperand &MO = I.getOperand(i);
710
711 if (!MO.isReg())
712 continue;
713
714 unsigned Reg = MO.getReg();
715 if (Reg == 0) continue;
716
717 // Don't hoist an instruction that uses or defines a physical register.
718 if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
719 if (MO.isUse()) {
720 // If the physreg has no defs anywhere, it's just an ambient register
721 // and we can freely move its uses. Alternatively, if it's allocatable,
722 // it could get allocated to something with a def during allocation.
723 if (!MRI->def_empty(Reg))
724 return false;
725 if (AllocatableSet.test(Reg))
726 return false;
727 // Check for a def among the register's aliases too.
728 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
729 unsigned AliasReg = *Alias;
730 if (!MRI->def_empty(AliasReg))
731 return false;
732 if (AllocatableSet.test(AliasReg))
733 return false;
734 }
735 // Otherwise it's safe to move.
736 continue;
737 } else if (!MO.isDead()) {
738 // A def that isn't dead. We can't move it.
739 return false;
740 } else if (CurLoop->getHeader()->isLiveIn(Reg)) {
741 // If the reg is live into the loop, we can't hoist an instruction
742 // which would clobber it.
743 return false;
744 }
745 }
746
747 if (!MO.isUse())
748 continue;
749
750 assert(MRI->getVRegDef(Reg) &&
751 "Machine instr not mapped for this vreg?!");
752
753 // If the loop contains the definition of an operand, then the instruction
754 // isn't loop invariant.
755 if (CurLoop->contains(MRI->getVRegDef(Reg)))
756 return false;
757 }
758
759 // If we got this far, the instruction is loop invariant!
760 return true;
761 }
762
763
764 /// HasAnyPHIUse - Return true if the specified register is used by any
765 /// phi node.
HasAnyPHIUse(unsigned Reg) const766 bool MachineLICM::HasAnyPHIUse(unsigned Reg) const {
767 for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
768 UE = MRI->use_end(); UI != UE; ++UI) {
769 MachineInstr *UseMI = &*UI;
770 if (UseMI->isPHI())
771 return true;
772 // Look pass copies as well.
773 if (UseMI->isCopy()) {
774 unsigned Def = UseMI->getOperand(0).getReg();
775 if (TargetRegisterInfo::isVirtualRegister(Def) &&
776 HasAnyPHIUse(Def))
777 return true;
778 }
779 }
780 return false;
781 }
782
783 /// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
784 /// and an use in the current loop, return true if the target considered
785 /// it 'high'.
HasHighOperandLatency(MachineInstr & MI,unsigned DefIdx,unsigned Reg) const786 bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
787 unsigned DefIdx, unsigned Reg) const {
788 if (!InstrItins || InstrItins->isEmpty() || MRI->use_nodbg_empty(Reg))
789 return false;
790
791 for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg),
792 E = MRI->use_nodbg_end(); I != E; ++I) {
793 MachineInstr *UseMI = &*I;
794 if (UseMI->isCopyLike())
795 continue;
796 if (!CurLoop->contains(UseMI->getParent()))
797 continue;
798 for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
799 const MachineOperand &MO = UseMI->getOperand(i);
800 if (!MO.isReg() || !MO.isUse())
801 continue;
802 unsigned MOReg = MO.getReg();
803 if (MOReg != Reg)
804 continue;
805
806 if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, UseMI, i))
807 return true;
808 }
809
810 // Only look at the first in loop use.
811 break;
812 }
813
814 return false;
815 }
816
817 /// IsCheapInstruction - Return true if the instruction is marked "cheap" or
818 /// the operand latency between its def and a use is one or less.
IsCheapInstruction(MachineInstr & MI) const819 bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
820 if (MI.getDesc().isAsCheapAsAMove() || MI.isCopyLike())
821 return true;
822 if (!InstrItins || InstrItins->isEmpty())
823 return false;
824
825 bool isCheap = false;
826 unsigned NumDefs = MI.getDesc().getNumDefs();
827 for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
828 MachineOperand &DefMO = MI.getOperand(i);
829 if (!DefMO.isReg() || !DefMO.isDef())
830 continue;
831 --NumDefs;
832 unsigned Reg = DefMO.getReg();
833 if (TargetRegisterInfo::isPhysicalRegister(Reg))
834 continue;
835
836 if (!TII->hasLowDefLatency(InstrItins, &MI, i))
837 return false;
838 isCheap = true;
839 }
840
841 return isCheap;
842 }
843
844 /// CanCauseHighRegPressure - Visit BBs from header to current BB, check
845 /// if hoisting an instruction of the given cost matrix can cause high
846 /// register pressure.
CanCauseHighRegPressure(DenseMap<unsigned,int> & Cost)847 bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost) {
848 for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
849 CI != CE; ++CI) {
850 if (CI->second <= 0)
851 continue;
852
853 unsigned RCId = CI->first;
854 for (unsigned i = BackTrace.size(); i != 0; --i) {
855 SmallVector<unsigned, 8> &RP = BackTrace[i-1];
856 if (RP[RCId] + CI->second >= RegLimit[RCId])
857 return true;
858 }
859 }
860
861 return false;
862 }
863
864 /// UpdateBackTraceRegPressure - Traverse the back trace from header to the
865 /// current block and update their register pressures to reflect the effect
866 /// of hoisting MI from the current block to the preheader.
UpdateBackTraceRegPressure(const MachineInstr * MI)867 void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) {
868 if (MI->isImplicitDef())
869 return;
870
871 // First compute the 'cost' of the instruction, i.e. its contribution
872 // to register pressure.
873 DenseMap<unsigned, int> Cost;
874 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
875 const MachineOperand &MO = MI->getOperand(i);
876 if (!MO.isReg() || MO.isImplicit())
877 continue;
878 unsigned Reg = MO.getReg();
879 if (!TargetRegisterInfo::isVirtualRegister(Reg))
880 continue;
881
882 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
883 EVT VT = *RC->vt_begin();
884 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
885 unsigned RCCost = TLI->getRepRegClassCostFor(VT);
886 if (MO.isDef()) {
887 DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
888 if (CI != Cost.end())
889 CI->second += RCCost;
890 else
891 Cost.insert(std::make_pair(RCId, RCCost));
892 } else if (isOperandKill(MO, MRI)) {
893 DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
894 if (CI != Cost.end())
895 CI->second -= RCCost;
896 else
897 Cost.insert(std::make_pair(RCId, -RCCost));
898 }
899 }
900
901 // Update register pressure of blocks from loop header to current block.
902 for (unsigned i = 0, e = BackTrace.size(); i != e; ++i) {
903 SmallVector<unsigned, 8> &RP = BackTrace[i];
904 for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
905 CI != CE; ++CI) {
906 unsigned RCId = CI->first;
907 RP[RCId] += CI->second;
908 }
909 }
910 }
911
912 /// IsProfitableToHoist - Return true if it is potentially profitable to hoist
913 /// the given loop invariant.
IsProfitableToHoist(MachineInstr & MI)914 bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
915 if (MI.isImplicitDef())
916 return true;
917
918 // If the instruction is cheap, only hoist if it is re-materilizable. LICM
919 // will increase register pressure. It's probably not worth it if the
920 // instruction is cheap.
921 // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting
922 // these tend to help performance in low register pressure situation. The
923 // trade off is it may cause spill in high pressure situation. It will end up
924 // adding a store in the loop preheader. But the reload is no more expensive.
925 // The side benefit is these loads are frequently CSE'ed.
926 if (IsCheapInstruction(MI)) {
927 if (!TII->isTriviallyReMaterializable(&MI, AA))
928 return false;
929 } else {
930 // Estimate register pressure to determine whether to LICM the instruction.
931 // In low register pressure situation, we can be more aggressive about
932 // hoisting. Also, favors hoisting long latency instructions even in
933 // moderately high pressure situation.
934 // FIXME: If there are long latency loop-invariant instructions inside the
935 // loop at this point, why didn't the optimizer's LICM hoist them?
936 DenseMap<unsigned, int> Cost;
937 for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
938 const MachineOperand &MO = MI.getOperand(i);
939 if (!MO.isReg() || MO.isImplicit())
940 continue;
941 unsigned Reg = MO.getReg();
942 if (!TargetRegisterInfo::isVirtualRegister(Reg))
943 continue;
944 if (MO.isDef()) {
945 if (HasHighOperandLatency(MI, i, Reg)) {
946 ++NumHighLatency;
947 return true;
948 }
949
950 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
951 EVT VT = *RC->vt_begin();
952 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
953 unsigned RCCost = TLI->getRepRegClassCostFor(VT);
954 DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
955 if (CI != Cost.end())
956 CI->second += RCCost;
957 else
958 Cost.insert(std::make_pair(RCId, RCCost));
959 } else if (isOperandKill(MO, MRI)) {
960 // Is a virtual register use is a kill, hoisting it out of the loop
961 // may actually reduce register pressure or be register pressure
962 // neutral.
963 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
964 EVT VT = *RC->vt_begin();
965 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
966 unsigned RCCost = TLI->getRepRegClassCostFor(VT);
967 DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
968 if (CI != Cost.end())
969 CI->second -= RCCost;
970 else
971 Cost.insert(std::make_pair(RCId, -RCCost));
972 }
973 }
974
975 // Visit BBs from header to current BB, if hoisting this doesn't cause
976 // high register pressure, then it's safe to proceed.
977 if (!CanCauseHighRegPressure(Cost)) {
978 ++NumLowRP;
979 return true;
980 }
981
982 // High register pressure situation, only hoist if the instruction is going to
983 // be remat'ed.
984 if (!TII->isTriviallyReMaterializable(&MI, AA) &&
985 !MI.isInvariantLoad(AA))
986 return false;
987 }
988
989 // If result(s) of this instruction is used by PHIs outside of the loop, then
990 // don't hoist it if the instruction because it will introduce an extra copy.
991 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
992 const MachineOperand &MO = MI.getOperand(i);
993 if (!MO.isReg() || !MO.isDef())
994 continue;
995 if (HasAnyPHIUse(MO.getReg()))
996 return false;
997 }
998
999 return true;
1000 }
1001
ExtractHoistableLoad(MachineInstr * MI)1002 MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
1003 // Don't unfold simple loads.
1004 if (MI->getDesc().canFoldAsLoad())
1005 return 0;
1006
1007 // If not, we may be able to unfold a load and hoist that.
1008 // First test whether the instruction is loading from an amenable
1009 // memory location.
1010 if (!MI->isInvariantLoad(AA))
1011 return 0;
1012
1013 // Next determine the register class for a temporary register.
1014 unsigned LoadRegIndex;
1015 unsigned NewOpc =
1016 TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(),
1017 /*UnfoldLoad=*/true,
1018 /*UnfoldStore=*/false,
1019 &LoadRegIndex);
1020 if (NewOpc == 0) return 0;
1021 const MCInstrDesc &MID = TII->get(NewOpc);
1022 if (MID.getNumDefs() != 1) return 0;
1023 const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI);
1024 // Ok, we're unfolding. Create a temporary register and do the unfold.
1025 unsigned Reg = MRI->createVirtualRegister(RC);
1026
1027 MachineFunction &MF = *MI->getParent()->getParent();
1028 SmallVector<MachineInstr *, 2> NewMIs;
1029 bool Success =
1030 TII->unfoldMemoryOperand(MF, MI, Reg,
1031 /*UnfoldLoad=*/true, /*UnfoldStore=*/false,
1032 NewMIs);
1033 (void)Success;
1034 assert(Success &&
1035 "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
1036 "succeeded!");
1037 assert(NewMIs.size() == 2 &&
1038 "Unfolded a load into multiple instructions!");
1039 MachineBasicBlock *MBB = MI->getParent();
1040 MBB->insert(MI, NewMIs[0]);
1041 MBB->insert(MI, NewMIs[1]);
1042 // If unfolding produced a load that wasn't loop-invariant or profitable to
1043 // hoist, discard the new instructions and bail.
1044 if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) {
1045 NewMIs[0]->eraseFromParent();
1046 NewMIs[1]->eraseFromParent();
1047 return 0;
1048 }
1049
1050 // Update register pressure for the unfolded instruction.
1051 UpdateRegPressure(NewMIs[1]);
1052
1053 // Otherwise we successfully unfolded a load that we can hoist.
1054 MI->eraseFromParent();
1055 return NewMIs[0];
1056 }
1057
InitCSEMap(MachineBasicBlock * BB)1058 void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
1059 for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) {
1060 const MachineInstr *MI = &*I;
1061 unsigned Opcode = MI->getOpcode();
1062 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
1063 CI = CSEMap.find(Opcode);
1064 if (CI != CSEMap.end())
1065 CI->second.push_back(MI);
1066 else {
1067 std::vector<const MachineInstr*> CSEMIs;
1068 CSEMIs.push_back(MI);
1069 CSEMap.insert(std::make_pair(Opcode, CSEMIs));
1070 }
1071 }
1072 }
1073
1074 const MachineInstr*
LookForDuplicate(const MachineInstr * MI,std::vector<const MachineInstr * > & PrevMIs)1075 MachineLICM::LookForDuplicate(const MachineInstr *MI,
1076 std::vector<const MachineInstr*> &PrevMIs) {
1077 for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) {
1078 const MachineInstr *PrevMI = PrevMIs[i];
1079 if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : 0)))
1080 return PrevMI;
1081 }
1082 return 0;
1083 }
1084
EliminateCSE(MachineInstr * MI,DenseMap<unsigned,std::vector<const MachineInstr * >>::iterator & CI)1085 bool MachineLICM::EliminateCSE(MachineInstr *MI,
1086 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) {
1087 // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
1088 // the undef property onto uses.
1089 if (CI == CSEMap.end() || MI->isImplicitDef())
1090 return false;
1091
1092 if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
1093 DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);
1094
1095 // Replace virtual registers defined by MI by their counterparts defined
1096 // by Dup.
1097 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1098 const MachineOperand &MO = MI->getOperand(i);
1099
1100 // Physical registers may not differ here.
1101 assert((!MO.isReg() || MO.getReg() == 0 ||
1102 !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
1103 MO.getReg() == Dup->getOperand(i).getReg()) &&
1104 "Instructions with different phys regs are not identical!");
1105
1106 if (MO.isReg() && MO.isDef() &&
1107 !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
1108 MRI->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
1109 MRI->clearKillFlags(Dup->getOperand(i).getReg());
1110 }
1111 }
1112 MI->eraseFromParent();
1113 ++NumCSEed;
1114 return true;
1115 }
1116 return false;
1117 }
1118
1119 /// Hoist - When an instruction is found to use only loop invariant operands
1120 /// that are safe to hoist, this instruction is called to do the dirty work.
1121 ///
Hoist(MachineInstr * MI,MachineBasicBlock * Preheader)1122 bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
1123 // First check whether we should hoist this instruction.
1124 if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
1125 // If not, try unfolding a hoistable load.
1126 MI = ExtractHoistableLoad(MI);
1127 if (!MI) return false;
1128 }
1129
1130 // Now move the instructions to the predecessor, inserting it before any
1131 // terminator instructions.
1132 DEBUG({
1133 dbgs() << "Hoisting " << *MI;
1134 if (Preheader->getBasicBlock())
1135 dbgs() << " to MachineBasicBlock "
1136 << Preheader->getName();
1137 if (MI->getParent()->getBasicBlock())
1138 dbgs() << " from MachineBasicBlock "
1139 << MI->getParent()->getName();
1140 dbgs() << "\n";
1141 });
1142
1143 // If this is the first instruction being hoisted to the preheader,
1144 // initialize the CSE map with potential common expressions.
1145 if (FirstInLoop) {
1146 InitCSEMap(Preheader);
1147 FirstInLoop = false;
1148 }
1149
1150 // Look for opportunity to CSE the hoisted instruction.
1151 unsigned Opcode = MI->getOpcode();
1152 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
1153 CI = CSEMap.find(Opcode);
1154 if (!EliminateCSE(MI, CI)) {
1155 // Otherwise, splice the instruction to the preheader.
1156 Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
1157
1158 // Update register pressure for BBs from header to this block.
1159 UpdateBackTraceRegPressure(MI);
1160
1161 // Clear the kill flags of any register this instruction defines,
1162 // since they may need to be live throughout the entire loop
1163 // rather than just live for part of it.
1164 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1165 MachineOperand &MO = MI->getOperand(i);
1166 if (MO.isReg() && MO.isDef() && !MO.isDead())
1167 MRI->clearKillFlags(MO.getReg());
1168 }
1169
1170 // Add to the CSE map.
1171 if (CI != CSEMap.end())
1172 CI->second.push_back(MI);
1173 else {
1174 std::vector<const MachineInstr*> CSEMIs;
1175 CSEMIs.push_back(MI);
1176 CSEMap.insert(std::make_pair(Opcode, CSEMIs));
1177 }
1178 }
1179
1180 ++NumHoisted;
1181 Changed = true;
1182
1183 return true;
1184 }
1185
getCurPreheader()1186 MachineBasicBlock *MachineLICM::getCurPreheader() {
1187 // Determine the block to which to hoist instructions. If we can't find a
1188 // suitable loop predecessor, we can't do any hoisting.
1189
1190 // If we've tried to get a preheader and failed, don't try again.
1191 if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1))
1192 return 0;
1193
1194 if (!CurPreheader) {
1195 CurPreheader = CurLoop->getLoopPreheader();
1196 if (!CurPreheader) {
1197 MachineBasicBlock *Pred = CurLoop->getLoopPredecessor();
1198 if (!Pred) {
1199 CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
1200 return 0;
1201 }
1202
1203 CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this);
1204 if (!CurPreheader) {
1205 CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
1206 return 0;
1207 }
1208 }
1209 }
1210 return CurPreheader;
1211 }
1212