1 //===-- CodePlacementOpt.cpp - Code Placement pass. -----------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the pass that optimizes code placement and aligns loop
11 // headers to target-specific alignment boundaries.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #define DEBUG_TYPE "code-placement"
16 #include "llvm/CodeGen/MachineLoopInfo.h"
17 #include "llvm/CodeGen/MachineFunctionPass.h"
18 #include "llvm/CodeGen/Passes.h"
19 #include "llvm/Target/TargetInstrInfo.h"
20 #include "llvm/Target/TargetLowering.h"
21 #include "llvm/Target/TargetMachine.h"
22 #include "llvm/Support/Compiler.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/ADT/Statistic.h"
25 using namespace llvm;
26
27 STATISTIC(NumLoopsAligned, "Number of loops aligned");
28 STATISTIC(NumIntraElim, "Number of intra loop branches eliminated");
29 STATISTIC(NumIntraMoved, "Number of intra loop branches moved");
30
31 namespace {
32 class CodePlacementOpt : public MachineFunctionPass {
33 const MachineLoopInfo *MLI;
34 const TargetInstrInfo *TII;
35 const TargetLowering *TLI;
36
37 public:
38 static char ID;
CodePlacementOpt()39 CodePlacementOpt() : MachineFunctionPass(ID) {}
40
41 virtual bool runOnMachineFunction(MachineFunction &MF);
getPassName() const42 virtual const char *getPassName() const {
43 return "Code Placement Optimizer";
44 }
45
getAnalysisUsage(AnalysisUsage & AU) const46 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
47 AU.addRequired<MachineLoopInfo>();
48 AU.addPreservedID(MachineDominatorsID);
49 MachineFunctionPass::getAnalysisUsage(AU);
50 }
51
52 private:
53 bool HasFallthrough(MachineBasicBlock *MBB);
54 bool HasAnalyzableTerminator(MachineBasicBlock *MBB);
55 void Splice(MachineFunction &MF,
56 MachineFunction::iterator InsertPt,
57 MachineFunction::iterator Begin,
58 MachineFunction::iterator End);
59 bool EliminateUnconditionalJumpsToTop(MachineFunction &MF,
60 MachineLoop *L);
61 bool MoveDiscontiguousLoopBlocks(MachineFunction &MF,
62 MachineLoop *L);
63 bool OptimizeIntraLoopEdgesInLoopNest(MachineFunction &MF, MachineLoop *L);
64 bool OptimizeIntraLoopEdges(MachineFunction &MF);
65 bool AlignLoops(MachineFunction &MF);
66 bool AlignLoop(MachineFunction &MF, MachineLoop *L, unsigned Align);
67 };
68
69 char CodePlacementOpt::ID = 0;
70 } // end anonymous namespace
71
createCodePlacementOptPass()72 FunctionPass *llvm::createCodePlacementOptPass() {
73 return new CodePlacementOpt();
74 }
75
76 /// HasFallthrough - Test whether the given branch has a fallthrough, either as
77 /// a plain fallthrough or as a fallthrough case of a conditional branch.
78 ///
HasFallthrough(MachineBasicBlock * MBB)79 bool CodePlacementOpt::HasFallthrough(MachineBasicBlock *MBB) {
80 MachineBasicBlock *TBB = 0, *FBB = 0;
81 SmallVector<MachineOperand, 4> Cond;
82 if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond))
83 return false;
84 // This conditional branch has no fallthrough.
85 if (FBB)
86 return false;
87 // An unconditional branch has no fallthrough.
88 if (Cond.empty() && TBB)
89 return false;
90 // It has a fallthrough.
91 return true;
92 }
93
94 /// HasAnalyzableTerminator - Test whether AnalyzeBranch will succeed on MBB.
95 /// This is called before major changes are begun to test whether it will be
96 /// possible to complete the changes.
97 ///
98 /// Target-specific code is hereby encouraged to make AnalyzeBranch succeed
99 /// whenever possible.
100 ///
HasAnalyzableTerminator(MachineBasicBlock * MBB)101 bool CodePlacementOpt::HasAnalyzableTerminator(MachineBasicBlock *MBB) {
102 // Conservatively ignore EH landing pads.
103 if (MBB->isLandingPad()) return false;
104
105 // Aggressively handle return blocks and similar constructs.
106 if (MBB->succ_empty()) return true;
107
108 // Ask the target's AnalyzeBranch if it can handle this block.
109 MachineBasicBlock *TBB = 0, *FBB = 0;
110 SmallVector<MachineOperand, 4> Cond;
111 // Make sure the terminator is understood.
112 if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond))
113 return false;
114 // Ignore blocks which look like they might have EH-related control flow.
115 // AnalyzeBranch thinks it knows how to analyze such things, but it doesn't
116 // recognize the possibility of a control transfer through an unwind.
117 // Such blocks contain EH_LABEL instructions, however they may be in the
118 // middle of the block. Instead of searching for them, just check to see
119 // if the CFG disagrees with AnalyzeBranch.
120 if (1u + !Cond.empty() != MBB->succ_size())
121 return false;
122 // Make sure we have the option of reversing the condition.
123 if (!Cond.empty() && TII->ReverseBranchCondition(Cond))
124 return false;
125 return true;
126 }
127
128 /// Splice - Move the sequence of instructions [Begin,End) to just before
129 /// InsertPt. Update branch instructions as needed to account for broken
130 /// fallthrough edges and to take advantage of newly exposed fallthrough
131 /// opportunities.
132 ///
Splice(MachineFunction & MF,MachineFunction::iterator InsertPt,MachineFunction::iterator Begin,MachineFunction::iterator End)133 void CodePlacementOpt::Splice(MachineFunction &MF,
134 MachineFunction::iterator InsertPt,
135 MachineFunction::iterator Begin,
136 MachineFunction::iterator End) {
137 assert(Begin != MF.begin() && End != MF.begin() && InsertPt != MF.begin() &&
138 "Splice can't change the entry block!");
139 MachineFunction::iterator OldBeginPrior = prior(Begin);
140 MachineFunction::iterator OldEndPrior = prior(End);
141
142 MF.splice(InsertPt, Begin, End);
143
144 prior(Begin)->updateTerminator();
145 OldBeginPrior->updateTerminator();
146 OldEndPrior->updateTerminator();
147 }
148
149 /// EliminateUnconditionalJumpsToTop - Move blocks which unconditionally jump
150 /// to the loop top to the top of the loop so that they have a fall through.
151 /// This can introduce a branch on entry to the loop, but it can eliminate a
152 /// branch within the loop. See the @simple case in
153 /// test/CodeGen/X86/loop_blocks.ll for an example of this.
EliminateUnconditionalJumpsToTop(MachineFunction & MF,MachineLoop * L)154 bool CodePlacementOpt::EliminateUnconditionalJumpsToTop(MachineFunction &MF,
155 MachineLoop *L) {
156 bool Changed = false;
157 MachineBasicBlock *TopMBB = L->getTopBlock();
158
159 bool BotHasFallthrough = HasFallthrough(L->getBottomBlock());
160
161 if (TopMBB == MF.begin() ||
162 HasAnalyzableTerminator(prior(MachineFunction::iterator(TopMBB)))) {
163 new_top:
164 for (MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(),
165 PE = TopMBB->pred_end(); PI != PE; ++PI) {
166 MachineBasicBlock *Pred = *PI;
167 if (Pred == TopMBB) continue;
168 if (HasFallthrough(Pred)) continue;
169 if (!L->contains(Pred)) continue;
170
171 // Verify that we can analyze all the loop entry edges before beginning
172 // any changes which will require us to be able to analyze them.
173 if (Pred == MF.begin())
174 continue;
175 if (!HasAnalyzableTerminator(Pred))
176 continue;
177 if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(Pred))))
178 continue;
179
180 // Move the block.
181 DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << Pred->getNumber()
182 << " to top of loop.\n");
183 Changed = true;
184
185 // Move it and all the blocks that can reach it via fallthrough edges
186 // exclusively, to keep existing fallthrough edges intact.
187 MachineFunction::iterator Begin = Pred;
188 MachineFunction::iterator End = llvm::next(Begin);
189 while (Begin != MF.begin()) {
190 MachineFunction::iterator Prior = prior(Begin);
191 if (Prior == MF.begin())
192 break;
193 // Stop when a non-fallthrough edge is found.
194 if (!HasFallthrough(Prior))
195 break;
196 // Stop if a block which could fall-through out of the loop is found.
197 if (Prior->isSuccessor(End))
198 break;
199 // If we've reached the top, stop scanning.
200 if (Prior == MachineFunction::iterator(TopMBB)) {
201 // We know top currently has a fall through (because we just checked
202 // it) which would be lost if we do the transformation, so it isn't
203 // worthwhile to do the transformation unless it would expose a new
204 // fallthrough edge.
205 if (!Prior->isSuccessor(End))
206 goto next_pred;
207 // Otherwise we can stop scanning and procede to move the blocks.
208 break;
209 }
210 // If we hit a switch or something complicated, don't move anything
211 // for this predecessor.
212 if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(Prior))))
213 break;
214 // Ok, the block prior to Begin will be moved along with the rest.
215 // Extend the range to include it.
216 Begin = Prior;
217 ++NumIntraMoved;
218 }
219
220 // Move the blocks.
221 Splice(MF, TopMBB, Begin, End);
222
223 // Update TopMBB.
224 TopMBB = L->getTopBlock();
225
226 // We have a new loop top. Iterate on it. We shouldn't have to do this
227 // too many times if BranchFolding has done a reasonable job.
228 goto new_top;
229 next_pred:;
230 }
231 }
232
233 // If the loop previously didn't exit with a fall-through and it now does,
234 // we eliminated a branch.
235 if (Changed &&
236 !BotHasFallthrough &&
237 HasFallthrough(L->getBottomBlock())) {
238 ++NumIntraElim;
239 }
240
241 return Changed;
242 }
243
244 /// MoveDiscontiguousLoopBlocks - Move any loop blocks that are not in the
245 /// portion of the loop contiguous with the header. This usually makes the loop
246 /// contiguous, provided that AnalyzeBranch can handle all the relevant
247 /// branching. See the @cfg_islands case in test/CodeGen/X86/loop_blocks.ll
248 /// for an example of this.
MoveDiscontiguousLoopBlocks(MachineFunction & MF,MachineLoop * L)249 bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF,
250 MachineLoop *L) {
251 bool Changed = false;
252 MachineBasicBlock *TopMBB = L->getTopBlock();
253 MachineBasicBlock *BotMBB = L->getBottomBlock();
254
255 // Determine a position to move orphaned loop blocks to. If TopMBB is not
256 // entered via fallthrough and BotMBB is exited via fallthrough, prepend them
257 // to the top of the loop to avoid losing that fallthrough. Otherwise append
258 // them to the bottom, even if it previously had a fallthrough, on the theory
259 // that it's worth an extra branch to keep the loop contiguous.
260 MachineFunction::iterator InsertPt =
261 llvm::next(MachineFunction::iterator(BotMBB));
262 bool InsertAtTop = false;
263 if (TopMBB != MF.begin() &&
264 !HasFallthrough(prior(MachineFunction::iterator(TopMBB))) &&
265 HasFallthrough(BotMBB)) {
266 InsertPt = TopMBB;
267 InsertAtTop = true;
268 }
269
270 // Keep a record of which blocks are in the portion of the loop contiguous
271 // with the loop header.
272 SmallPtrSet<MachineBasicBlock *, 8> ContiguousBlocks;
273 for (MachineFunction::iterator I = TopMBB,
274 E = llvm::next(MachineFunction::iterator(BotMBB)); I != E; ++I)
275 ContiguousBlocks.insert(I);
276
277 // Find non-contigous blocks and fix them.
278 if (InsertPt != MF.begin() && HasAnalyzableTerminator(prior(InsertPt)))
279 for (MachineLoop::block_iterator BI = L->block_begin(), BE = L->block_end();
280 BI != BE; ++BI) {
281 MachineBasicBlock *BB = *BI;
282
283 // Verify that we can analyze all the loop entry edges before beginning
284 // any changes which will require us to be able to analyze them.
285 if (!HasAnalyzableTerminator(BB))
286 continue;
287 if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(BB))))
288 continue;
289
290 // If the layout predecessor is part of the loop, this block will be
291 // processed along with it. This keeps them in their relative order.
292 if (BB != MF.begin() &&
293 L->contains(prior(MachineFunction::iterator(BB))))
294 continue;
295
296 // Check to see if this block is already contiguous with the main
297 // portion of the loop.
298 if (!ContiguousBlocks.insert(BB))
299 continue;
300
301 // Move the block.
302 DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << BB->getNumber()
303 << " to be contiguous with loop.\n");
304 Changed = true;
305
306 // Process this block and all loop blocks contiguous with it, to keep
307 // them in their relative order.
308 MachineFunction::iterator Begin = BB;
309 MachineFunction::iterator End = llvm::next(MachineFunction::iterator(BB));
310 for (; End != MF.end(); ++End) {
311 if (!L->contains(End)) break;
312 if (!HasAnalyzableTerminator(End)) break;
313 ContiguousBlocks.insert(End);
314 ++NumIntraMoved;
315 }
316
317 // If we're inserting at the bottom of the loop, and the code we're
318 // moving originally had fall-through successors, bring the sucessors
319 // up with the loop blocks to preserve the fall-through edges.
320 if (!InsertAtTop)
321 for (; End != MF.end(); ++End) {
322 if (L->contains(End)) break;
323 if (!HasAnalyzableTerminator(End)) break;
324 if (!HasFallthrough(prior(End))) break;
325 }
326
327 // Move the blocks. This may invalidate TopMBB and/or BotMBB, but
328 // we don't need them anymore at this point.
329 Splice(MF, InsertPt, Begin, End);
330 }
331
332 return Changed;
333 }
334
335 /// OptimizeIntraLoopEdgesInLoopNest - Reposition loop blocks to minimize
336 /// intra-loop branching and to form contiguous loops.
337 ///
338 /// This code takes the approach of making minor changes to the existing
339 /// layout to fix specific loop-oriented problems. Also, it depends on
340 /// AnalyzeBranch, which can't understand complex control instructions.
341 ///
OptimizeIntraLoopEdgesInLoopNest(MachineFunction & MF,MachineLoop * L)342 bool CodePlacementOpt::OptimizeIntraLoopEdgesInLoopNest(MachineFunction &MF,
343 MachineLoop *L) {
344 bool Changed = false;
345
346 // Do optimization for nested loops.
347 for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
348 Changed |= OptimizeIntraLoopEdgesInLoopNest(MF, *I);
349
350 // Do optimization for this loop.
351 Changed |= EliminateUnconditionalJumpsToTop(MF, L);
352 Changed |= MoveDiscontiguousLoopBlocks(MF, L);
353
354 return Changed;
355 }
356
357 /// OptimizeIntraLoopEdges - Reposition loop blocks to minimize
358 /// intra-loop branching and to form contiguous loops.
359 ///
OptimizeIntraLoopEdges(MachineFunction & MF)360 bool CodePlacementOpt::OptimizeIntraLoopEdges(MachineFunction &MF) {
361 bool Changed = false;
362
363 if (!TLI->shouldOptimizeCodePlacement())
364 return Changed;
365
366 // Do optimization for each loop in the function.
367 for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
368 I != E; ++I)
369 if (!(*I)->getParentLoop())
370 Changed |= OptimizeIntraLoopEdgesInLoopNest(MF, *I);
371
372 return Changed;
373 }
374
375 /// AlignLoops - Align loop headers to target preferred alignments.
376 ///
AlignLoops(MachineFunction & MF)377 bool CodePlacementOpt::AlignLoops(MachineFunction &MF) {
378 const Function *F = MF.getFunction();
379 if (F->hasFnAttr(Attribute::OptimizeForSize))
380 return false;
381
382 unsigned Align = TLI->getPrefLoopAlignment();
383 if (!Align)
384 return false; // Don't care about loop alignment.
385
386 bool Changed = false;
387
388 for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
389 I != E; ++I)
390 Changed |= AlignLoop(MF, *I, Align);
391
392 return Changed;
393 }
394
395 /// AlignLoop - Align loop headers to target preferred alignments.
396 ///
AlignLoop(MachineFunction & MF,MachineLoop * L,unsigned Align)397 bool CodePlacementOpt::AlignLoop(MachineFunction &MF, MachineLoop *L,
398 unsigned Align) {
399 bool Changed = false;
400
401 // Do alignment for nested loops.
402 for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
403 Changed |= AlignLoop(MF, *I, Align);
404
405 L->getTopBlock()->setAlignment(Align);
406 Changed = true;
407 ++NumLoopsAligned;
408
409 return Changed;
410 }
411
runOnMachineFunction(MachineFunction & MF)412 bool CodePlacementOpt::runOnMachineFunction(MachineFunction &MF) {
413 MLI = &getAnalysis<MachineLoopInfo>();
414 if (MLI->empty())
415 return false; // No loops.
416
417 TLI = MF.getTarget().getTargetLowering();
418 TII = MF.getTarget().getInstrInfo();
419
420 bool Changed = OptimizeIntraLoopEdges(MF);
421
422 Changed |= AlignLoops(MF);
423
424 return Changed;
425 }
426