1 //===---------- AArch64CollectLOH.cpp - AArch64 collect LOH pass --*- C++ -*-=//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains a pass that collect the Linker Optimization Hint (LOH).
11 // This pass should be run at the very end of the compilation flow, just before
12 // assembly printer.
13 // To be useful for the linker, the LOH must be printed into the assembly file.
14 //
15 // A LOH describes a sequence of instructions that may be optimized by the
16 // linker.
17 // This same sequence cannot be optimized by the compiler because some of
18 // the information will be known at link time.
19 // For instance, consider the following sequence:
20 // L1: adrp xA, sym@PAGE
21 // L2: add xB, xA, sym@PAGEOFF
22 // L3: ldr xC, [xB, #imm]
23 // This sequence can be turned into:
24 // A literal load if sym@PAGE + sym@PAGEOFF + #imm - address(L3) is < 1MB:
25 // L3: ldr xC, sym+#imm
26 // It may also be turned into either the following more efficient
27 // code sequences:
28 // - If sym@PAGEOFF + #imm fits the encoding space of L3.
29 // L1: adrp xA, sym@PAGE
30 // L3: ldr xC, [xB, sym@PAGEOFF + #imm]
31 // - If sym@PAGE + sym@PAGEOFF - address(L1) < 1MB:
32 // L1: adr xA, sym
33 // L3: ldr xC, [xB, #imm]
34 //
35 // To be valid a LOH must meet all the requirements needed by all the related
36 // possible linker transformations.
37 // For instance, using the running example, the constraints to emit
38 // ".loh AdrpAddLdr" are:
39 // - L1, L2, and L3 instructions are of the expected type, i.e.,
40 // respectively ADRP, ADD (immediate), and LD.
41 // - The result of L1 is used only by L2.
42 // - The register argument (xA) used in the ADD instruction is defined
43 // only by L1.
44 // - The result of L2 is used only by L3.
45 // - The base address (xB) in L3 is defined only L2.
46 // - The ADRP in L1 and the ADD in L2 must reference the same symbol using
47 // @PAGE/@PAGEOFF with no additional constants
48 //
49 // Currently supported LOHs are:
50 // * So called non-ADRP-related:
51 // - .loh AdrpAddLdr L1, L2, L3:
52 // L1: adrp xA, sym@PAGE
53 // L2: add xB, xA, sym@PAGEOFF
54 // L3: ldr xC, [xB, #imm]
55 // - .loh AdrpLdrGotLdr L1, L2, L3:
56 // L1: adrp xA, sym@GOTPAGE
57 // L2: ldr xB, [xA, sym@GOTPAGEOFF]
58 // L3: ldr xC, [xB, #imm]
59 // - .loh AdrpLdr L1, L3:
60 // L1: adrp xA, sym@PAGE
61 // L3: ldr xC, [xA, sym@PAGEOFF]
62 // - .loh AdrpAddStr L1, L2, L3:
63 // L1: adrp xA, sym@PAGE
64 // L2: add xB, xA, sym@PAGEOFF
65 // L3: str xC, [xB, #imm]
66 // - .loh AdrpLdrGotStr L1, L2, L3:
67 // L1: adrp xA, sym@GOTPAGE
68 // L2: ldr xB, [xA, sym@GOTPAGEOFF]
69 // L3: str xC, [xB, #imm]
70 // - .loh AdrpAdd L1, L2:
71 // L1: adrp xA, sym@PAGE
72 // L2: add xB, xA, sym@PAGEOFF
73 // For all these LOHs, L1, L2, L3 form a simple chain:
74 // L1 result is used only by L2 and L2 result by L3.
75 // L3 LOH-related argument is defined only by L2 and L2 LOH-related argument
76 // by L1.
77 // All these LOHs aim at using more efficient load/store patterns by folding
78 // some instructions used to compute the address directly into the load/store.
79 //
80 // * So called ADRP-related:
81 // - .loh AdrpAdrp L2, L1:
82 // L2: ADRP xA, sym1@PAGE
83 // L1: ADRP xA, sym2@PAGE
84 // L2 dominates L1 and xA is not redifined between L2 and L1
85 // This LOH aims at getting rid of redundant ADRP instructions.
86 //
87 // The overall design for emitting the LOHs is:
88 // 1. AArch64CollectLOH (this pass) records the LOHs in the AArch64FunctionInfo.
89 // 2. AArch64AsmPrinter reads the LOHs from AArch64FunctionInfo and it:
90 // 1. Associates them a label.
91 // 2. Emits them in a MCStreamer (EmitLOHDirective).
92 // - The MCMachOStreamer records them into the MCAssembler.
93 // - The MCAsmStreamer prints them.
94 // - Other MCStreamers ignore them.
95 // 3. Closes the MCStreamer:
96 // - The MachObjectWriter gets them from the MCAssembler and writes
97 // them in the object file.
98 // - Other ObjectWriters ignore them.
99 //===----------------------------------------------------------------------===//
100
101 #include "AArch64.h"
102 #include "AArch64InstrInfo.h"
103 #include "AArch64MachineFunctionInfo.h"
104 #include "AArch64Subtarget.h"
105 #include "MCTargetDesc/AArch64AddressingModes.h"
106 #include "llvm/ADT/BitVector.h"
107 #include "llvm/ADT/DenseMap.h"
108 #include "llvm/ADT/MapVector.h"
109 #include "llvm/ADT/SetVector.h"
110 #include "llvm/ADT/SmallVector.h"
111 #include "llvm/ADT/Statistic.h"
112 #include "llvm/CodeGen/MachineBasicBlock.h"
113 #include "llvm/CodeGen/MachineDominators.h"
114 #include "llvm/CodeGen/MachineFunctionPass.h"
115 #include "llvm/CodeGen/MachineInstr.h"
116 #include "llvm/CodeGen/MachineInstrBuilder.h"
117 #include "llvm/Support/CommandLine.h"
118 #include "llvm/Support/Debug.h"
119 #include "llvm/Support/ErrorHandling.h"
120 #include "llvm/Support/raw_ostream.h"
121 #include "llvm/Target/TargetInstrInfo.h"
122 #include "llvm/Target/TargetMachine.h"
123 #include "llvm/Target/TargetRegisterInfo.h"
124 using namespace llvm;
125
126 #define DEBUG_TYPE "aarch64-collect-loh"
127
128 static cl::opt<bool>
129 PreCollectRegister("aarch64-collect-loh-pre-collect-register", cl::Hidden,
130 cl::desc("Restrict analysis to registers invovled"
131 " in LOHs"),
132 cl::init(true));
133
134 static cl::opt<bool>
135 BasicBlockScopeOnly("aarch64-collect-loh-bb-only", cl::Hidden,
136 cl::desc("Restrict analysis at basic block scope"),
137 cl::init(true));
138
139 STATISTIC(NumADRPSimpleCandidate,
140 "Number of simplifiable ADRP dominate by another");
141 STATISTIC(NumADRPComplexCandidate2,
142 "Number of simplifiable ADRP reachable by 2 defs");
143 STATISTIC(NumADRPComplexCandidate3,
144 "Number of simplifiable ADRP reachable by 3 defs");
145 STATISTIC(NumADRPComplexCandidateOther,
146 "Number of simplifiable ADRP reachable by 4 or more defs");
147 STATISTIC(NumADDToSTRWithImm,
148 "Number of simplifiable STR with imm reachable by ADD");
149 STATISTIC(NumLDRToSTRWithImm,
150 "Number of simplifiable STR with imm reachable by LDR");
151 STATISTIC(NumADDToSTR, "Number of simplifiable STR reachable by ADD");
152 STATISTIC(NumLDRToSTR, "Number of simplifiable STR reachable by LDR");
153 STATISTIC(NumADDToLDRWithImm,
154 "Number of simplifiable LDR with imm reachable by ADD");
155 STATISTIC(NumLDRToLDRWithImm,
156 "Number of simplifiable LDR with imm reachable by LDR");
157 STATISTIC(NumADDToLDR, "Number of simplifiable LDR reachable by ADD");
158 STATISTIC(NumLDRToLDR, "Number of simplifiable LDR reachable by LDR");
159 STATISTIC(NumADRPToLDR, "Number of simplifiable LDR reachable by ADRP");
160 STATISTIC(NumCplxLvl1, "Number of complex case of level 1");
161 STATISTIC(NumTooCplxLvl1, "Number of too complex case of level 1");
162 STATISTIC(NumCplxLvl2, "Number of complex case of level 2");
163 STATISTIC(NumTooCplxLvl2, "Number of too complex case of level 2");
164 STATISTIC(NumADRSimpleCandidate, "Number of simplifiable ADRP + ADD");
165 STATISTIC(NumADRComplexCandidate, "Number of too complex ADRP + ADD");
166
167 namespace llvm {
168 void initializeAArch64CollectLOHPass(PassRegistry &);
169 }
170
171 #define AARCH64_COLLECT_LOH_NAME "AArch64 Collect Linker Optimization Hint (LOH)"
172
173 namespace {
174 struct AArch64CollectLOH : public MachineFunctionPass {
175 static char ID;
AArch64CollectLOH__anonc80aa5de0111::AArch64CollectLOH176 AArch64CollectLOH() : MachineFunctionPass(ID) {
177 initializeAArch64CollectLOHPass(*PassRegistry::getPassRegistry());
178 }
179
180 bool runOnMachineFunction(MachineFunction &MF) override;
181
getPassName__anonc80aa5de0111::AArch64CollectLOH182 const char *getPassName() const override {
183 return AARCH64_COLLECT_LOH_NAME;
184 }
185
getAnalysisUsage__anonc80aa5de0111::AArch64CollectLOH186 void getAnalysisUsage(AnalysisUsage &AU) const override {
187 AU.setPreservesAll();
188 MachineFunctionPass::getAnalysisUsage(AU);
189 AU.addRequired<MachineDominatorTree>();
190 }
191
192 private:
193 };
194
195 /// A set of MachineInstruction.
196 typedef SetVector<const MachineInstr *> SetOfMachineInstr;
197 /// Map a basic block to a set of instructions per register.
198 /// This is used to represent the exposed uses of a basic block
199 /// per register.
200 typedef MapVector<const MachineBasicBlock *,
201 std::unique_ptr<SetOfMachineInstr[]>>
202 BlockToSetOfInstrsPerColor;
203 /// Map a basic block to an instruction per register.
204 /// This is used to represent the live-out definitions of a basic block
205 /// per register.
206 typedef MapVector<const MachineBasicBlock *,
207 std::unique_ptr<const MachineInstr *[]>>
208 BlockToInstrPerColor;
209 /// Map an instruction to a set of instructions. Used to represent the
210 /// mapping def to reachable uses or use to definitions.
211 typedef MapVector<const MachineInstr *, SetOfMachineInstr> InstrToInstrs;
212 /// Map a basic block to a BitVector.
213 /// This is used to record the kill registers per basic block.
214 typedef MapVector<const MachineBasicBlock *, BitVector> BlockToRegSet;
215
216 /// Map a register to a dense id.
217 typedef DenseMap<unsigned, unsigned> MapRegToId;
218 /// Map a dense id to a register. Used for debug purposes.
219 typedef SmallVector<unsigned, 32> MapIdToReg;
220 } // end anonymous namespace.
221
222 char AArch64CollectLOH::ID = 0;
223
224 INITIALIZE_PASS_BEGIN(AArch64CollectLOH, "aarch64-collect-loh",
225 AARCH64_COLLECT_LOH_NAME, false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)226 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
227 INITIALIZE_PASS_END(AArch64CollectLOH, "aarch64-collect-loh",
228 AARCH64_COLLECT_LOH_NAME, false, false)
229
230 /// Given a couple (MBB, reg) get the corresponding set of instruction from
231 /// the given "sets".
232 /// If this couple does not reference any set, an empty set is added to "sets"
233 /// for this couple and returned.
234 /// \param nbRegs is used internally allocate some memory. It must be consistent
235 /// with the way sets is used.
236 static SetOfMachineInstr &getSet(BlockToSetOfInstrsPerColor &sets,
237 const MachineBasicBlock &MBB, unsigned reg,
238 unsigned nbRegs) {
239 SetOfMachineInstr *result;
240 BlockToSetOfInstrsPerColor::iterator it = sets.find(&MBB);
241 if (it != sets.end())
242 result = it->second.get();
243 else
244 result = (sets[&MBB] = make_unique<SetOfMachineInstr[]>(nbRegs)).get();
245
246 return result[reg];
247 }
248
249 /// Given a couple (reg, MI) get the corresponding set of instructions from the
250 /// the given "sets".
251 /// This is used to get the uses record in sets of a definition identified by
252 /// MI and reg, i.e., MI defines reg.
253 /// If the couple does not reference anything, an empty set is added to
254 /// "sets[reg]".
255 /// \pre set[reg] is valid.
getUses(InstrToInstrs * sets,unsigned reg,const MachineInstr & MI)256 static SetOfMachineInstr &getUses(InstrToInstrs *sets, unsigned reg,
257 const MachineInstr &MI) {
258 return sets[reg][&MI];
259 }
260
261 /// Same as getUses but does not modify the input map: sets.
262 /// \return NULL if the couple (reg, MI) is not in sets.
getUses(const InstrToInstrs * sets,unsigned reg,const MachineInstr & MI)263 static const SetOfMachineInstr *getUses(const InstrToInstrs *sets, unsigned reg,
264 const MachineInstr &MI) {
265 InstrToInstrs::const_iterator Res = sets[reg].find(&MI);
266 if (Res != sets[reg].end())
267 return &(Res->second);
268 return nullptr;
269 }
270
271 /// Initialize the reaching definition algorithm:
272 /// For each basic block BB in MF, record:
273 /// - its kill set.
274 /// - its reachable uses (uses that are exposed to BB's predecessors).
275 /// - its the generated definitions.
276 /// \param DummyOp if not NULL, specifies a Dummy Operation to be added to
277 /// the list of uses of exposed defintions.
278 /// \param ADRPMode specifies to only consider ADRP instructions for generated
279 /// definition. It also consider definitions of ADRP instructions as uses and
280 /// ignore other uses. The ADRPMode is used to collect the information for LHO
281 /// that involve ADRP operation only.
initReachingDef(const MachineFunction & MF,InstrToInstrs * ColorOpToReachedUses,BlockToInstrPerColor & Gen,BlockToRegSet & Kill,BlockToSetOfInstrsPerColor & ReachableUses,const MapRegToId & RegToId,const MachineInstr * DummyOp,bool ADRPMode)282 static void initReachingDef(const MachineFunction &MF,
283 InstrToInstrs *ColorOpToReachedUses,
284 BlockToInstrPerColor &Gen, BlockToRegSet &Kill,
285 BlockToSetOfInstrsPerColor &ReachableUses,
286 const MapRegToId &RegToId,
287 const MachineInstr *DummyOp, bool ADRPMode) {
288 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
289 unsigned NbReg = RegToId.size();
290
291 for (const MachineBasicBlock &MBB : MF) {
292 auto &BBGen = Gen[&MBB];
293 BBGen = make_unique<const MachineInstr *[]>(NbReg);
294 std::fill(BBGen.get(), BBGen.get() + NbReg, nullptr);
295
296 BitVector &BBKillSet = Kill[&MBB];
297 BBKillSet.resize(NbReg);
298 for (const MachineInstr &MI : MBB) {
299 bool IsADRP = MI.getOpcode() == AArch64::ADRP;
300
301 // Process uses first.
302 if (IsADRP || !ADRPMode)
303 for (const MachineOperand &MO : MI.operands()) {
304 // Treat ADRP def as use, as the goal of the analysis is to find
305 // ADRP defs reached by other ADRP defs.
306 if (!MO.isReg() || (!ADRPMode && !MO.isUse()) ||
307 (ADRPMode && (!IsADRP || !MO.isDef())))
308 continue;
309 unsigned CurReg = MO.getReg();
310 MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg);
311 if (ItCurRegId == RegToId.end())
312 continue;
313 CurReg = ItCurRegId->second;
314
315 // if CurReg has not been defined, this use is reachable.
316 if (!BBGen[CurReg] && !BBKillSet.test(CurReg))
317 getSet(ReachableUses, MBB, CurReg, NbReg).insert(&MI);
318 // current basic block definition for this color, if any, is in Gen.
319 if (BBGen[CurReg])
320 getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(&MI);
321 }
322
323 // Process clobbers.
324 for (const MachineOperand &MO : MI.operands()) {
325 if (!MO.isRegMask())
326 continue;
327 // Clobbers kill the related colors.
328 const uint32_t *PreservedRegs = MO.getRegMask();
329
330 // Set generated regs.
331 for (const auto &Entry : RegToId) {
332 unsigned Reg = Entry.second;
333 // Use the global register ID when querying APIs external to this
334 // pass.
335 if (MachineOperand::clobbersPhysReg(PreservedRegs, Entry.first)) {
336 // Do not register clobbered definition for no ADRP.
337 // This definition is not used anyway (otherwise register
338 // allocation is wrong).
339 BBGen[Reg] = ADRPMode ? &MI : nullptr;
340 BBKillSet.set(Reg);
341 }
342 }
343 }
344
345 // Process register defs.
346 for (const MachineOperand &MO : MI.operands()) {
347 if (!MO.isReg() || !MO.isDef())
348 continue;
349 unsigned CurReg = MO.getReg();
350 MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg);
351 if (ItCurRegId == RegToId.end())
352 continue;
353
354 for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) {
355 MapRegToId::const_iterator ItRegId = RegToId.find(*AI);
356 // If this alias has not been recorded, then it is not interesting
357 // for the current analysis.
358 // We can end up in this situation because of tuple registers.
359 // E.g., Let say we are interested in S1. When we register
360 // S1, we will also register its aliases and in particular
361 // the tuple Q1_Q2.
362 // Now, when we encounter Q1_Q2, we will look through its aliases
363 // and will find that S2 is not registered.
364 if (ItRegId == RegToId.end())
365 continue;
366
367 BBKillSet.set(ItRegId->second);
368 BBGen[ItRegId->second] = &MI;
369 }
370 BBGen[ItCurRegId->second] = &MI;
371 }
372 }
373
374 // If we restrict our analysis to basic block scope, conservatively add a
375 // dummy
376 // use for each generated value.
377 if (!ADRPMode && DummyOp && !MBB.succ_empty())
378 for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg)
379 if (BBGen[CurReg])
380 getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(DummyOp);
381 }
382 }
383
384 /// Reaching def core algorithm:
385 /// while an Out has changed
386 /// for each bb
387 /// for each color
388 /// In[bb][color] = U Out[bb.predecessors][color]
389 /// insert reachableUses[bb][color] in each in[bb][color]
390 /// op.reachedUses
391 ///
392 /// Out[bb] = Gen[bb] U (In[bb] - Kill[bb])
reachingDefAlgorithm(const MachineFunction & MF,InstrToInstrs * ColorOpToReachedUses,BlockToSetOfInstrsPerColor & In,BlockToSetOfInstrsPerColor & Out,BlockToInstrPerColor & Gen,BlockToRegSet & Kill,BlockToSetOfInstrsPerColor & ReachableUses,unsigned NbReg)393 static void reachingDefAlgorithm(const MachineFunction &MF,
394 InstrToInstrs *ColorOpToReachedUses,
395 BlockToSetOfInstrsPerColor &In,
396 BlockToSetOfInstrsPerColor &Out,
397 BlockToInstrPerColor &Gen, BlockToRegSet &Kill,
398 BlockToSetOfInstrsPerColor &ReachableUses,
399 unsigned NbReg) {
400 bool HasChanged;
401 do {
402 HasChanged = false;
403 for (const MachineBasicBlock &MBB : MF) {
404 unsigned CurReg;
405 for (CurReg = 0; CurReg < NbReg; ++CurReg) {
406 SetOfMachineInstr &BBInSet = getSet(In, MBB, CurReg, NbReg);
407 SetOfMachineInstr &BBReachableUses =
408 getSet(ReachableUses, MBB, CurReg, NbReg);
409 SetOfMachineInstr &BBOutSet = getSet(Out, MBB, CurReg, NbReg);
410 unsigned Size = BBOutSet.size();
411 // In[bb][color] = U Out[bb.predecessors][color]
412 for (const MachineBasicBlock *PredMBB : MBB.predecessors()) {
413 SetOfMachineInstr &PredOutSet = getSet(Out, *PredMBB, CurReg, NbReg);
414 BBInSet.insert(PredOutSet.begin(), PredOutSet.end());
415 }
416 // insert reachableUses[bb][color] in each in[bb][color] op.reachedses
417 for (const MachineInstr *MI : BBInSet) {
418 SetOfMachineInstr &OpReachedUses =
419 getUses(ColorOpToReachedUses, CurReg, *MI);
420 OpReachedUses.insert(BBReachableUses.begin(), BBReachableUses.end());
421 }
422 // Out[bb] = Gen[bb] U (In[bb] - Kill[bb])
423 if (!Kill[&MBB].test(CurReg))
424 BBOutSet.insert(BBInSet.begin(), BBInSet.end());
425 if (Gen[&MBB][CurReg])
426 BBOutSet.insert(Gen[&MBB][CurReg]);
427 HasChanged |= BBOutSet.size() != Size;
428 }
429 }
430 } while (HasChanged);
431 }
432
433 /// Reaching definition algorithm.
434 /// \param MF function on which the algorithm will operate.
435 /// \param[out] ColorOpToReachedUses will contain the result of the reaching
436 /// def algorithm.
437 /// \param ADRPMode specify whether the reaching def algorithm should be tuned
438 /// for ADRP optimization. \see initReachingDef for more details.
439 /// \param DummyOp if not NULL, the algorithm will work at
440 /// basic block scope and will set for every exposed definition a use to
441 /// @p DummyOp.
442 /// \pre ColorOpToReachedUses is an array of at least number of registers of
443 /// InstrToInstrs.
reachingDef(const MachineFunction & MF,InstrToInstrs * ColorOpToReachedUses,const MapRegToId & RegToId,bool ADRPMode=false,const MachineInstr * DummyOp=nullptr)444 static void reachingDef(const MachineFunction &MF,
445 InstrToInstrs *ColorOpToReachedUses,
446 const MapRegToId &RegToId, bool ADRPMode = false,
447 const MachineInstr *DummyOp = nullptr) {
448 // structures:
449 // For each basic block.
450 // Out: a set per color of definitions that reach the
451 // out boundary of this block.
452 // In: Same as Out but for in boundary.
453 // Gen: generated color in this block (one operation per color).
454 // Kill: register set of killed color in this block.
455 // ReachableUses: a set per color of uses (operation) reachable
456 // for "In" definitions.
457 BlockToSetOfInstrsPerColor Out, In, ReachableUses;
458 BlockToInstrPerColor Gen;
459 BlockToRegSet Kill;
460
461 // Initialize Gen, kill and reachableUses.
462 initReachingDef(MF, ColorOpToReachedUses, Gen, Kill, ReachableUses, RegToId,
463 DummyOp, ADRPMode);
464
465 // Algo.
466 if (!DummyOp)
467 reachingDefAlgorithm(MF, ColorOpToReachedUses, In, Out, Gen, Kill,
468 ReachableUses, RegToId.size());
469 }
470
471 #ifndef NDEBUG
472 /// print the result of the reaching definition algorithm.
printReachingDef(const InstrToInstrs * ColorOpToReachedUses,unsigned NbReg,const TargetRegisterInfo * TRI,const MapIdToReg & IdToReg)473 static void printReachingDef(const InstrToInstrs *ColorOpToReachedUses,
474 unsigned NbReg, const TargetRegisterInfo *TRI,
475 const MapIdToReg &IdToReg) {
476 unsigned CurReg;
477 for (CurReg = 0; CurReg < NbReg; ++CurReg) {
478 if (ColorOpToReachedUses[CurReg].empty())
479 continue;
480 DEBUG(dbgs() << "*** Reg " << PrintReg(IdToReg[CurReg], TRI) << " ***\n");
481
482 for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) {
483 DEBUG(dbgs() << "Def:\n");
484 DEBUG(DefsIt.first->print(dbgs()));
485 DEBUG(dbgs() << "Reachable uses:\n");
486 for (const MachineInstr *MI : DefsIt.second) {
487 DEBUG(MI->print(dbgs()));
488 }
489 }
490 }
491 }
492 #endif // NDEBUG
493
494 /// Answer the following question: Can Def be one of the definition
495 /// involved in a part of a LOH?
canDefBePartOfLOH(const MachineInstr * Def)496 static bool canDefBePartOfLOH(const MachineInstr *Def) {
497 unsigned Opc = Def->getOpcode();
498 // Accept ADRP, ADDLow and LOADGot.
499 switch (Opc) {
500 default:
501 return false;
502 case AArch64::ADRP:
503 return true;
504 case AArch64::ADDXri:
505 // Check immediate to see if the immediate is an address.
506 switch (Def->getOperand(2).getType()) {
507 default:
508 return false;
509 case MachineOperand::MO_GlobalAddress:
510 case MachineOperand::MO_JumpTableIndex:
511 case MachineOperand::MO_ConstantPoolIndex:
512 case MachineOperand::MO_BlockAddress:
513 return true;
514 }
515 case AArch64::LDRXui:
516 // Check immediate to see if the immediate is an address.
517 switch (Def->getOperand(2).getType()) {
518 default:
519 return false;
520 case MachineOperand::MO_GlobalAddress:
521 return true;
522 }
523 }
524 // Unreachable.
525 return false;
526 }
527
528 /// Check whether the given instruction can the end of a LOH chain involving a
529 /// store.
isCandidateStore(const MachineInstr * Instr)530 static bool isCandidateStore(const MachineInstr *Instr) {
531 switch (Instr->getOpcode()) {
532 default:
533 return false;
534 case AArch64::STRBBui:
535 case AArch64::STRHHui:
536 case AArch64::STRBui:
537 case AArch64::STRHui:
538 case AArch64::STRWui:
539 case AArch64::STRXui:
540 case AArch64::STRSui:
541 case AArch64::STRDui:
542 case AArch64::STRQui:
543 // In case we have str xA, [xA, #imm], this is two different uses
544 // of xA and we cannot fold, otherwise the xA stored may be wrong,
545 // even if #imm == 0.
546 if (Instr->getOperand(0).getReg() != Instr->getOperand(1).getReg())
547 return true;
548 }
549 return false;
550 }
551
552 /// Given the result of a reaching definition algorithm in ColorOpToReachedUses,
553 /// Build the Use to Defs information and filter out obvious non-LOH candidates.
554 /// In ADRPMode, non-LOH candidates are "uses" with non-ADRP definitions.
555 /// In non-ADRPMode, non-LOH candidates are "uses" with several definition,
556 /// i.e., no simple chain.
557 /// \param ADRPMode -- \see initReachingDef.
reachedUsesToDefs(InstrToInstrs & UseToReachingDefs,const InstrToInstrs * ColorOpToReachedUses,const MapRegToId & RegToId,bool ADRPMode=false)558 static void reachedUsesToDefs(InstrToInstrs &UseToReachingDefs,
559 const InstrToInstrs *ColorOpToReachedUses,
560 const MapRegToId &RegToId,
561 bool ADRPMode = false) {
562
563 SetOfMachineInstr NotCandidate;
564 unsigned NbReg = RegToId.size();
565 MapRegToId::const_iterator EndIt = RegToId.end();
566 for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg) {
567 // If this color is never defined, continue.
568 if (ColorOpToReachedUses[CurReg].empty())
569 continue;
570
571 for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) {
572 for (const MachineInstr *MI : DefsIt.second) {
573 const MachineInstr *Def = DefsIt.first;
574 MapRegToId::const_iterator It;
575 // if all the reaching defs are not adrp, this use will not be
576 // simplifiable.
577 if ((ADRPMode && Def->getOpcode() != AArch64::ADRP) ||
578 (!ADRPMode && !canDefBePartOfLOH(Def)) ||
579 (!ADRPMode && isCandidateStore(MI) &&
580 // store are LOH candidate iff the end of the chain is used as
581 // base.
582 ((It = RegToId.find((MI)->getOperand(1).getReg())) == EndIt ||
583 It->second != CurReg))) {
584 NotCandidate.insert(MI);
585 continue;
586 }
587 // Do not consider self reaching as a simplifiable case for ADRP.
588 if (!ADRPMode || MI != DefsIt.first) {
589 UseToReachingDefs[MI].insert(DefsIt.first);
590 // If UsesIt has several reaching definitions, it is not
591 // candidate for simplificaton in non-ADRPMode.
592 if (!ADRPMode && UseToReachingDefs[MI].size() > 1)
593 NotCandidate.insert(MI);
594 }
595 }
596 }
597 }
598 for (const MachineInstr *Elem : NotCandidate) {
599 DEBUG(dbgs() << "Too many reaching defs: " << *Elem << "\n");
600 // It would have been better if we could just remove the entry
601 // from the map. Because of that, we have to filter the garbage
602 // (second.empty) in the subsequence analysis.
603 UseToReachingDefs[Elem].clear();
604 }
605 }
606
607 /// Based on the use to defs information (in ADRPMode), compute the
608 /// opportunities of LOH ADRP-related.
computeADRP(const InstrToInstrs & UseToDefs,AArch64FunctionInfo & AArch64FI,const MachineDominatorTree * MDT)609 static void computeADRP(const InstrToInstrs &UseToDefs,
610 AArch64FunctionInfo &AArch64FI,
611 const MachineDominatorTree *MDT) {
612 DEBUG(dbgs() << "*** Compute LOH for ADRP\n");
613 for (const auto &Entry : UseToDefs) {
614 unsigned Size = Entry.second.size();
615 if (Size == 0)
616 continue;
617 if (Size == 1) {
618 const MachineInstr *L2 = *Entry.second.begin();
619 const MachineInstr *L1 = Entry.first;
620 if (!MDT->dominates(L2, L1)) {
621 DEBUG(dbgs() << "Dominance check failed:\n" << *L2 << '\n' << *L1
622 << '\n');
623 continue;
624 }
625 DEBUG(dbgs() << "Record AdrpAdrp:\n" << *L2 << '\n' << *L1 << '\n');
626 SmallVector<const MachineInstr *, 2> Args;
627 Args.push_back(L2);
628 Args.push_back(L1);
629 AArch64FI.addLOHDirective(MCLOH_AdrpAdrp, Args);
630 ++NumADRPSimpleCandidate;
631 }
632 #ifdef DEBUG
633 else if (Size == 2)
634 ++NumADRPComplexCandidate2;
635 else if (Size == 3)
636 ++NumADRPComplexCandidate3;
637 else
638 ++NumADRPComplexCandidateOther;
639 #endif
640 // if Size < 1, the use should have been removed from the candidates
641 assert(Size >= 1 && "No reaching defs for that use!");
642 }
643 }
644
645 /// Check whether the given instruction can be the end of a LOH chain
646 /// involving a load.
isCandidateLoad(const MachineInstr * Instr)647 static bool isCandidateLoad(const MachineInstr *Instr) {
648 switch (Instr->getOpcode()) {
649 default:
650 return false;
651 case AArch64::LDRSBWui:
652 case AArch64::LDRSBXui:
653 case AArch64::LDRSHWui:
654 case AArch64::LDRSHXui:
655 case AArch64::LDRSWui:
656 case AArch64::LDRBui:
657 case AArch64::LDRHui:
658 case AArch64::LDRWui:
659 case AArch64::LDRXui:
660 case AArch64::LDRSui:
661 case AArch64::LDRDui:
662 case AArch64::LDRQui:
663 if (Instr->getOperand(2).getTargetFlags() & AArch64II::MO_GOT)
664 return false;
665 return true;
666 }
667 // Unreachable.
668 return false;
669 }
670
671 /// Check whether the given instruction can load a litteral.
supportLoadFromLiteral(const MachineInstr * Instr)672 static bool supportLoadFromLiteral(const MachineInstr *Instr) {
673 switch (Instr->getOpcode()) {
674 default:
675 return false;
676 case AArch64::LDRSWui:
677 case AArch64::LDRWui:
678 case AArch64::LDRXui:
679 case AArch64::LDRSui:
680 case AArch64::LDRDui:
681 case AArch64::LDRQui:
682 return true;
683 }
684 // Unreachable.
685 return false;
686 }
687
688 /// Check whether the given instruction is a LOH candidate.
689 /// \param UseToDefs is used to check that Instr is at the end of LOH supported
690 /// chain.
691 /// \pre UseToDefs contains only on def per use, i.e., obvious non candidate are
692 /// already been filtered out.
isCandidate(const MachineInstr * Instr,const InstrToInstrs & UseToDefs,const MachineDominatorTree * MDT)693 static bool isCandidate(const MachineInstr *Instr,
694 const InstrToInstrs &UseToDefs,
695 const MachineDominatorTree *MDT) {
696 if (!isCandidateLoad(Instr) && !isCandidateStore(Instr))
697 return false;
698
699 const MachineInstr *Def = *UseToDefs.find(Instr)->second.begin();
700 if (Def->getOpcode() != AArch64::ADRP) {
701 // At this point, Def is ADDXri or LDRXui of the right type of
702 // symbol, because we filtered out the uses that were not defined
703 // by these kind of instructions (+ ADRP).
704
705 // Check if this forms a simple chain: each intermediate node must
706 // dominates the next one.
707 if (!MDT->dominates(Def, Instr))
708 return false;
709 // Move one node up in the simple chain.
710 if (UseToDefs.find(Def) ==
711 UseToDefs.end()
712 // The map may contain garbage we have to ignore.
713 ||
714 UseToDefs.find(Def)->second.empty())
715 return false;
716 Instr = Def;
717 Def = *UseToDefs.find(Def)->second.begin();
718 }
719 // Check if we reached the top of the simple chain:
720 // - top is ADRP.
721 // - check the simple chain property: each intermediate node must
722 // dominates the next one.
723 if (Def->getOpcode() == AArch64::ADRP)
724 return MDT->dominates(Def, Instr);
725 return false;
726 }
727
registerADRCandidate(const MachineInstr & Use,const InstrToInstrs & UseToDefs,const InstrToInstrs * DefsPerColorToUses,AArch64FunctionInfo & AArch64FI,SetOfMachineInstr * InvolvedInLOHs,const MapRegToId & RegToId)728 static bool registerADRCandidate(const MachineInstr &Use,
729 const InstrToInstrs &UseToDefs,
730 const InstrToInstrs *DefsPerColorToUses,
731 AArch64FunctionInfo &AArch64FI,
732 SetOfMachineInstr *InvolvedInLOHs,
733 const MapRegToId &RegToId) {
734 // Look for opportunities to turn ADRP -> ADD or
735 // ADRP -> LDR GOTPAGEOFF into ADR.
736 // If ADRP has more than one use. Give up.
737 if (Use.getOpcode() != AArch64::ADDXri &&
738 (Use.getOpcode() != AArch64::LDRXui ||
739 !(Use.getOperand(2).getTargetFlags() & AArch64II::MO_GOT)))
740 return false;
741 InstrToInstrs::const_iterator It = UseToDefs.find(&Use);
742 // The map may contain garbage that we need to ignore.
743 if (It == UseToDefs.end() || It->second.empty())
744 return false;
745 const MachineInstr &Def = **It->second.begin();
746 if (Def.getOpcode() != AArch64::ADRP)
747 return false;
748 // Check the number of users of ADRP.
749 const SetOfMachineInstr *Users =
750 getUses(DefsPerColorToUses,
751 RegToId.find(Def.getOperand(0).getReg())->second, Def);
752 if (Users->size() > 1) {
753 ++NumADRComplexCandidate;
754 return false;
755 }
756 ++NumADRSimpleCandidate;
757 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Def)) &&
758 "ADRP already involved in LOH.");
759 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Use)) &&
760 "ADD already involved in LOH.");
761 DEBUG(dbgs() << "Record AdrpAdd\n" << Def << '\n' << Use << '\n');
762
763 SmallVector<const MachineInstr *, 2> Args;
764 Args.push_back(&Def);
765 Args.push_back(&Use);
766
767 AArch64FI.addLOHDirective(Use.getOpcode() == AArch64::ADDXri ? MCLOH_AdrpAdd
768 : MCLOH_AdrpLdrGot,
769 Args);
770 return true;
771 }
772
773 /// Based on the use to defs information (in non-ADRPMode), compute the
774 /// opportunities of LOH non-ADRP-related
computeOthers(const InstrToInstrs & UseToDefs,const InstrToInstrs * DefsPerColorToUses,AArch64FunctionInfo & AArch64FI,const MapRegToId & RegToId,const MachineDominatorTree * MDT)775 static void computeOthers(const InstrToInstrs &UseToDefs,
776 const InstrToInstrs *DefsPerColorToUses,
777 AArch64FunctionInfo &AArch64FI, const MapRegToId &RegToId,
778 const MachineDominatorTree *MDT) {
779 SetOfMachineInstr *InvolvedInLOHs = nullptr;
780 #ifdef DEBUG
781 SetOfMachineInstr InvolvedInLOHsStorage;
782 InvolvedInLOHs = &InvolvedInLOHsStorage;
783 #endif // DEBUG
784 DEBUG(dbgs() << "*** Compute LOH for Others\n");
785 // ADRP -> ADD/LDR -> LDR/STR pattern.
786 // Fall back to ADRP -> ADD pattern if we fail to catch the bigger pattern.
787
788 // FIXME: When the statistics are not important,
789 // This initial filtering loop can be merged into the next loop.
790 // Currently, we didn't do it to have the same code for both DEBUG and
791 // NDEBUG builds. Indeed, the iterator of the second loop would need
792 // to be changed.
793 SetOfMachineInstr PotentialCandidates;
794 SetOfMachineInstr PotentialADROpportunities;
795 for (auto &Use : UseToDefs) {
796 // If no definition is available, this is a non candidate.
797 if (Use.second.empty())
798 continue;
799 // Keep only instructions that are load or store and at the end of
800 // a ADRP -> ADD/LDR/Nothing chain.
801 // We already filtered out the no-chain cases.
802 if (!isCandidate(Use.first, UseToDefs, MDT)) {
803 PotentialADROpportunities.insert(Use.first);
804 continue;
805 }
806 PotentialCandidates.insert(Use.first);
807 }
808
809 // Make the following distinctions for statistics as the linker does
810 // know how to decode instructions:
811 // - ADD/LDR/Nothing make there different patterns.
812 // - LDR/STR make two different patterns.
813 // Hence, 6 - 1 base patterns.
814 // (because ADRP-> Nothing -> STR is not simplifiable)
815
816 // The linker is only able to have a simple semantic, i.e., if pattern A
817 // do B.
818 // However, we want to see the opportunity we may miss if we were able to
819 // catch more complex cases.
820
821 // PotentialCandidates are result of a chain ADRP -> ADD/LDR ->
822 // A potential candidate becomes a candidate, if its current immediate
823 // operand is zero and all nodes of the chain have respectively only one user
824 #ifdef DEBUG
825 SetOfMachineInstr DefsOfPotentialCandidates;
826 #endif
827 for (const MachineInstr *Candidate : PotentialCandidates) {
828 // Get the definition of the candidate i.e., ADD or LDR.
829 const MachineInstr *Def = *UseToDefs.find(Candidate)->second.begin();
830 // Record the elements of the chain.
831 const MachineInstr *L1 = Def;
832 const MachineInstr *L2 = nullptr;
833 unsigned ImmediateDefOpc = Def->getOpcode();
834 if (Def->getOpcode() != AArch64::ADRP) {
835 // Check the number of users of this node.
836 const SetOfMachineInstr *Users =
837 getUses(DefsPerColorToUses,
838 RegToId.find(Def->getOperand(0).getReg())->second, *Def);
839 if (Users->size() > 1) {
840 #ifdef DEBUG
841 // if all the uses of this def are in potential candidate, this is
842 // a complex candidate of level 2.
843 bool IsLevel2 = true;
844 for (const MachineInstr *MI : *Users) {
845 if (!PotentialCandidates.count(MI)) {
846 ++NumTooCplxLvl2;
847 IsLevel2 = false;
848 break;
849 }
850 }
851 if (IsLevel2)
852 ++NumCplxLvl2;
853 #endif // DEBUG
854 PotentialADROpportunities.insert(Def);
855 continue;
856 }
857 L2 = Def;
858 Def = *UseToDefs.find(Def)->second.begin();
859 L1 = Def;
860 } // else the element in the middle of the chain is nothing, thus
861 // Def already contains the first element of the chain.
862
863 // Check the number of users of the first node in the chain, i.e., ADRP
864 const SetOfMachineInstr *Users =
865 getUses(DefsPerColorToUses,
866 RegToId.find(Def->getOperand(0).getReg())->second, *Def);
867 if (Users->size() > 1) {
868 #ifdef DEBUG
869 // if all the uses of this def are in the defs of the potential candidate,
870 // this is a complex candidate of level 1
871 if (DefsOfPotentialCandidates.empty()) {
872 // lazy init
873 DefsOfPotentialCandidates = PotentialCandidates;
874 for (const MachineInstr *Candidate : PotentialCandidates) {
875 if (!UseToDefs.find(Candidate)->second.empty())
876 DefsOfPotentialCandidates.insert(
877 *UseToDefs.find(Candidate)->second.begin());
878 }
879 }
880 bool Found = false;
881 for (auto &Use : *Users) {
882 if (!DefsOfPotentialCandidates.count(Use)) {
883 ++NumTooCplxLvl1;
884 Found = true;
885 break;
886 }
887 }
888 if (!Found)
889 ++NumCplxLvl1;
890 #endif // DEBUG
891 continue;
892 }
893
894 bool IsL2Add = (ImmediateDefOpc == AArch64::ADDXri);
895 // If the chain is three instructions long and ldr is the second element,
896 // then this ldr must load form GOT, otherwise this is not a correct chain.
897 if (L2 && !IsL2Add &&
898 !(L2->getOperand(2).getTargetFlags() & AArch64II::MO_GOT))
899 continue;
900 SmallVector<const MachineInstr *, 3> Args;
901 MCLOHType Kind;
902 if (isCandidateLoad(Candidate)) {
903 if (!L2) {
904 // At this point, the candidate LOH indicates that the ldr instruction
905 // may use a direct access to the symbol. There is not such encoding
906 // for loads of byte and half.
907 if (!supportLoadFromLiteral(Candidate))
908 continue;
909
910 DEBUG(dbgs() << "Record AdrpLdr:\n" << *L1 << '\n' << *Candidate
911 << '\n');
912 Kind = MCLOH_AdrpLdr;
913 Args.push_back(L1);
914 Args.push_back(Candidate);
915 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
916 "L1 already involved in LOH.");
917 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
918 "Candidate already involved in LOH.");
919 ++NumADRPToLDR;
920 } else {
921 DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot")
922 << "Ldr:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate
923 << '\n');
924
925 Kind = IsL2Add ? MCLOH_AdrpAddLdr : MCLOH_AdrpLdrGotLdr;
926 Args.push_back(L1);
927 Args.push_back(L2);
928 Args.push_back(Candidate);
929
930 PotentialADROpportunities.remove(L2);
931 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
932 "L1 already involved in LOH.");
933 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) &&
934 "L2 already involved in LOH.");
935 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
936 "Candidate already involved in LOH.");
937 #ifdef DEBUG
938 // get the immediate of the load
939 if (Candidate->getOperand(2).getImm() == 0)
940 if (ImmediateDefOpc == AArch64::ADDXri)
941 ++NumADDToLDR;
942 else
943 ++NumLDRToLDR;
944 else if (ImmediateDefOpc == AArch64::ADDXri)
945 ++NumADDToLDRWithImm;
946 else
947 ++NumLDRToLDRWithImm;
948 #endif // DEBUG
949 }
950 } else {
951 if (ImmediateDefOpc == AArch64::ADRP)
952 continue;
953 else {
954
955 DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot")
956 << "Str:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate
957 << '\n');
958
959 Kind = IsL2Add ? MCLOH_AdrpAddStr : MCLOH_AdrpLdrGotStr;
960 Args.push_back(L1);
961 Args.push_back(L2);
962 Args.push_back(Candidate);
963
964 PotentialADROpportunities.remove(L2);
965 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
966 "L1 already involved in LOH.");
967 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) &&
968 "L2 already involved in LOH.");
969 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
970 "Candidate already involved in LOH.");
971 #ifdef DEBUG
972 // get the immediate of the store
973 if (Candidate->getOperand(2).getImm() == 0)
974 if (ImmediateDefOpc == AArch64::ADDXri)
975 ++NumADDToSTR;
976 else
977 ++NumLDRToSTR;
978 else if (ImmediateDefOpc == AArch64::ADDXri)
979 ++NumADDToSTRWithImm;
980 else
981 ++NumLDRToSTRWithImm;
982 #endif // DEBUG
983 }
984 }
985 AArch64FI.addLOHDirective(Kind, Args);
986 }
987
988 // Now, we grabbed all the big patterns, check ADR opportunities.
989 for (const MachineInstr *Candidate : PotentialADROpportunities)
990 registerADRCandidate(*Candidate, UseToDefs, DefsPerColorToUses, AArch64FI,
991 InvolvedInLOHs, RegToId);
992 }
993
994 /// Look for every register defined by potential LOHs candidates.
995 /// Map these registers with dense id in @p RegToId and vice-versa in
996 /// @p IdToReg. @p IdToReg is populated only in DEBUG mode.
collectInvolvedReg(const MachineFunction & MF,MapRegToId & RegToId,MapIdToReg & IdToReg,const TargetRegisterInfo * TRI)997 static void collectInvolvedReg(const MachineFunction &MF, MapRegToId &RegToId,
998 MapIdToReg &IdToReg,
999 const TargetRegisterInfo *TRI) {
1000 unsigned CurRegId = 0;
1001 if (!PreCollectRegister) {
1002 unsigned NbReg = TRI->getNumRegs();
1003 for (; CurRegId < NbReg; ++CurRegId) {
1004 RegToId[CurRegId] = CurRegId;
1005 DEBUG(IdToReg.push_back(CurRegId));
1006 DEBUG(assert(IdToReg[CurRegId] == CurRegId && "Reg index mismatches"));
1007 }
1008 return;
1009 }
1010
1011 DEBUG(dbgs() << "** Collect Involved Register\n");
1012 for (const auto &MBB : MF) {
1013 for (const MachineInstr &MI : MBB) {
1014 if (!canDefBePartOfLOH(&MI) &&
1015 !isCandidateLoad(&MI) && !isCandidateStore(&MI))
1016 continue;
1017
1018 // Process defs
1019 for (MachineInstr::const_mop_iterator IO = MI.operands_begin(),
1020 IOEnd = MI.operands_end();
1021 IO != IOEnd; ++IO) {
1022 if (!IO->isReg() || !IO->isDef())
1023 continue;
1024 unsigned CurReg = IO->getReg();
1025 for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI)
1026 if (RegToId.find(*AI) == RegToId.end()) {
1027 DEBUG(IdToReg.push_back(*AI);
1028 assert(IdToReg[CurRegId] == *AI &&
1029 "Reg index mismatches insertion index."));
1030 RegToId[*AI] = CurRegId++;
1031 DEBUG(dbgs() << "Register: " << PrintReg(*AI, TRI) << '\n');
1032 }
1033 }
1034 }
1035 }
1036 }
1037
runOnMachineFunction(MachineFunction & MF)1038 bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) {
1039 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1040 const MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>();
1041
1042 MapRegToId RegToId;
1043 MapIdToReg IdToReg;
1044 AArch64FunctionInfo *AArch64FI = MF.getInfo<AArch64FunctionInfo>();
1045 assert(AArch64FI && "No MachineFunctionInfo for this function!");
1046
1047 DEBUG(dbgs() << "Looking for LOH in " << MF.getName() << '\n');
1048
1049 collectInvolvedReg(MF, RegToId, IdToReg, TRI);
1050 if (RegToId.empty())
1051 return false;
1052
1053 MachineInstr *DummyOp = nullptr;
1054 if (BasicBlockScopeOnly) {
1055 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
1056 // For local analysis, create a dummy operation to record uses that are not
1057 // local.
1058 DummyOp = MF.CreateMachineInstr(TII->get(AArch64::COPY), DebugLoc());
1059 }
1060
1061 unsigned NbReg = RegToId.size();
1062 bool Modified = false;
1063
1064 // Start with ADRP.
1065 InstrToInstrs *ColorOpToReachedUses = new InstrToInstrs[NbReg];
1066
1067 // Compute the reaching def in ADRP mode, meaning ADRP definitions
1068 // are first considered as uses.
1069 reachingDef(MF, ColorOpToReachedUses, RegToId, true, DummyOp);
1070 DEBUG(dbgs() << "ADRP reaching defs\n");
1071 DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg));
1072
1073 // Translate the definition to uses map into a use to definitions map to ease
1074 // statistic computation.
1075 InstrToInstrs ADRPToReachingDefs;
1076 reachedUsesToDefs(ADRPToReachingDefs, ColorOpToReachedUses, RegToId, true);
1077
1078 // Compute LOH for ADRP.
1079 computeADRP(ADRPToReachingDefs, *AArch64FI, MDT);
1080 delete[] ColorOpToReachedUses;
1081
1082 // Continue with general ADRP -> ADD/LDR -> LDR/STR pattern.
1083 ColorOpToReachedUses = new InstrToInstrs[NbReg];
1084
1085 // first perform a regular reaching def analysis.
1086 reachingDef(MF, ColorOpToReachedUses, RegToId, false, DummyOp);
1087 DEBUG(dbgs() << "All reaching defs\n");
1088 DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg));
1089
1090 // Turn that into a use to defs to ease statistic computation.
1091 InstrToInstrs UsesToReachingDefs;
1092 reachedUsesToDefs(UsesToReachingDefs, ColorOpToReachedUses, RegToId, false);
1093
1094 // Compute other than AdrpAdrp LOH.
1095 computeOthers(UsesToReachingDefs, ColorOpToReachedUses, *AArch64FI, RegToId,
1096 MDT);
1097 delete[] ColorOpToReachedUses;
1098
1099 if (BasicBlockScopeOnly)
1100 MF.DeleteMachineInstr(DummyOp);
1101
1102 return Modified;
1103 }
1104
1105 /// createAArch64CollectLOHPass - returns an instance of the Statistic for
1106 /// linker optimization pass.
createAArch64CollectLOHPass()1107 FunctionPass *llvm::createAArch64CollectLOHPass() {
1108 return new AArch64CollectLOH();
1109 }
1110