• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #define DEBUG_TYPE "arm-ldst-opt"
16 #include "ARM.h"
17 #include "ARMBaseInstrInfo.h"
18 #include "ARMBaseRegisterInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "MCTargetDesc/ARMAddressingModes.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/CodeGen/SelectionDAGNodes.h"
30 #include "llvm/Target/TargetData.h"
31 #include "llvm/Target/TargetInstrInfo.h"
32 #include "llvm/Target/TargetMachine.h"
33 #include "llvm/Target/TargetRegisterInfo.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/raw_ostream.h"
37 #include "llvm/ADT/DenseMap.h"
38 #include "llvm/ADT/STLExtras.h"
39 #include "llvm/ADT/SmallPtrSet.h"
40 #include "llvm/ADT/SmallSet.h"
41 #include "llvm/ADT/SmallVector.h"
42 #include "llvm/ADT/Statistic.h"
43 using namespace llvm;
44 
45 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
46 STATISTIC(NumSTMGened , "Number of stm instructions generated");
47 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
48 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
49 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
50 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
51 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
52 STATISTIC(NumLDRD2LDM,  "Number of ldrd instructions turned back into ldm");
53 STATISTIC(NumSTRD2STM,  "Number of strd instructions turned back into stm");
54 STATISTIC(NumLDRD2LDR,  "Number of ldrd instructions turned back into ldr's");
55 STATISTIC(NumSTRD2STR,  "Number of strd instructions turned back into str's");
56 
57 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
58 /// load / store instructions to form ldm / stm instructions.
59 
60 namespace {
61   struct ARMLoadStoreOpt : public MachineFunctionPass {
62     static char ID;
ARMLoadStoreOpt__anona299b52d0111::ARMLoadStoreOpt63     ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
64 
65     const TargetInstrInfo *TII;
66     const TargetRegisterInfo *TRI;
67     const ARMSubtarget *STI;
68     ARMFunctionInfo *AFI;
69     RegScavenger *RS;
70     bool isThumb2;
71 
72     virtual bool runOnMachineFunction(MachineFunction &Fn);
73 
getPassName__anona299b52d0111::ARMLoadStoreOpt74     virtual const char *getPassName() const {
75       return "ARM load / store optimization pass";
76     }
77 
78   private:
79     struct MemOpQueueEntry {
80       int Offset;
81       unsigned Reg;
82       bool isKill;
83       unsigned Position;
84       MachineBasicBlock::iterator MBBI;
85       bool Merged;
MemOpQueueEntry__anona299b52d0111::ARMLoadStoreOpt::MemOpQueueEntry86       MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
87                       MachineBasicBlock::iterator i)
88         : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
89     };
90     typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
91     typedef MemOpQueue::iterator MemOpQueueIter;
92 
93     bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
94                   int Offset, unsigned Base, bool BaseKill, int Opcode,
95                   ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
96                   DebugLoc dl,
97                   ArrayRef<std::pair<unsigned, bool> > Regs,
98                   ArrayRef<unsigned> ImpDefs);
99     void MergeOpsUpdate(MachineBasicBlock &MBB,
100                         MemOpQueue &MemOps,
101                         unsigned memOpsBegin,
102                         unsigned memOpsEnd,
103                         unsigned insertAfter,
104                         int Offset,
105                         unsigned Base,
106                         bool BaseKill,
107                         int Opcode,
108                         ARMCC::CondCodes Pred,
109                         unsigned PredReg,
110                         unsigned Scratch,
111                         DebugLoc dl,
112                         SmallVector<MachineBasicBlock::iterator, 4> &Merges);
113     void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
114                       int Opcode, unsigned Size,
115                       ARMCC::CondCodes Pred, unsigned PredReg,
116                       unsigned Scratch, MemOpQueue &MemOps,
117                       SmallVector<MachineBasicBlock::iterator, 4> &Merges);
118 
119     void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
120     bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
121                              MachineBasicBlock::iterator &MBBI);
122     bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
123                                   MachineBasicBlock::iterator MBBI,
124                                   const TargetInstrInfo *TII,
125                                   bool &Advance,
126                                   MachineBasicBlock::iterator &I);
127     bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
128                                    MachineBasicBlock::iterator MBBI,
129                                    bool &Advance,
130                                    MachineBasicBlock::iterator &I);
131     bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
132     bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
133   };
134   char ARMLoadStoreOpt::ID = 0;
135 }
136 
getLoadStoreMultipleOpcode(int Opcode,ARM_AM::AMSubMode Mode)137 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
138   switch (Opcode) {
139   default: llvm_unreachable("Unhandled opcode!");
140   case ARM::LDRi12:
141     ++NumLDMGened;
142     switch (Mode) {
143     default: llvm_unreachable("Unhandled submode!");
144     case ARM_AM::ia: return ARM::LDMIA;
145     case ARM_AM::da: return ARM::LDMDA;
146     case ARM_AM::db: return ARM::LDMDB;
147     case ARM_AM::ib: return ARM::LDMIB;
148     }
149   case ARM::STRi12:
150     ++NumSTMGened;
151     switch (Mode) {
152     default: llvm_unreachable("Unhandled submode!");
153     case ARM_AM::ia: return ARM::STMIA;
154     case ARM_AM::da: return ARM::STMDA;
155     case ARM_AM::db: return ARM::STMDB;
156     case ARM_AM::ib: return ARM::STMIB;
157     }
158   case ARM::t2LDRi8:
159   case ARM::t2LDRi12:
160     ++NumLDMGened;
161     switch (Mode) {
162     default: llvm_unreachable("Unhandled submode!");
163     case ARM_AM::ia: return ARM::t2LDMIA;
164     case ARM_AM::db: return ARM::t2LDMDB;
165     }
166   case ARM::t2STRi8:
167   case ARM::t2STRi12:
168     ++NumSTMGened;
169     switch (Mode) {
170     default: llvm_unreachable("Unhandled submode!");
171     case ARM_AM::ia: return ARM::t2STMIA;
172     case ARM_AM::db: return ARM::t2STMDB;
173     }
174   case ARM::VLDRS:
175     ++NumVLDMGened;
176     switch (Mode) {
177     default: llvm_unreachable("Unhandled submode!");
178     case ARM_AM::ia: return ARM::VLDMSIA;
179     case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
180     }
181   case ARM::VSTRS:
182     ++NumVSTMGened;
183     switch (Mode) {
184     default: llvm_unreachable("Unhandled submode!");
185     case ARM_AM::ia: return ARM::VSTMSIA;
186     case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
187     }
188   case ARM::VLDRD:
189     ++NumVLDMGened;
190     switch (Mode) {
191     default: llvm_unreachable("Unhandled submode!");
192     case ARM_AM::ia: return ARM::VLDMDIA;
193     case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
194     }
195   case ARM::VSTRD:
196     ++NumVSTMGened;
197     switch (Mode) {
198     default: llvm_unreachable("Unhandled submode!");
199     case ARM_AM::ia: return ARM::VSTMDIA;
200     case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
201     }
202   }
203 }
204 
205 namespace llvm {
206   namespace ARM_AM {
207 
getLoadStoreMultipleSubMode(int Opcode)208 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
209   switch (Opcode) {
210   default: llvm_unreachable("Unhandled opcode!");
211   case ARM::LDMIA_RET:
212   case ARM::LDMIA:
213   case ARM::LDMIA_UPD:
214   case ARM::STMIA:
215   case ARM::STMIA_UPD:
216   case ARM::t2LDMIA_RET:
217   case ARM::t2LDMIA:
218   case ARM::t2LDMIA_UPD:
219   case ARM::t2STMIA:
220   case ARM::t2STMIA_UPD:
221   case ARM::VLDMSIA:
222   case ARM::VLDMSIA_UPD:
223   case ARM::VSTMSIA:
224   case ARM::VSTMSIA_UPD:
225   case ARM::VLDMDIA:
226   case ARM::VLDMDIA_UPD:
227   case ARM::VSTMDIA:
228   case ARM::VSTMDIA_UPD:
229     return ARM_AM::ia;
230 
231   case ARM::LDMDA:
232   case ARM::LDMDA_UPD:
233   case ARM::STMDA:
234   case ARM::STMDA_UPD:
235     return ARM_AM::da;
236 
237   case ARM::LDMDB:
238   case ARM::LDMDB_UPD:
239   case ARM::STMDB:
240   case ARM::STMDB_UPD:
241   case ARM::t2LDMDB:
242   case ARM::t2LDMDB_UPD:
243   case ARM::t2STMDB:
244   case ARM::t2STMDB_UPD:
245   case ARM::VLDMSDB_UPD:
246   case ARM::VSTMSDB_UPD:
247   case ARM::VLDMDDB_UPD:
248   case ARM::VSTMDDB_UPD:
249     return ARM_AM::db;
250 
251   case ARM::LDMIB:
252   case ARM::LDMIB_UPD:
253   case ARM::STMIB:
254   case ARM::STMIB_UPD:
255     return ARM_AM::ib;
256   }
257 }
258 
259   } // end namespace ARM_AM
260 } // end namespace llvm
261 
isT2i32Load(unsigned Opc)262 static bool isT2i32Load(unsigned Opc) {
263   return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
264 }
265 
isi32Load(unsigned Opc)266 static bool isi32Load(unsigned Opc) {
267   return Opc == ARM::LDRi12 || isT2i32Load(Opc);
268 }
269 
isT2i32Store(unsigned Opc)270 static bool isT2i32Store(unsigned Opc) {
271   return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
272 }
273 
isi32Store(unsigned Opc)274 static bool isi32Store(unsigned Opc) {
275   return Opc == ARM::STRi12 || isT2i32Store(Opc);
276 }
277 
278 /// MergeOps - Create and insert a LDM or STM with Base as base register and
279 /// registers in Regs as the register operands that would be loaded / stored.
280 /// It returns true if the transformation is done.
281 bool
MergeOps(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,int Offset,unsigned Base,bool BaseKill,int Opcode,ARMCC::CondCodes Pred,unsigned PredReg,unsigned Scratch,DebugLoc dl,ArrayRef<std::pair<unsigned,bool>> Regs,ArrayRef<unsigned> ImpDefs)282 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
283                           MachineBasicBlock::iterator MBBI,
284                           int Offset, unsigned Base, bool BaseKill,
285                           int Opcode, ARMCC::CondCodes Pred,
286                           unsigned PredReg, unsigned Scratch, DebugLoc dl,
287                           ArrayRef<std::pair<unsigned, bool> > Regs,
288                           ArrayRef<unsigned> ImpDefs) {
289   // Only a single register to load / store. Don't bother.
290   unsigned NumRegs = Regs.size();
291   if (NumRegs <= 1)
292     return false;
293 
294   ARM_AM::AMSubMode Mode = ARM_AM::ia;
295   // VFP and Thumb2 do not support IB or DA modes.
296   bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
297   bool haveIBAndDA = isNotVFP && !isThumb2;
298   if (Offset == 4 && haveIBAndDA)
299     Mode = ARM_AM::ib;
300   else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
301     Mode = ARM_AM::da;
302   else if (Offset == -4 * (int)NumRegs && isNotVFP)
303     // VLDM/VSTM do not support DB mode without also updating the base reg.
304     Mode = ARM_AM::db;
305   else if (Offset != 0) {
306     // Check if this is a supported opcode before we insert instructions to
307     // calculate a new base register.
308     if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
309 
310     // If starting offset isn't zero, insert a MI to materialize a new base.
311     // But only do so if it is cost effective, i.e. merging more than two
312     // loads / stores.
313     if (NumRegs <= 2)
314       return false;
315 
316     unsigned NewBase;
317     if (isi32Load(Opcode))
318       // If it is a load, then just use one of the destination register to
319       // use as the new base.
320       NewBase = Regs[NumRegs-1].first;
321     else {
322       // Use the scratch register to use as a new base.
323       NewBase = Scratch;
324       if (NewBase == 0)
325         return false;
326     }
327     int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
328     if (Offset < 0) {
329       BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
330       Offset = - Offset;
331     }
332     int ImmedOffset = isThumb2
333       ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
334     if (ImmedOffset == -1)
335       // FIXME: Try t2ADDri12 or t2SUBri12?
336       return false;  // Probably not worth it then.
337 
338     BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
339       .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
340       .addImm(Pred).addReg(PredReg).addReg(0);
341     Base = NewBase;
342     BaseKill = true;  // New base is always killed right its use.
343   }
344 
345   bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
346                 Opcode == ARM::VLDRD);
347   Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
348   if (!Opcode) return false;
349   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
350     .addReg(Base, getKillRegState(BaseKill))
351     .addImm(Pred).addReg(PredReg);
352   for (unsigned i = 0; i != NumRegs; ++i)
353     MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
354                      | getKillRegState(Regs[i].second));
355 
356   // Add implicit defs for super-registers.
357   for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i)
358     MIB.addReg(ImpDefs[i], RegState::ImplicitDefine);
359 
360   return true;
361 }
362 
363 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
364 // success.
MergeOpsUpdate(MachineBasicBlock & MBB,MemOpQueue & memOps,unsigned memOpsBegin,unsigned memOpsEnd,unsigned insertAfter,int Offset,unsigned Base,bool BaseKill,int Opcode,ARMCC::CondCodes Pred,unsigned PredReg,unsigned Scratch,DebugLoc dl,SmallVector<MachineBasicBlock::iterator,4> & Merges)365 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
366                                      MemOpQueue &memOps,
367                                      unsigned memOpsBegin, unsigned memOpsEnd,
368                                      unsigned insertAfter, int Offset,
369                                      unsigned Base, bool BaseKill,
370                                      int Opcode,
371                                      ARMCC::CondCodes Pred, unsigned PredReg,
372                                      unsigned Scratch,
373                                      DebugLoc dl,
374                           SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
375   // First calculate which of the registers should be killed by the merged
376   // instruction.
377   const unsigned insertPos = memOps[insertAfter].Position;
378   SmallSet<unsigned, 4> KilledRegs;
379   DenseMap<unsigned, unsigned> Killer;
380   for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
381     if (i == memOpsBegin) {
382       i = memOpsEnd;
383       if (i == e)
384         break;
385     }
386     if (memOps[i].Position < insertPos && memOps[i].isKill) {
387       unsigned Reg = memOps[i].Reg;
388       KilledRegs.insert(Reg);
389       Killer[Reg] = i;
390     }
391   }
392 
393   SmallVector<std::pair<unsigned, bool>, 8> Regs;
394   SmallVector<unsigned, 8> ImpDefs;
395   for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
396     unsigned Reg = memOps[i].Reg;
397     // If we are inserting the merged operation after an operation that
398     // uses the same register, make sure to transfer any kill flag.
399     bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
400     Regs.push_back(std::make_pair(Reg, isKill));
401 
402     // Collect any implicit defs of super-registers. They must be preserved.
403     for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) {
404       if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead())
405         continue;
406       unsigned DefReg = MO->getReg();
407       if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
408         ImpDefs.push_back(DefReg);
409     }
410   }
411 
412   // Try to do the merge.
413   MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
414   ++Loc;
415   if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
416                 Pred, PredReg, Scratch, dl, Regs, ImpDefs))
417     return;
418 
419   // Merge succeeded, update records.
420   Merges.push_back(prior(Loc));
421   for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
422     // Remove kill flags from any memops that come before insertPos.
423     if (Regs[i-memOpsBegin].second) {
424       unsigned Reg = Regs[i-memOpsBegin].first;
425       if (KilledRegs.count(Reg)) {
426         unsigned j = Killer[Reg];
427         int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
428         assert(Idx >= 0 && "Cannot find killing operand");
429         memOps[j].MBBI->getOperand(Idx).setIsKill(false);
430         memOps[j].isKill = false;
431       }
432       memOps[i].isKill = true;
433     }
434     MBB.erase(memOps[i].MBBI);
435     // Update this memop to refer to the merged instruction.
436     // We may need to move kill flags again.
437     memOps[i].Merged = true;
438     memOps[i].MBBI = Merges.back();
439     memOps[i].Position = insertPos;
440   }
441 }
442 
443 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
444 /// load / store multiple instructions.
445 void
MergeLDR_STR(MachineBasicBlock & MBB,unsigned SIndex,unsigned Base,int Opcode,unsigned Size,ARMCC::CondCodes Pred,unsigned PredReg,unsigned Scratch,MemOpQueue & MemOps,SmallVector<MachineBasicBlock::iterator,4> & Merges)446 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
447                           unsigned Base, int Opcode, unsigned Size,
448                           ARMCC::CondCodes Pred, unsigned PredReg,
449                           unsigned Scratch, MemOpQueue &MemOps,
450                           SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
451   bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
452   int Offset = MemOps[SIndex].Offset;
453   int SOffset = Offset;
454   unsigned insertAfter = SIndex;
455   MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
456   DebugLoc dl = Loc->getDebugLoc();
457   const MachineOperand &PMO = Loc->getOperand(0);
458   unsigned PReg = PMO.getReg();
459   unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
460   unsigned Count = 1;
461   unsigned Limit = ~0U;
462 
463   // vldm / vstm limit are 32 for S variants, 16 for D variants.
464 
465   switch (Opcode) {
466   default: break;
467   case ARM::VSTRS:
468     Limit = 32;
469     break;
470   case ARM::VSTRD:
471     Limit = 16;
472     break;
473   case ARM::VLDRD:
474     Limit = 16;
475     break;
476   case ARM::VLDRS:
477     Limit = 32;
478     break;
479   }
480 
481   for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
482     int NewOffset = MemOps[i].Offset;
483     const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
484     unsigned Reg = MO.getReg();
485     unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
486     // Register numbers must be in ascending order. For VFP / NEON load and
487     // store multiples, the registers must also be consecutive and within the
488     // limit on the number of registers per instruction.
489     if (Reg != ARM::SP &&
490         NewOffset == Offset + (int)Size &&
491         ((isNotVFP && RegNum > PRegNum) ||
492          ((Count < Limit) && RegNum == PRegNum+1))) {
493       Offset += Size;
494       PRegNum = RegNum;
495       ++Count;
496     } else {
497       // Can't merge this in. Try merge the earlier ones first.
498       MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
499                      Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
500       MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
501                    MemOps, Merges);
502       return;
503     }
504 
505     if (MemOps[i].Position > MemOps[insertAfter].Position)
506       insertAfter = i;
507   }
508 
509   bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
510   MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
511                  Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
512   return;
513 }
514 
definesCPSR(MachineInstr * MI)515 static bool definesCPSR(MachineInstr *MI) {
516   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
517     const MachineOperand &MO = MI->getOperand(i);
518     if (!MO.isReg())
519       continue;
520     if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
521       // If the instruction has live CPSR def, then it's not safe to fold it
522       // into load / store.
523       return true;
524   }
525 
526   return false;
527 }
528 
isMatchingDecrement(MachineInstr * MI,unsigned Base,unsigned Bytes,unsigned Limit,ARMCC::CondCodes Pred,unsigned PredReg)529 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
530                                 unsigned Bytes, unsigned Limit,
531                                 ARMCC::CondCodes Pred, unsigned PredReg) {
532   unsigned MyPredReg = 0;
533   if (!MI)
534     return false;
535 
536   bool CheckCPSRDef = false;
537   switch (MI->getOpcode()) {
538   default: return false;
539   case ARM::t2SUBri:
540   case ARM::SUBri:
541     CheckCPSRDef = true;
542   // fallthrough
543   case ARM::tSUBspi:
544     break;
545   }
546 
547   // Make sure the offset fits in 8 bits.
548   if (Bytes == 0 || (Limit && Bytes >= Limit))
549     return false;
550 
551   unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
552   if (!(MI->getOperand(0).getReg() == Base &&
553         MI->getOperand(1).getReg() == Base &&
554         (MI->getOperand(2).getImm()*Scale) == Bytes &&
555         getInstrPredicate(MI, MyPredReg) == Pred &&
556         MyPredReg == PredReg))
557     return false;
558 
559   return CheckCPSRDef ? !definesCPSR(MI) : true;
560 }
561 
isMatchingIncrement(MachineInstr * MI,unsigned Base,unsigned Bytes,unsigned Limit,ARMCC::CondCodes Pred,unsigned PredReg)562 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
563                                 unsigned Bytes, unsigned Limit,
564                                 ARMCC::CondCodes Pred, unsigned PredReg) {
565   unsigned MyPredReg = 0;
566   if (!MI)
567     return false;
568 
569   bool CheckCPSRDef = false;
570   switch (MI->getOpcode()) {
571   default: return false;
572   case ARM::t2ADDri:
573   case ARM::ADDri:
574     CheckCPSRDef = true;
575   // fallthrough
576   case ARM::tADDspi:
577     break;
578   }
579 
580   if (Bytes == 0 || (Limit && Bytes >= Limit))
581     // Make sure the offset fits in 8 bits.
582     return false;
583 
584   unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
585   if (!(MI->getOperand(0).getReg() == Base &&
586         MI->getOperand(1).getReg() == Base &&
587         (MI->getOperand(2).getImm()*Scale) == Bytes &&
588         getInstrPredicate(MI, MyPredReg) == Pred &&
589         MyPredReg == PredReg))
590     return false;
591 
592   return CheckCPSRDef ? !definesCPSR(MI) : true;
593 }
594 
getLSMultipleTransferSize(MachineInstr * MI)595 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
596   switch (MI->getOpcode()) {
597   default: return 0;
598   case ARM::LDRi12:
599   case ARM::STRi12:
600   case ARM::t2LDRi8:
601   case ARM::t2LDRi12:
602   case ARM::t2STRi8:
603   case ARM::t2STRi12:
604   case ARM::VLDRS:
605   case ARM::VSTRS:
606     return 4;
607   case ARM::VLDRD:
608   case ARM::VSTRD:
609     return 8;
610   case ARM::LDMIA:
611   case ARM::LDMDA:
612   case ARM::LDMDB:
613   case ARM::LDMIB:
614   case ARM::STMIA:
615   case ARM::STMDA:
616   case ARM::STMDB:
617   case ARM::STMIB:
618   case ARM::t2LDMIA:
619   case ARM::t2LDMDB:
620   case ARM::t2STMIA:
621   case ARM::t2STMDB:
622   case ARM::VLDMSIA:
623   case ARM::VSTMSIA:
624     return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
625   case ARM::VLDMDIA:
626   case ARM::VSTMDIA:
627     return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
628   }
629 }
630 
getUpdatingLSMultipleOpcode(unsigned Opc,ARM_AM::AMSubMode Mode)631 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
632                                             ARM_AM::AMSubMode Mode) {
633   switch (Opc) {
634   default: llvm_unreachable("Unhandled opcode!");
635   case ARM::LDMIA:
636   case ARM::LDMDA:
637   case ARM::LDMDB:
638   case ARM::LDMIB:
639     switch (Mode) {
640     default: llvm_unreachable("Unhandled submode!");
641     case ARM_AM::ia: return ARM::LDMIA_UPD;
642     case ARM_AM::ib: return ARM::LDMIB_UPD;
643     case ARM_AM::da: return ARM::LDMDA_UPD;
644     case ARM_AM::db: return ARM::LDMDB_UPD;
645     }
646   case ARM::STMIA:
647   case ARM::STMDA:
648   case ARM::STMDB:
649   case ARM::STMIB:
650     switch (Mode) {
651     default: llvm_unreachable("Unhandled submode!");
652     case ARM_AM::ia: return ARM::STMIA_UPD;
653     case ARM_AM::ib: return ARM::STMIB_UPD;
654     case ARM_AM::da: return ARM::STMDA_UPD;
655     case ARM_AM::db: return ARM::STMDB_UPD;
656     }
657   case ARM::t2LDMIA:
658   case ARM::t2LDMDB:
659     switch (Mode) {
660     default: llvm_unreachable("Unhandled submode!");
661     case ARM_AM::ia: return ARM::t2LDMIA_UPD;
662     case ARM_AM::db: return ARM::t2LDMDB_UPD;
663     }
664   case ARM::t2STMIA:
665   case ARM::t2STMDB:
666     switch (Mode) {
667     default: llvm_unreachable("Unhandled submode!");
668     case ARM_AM::ia: return ARM::t2STMIA_UPD;
669     case ARM_AM::db: return ARM::t2STMDB_UPD;
670     }
671   case ARM::VLDMSIA:
672     switch (Mode) {
673     default: llvm_unreachable("Unhandled submode!");
674     case ARM_AM::ia: return ARM::VLDMSIA_UPD;
675     case ARM_AM::db: return ARM::VLDMSDB_UPD;
676     }
677   case ARM::VLDMDIA:
678     switch (Mode) {
679     default: llvm_unreachable("Unhandled submode!");
680     case ARM_AM::ia: return ARM::VLDMDIA_UPD;
681     case ARM_AM::db: return ARM::VLDMDDB_UPD;
682     }
683   case ARM::VSTMSIA:
684     switch (Mode) {
685     default: llvm_unreachable("Unhandled submode!");
686     case ARM_AM::ia: return ARM::VSTMSIA_UPD;
687     case ARM_AM::db: return ARM::VSTMSDB_UPD;
688     }
689   case ARM::VSTMDIA:
690     switch (Mode) {
691     default: llvm_unreachable("Unhandled submode!");
692     case ARM_AM::ia: return ARM::VSTMDIA_UPD;
693     case ARM_AM::db: return ARM::VSTMDDB_UPD;
694     }
695   }
696 }
697 
698 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
699 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
700 ///
701 /// stmia rn, <ra, rb, rc>
702 /// rn := rn + 4 * 3;
703 /// =>
704 /// stmia rn!, <ra, rb, rc>
705 ///
706 /// rn := rn - 4 * 3;
707 /// ldmia rn, <ra, rb, rc>
708 /// =>
709 /// ldmdb rn!, <ra, rb, rc>
MergeBaseUpdateLSMultiple(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,bool & Advance,MachineBasicBlock::iterator & I)710 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
711                                                MachineBasicBlock::iterator MBBI,
712                                                bool &Advance,
713                                                MachineBasicBlock::iterator &I) {
714   MachineInstr *MI = MBBI;
715   unsigned Base = MI->getOperand(0).getReg();
716   bool BaseKill = MI->getOperand(0).isKill();
717   unsigned Bytes = getLSMultipleTransferSize(MI);
718   unsigned PredReg = 0;
719   ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
720   int Opcode = MI->getOpcode();
721   DebugLoc dl = MI->getDebugLoc();
722 
723   // Can't use an updating ld/st if the base register is also a dest
724   // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
725   for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
726     if (MI->getOperand(i).getReg() == Base)
727       return false;
728 
729   bool DoMerge = false;
730   ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
731 
732   // Try merging with the previous instruction.
733   MachineBasicBlock::iterator BeginMBBI = MBB.begin();
734   if (MBBI != BeginMBBI) {
735     MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
736     while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
737       --PrevMBBI;
738     if (Mode == ARM_AM::ia &&
739         isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
740       Mode = ARM_AM::db;
741       DoMerge = true;
742     } else if (Mode == ARM_AM::ib &&
743                isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
744       Mode = ARM_AM::da;
745       DoMerge = true;
746     }
747     if (DoMerge)
748       MBB.erase(PrevMBBI);
749   }
750 
751   // Try merging with the next instruction.
752   MachineBasicBlock::iterator EndMBBI = MBB.end();
753   if (!DoMerge && MBBI != EndMBBI) {
754     MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
755     while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
756       ++NextMBBI;
757     if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
758         isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
759       DoMerge = true;
760     } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
761                isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
762       DoMerge = true;
763     }
764     if (DoMerge) {
765       if (NextMBBI == I) {
766         Advance = true;
767         ++I;
768       }
769       MBB.erase(NextMBBI);
770     }
771   }
772 
773   if (!DoMerge)
774     return false;
775 
776   unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
777   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
778     .addReg(Base, getDefRegState(true)) // WB base register
779     .addReg(Base, getKillRegState(BaseKill))
780     .addImm(Pred).addReg(PredReg);
781 
782   // Transfer the rest of operands.
783   for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
784     MIB.addOperand(MI->getOperand(OpNum));
785 
786   // Transfer memoperands.
787   MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
788 
789   MBB.erase(MBBI);
790   return true;
791 }
792 
getPreIndexedLoadStoreOpcode(unsigned Opc,ARM_AM::AddrOpc Mode)793 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
794                                              ARM_AM::AddrOpc Mode) {
795   switch (Opc) {
796   case ARM::LDRi12:
797     return ARM::LDR_PRE_IMM;
798   case ARM::STRi12:
799     return ARM::STR_PRE_IMM;
800   case ARM::VLDRS:
801     return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
802   case ARM::VLDRD:
803     return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
804   case ARM::VSTRS:
805     return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
806   case ARM::VSTRD:
807     return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
808   case ARM::t2LDRi8:
809   case ARM::t2LDRi12:
810     return ARM::t2LDR_PRE;
811   case ARM::t2STRi8:
812   case ARM::t2STRi12:
813     return ARM::t2STR_PRE;
814   default: llvm_unreachable("Unhandled opcode!");
815   }
816 }
817 
getPostIndexedLoadStoreOpcode(unsigned Opc,ARM_AM::AddrOpc Mode)818 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
819                                               ARM_AM::AddrOpc Mode) {
820   switch (Opc) {
821   case ARM::LDRi12:
822     return ARM::LDR_POST_IMM;
823   case ARM::STRi12:
824     return ARM::STR_POST_IMM;
825   case ARM::VLDRS:
826     return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
827   case ARM::VLDRD:
828     return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
829   case ARM::VSTRS:
830     return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
831   case ARM::VSTRD:
832     return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
833   case ARM::t2LDRi8:
834   case ARM::t2LDRi12:
835     return ARM::t2LDR_POST;
836   case ARM::t2STRi8:
837   case ARM::t2STRi12:
838     return ARM::t2STR_POST;
839   default: llvm_unreachable("Unhandled opcode!");
840   }
841 }
842 
843 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
844 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
MergeBaseUpdateLoadStore(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const TargetInstrInfo * TII,bool & Advance,MachineBasicBlock::iterator & I)845 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
846                                                MachineBasicBlock::iterator MBBI,
847                                                const TargetInstrInfo *TII,
848                                                bool &Advance,
849                                                MachineBasicBlock::iterator &I) {
850   MachineInstr *MI = MBBI;
851   unsigned Base = MI->getOperand(1).getReg();
852   bool BaseKill = MI->getOperand(1).isKill();
853   unsigned Bytes = getLSMultipleTransferSize(MI);
854   int Opcode = MI->getOpcode();
855   DebugLoc dl = MI->getDebugLoc();
856   bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
857                 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
858   bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
859   if (isi32Load(Opcode) || isi32Store(Opcode))
860     if (MI->getOperand(2).getImm() != 0)
861       return false;
862   if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
863     return false;
864 
865   bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
866   // Can't do the merge if the destination register is the same as the would-be
867   // writeback register.
868   if (isLd && MI->getOperand(0).getReg() == Base)
869     return false;
870 
871   unsigned PredReg = 0;
872   ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
873   bool DoMerge = false;
874   ARM_AM::AddrOpc AddSub = ARM_AM::add;
875   unsigned NewOpc = 0;
876   // AM2 - 12 bits, thumb2 - 8 bits.
877   unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
878 
879   // Try merging with the previous instruction.
880   MachineBasicBlock::iterator BeginMBBI = MBB.begin();
881   if (MBBI != BeginMBBI) {
882     MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
883     while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
884       --PrevMBBI;
885     if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
886       DoMerge = true;
887       AddSub = ARM_AM::sub;
888     } else if (!isAM5 &&
889                isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
890       DoMerge = true;
891     }
892     if (DoMerge) {
893       NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
894       MBB.erase(PrevMBBI);
895     }
896   }
897 
898   // Try merging with the next instruction.
899   MachineBasicBlock::iterator EndMBBI = MBB.end();
900   if (!DoMerge && MBBI != EndMBBI) {
901     MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
902     while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
903       ++NextMBBI;
904     if (!isAM5 &&
905         isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
906       DoMerge = true;
907       AddSub = ARM_AM::sub;
908     } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
909       DoMerge = true;
910     }
911     if (DoMerge) {
912       NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
913       if (NextMBBI == I) {
914         Advance = true;
915         ++I;
916       }
917       MBB.erase(NextMBBI);
918     }
919   }
920 
921   if (!DoMerge)
922     return false;
923 
924   if (isAM5) {
925     // VLDM[SD}_UPD, VSTM[SD]_UPD
926     // (There are no base-updating versions of VLDR/VSTR instructions, but the
927     // updating load/store-multiple instructions can be used with only one
928     // register.)
929     MachineOperand &MO = MI->getOperand(0);
930     BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
931       .addReg(Base, getDefRegState(true)) // WB base register
932       .addReg(Base, getKillRegState(isLd ? BaseKill : false))
933       .addImm(Pred).addReg(PredReg)
934       .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
935                             getKillRegState(MO.isKill())));
936   } else if (isLd) {
937     if (isAM2) {
938       // LDR_PRE, LDR_POST
939       if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
940         int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
941         BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
942           .addReg(Base, RegState::Define)
943           .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
944       } else {
945         int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
946         BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
947           .addReg(Base, RegState::Define)
948           .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
949       }
950     } else {
951       int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
952       // t2LDR_PRE, t2LDR_POST
953       BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
954         .addReg(Base, RegState::Define)
955         .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
956     }
957   } else {
958     MachineOperand &MO = MI->getOperand(0);
959     // FIXME: post-indexed stores use am2offset_imm, which still encodes
960     // the vestigal zero-reg offset register. When that's fixed, this clause
961     // can be removed entirely.
962     if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
963       int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
964       // STR_PRE, STR_POST
965       BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
966         .addReg(MO.getReg(), getKillRegState(MO.isKill()))
967         .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
968     } else {
969       int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
970       // t2STR_PRE, t2STR_POST
971       BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
972         .addReg(MO.getReg(), getKillRegState(MO.isKill()))
973         .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
974     }
975   }
976   MBB.erase(MBBI);
977 
978   return true;
979 }
980 
981 /// isMemoryOp - Returns true if instruction is a memory operation that this
982 /// pass is capable of operating on.
isMemoryOp(const MachineInstr * MI)983 static bool isMemoryOp(const MachineInstr *MI) {
984   // When no memory operands are present, conservatively assume unaligned,
985   // volatile, unfoldable.
986   if (!MI->hasOneMemOperand())
987     return false;
988 
989   const MachineMemOperand *MMO = *MI->memoperands_begin();
990 
991   // Don't touch volatile memory accesses - we may be changing their order.
992   if (MMO->isVolatile())
993     return false;
994 
995   // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
996   // not.
997   if (MMO->getAlignment() < 4)
998     return false;
999 
1000   // str <undef> could probably be eliminated entirely, but for now we just want
1001   // to avoid making a mess of it.
1002   // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1003   if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1004       MI->getOperand(0).isUndef())
1005     return false;
1006 
1007   // Likewise don't mess with references to undefined addresses.
1008   if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1009       MI->getOperand(1).isUndef())
1010     return false;
1011 
1012   int Opcode = MI->getOpcode();
1013   switch (Opcode) {
1014   default: break;
1015   case ARM::VLDRS:
1016   case ARM::VSTRS:
1017     return MI->getOperand(1).isReg();
1018   case ARM::VLDRD:
1019   case ARM::VSTRD:
1020     return MI->getOperand(1).isReg();
1021   case ARM::LDRi12:
1022   case ARM::STRi12:
1023   case ARM::t2LDRi8:
1024   case ARM::t2LDRi12:
1025   case ARM::t2STRi8:
1026   case ARM::t2STRi12:
1027     return MI->getOperand(1).isReg();
1028   }
1029   return false;
1030 }
1031 
1032 /// AdvanceRS - Advance register scavenger to just before the earliest memory
1033 /// op that is being merged.
AdvanceRS(MachineBasicBlock & MBB,MemOpQueue & MemOps)1034 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1035   MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1036   unsigned Position = MemOps[0].Position;
1037   for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1038     if (MemOps[i].Position < Position) {
1039       Position = MemOps[i].Position;
1040       Loc = MemOps[i].MBBI;
1041     }
1042   }
1043 
1044   if (Loc != MBB.begin())
1045     RS->forward(prior(Loc));
1046 }
1047 
getMemoryOpOffset(const MachineInstr * MI)1048 static int getMemoryOpOffset(const MachineInstr *MI) {
1049   int Opcode = MI->getOpcode();
1050   bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1051   unsigned NumOperands = MI->getDesc().getNumOperands();
1052   unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1053 
1054   if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1055       Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1056       Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1057       Opcode == ARM::LDRi12   || Opcode == ARM::STRi12)
1058     return OffField;
1059 
1060   int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1061     : ARM_AM::getAM5Offset(OffField) * 4;
1062   if (isAM3) {
1063     if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1064       Offset = -Offset;
1065   } else {
1066     if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1067       Offset = -Offset;
1068   }
1069   return Offset;
1070 }
1071 
InsertLDR_STR(MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI,int Offset,bool isDef,DebugLoc dl,unsigned NewOpc,unsigned Reg,bool RegDeadKill,bool RegUndef,unsigned BaseReg,bool BaseKill,bool BaseUndef,bool OffKill,bool OffUndef,ARMCC::CondCodes Pred,unsigned PredReg,const TargetInstrInfo * TII,bool isT2)1072 static void InsertLDR_STR(MachineBasicBlock &MBB,
1073                           MachineBasicBlock::iterator &MBBI,
1074                           int Offset, bool isDef,
1075                           DebugLoc dl, unsigned NewOpc,
1076                           unsigned Reg, bool RegDeadKill, bool RegUndef,
1077                           unsigned BaseReg, bool BaseKill, bool BaseUndef,
1078                           bool OffKill, bool OffUndef,
1079                           ARMCC::CondCodes Pred, unsigned PredReg,
1080                           const TargetInstrInfo *TII, bool isT2) {
1081   if (isDef) {
1082     MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1083                                       TII->get(NewOpc))
1084       .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1085       .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1086     MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1087   } else {
1088     MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1089                                       TII->get(NewOpc))
1090       .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1091       .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1092     MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1093   }
1094 }
1095 
FixInvalidRegPairOp(MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI)1096 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1097                                           MachineBasicBlock::iterator &MBBI) {
1098   MachineInstr *MI = &*MBBI;
1099   unsigned Opcode = MI->getOpcode();
1100   if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1101       Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1102     const MachineOperand &BaseOp = MI->getOperand(2);
1103     unsigned BaseReg = BaseOp.getReg();
1104     unsigned EvenReg = MI->getOperand(0).getReg();
1105     unsigned OddReg  = MI->getOperand(1).getReg();
1106     unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1107     unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
1108     // ARM errata 602117: LDRD with base in list may result in incorrect base
1109     // register when interrupted or faulted.
1110     bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1111     if (!Errata602117 &&
1112         ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1113       return false;
1114 
1115     MachineBasicBlock::iterator NewBBI = MBBI;
1116     bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1117     bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1118     bool EvenDeadKill = isLd ?
1119       MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1120     bool EvenUndef = MI->getOperand(0).isUndef();
1121     bool OddDeadKill  = isLd ?
1122       MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1123     bool OddUndef = MI->getOperand(1).isUndef();
1124     bool BaseKill = BaseOp.isKill();
1125     bool BaseUndef = BaseOp.isUndef();
1126     bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1127     bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1128     int OffImm = getMemoryOpOffset(MI);
1129     unsigned PredReg = 0;
1130     ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1131 
1132     if (OddRegNum > EvenRegNum && OffImm == 0) {
1133       // Ascending register numbers and no offset. It's safe to change it to a
1134       // ldm or stm.
1135       unsigned NewOpc = (isLd)
1136         ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1137         : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1138       if (isLd) {
1139         BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1140           .addReg(BaseReg, getKillRegState(BaseKill))
1141           .addImm(Pred).addReg(PredReg)
1142           .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1143           .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1144         ++NumLDRD2LDM;
1145       } else {
1146         BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1147           .addReg(BaseReg, getKillRegState(BaseKill))
1148           .addImm(Pred).addReg(PredReg)
1149           .addReg(EvenReg,
1150                   getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1151           .addReg(OddReg,
1152                   getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
1153         ++NumSTRD2STM;
1154       }
1155       NewBBI = llvm::prior(MBBI);
1156     } else {
1157       // Split into two instructions.
1158       unsigned NewOpc = (isLd)
1159         ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1160         : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1161       // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1162       // so adjust and use t2LDRi12 here for that.
1163       unsigned NewOpc2 = (isLd)
1164         ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1165         : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1166       DebugLoc dl = MBBI->getDebugLoc();
1167       // If this is a load and base register is killed, it may have been
1168       // re-defed by the load, make sure the first load does not clobber it.
1169       if (isLd &&
1170           (BaseKill || OffKill) &&
1171           (TRI->regsOverlap(EvenReg, BaseReg))) {
1172         assert(!TRI->regsOverlap(OddReg, BaseReg));
1173         InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1174                       OddReg, OddDeadKill, false,
1175                       BaseReg, false, BaseUndef, false, OffUndef,
1176                       Pred, PredReg, TII, isT2);
1177         NewBBI = llvm::prior(MBBI);
1178         InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1179                       EvenReg, EvenDeadKill, false,
1180                       BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1181                       Pred, PredReg, TII, isT2);
1182       } else {
1183         if (OddReg == EvenReg && EvenDeadKill) {
1184           // If the two source operands are the same, the kill marker is
1185           // probably on the first one. e.g.
1186           // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1187           EvenDeadKill = false;
1188           OddDeadKill = true;
1189         }
1190         // Never kill the base register in the first instruction.
1191         // <rdar://problem/11101911>
1192         if (EvenReg == BaseReg)
1193           EvenDeadKill = false;
1194         InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1195                       EvenReg, EvenDeadKill, EvenUndef,
1196                       BaseReg, false, BaseUndef, false, OffUndef,
1197                       Pred, PredReg, TII, isT2);
1198         NewBBI = llvm::prior(MBBI);
1199         InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1200                       OddReg, OddDeadKill, OddUndef,
1201                       BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1202                       Pred, PredReg, TII, isT2);
1203       }
1204       if (isLd)
1205         ++NumLDRD2LDR;
1206       else
1207         ++NumSTRD2STR;
1208     }
1209 
1210     MBB.erase(MI);
1211     MBBI = NewBBI;
1212     return true;
1213   }
1214   return false;
1215 }
1216 
1217 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1218 /// ops of the same base and incrementing offset into LDM / STM ops.
LoadStoreMultipleOpti(MachineBasicBlock & MBB)1219 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1220   unsigned NumMerges = 0;
1221   unsigned NumMemOps = 0;
1222   MemOpQueue MemOps;
1223   unsigned CurrBase = 0;
1224   int CurrOpc = -1;
1225   unsigned CurrSize = 0;
1226   ARMCC::CondCodes CurrPred = ARMCC::AL;
1227   unsigned CurrPredReg = 0;
1228   unsigned Position = 0;
1229   SmallVector<MachineBasicBlock::iterator,4> Merges;
1230 
1231   RS->enterBasicBlock(&MBB);
1232   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1233   while (MBBI != E) {
1234     if (FixInvalidRegPairOp(MBB, MBBI))
1235       continue;
1236 
1237     bool Advance  = false;
1238     bool TryMerge = false;
1239     bool Clobber  = false;
1240 
1241     bool isMemOp = isMemoryOp(MBBI);
1242     if (isMemOp) {
1243       int Opcode = MBBI->getOpcode();
1244       unsigned Size = getLSMultipleTransferSize(MBBI);
1245       const MachineOperand &MO = MBBI->getOperand(0);
1246       unsigned Reg = MO.getReg();
1247       bool isKill = MO.isDef() ? false : MO.isKill();
1248       unsigned Base = MBBI->getOperand(1).getReg();
1249       unsigned PredReg = 0;
1250       ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1251       int Offset = getMemoryOpOffset(MBBI);
1252       // Watch out for:
1253       // r4 := ldr [r5]
1254       // r5 := ldr [r5, #4]
1255       // r6 := ldr [r5, #8]
1256       //
1257       // The second ldr has effectively broken the chain even though it
1258       // looks like the later ldr(s) use the same base register. Try to
1259       // merge the ldr's so far, including this one. But don't try to
1260       // combine the following ldr(s).
1261       Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1262       if (CurrBase == 0 && !Clobber) {
1263         // Start of a new chain.
1264         CurrBase = Base;
1265         CurrOpc  = Opcode;
1266         CurrSize = Size;
1267         CurrPred = Pred;
1268         CurrPredReg = PredReg;
1269         MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1270         ++NumMemOps;
1271         Advance = true;
1272       } else {
1273         if (Clobber) {
1274           TryMerge = true;
1275           Advance = true;
1276         }
1277 
1278         if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1279           // No need to match PredReg.
1280           // Continue adding to the queue.
1281           if (Offset > MemOps.back().Offset) {
1282             MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1283                                              Position, MBBI));
1284             ++NumMemOps;
1285             Advance = true;
1286           } else {
1287             for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1288                  I != E; ++I) {
1289               if (Offset < I->Offset) {
1290                 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1291                                                  Position, MBBI));
1292                 ++NumMemOps;
1293                 Advance = true;
1294                 break;
1295               } else if (Offset == I->Offset) {
1296                 // Collision! This can't be merged!
1297                 break;
1298               }
1299             }
1300           }
1301         }
1302       }
1303     }
1304 
1305     if (MBBI->isDebugValue()) {
1306       ++MBBI;
1307       if (MBBI == E)
1308         // Reach the end of the block, try merging the memory instructions.
1309         TryMerge = true;
1310     } else if (Advance) {
1311       ++Position;
1312       ++MBBI;
1313       if (MBBI == E)
1314         // Reach the end of the block, try merging the memory instructions.
1315         TryMerge = true;
1316     } else
1317       TryMerge = true;
1318 
1319     if (TryMerge) {
1320       if (NumMemOps > 1) {
1321         // Try to find a free register to use as a new base in case it's needed.
1322         // First advance to the instruction just before the start of the chain.
1323         AdvanceRS(MBB, MemOps);
1324         // Find a scratch register.
1325         unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass);
1326         // Process the load / store instructions.
1327         RS->forward(prior(MBBI));
1328 
1329         // Merge ops.
1330         Merges.clear();
1331         MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1332                      CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1333 
1334         // Try folding preceding/trailing base inc/dec into the generated
1335         // LDM/STM ops.
1336         for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1337           if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1338             ++NumMerges;
1339         NumMerges += Merges.size();
1340 
1341         // Try folding preceding/trailing base inc/dec into those load/store
1342         // that were not merged to form LDM/STM ops.
1343         for (unsigned i = 0; i != NumMemOps; ++i)
1344           if (!MemOps[i].Merged)
1345             if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1346               ++NumMerges;
1347 
1348         // RS may be pointing to an instruction that's deleted.
1349         RS->skipTo(prior(MBBI));
1350       } else if (NumMemOps == 1) {
1351         // Try folding preceding/trailing base inc/dec into the single
1352         // load/store.
1353         if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1354           ++NumMerges;
1355           RS->forward(prior(MBBI));
1356         }
1357       }
1358 
1359       CurrBase = 0;
1360       CurrOpc = -1;
1361       CurrSize = 0;
1362       CurrPred = ARMCC::AL;
1363       CurrPredReg = 0;
1364       if (NumMemOps) {
1365         MemOps.clear();
1366         NumMemOps = 0;
1367       }
1368 
1369       // If iterator hasn't been advanced and this is not a memory op, skip it.
1370       // It can't start a new chain anyway.
1371       if (!Advance && !isMemOp && MBBI != E) {
1372         ++Position;
1373         ++MBBI;
1374       }
1375     }
1376   }
1377   return NumMerges > 0;
1378 }
1379 
1380 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1381 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1382 /// directly restore the value of LR into pc.
1383 ///   ldmfd sp!, {..., lr}
1384 ///   bx lr
1385 /// or
1386 ///   ldmfd sp!, {..., lr}
1387 ///   mov pc, lr
1388 /// =>
1389 ///   ldmfd sp!, {..., pc}
MergeReturnIntoLDM(MachineBasicBlock & MBB)1390 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1391   if (MBB.empty()) return false;
1392 
1393   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1394   if (MBBI != MBB.begin() &&
1395       (MBBI->getOpcode() == ARM::BX_RET ||
1396        MBBI->getOpcode() == ARM::tBX_RET ||
1397        MBBI->getOpcode() == ARM::MOVPCLR)) {
1398     MachineInstr *PrevMI = prior(MBBI);
1399     unsigned Opcode = PrevMI->getOpcode();
1400     if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1401         Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1402         Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1403       MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1404       if (MO.getReg() != ARM::LR)
1405         return false;
1406       unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1407       assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1408               Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1409       PrevMI->setDesc(TII->get(NewOpc));
1410       MO.setReg(ARM::PC);
1411       PrevMI->copyImplicitOps(&*MBBI);
1412       MBB.erase(MBBI);
1413       return true;
1414     }
1415   }
1416   return false;
1417 }
1418 
runOnMachineFunction(MachineFunction & Fn)1419 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1420   const TargetMachine &TM = Fn.getTarget();
1421   AFI = Fn.getInfo<ARMFunctionInfo>();
1422   TII = TM.getInstrInfo();
1423   TRI = TM.getRegisterInfo();
1424   STI = &TM.getSubtarget<ARMSubtarget>();
1425   RS = new RegScavenger();
1426   isThumb2 = AFI->isThumb2Function();
1427 
1428   bool Modified = false;
1429   for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1430        ++MFI) {
1431     MachineBasicBlock &MBB = *MFI;
1432     Modified |= LoadStoreMultipleOpti(MBB);
1433     if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1434       Modified |= MergeReturnIntoLDM(MBB);
1435   }
1436 
1437   delete RS;
1438   return Modified;
1439 }
1440 
1441 
1442 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1443 /// load / stores from consecutive locations close to make it more
1444 /// likely they will be combined later.
1445 
1446 namespace {
1447   struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1448     static char ID;
ARMPreAllocLoadStoreOpt__anona299b52d0211::ARMPreAllocLoadStoreOpt1449     ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1450 
1451     const TargetData *TD;
1452     const TargetInstrInfo *TII;
1453     const TargetRegisterInfo *TRI;
1454     const ARMSubtarget *STI;
1455     MachineRegisterInfo *MRI;
1456     MachineFunction *MF;
1457 
1458     virtual bool runOnMachineFunction(MachineFunction &Fn);
1459 
getPassName__anona299b52d0211::ARMPreAllocLoadStoreOpt1460     virtual const char *getPassName() const {
1461       return "ARM pre- register allocation load / store optimization pass";
1462     }
1463 
1464   private:
1465     bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1466                           unsigned &NewOpc, unsigned &EvenReg,
1467                           unsigned &OddReg, unsigned &BaseReg,
1468                           int &Offset,
1469                           unsigned &PredReg, ARMCC::CondCodes &Pred,
1470                           bool &isT2);
1471     bool RescheduleOps(MachineBasicBlock *MBB,
1472                        SmallVector<MachineInstr*, 4> &Ops,
1473                        unsigned Base, bool isLd,
1474                        DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1475     bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1476   };
1477   char ARMPreAllocLoadStoreOpt::ID = 0;
1478 }
1479 
runOnMachineFunction(MachineFunction & Fn)1480 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1481   TD  = Fn.getTarget().getTargetData();
1482   TII = Fn.getTarget().getInstrInfo();
1483   TRI = Fn.getTarget().getRegisterInfo();
1484   STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1485   MRI = &Fn.getRegInfo();
1486   MF  = &Fn;
1487 
1488   bool Modified = false;
1489   for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1490        ++MFI)
1491     Modified |= RescheduleLoadStoreInstrs(MFI);
1492 
1493   return Modified;
1494 }
1495 
IsSafeAndProfitableToMove(bool isLd,unsigned Base,MachineBasicBlock::iterator I,MachineBasicBlock::iterator E,SmallPtrSet<MachineInstr *,4> & MemOps,SmallSet<unsigned,4> & MemRegs,const TargetRegisterInfo * TRI)1496 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1497                                       MachineBasicBlock::iterator I,
1498                                       MachineBasicBlock::iterator E,
1499                                       SmallPtrSet<MachineInstr*, 4> &MemOps,
1500                                       SmallSet<unsigned, 4> &MemRegs,
1501                                       const TargetRegisterInfo *TRI) {
1502   // Are there stores / loads / calls between them?
1503   // FIXME: This is overly conservative. We should make use of alias information
1504   // some day.
1505   SmallSet<unsigned, 4> AddedRegPressure;
1506   while (++I != E) {
1507     if (I->isDebugValue() || MemOps.count(&*I))
1508       continue;
1509     if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1510       return false;
1511     if (isLd && I->mayStore())
1512       return false;
1513     if (!isLd) {
1514       if (I->mayLoad())
1515         return false;
1516       // It's not safe to move the first 'str' down.
1517       // str r1, [r0]
1518       // strh r5, [r0]
1519       // str r4, [r0, #+4]
1520       if (I->mayStore())
1521         return false;
1522     }
1523     for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1524       MachineOperand &MO = I->getOperand(j);
1525       if (!MO.isReg())
1526         continue;
1527       unsigned Reg = MO.getReg();
1528       if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1529         return false;
1530       if (Reg != Base && !MemRegs.count(Reg))
1531         AddedRegPressure.insert(Reg);
1532     }
1533   }
1534 
1535   // Estimate register pressure increase due to the transformation.
1536   if (MemRegs.size() <= 4)
1537     // Ok if we are moving small number of instructions.
1538     return true;
1539   return AddedRegPressure.size() <= MemRegs.size() * 2;
1540 }
1541 
1542 
1543 /// Copy Op0 and Op1 operands into a new array assigned to MI.
concatenateMemOperands(MachineInstr * MI,MachineInstr * Op0,MachineInstr * Op1)1544 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1545                                    MachineInstr *Op1) {
1546   assert(MI->memoperands_empty() && "expected a new machineinstr");
1547   size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1548     + (Op1->memoperands_end() - Op1->memoperands_begin());
1549 
1550   MachineFunction *MF = MI->getParent()->getParent();
1551   MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1552   MachineSDNode::mmo_iterator MemEnd =
1553     std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1554   MemEnd =
1555     std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1556   MI->setMemRefs(MemBegin, MemEnd);
1557 }
1558 
1559 bool
CanFormLdStDWord(MachineInstr * Op0,MachineInstr * Op1,DebugLoc & dl,unsigned & NewOpc,unsigned & EvenReg,unsigned & OddReg,unsigned & BaseReg,int & Offset,unsigned & PredReg,ARMCC::CondCodes & Pred,bool & isT2)1560 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1561                                           DebugLoc &dl,
1562                                           unsigned &NewOpc, unsigned &EvenReg,
1563                                           unsigned &OddReg, unsigned &BaseReg,
1564                                           int &Offset, unsigned &PredReg,
1565                                           ARMCC::CondCodes &Pred,
1566                                           bool &isT2) {
1567   // Make sure we're allowed to generate LDRD/STRD.
1568   if (!STI->hasV5TEOps())
1569     return false;
1570 
1571   // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1572   unsigned Scale = 1;
1573   unsigned Opcode = Op0->getOpcode();
1574   if (Opcode == ARM::LDRi12)
1575     NewOpc = ARM::LDRD;
1576   else if (Opcode == ARM::STRi12)
1577     NewOpc = ARM::STRD;
1578   else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1579     NewOpc = ARM::t2LDRDi8;
1580     Scale = 4;
1581     isT2 = true;
1582   } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1583     NewOpc = ARM::t2STRDi8;
1584     Scale = 4;
1585     isT2 = true;
1586   } else
1587     return false;
1588 
1589   // Make sure the base address satisfies i64 ld / st alignment requirement.
1590   if (!Op0->hasOneMemOperand() ||
1591       !(*Op0->memoperands_begin())->getValue() ||
1592       (*Op0->memoperands_begin())->isVolatile())
1593     return false;
1594 
1595   unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1596   const Function *Func = MF->getFunction();
1597   unsigned ReqAlign = STI->hasV6Ops()
1598     ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1599     : 8;  // Pre-v6 need 8-byte align
1600   if (Align < ReqAlign)
1601     return false;
1602 
1603   // Then make sure the immediate offset fits.
1604   int OffImm = getMemoryOpOffset(Op0);
1605   if (isT2) {
1606     int Limit = (1 << 8) * Scale;
1607     if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1608       return false;
1609     Offset = OffImm;
1610   } else {
1611     ARM_AM::AddrOpc AddSub = ARM_AM::add;
1612     if (OffImm < 0) {
1613       AddSub = ARM_AM::sub;
1614       OffImm = - OffImm;
1615     }
1616     int Limit = (1 << 8) * Scale;
1617     if (OffImm >= Limit || (OffImm & (Scale-1)))
1618       return false;
1619     Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1620   }
1621   EvenReg = Op0->getOperand(0).getReg();
1622   OddReg  = Op1->getOperand(0).getReg();
1623   if (EvenReg == OddReg)
1624     return false;
1625   BaseReg = Op0->getOperand(1).getReg();
1626   Pred = getInstrPredicate(Op0, PredReg);
1627   dl = Op0->getDebugLoc();
1628   return true;
1629 }
1630 
1631 namespace {
1632   struct OffsetCompare {
operator ()__anona299b52d0311::OffsetCompare1633     bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1634       int LOffset = getMemoryOpOffset(LHS);
1635       int ROffset = getMemoryOpOffset(RHS);
1636       assert(LHS == RHS || LOffset != ROffset);
1637       return LOffset > ROffset;
1638     }
1639   };
1640 }
1641 
RescheduleOps(MachineBasicBlock * MBB,SmallVector<MachineInstr *,4> & Ops,unsigned Base,bool isLd,DenseMap<MachineInstr *,unsigned> & MI2LocMap)1642 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1643                                  SmallVector<MachineInstr*, 4> &Ops,
1644                                  unsigned Base, bool isLd,
1645                                  DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1646   bool RetVal = false;
1647 
1648   // Sort by offset (in reverse order).
1649   std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1650 
1651   // The loads / stores of the same base are in order. Scan them from first to
1652   // last and check for the following:
1653   // 1. Any def of base.
1654   // 2. Any gaps.
1655   while (Ops.size() > 1) {
1656     unsigned FirstLoc = ~0U;
1657     unsigned LastLoc = 0;
1658     MachineInstr *FirstOp = 0;
1659     MachineInstr *LastOp = 0;
1660     int LastOffset = 0;
1661     unsigned LastOpcode = 0;
1662     unsigned LastBytes = 0;
1663     unsigned NumMove = 0;
1664     for (int i = Ops.size() - 1; i >= 0; --i) {
1665       MachineInstr *Op = Ops[i];
1666       unsigned Loc = MI2LocMap[Op];
1667       if (Loc <= FirstLoc) {
1668         FirstLoc = Loc;
1669         FirstOp = Op;
1670       }
1671       if (Loc >= LastLoc) {
1672         LastLoc = Loc;
1673         LastOp = Op;
1674       }
1675 
1676       unsigned LSMOpcode
1677         = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
1678       if (LastOpcode && LSMOpcode != LastOpcode)
1679         break;
1680 
1681       int Offset = getMemoryOpOffset(Op);
1682       unsigned Bytes = getLSMultipleTransferSize(Op);
1683       if (LastBytes) {
1684         if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1685           break;
1686       }
1687       LastOffset = Offset;
1688       LastBytes = Bytes;
1689       LastOpcode = LSMOpcode;
1690       if (++NumMove == 8) // FIXME: Tune this limit.
1691         break;
1692     }
1693 
1694     if (NumMove <= 1)
1695       Ops.pop_back();
1696     else {
1697       SmallPtrSet<MachineInstr*, 4> MemOps;
1698       SmallSet<unsigned, 4> MemRegs;
1699       for (int i = NumMove-1; i >= 0; --i) {
1700         MemOps.insert(Ops[i]);
1701         MemRegs.insert(Ops[i]->getOperand(0).getReg());
1702       }
1703 
1704       // Be conservative, if the instructions are too far apart, don't
1705       // move them. We want to limit the increase of register pressure.
1706       bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1707       if (DoMove)
1708         DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1709                                            MemOps, MemRegs, TRI);
1710       if (!DoMove) {
1711         for (unsigned i = 0; i != NumMove; ++i)
1712           Ops.pop_back();
1713       } else {
1714         // This is the new location for the loads / stores.
1715         MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1716         while (InsertPos != MBB->end()
1717                && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1718           ++InsertPos;
1719 
1720         // If we are moving a pair of loads / stores, see if it makes sense
1721         // to try to allocate a pair of registers that can form register pairs.
1722         MachineInstr *Op0 = Ops.back();
1723         MachineInstr *Op1 = Ops[Ops.size()-2];
1724         unsigned EvenReg = 0, OddReg = 0;
1725         unsigned BaseReg = 0, PredReg = 0;
1726         ARMCC::CondCodes Pred = ARMCC::AL;
1727         bool isT2 = false;
1728         unsigned NewOpc = 0;
1729         int Offset = 0;
1730         DebugLoc dl;
1731         if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1732                                              EvenReg, OddReg, BaseReg,
1733                                              Offset, PredReg, Pred, isT2)) {
1734           Ops.pop_back();
1735           Ops.pop_back();
1736 
1737           const MCInstrDesc &MCID = TII->get(NewOpc);
1738           const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
1739           MRI->constrainRegClass(EvenReg, TRC);
1740           MRI->constrainRegClass(OddReg, TRC);
1741 
1742           // Form the pair instruction.
1743           if (isLd) {
1744             MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1745               .addReg(EvenReg, RegState::Define)
1746               .addReg(OddReg, RegState::Define)
1747               .addReg(BaseReg);
1748             // FIXME: We're converting from LDRi12 to an insn that still
1749             // uses addrmode2, so we need an explicit offset reg. It should
1750             // always by reg0 since we're transforming LDRi12s.
1751             if (!isT2)
1752               MIB.addReg(0);
1753             MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1754             concatenateMemOperands(MIB, Op0, Op1);
1755             DEBUG(dbgs() << "Formed " << *MIB << "\n");
1756             ++NumLDRDFormed;
1757           } else {
1758             MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1759               .addReg(EvenReg)
1760               .addReg(OddReg)
1761               .addReg(BaseReg);
1762             // FIXME: We're converting from LDRi12 to an insn that still
1763             // uses addrmode2, so we need an explicit offset reg. It should
1764             // always by reg0 since we're transforming STRi12s.
1765             if (!isT2)
1766               MIB.addReg(0);
1767             MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1768             concatenateMemOperands(MIB, Op0, Op1);
1769             DEBUG(dbgs() << "Formed " << *MIB << "\n");
1770             ++NumSTRDFormed;
1771           }
1772           MBB->erase(Op0);
1773           MBB->erase(Op1);
1774 
1775           // Add register allocation hints to form register pairs.
1776           MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1777           MRI->setRegAllocationHint(OddReg,  ARMRI::RegPairOdd, EvenReg);
1778         } else {
1779           for (unsigned i = 0; i != NumMove; ++i) {
1780             MachineInstr *Op = Ops.back();
1781             Ops.pop_back();
1782             MBB->splice(InsertPos, MBB, Op);
1783           }
1784         }
1785 
1786         NumLdStMoved += NumMove;
1787         RetVal = true;
1788       }
1789     }
1790   }
1791 
1792   return RetVal;
1793 }
1794 
1795 bool
RescheduleLoadStoreInstrs(MachineBasicBlock * MBB)1796 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1797   bool RetVal = false;
1798 
1799   DenseMap<MachineInstr*, unsigned> MI2LocMap;
1800   DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1801   DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1802   SmallVector<unsigned, 4> LdBases;
1803   SmallVector<unsigned, 4> StBases;
1804 
1805   unsigned Loc = 0;
1806   MachineBasicBlock::iterator MBBI = MBB->begin();
1807   MachineBasicBlock::iterator E = MBB->end();
1808   while (MBBI != E) {
1809     for (; MBBI != E; ++MBBI) {
1810       MachineInstr *MI = MBBI;
1811       if (MI->isCall() || MI->isTerminator()) {
1812         // Stop at barriers.
1813         ++MBBI;
1814         break;
1815       }
1816 
1817       if (!MI->isDebugValue())
1818         MI2LocMap[MI] = ++Loc;
1819 
1820       if (!isMemoryOp(MI))
1821         continue;
1822       unsigned PredReg = 0;
1823       if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
1824         continue;
1825 
1826       int Opc = MI->getOpcode();
1827       bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1828       unsigned Base = MI->getOperand(1).getReg();
1829       int Offset = getMemoryOpOffset(MI);
1830 
1831       bool StopHere = false;
1832       if (isLd) {
1833         DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1834           Base2LdsMap.find(Base);
1835         if (BI != Base2LdsMap.end()) {
1836           for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1837             if (Offset == getMemoryOpOffset(BI->second[i])) {
1838               StopHere = true;
1839               break;
1840             }
1841           }
1842           if (!StopHere)
1843             BI->second.push_back(MI);
1844         } else {
1845           SmallVector<MachineInstr*, 4> MIs;
1846           MIs.push_back(MI);
1847           Base2LdsMap[Base] = MIs;
1848           LdBases.push_back(Base);
1849         }
1850       } else {
1851         DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1852           Base2StsMap.find(Base);
1853         if (BI != Base2StsMap.end()) {
1854           for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1855             if (Offset == getMemoryOpOffset(BI->second[i])) {
1856               StopHere = true;
1857               break;
1858             }
1859           }
1860           if (!StopHere)
1861             BI->second.push_back(MI);
1862         } else {
1863           SmallVector<MachineInstr*, 4> MIs;
1864           MIs.push_back(MI);
1865           Base2StsMap[Base] = MIs;
1866           StBases.push_back(Base);
1867         }
1868       }
1869 
1870       if (StopHere) {
1871         // Found a duplicate (a base+offset combination that's seen earlier).
1872         // Backtrack.
1873         --Loc;
1874         break;
1875       }
1876     }
1877 
1878     // Re-schedule loads.
1879     for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1880       unsigned Base = LdBases[i];
1881       SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1882       if (Lds.size() > 1)
1883         RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1884     }
1885 
1886     // Re-schedule stores.
1887     for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1888       unsigned Base = StBases[i];
1889       SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1890       if (Sts.size() > 1)
1891         RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1892     }
1893 
1894     if (MBBI != E) {
1895       Base2LdsMap.clear();
1896       Base2StsMap.clear();
1897       LdBases.clear();
1898       StBases.clear();
1899     }
1900   }
1901 
1902   return RetVal;
1903 }
1904 
1905 
1906 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1907 /// optimization pass.
createARMLoadStoreOptimizationPass(bool PreAlloc)1908 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1909   if (PreAlloc)
1910     return new ARMPreAllocLoadStoreOpt();
1911   return new ARMLoadStoreOpt();
1912 }
1913