• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #define DEBUG_TYPE "arm-ldst-opt"
16 #include "ARM.h"
17 #include "ARMBaseInstrInfo.h"
18 #include "ARMMachineFunctionInfo.h"
19 #include "ARMRegisterInfo.h"
20 #include "MCTargetDesc/ARMAddressingModes.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/CodeGen/SelectionDAGNodes.h"
30 #include "llvm/Target/TargetData.h"
31 #include "llvm/Target/TargetInstrInfo.h"
32 #include "llvm/Target/TargetMachine.h"
33 #include "llvm/Target/TargetRegisterInfo.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/ADT/DenseMap.h"
36 #include "llvm/ADT/STLExtras.h"
37 #include "llvm/ADT/SmallPtrSet.h"
38 #include "llvm/ADT/SmallSet.h"
39 #include "llvm/ADT/SmallVector.h"
40 #include "llvm/ADT/Statistic.h"
41 using namespace llvm;
42 
43 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
44 STATISTIC(NumSTMGened , "Number of stm instructions generated");
45 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
46 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
47 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
48 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
49 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
50 STATISTIC(NumLDRD2LDM,  "Number of ldrd instructions turned back into ldm");
51 STATISTIC(NumSTRD2STM,  "Number of strd instructions turned back into stm");
52 STATISTIC(NumLDRD2LDR,  "Number of ldrd instructions turned back into ldr's");
53 STATISTIC(NumSTRD2STR,  "Number of strd instructions turned back into str's");
54 
55 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
56 /// load / store instructions to form ldm / stm instructions.
57 
58 namespace {
59   struct ARMLoadStoreOpt : public MachineFunctionPass {
60     static char ID;
ARMLoadStoreOpt__anon16ab0f520111::ARMLoadStoreOpt61     ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
62 
63     const TargetInstrInfo *TII;
64     const TargetRegisterInfo *TRI;
65     ARMFunctionInfo *AFI;
66     RegScavenger *RS;
67     bool isThumb2;
68 
69     virtual bool runOnMachineFunction(MachineFunction &Fn);
70 
getPassName__anon16ab0f520111::ARMLoadStoreOpt71     virtual const char *getPassName() const {
72       return "ARM load / store optimization pass";
73     }
74 
75   private:
76     struct MemOpQueueEntry {
77       int Offset;
78       unsigned Reg;
79       bool isKill;
80       unsigned Position;
81       MachineBasicBlock::iterator MBBI;
82       bool Merged;
MemOpQueueEntry__anon16ab0f520111::ARMLoadStoreOpt::MemOpQueueEntry83       MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
84                       MachineBasicBlock::iterator i)
85         : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
86     };
87     typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
88     typedef MemOpQueue::iterator MemOpQueueIter;
89 
90     bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
91                   int Offset, unsigned Base, bool BaseKill, int Opcode,
92                   ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
93                   DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
94     void MergeOpsUpdate(MachineBasicBlock &MBB,
95                         MemOpQueue &MemOps,
96                         unsigned memOpsBegin,
97                         unsigned memOpsEnd,
98                         unsigned insertAfter,
99                         int Offset,
100                         unsigned Base,
101                         bool BaseKill,
102                         int Opcode,
103                         ARMCC::CondCodes Pred,
104                         unsigned PredReg,
105                         unsigned Scratch,
106                         DebugLoc dl,
107                         SmallVector<MachineBasicBlock::iterator, 4> &Merges);
108     void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
109                       int Opcode, unsigned Size,
110                       ARMCC::CondCodes Pred, unsigned PredReg,
111                       unsigned Scratch, MemOpQueue &MemOps,
112                       SmallVector<MachineBasicBlock::iterator, 4> &Merges);
113 
114     void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
115     bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
116                              MachineBasicBlock::iterator &MBBI);
117     bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
118                                   MachineBasicBlock::iterator MBBI,
119                                   const TargetInstrInfo *TII,
120                                   bool &Advance,
121                                   MachineBasicBlock::iterator &I);
122     bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
123                                    MachineBasicBlock::iterator MBBI,
124                                    bool &Advance,
125                                    MachineBasicBlock::iterator &I);
126     bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
127     bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
128   };
129   char ARMLoadStoreOpt::ID = 0;
130 }
131 
getLoadStoreMultipleOpcode(int Opcode,ARM_AM::AMSubMode Mode)132 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
133   switch (Opcode) {
134   default: llvm_unreachable("Unhandled opcode!");
135   case ARM::LDRi12:
136     ++NumLDMGened;
137     switch (Mode) {
138     default: llvm_unreachable("Unhandled submode!");
139     case ARM_AM::ia: return ARM::LDMIA;
140     case ARM_AM::da: return ARM::LDMDA;
141     case ARM_AM::db: return ARM::LDMDB;
142     case ARM_AM::ib: return ARM::LDMIB;
143     }
144     break;
145   case ARM::STRi12:
146     ++NumSTMGened;
147     switch (Mode) {
148     default: llvm_unreachable("Unhandled submode!");
149     case ARM_AM::ia: return ARM::STMIA;
150     case ARM_AM::da: return ARM::STMDA;
151     case ARM_AM::db: return ARM::STMDB;
152     case ARM_AM::ib: return ARM::STMIB;
153     }
154     break;
155   case ARM::t2LDRi8:
156   case ARM::t2LDRi12:
157     ++NumLDMGened;
158     switch (Mode) {
159     default: llvm_unreachable("Unhandled submode!");
160     case ARM_AM::ia: return ARM::t2LDMIA;
161     case ARM_AM::db: return ARM::t2LDMDB;
162     }
163     break;
164   case ARM::t2STRi8:
165   case ARM::t2STRi12:
166     ++NumSTMGened;
167     switch (Mode) {
168     default: llvm_unreachable("Unhandled submode!");
169     case ARM_AM::ia: return ARM::t2STMIA;
170     case ARM_AM::db: return ARM::t2STMDB;
171     }
172     break;
173   case ARM::VLDRS:
174     ++NumVLDMGened;
175     switch (Mode) {
176     default: llvm_unreachable("Unhandled submode!");
177     case ARM_AM::ia: return ARM::VLDMSIA;
178     case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
179     }
180     break;
181   case ARM::VSTRS:
182     ++NumVSTMGened;
183     switch (Mode) {
184     default: llvm_unreachable("Unhandled submode!");
185     case ARM_AM::ia: return ARM::VSTMSIA;
186     case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
187     }
188     break;
189   case ARM::VLDRD:
190     ++NumVLDMGened;
191     switch (Mode) {
192     default: llvm_unreachable("Unhandled submode!");
193     case ARM_AM::ia: return ARM::VLDMDIA;
194     case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
195     }
196     break;
197   case ARM::VSTRD:
198     ++NumVSTMGened;
199     switch (Mode) {
200     default: llvm_unreachable("Unhandled submode!");
201     case ARM_AM::ia: return ARM::VSTMDIA;
202     case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
203     }
204     break;
205   }
206 
207   return 0;
208 }
209 
210 namespace llvm {
211   namespace ARM_AM {
212 
getLoadStoreMultipleSubMode(int Opcode)213 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
214   switch (Opcode) {
215   default: llvm_unreachable("Unhandled opcode!");
216   case ARM::LDMIA_RET:
217   case ARM::LDMIA:
218   case ARM::LDMIA_UPD:
219   case ARM::STMIA:
220   case ARM::STMIA_UPD:
221   case ARM::t2LDMIA_RET:
222   case ARM::t2LDMIA:
223   case ARM::t2LDMIA_UPD:
224   case ARM::t2STMIA:
225   case ARM::t2STMIA_UPD:
226   case ARM::VLDMSIA:
227   case ARM::VLDMSIA_UPD:
228   case ARM::VSTMSIA:
229   case ARM::VSTMSIA_UPD:
230   case ARM::VLDMDIA:
231   case ARM::VLDMDIA_UPD:
232   case ARM::VSTMDIA:
233   case ARM::VSTMDIA_UPD:
234     return ARM_AM::ia;
235 
236   case ARM::LDMDA:
237   case ARM::LDMDA_UPD:
238   case ARM::STMDA:
239   case ARM::STMDA_UPD:
240     return ARM_AM::da;
241 
242   case ARM::LDMDB:
243   case ARM::LDMDB_UPD:
244   case ARM::STMDB:
245   case ARM::STMDB_UPD:
246   case ARM::t2LDMDB:
247   case ARM::t2LDMDB_UPD:
248   case ARM::t2STMDB:
249   case ARM::t2STMDB_UPD:
250   case ARM::VLDMSDB_UPD:
251   case ARM::VSTMSDB_UPD:
252   case ARM::VLDMDDB_UPD:
253   case ARM::VSTMDDB_UPD:
254     return ARM_AM::db;
255 
256   case ARM::LDMIB:
257   case ARM::LDMIB_UPD:
258   case ARM::STMIB:
259   case ARM::STMIB_UPD:
260     return ARM_AM::ib;
261   }
262 
263   return ARM_AM::bad_am_submode;
264 }
265 
266   } // end namespace ARM_AM
267 } // end namespace llvm
268 
isT2i32Load(unsigned Opc)269 static bool isT2i32Load(unsigned Opc) {
270   return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
271 }
272 
isi32Load(unsigned Opc)273 static bool isi32Load(unsigned Opc) {
274   return Opc == ARM::LDRi12 || isT2i32Load(Opc);
275 }
276 
isT2i32Store(unsigned Opc)277 static bool isT2i32Store(unsigned Opc) {
278   return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
279 }
280 
isi32Store(unsigned Opc)281 static bool isi32Store(unsigned Opc) {
282   return Opc == ARM::STRi12 || isT2i32Store(Opc);
283 }
284 
285 /// MergeOps - Create and insert a LDM or STM with Base as base register and
286 /// registers in Regs as the register operands that would be loaded / stored.
287 /// It returns true if the transformation is done.
288 bool
MergeOps(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,int Offset,unsigned Base,bool BaseKill,int Opcode,ARMCC::CondCodes Pred,unsigned PredReg,unsigned Scratch,DebugLoc dl,SmallVector<std::pair<unsigned,bool>,8> & Regs)289 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
290                           MachineBasicBlock::iterator MBBI,
291                           int Offset, unsigned Base, bool BaseKill,
292                           int Opcode, ARMCC::CondCodes Pred,
293                           unsigned PredReg, unsigned Scratch, DebugLoc dl,
294                           SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
295   // Only a single register to load / store. Don't bother.
296   unsigned NumRegs = Regs.size();
297   if (NumRegs <= 1)
298     return false;
299 
300   ARM_AM::AMSubMode Mode = ARM_AM::ia;
301   // VFP and Thumb2 do not support IB or DA modes.
302   bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
303   bool haveIBAndDA = isNotVFP && !isThumb2;
304   if (Offset == 4 && haveIBAndDA)
305     Mode = ARM_AM::ib;
306   else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
307     Mode = ARM_AM::da;
308   else if (Offset == -4 * (int)NumRegs && isNotVFP)
309     // VLDM/VSTM do not support DB mode without also updating the base reg.
310     Mode = ARM_AM::db;
311   else if (Offset != 0) {
312     // Check if this is a supported opcode before we insert instructions to
313     // calculate a new base register.
314     if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
315 
316     // If starting offset isn't zero, insert a MI to materialize a new base.
317     // But only do so if it is cost effective, i.e. merging more than two
318     // loads / stores.
319     if (NumRegs <= 2)
320       return false;
321 
322     unsigned NewBase;
323     if (isi32Load(Opcode))
324       // If it is a load, then just use one of the destination register to
325       // use as the new base.
326       NewBase = Regs[NumRegs-1].first;
327     else {
328       // Use the scratch register to use as a new base.
329       NewBase = Scratch;
330       if (NewBase == 0)
331         return false;
332     }
333     int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
334     if (Offset < 0) {
335       BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
336       Offset = - Offset;
337     }
338     int ImmedOffset = isThumb2
339       ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
340     if (ImmedOffset == -1)
341       // FIXME: Try t2ADDri12 or t2SUBri12?
342       return false;  // Probably not worth it then.
343 
344     BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
345       .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
346       .addImm(Pred).addReg(PredReg).addReg(0);
347     Base = NewBase;
348     BaseKill = true;  // New base is always killed right its use.
349   }
350 
351   bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
352                 Opcode == ARM::VLDRD);
353   Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
354   if (!Opcode) return false;
355   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
356     .addReg(Base, getKillRegState(BaseKill))
357     .addImm(Pred).addReg(PredReg);
358   for (unsigned i = 0; i != NumRegs; ++i)
359     MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
360                      | getKillRegState(Regs[i].second));
361 
362   return true;
363 }
364 
365 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
366 // success.
MergeOpsUpdate(MachineBasicBlock & MBB,MemOpQueue & memOps,unsigned memOpsBegin,unsigned memOpsEnd,unsigned insertAfter,int Offset,unsigned Base,bool BaseKill,int Opcode,ARMCC::CondCodes Pred,unsigned PredReg,unsigned Scratch,DebugLoc dl,SmallVector<MachineBasicBlock::iterator,4> & Merges)367 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
368                                      MemOpQueue &memOps,
369                                      unsigned memOpsBegin, unsigned memOpsEnd,
370                                      unsigned insertAfter, int Offset,
371                                      unsigned Base, bool BaseKill,
372                                      int Opcode,
373                                      ARMCC::CondCodes Pred, unsigned PredReg,
374                                      unsigned Scratch,
375                                      DebugLoc dl,
376                           SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
377   // First calculate which of the registers should be killed by the merged
378   // instruction.
379   const unsigned insertPos = memOps[insertAfter].Position;
380   SmallSet<unsigned, 4> KilledRegs;
381   DenseMap<unsigned, unsigned> Killer;
382   for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
383     if (i == memOpsBegin) {
384       i = memOpsEnd;
385       if (i == e)
386         break;
387     }
388     if (memOps[i].Position < insertPos && memOps[i].isKill) {
389       unsigned Reg = memOps[i].Reg;
390       KilledRegs.insert(Reg);
391       Killer[Reg] = i;
392     }
393   }
394 
395   SmallVector<std::pair<unsigned, bool>, 8> Regs;
396   for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
397     unsigned Reg = memOps[i].Reg;
398     // If we are inserting the merged operation after an operation that
399     // uses the same register, make sure to transfer any kill flag.
400     bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
401     Regs.push_back(std::make_pair(Reg, isKill));
402   }
403 
404   // Try to do the merge.
405   MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
406   ++Loc;
407   if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
408                 Pred, PredReg, Scratch, dl, Regs))
409     return;
410 
411   // Merge succeeded, update records.
412   Merges.push_back(prior(Loc));
413   for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
414     // Remove kill flags from any memops that come before insertPos.
415     if (Regs[i-memOpsBegin].second) {
416       unsigned Reg = Regs[i-memOpsBegin].first;
417       if (KilledRegs.count(Reg)) {
418         unsigned j = Killer[Reg];
419         int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
420         assert(Idx >= 0 && "Cannot find killing operand");
421         memOps[j].MBBI->getOperand(Idx).setIsKill(false);
422         memOps[j].isKill = false;
423       }
424       memOps[i].isKill = true;
425     }
426     MBB.erase(memOps[i].MBBI);
427     // Update this memop to refer to the merged instruction.
428     // We may need to move kill flags again.
429     memOps[i].Merged = true;
430     memOps[i].MBBI = Merges.back();
431     memOps[i].Position = insertPos;
432   }
433 }
434 
435 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
436 /// load / store multiple instructions.
437 void
MergeLDR_STR(MachineBasicBlock & MBB,unsigned SIndex,unsigned Base,int Opcode,unsigned Size,ARMCC::CondCodes Pred,unsigned PredReg,unsigned Scratch,MemOpQueue & MemOps,SmallVector<MachineBasicBlock::iterator,4> & Merges)438 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
439                           unsigned Base, int Opcode, unsigned Size,
440                           ARMCC::CondCodes Pred, unsigned PredReg,
441                           unsigned Scratch, MemOpQueue &MemOps,
442                           SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
443   bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
444   int Offset = MemOps[SIndex].Offset;
445   int SOffset = Offset;
446   unsigned insertAfter = SIndex;
447   MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
448   DebugLoc dl = Loc->getDebugLoc();
449   const MachineOperand &PMO = Loc->getOperand(0);
450   unsigned PReg = PMO.getReg();
451   unsigned PRegNum = PMO.isUndef() ? UINT_MAX
452     : getARMRegisterNumbering(PReg);
453   unsigned Count = 1;
454   unsigned Limit = ~0U;
455 
456   // vldm / vstm limit are 32 for S variants, 16 for D variants.
457 
458   switch (Opcode) {
459   default: break;
460   case ARM::VSTRS:
461     Limit = 32;
462     break;
463   case ARM::VSTRD:
464     Limit = 16;
465     break;
466   case ARM::VLDRD:
467     Limit = 16;
468     break;
469   case ARM::VLDRS:
470     Limit = 32;
471     break;
472   }
473 
474   for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
475     int NewOffset = MemOps[i].Offset;
476     const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
477     unsigned Reg = MO.getReg();
478     unsigned RegNum = MO.isUndef() ? UINT_MAX
479       : getARMRegisterNumbering(Reg);
480     // Register numbers must be in ascending order. For VFP / NEON load and
481     // store multiples, the registers must also be consecutive and within the
482     // limit on the number of registers per instruction.
483     if (Reg != ARM::SP &&
484         NewOffset == Offset + (int)Size &&
485         ((isNotVFP && RegNum > PRegNum) ||
486          ((Count < Limit) && RegNum == PRegNum+1))) {
487       Offset += Size;
488       PRegNum = RegNum;
489       ++Count;
490     } else {
491       // Can't merge this in. Try merge the earlier ones first.
492       MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
493                      Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
494       MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
495                    MemOps, Merges);
496       return;
497     }
498 
499     if (MemOps[i].Position > MemOps[insertAfter].Position)
500       insertAfter = i;
501   }
502 
503   bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
504   MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
505                  Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
506   return;
507 }
508 
isMatchingDecrement(MachineInstr * MI,unsigned Base,unsigned Bytes,unsigned Limit,ARMCC::CondCodes Pred,unsigned PredReg)509 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
510                                        unsigned Bytes, unsigned Limit,
511                                        ARMCC::CondCodes Pred, unsigned PredReg){
512   unsigned MyPredReg = 0;
513   if (!MI)
514     return false;
515   if (MI->getOpcode() != ARM::t2SUBri &&
516       MI->getOpcode() != ARM::tSUBspi &&
517       MI->getOpcode() != ARM::SUBri)
518     return false;
519 
520   // Make sure the offset fits in 8 bits.
521   if (Bytes == 0 || (Limit && Bytes >= Limit))
522     return false;
523 
524   unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
525   return (MI->getOperand(0).getReg() == Base &&
526           MI->getOperand(1).getReg() == Base &&
527           (MI->getOperand(2).getImm()*Scale) == Bytes &&
528           llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
529           MyPredReg == PredReg);
530 }
531 
isMatchingIncrement(MachineInstr * MI,unsigned Base,unsigned Bytes,unsigned Limit,ARMCC::CondCodes Pred,unsigned PredReg)532 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
533                                        unsigned Bytes, unsigned Limit,
534                                        ARMCC::CondCodes Pred, unsigned PredReg){
535   unsigned MyPredReg = 0;
536   if (!MI)
537     return false;
538   if (MI->getOpcode() != ARM::t2ADDri &&
539       MI->getOpcode() != ARM::tADDspi &&
540       MI->getOpcode() != ARM::ADDri)
541     return false;
542 
543   if (Bytes == 0 || (Limit && Bytes >= Limit))
544     // Make sure the offset fits in 8 bits.
545     return false;
546 
547   unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
548   return (MI->getOperand(0).getReg() == Base &&
549           MI->getOperand(1).getReg() == Base &&
550           (MI->getOperand(2).getImm()*Scale) == Bytes &&
551           llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
552           MyPredReg == PredReg);
553 }
554 
getLSMultipleTransferSize(MachineInstr * MI)555 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
556   switch (MI->getOpcode()) {
557   default: return 0;
558   case ARM::LDRi12:
559   case ARM::STRi12:
560   case ARM::t2LDRi8:
561   case ARM::t2LDRi12:
562   case ARM::t2STRi8:
563   case ARM::t2STRi12:
564   case ARM::VLDRS:
565   case ARM::VSTRS:
566     return 4;
567   case ARM::VLDRD:
568   case ARM::VSTRD:
569     return 8;
570   case ARM::LDMIA:
571   case ARM::LDMDA:
572   case ARM::LDMDB:
573   case ARM::LDMIB:
574   case ARM::STMIA:
575   case ARM::STMDA:
576   case ARM::STMDB:
577   case ARM::STMIB:
578   case ARM::t2LDMIA:
579   case ARM::t2LDMDB:
580   case ARM::t2STMIA:
581   case ARM::t2STMDB:
582   case ARM::VLDMSIA:
583   case ARM::VSTMSIA:
584     return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
585   case ARM::VLDMDIA:
586   case ARM::VSTMDIA:
587     return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
588   }
589 }
590 
getUpdatingLSMultipleOpcode(unsigned Opc,ARM_AM::AMSubMode Mode)591 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
592                                             ARM_AM::AMSubMode Mode) {
593   switch (Opc) {
594   default: llvm_unreachable("Unhandled opcode!");
595   case ARM::LDMIA:
596   case ARM::LDMDA:
597   case ARM::LDMDB:
598   case ARM::LDMIB:
599     switch (Mode) {
600     default: llvm_unreachable("Unhandled submode!");
601     case ARM_AM::ia: return ARM::LDMIA_UPD;
602     case ARM_AM::ib: return ARM::LDMIB_UPD;
603     case ARM_AM::da: return ARM::LDMDA_UPD;
604     case ARM_AM::db: return ARM::LDMDB_UPD;
605     }
606     break;
607   case ARM::STMIA:
608   case ARM::STMDA:
609   case ARM::STMDB:
610   case ARM::STMIB:
611     switch (Mode) {
612     default: llvm_unreachable("Unhandled submode!");
613     case ARM_AM::ia: return ARM::STMIA_UPD;
614     case ARM_AM::ib: return ARM::STMIB_UPD;
615     case ARM_AM::da: return ARM::STMDA_UPD;
616     case ARM_AM::db: return ARM::STMDB_UPD;
617     }
618     break;
619   case ARM::t2LDMIA:
620   case ARM::t2LDMDB:
621     switch (Mode) {
622     default: llvm_unreachable("Unhandled submode!");
623     case ARM_AM::ia: return ARM::t2LDMIA_UPD;
624     case ARM_AM::db: return ARM::t2LDMDB_UPD;
625     }
626     break;
627   case ARM::t2STMIA:
628   case ARM::t2STMDB:
629     switch (Mode) {
630     default: llvm_unreachable("Unhandled submode!");
631     case ARM_AM::ia: return ARM::t2STMIA_UPD;
632     case ARM_AM::db: return ARM::t2STMDB_UPD;
633     }
634     break;
635   case ARM::VLDMSIA:
636     switch (Mode) {
637     default: llvm_unreachable("Unhandled submode!");
638     case ARM_AM::ia: return ARM::VLDMSIA_UPD;
639     case ARM_AM::db: return ARM::VLDMSDB_UPD;
640     }
641     break;
642   case ARM::VLDMDIA:
643     switch (Mode) {
644     default: llvm_unreachable("Unhandled submode!");
645     case ARM_AM::ia: return ARM::VLDMDIA_UPD;
646     case ARM_AM::db: return ARM::VLDMDDB_UPD;
647     }
648     break;
649   case ARM::VSTMSIA:
650     switch (Mode) {
651     default: llvm_unreachable("Unhandled submode!");
652     case ARM_AM::ia: return ARM::VSTMSIA_UPD;
653     case ARM_AM::db: return ARM::VSTMSDB_UPD;
654     }
655     break;
656   case ARM::VSTMDIA:
657     switch (Mode) {
658     default: llvm_unreachable("Unhandled submode!");
659     case ARM_AM::ia: return ARM::VSTMDIA_UPD;
660     case ARM_AM::db: return ARM::VSTMDDB_UPD;
661     }
662     break;
663   }
664 
665   return 0;
666 }
667 
668 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
669 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
670 ///
671 /// stmia rn, <ra, rb, rc>
672 /// rn := rn + 4 * 3;
673 /// =>
674 /// stmia rn!, <ra, rb, rc>
675 ///
676 /// rn := rn - 4 * 3;
677 /// ldmia rn, <ra, rb, rc>
678 /// =>
679 /// ldmdb rn!, <ra, rb, rc>
MergeBaseUpdateLSMultiple(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,bool & Advance,MachineBasicBlock::iterator & I)680 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
681                                                MachineBasicBlock::iterator MBBI,
682                                                bool &Advance,
683                                                MachineBasicBlock::iterator &I) {
684   MachineInstr *MI = MBBI;
685   unsigned Base = MI->getOperand(0).getReg();
686   bool BaseKill = MI->getOperand(0).isKill();
687   unsigned Bytes = getLSMultipleTransferSize(MI);
688   unsigned PredReg = 0;
689   ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
690   int Opcode = MI->getOpcode();
691   DebugLoc dl = MI->getDebugLoc();
692 
693   // Can't use an updating ld/st if the base register is also a dest
694   // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
695   for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
696     if (MI->getOperand(i).getReg() == Base)
697       return false;
698 
699   bool DoMerge = false;
700   ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
701 
702   // Try merging with the previous instruction.
703   MachineBasicBlock::iterator BeginMBBI = MBB.begin();
704   if (MBBI != BeginMBBI) {
705     MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
706     while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
707       --PrevMBBI;
708     if (Mode == ARM_AM::ia &&
709         isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
710       Mode = ARM_AM::db;
711       DoMerge = true;
712     } else if (Mode == ARM_AM::ib &&
713                isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
714       Mode = ARM_AM::da;
715       DoMerge = true;
716     }
717     if (DoMerge)
718       MBB.erase(PrevMBBI);
719   }
720 
721   // Try merging with the next instruction.
722   MachineBasicBlock::iterator EndMBBI = MBB.end();
723   if (!DoMerge && MBBI != EndMBBI) {
724     MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
725     while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
726       ++NextMBBI;
727     if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
728         isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
729       DoMerge = true;
730     } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
731                isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
732       DoMerge = true;
733     }
734     if (DoMerge) {
735       if (NextMBBI == I) {
736         Advance = true;
737         ++I;
738       }
739       MBB.erase(NextMBBI);
740     }
741   }
742 
743   if (!DoMerge)
744     return false;
745 
746   unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
747   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
748     .addReg(Base, getDefRegState(true)) // WB base register
749     .addReg(Base, getKillRegState(BaseKill))
750     .addImm(Pred).addReg(PredReg);
751 
752   // Transfer the rest of operands.
753   for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
754     MIB.addOperand(MI->getOperand(OpNum));
755 
756   // Transfer memoperands.
757   MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
758 
759   MBB.erase(MBBI);
760   return true;
761 }
762 
getPreIndexedLoadStoreOpcode(unsigned Opc,ARM_AM::AddrOpc Mode)763 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
764                                              ARM_AM::AddrOpc Mode) {
765   switch (Opc) {
766   case ARM::LDRi12:
767     return ARM::LDR_PRE_IMM;
768   case ARM::STRi12:
769     return ARM::STR_PRE_IMM;
770   case ARM::VLDRS:
771     return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
772   case ARM::VLDRD:
773     return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
774   case ARM::VSTRS:
775     return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
776   case ARM::VSTRD:
777     return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
778   case ARM::t2LDRi8:
779   case ARM::t2LDRi12:
780     return ARM::t2LDR_PRE;
781   case ARM::t2STRi8:
782   case ARM::t2STRi12:
783     return ARM::t2STR_PRE;
784   default: llvm_unreachable("Unhandled opcode!");
785   }
786   return 0;
787 }
788 
getPostIndexedLoadStoreOpcode(unsigned Opc,ARM_AM::AddrOpc Mode)789 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
790                                               ARM_AM::AddrOpc Mode) {
791   switch (Opc) {
792   case ARM::LDRi12:
793     return ARM::LDR_POST_IMM;
794   case ARM::STRi12:
795     return ARM::STR_POST_IMM;
796   case ARM::VLDRS:
797     return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
798   case ARM::VLDRD:
799     return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
800   case ARM::VSTRS:
801     return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
802   case ARM::VSTRD:
803     return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
804   case ARM::t2LDRi8:
805   case ARM::t2LDRi12:
806     return ARM::t2LDR_POST;
807   case ARM::t2STRi8:
808   case ARM::t2STRi12:
809     return ARM::t2STR_POST;
810   default: llvm_unreachable("Unhandled opcode!");
811   }
812   return 0;
813 }
814 
815 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
816 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
MergeBaseUpdateLoadStore(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const TargetInstrInfo * TII,bool & Advance,MachineBasicBlock::iterator & I)817 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
818                                                MachineBasicBlock::iterator MBBI,
819                                                const TargetInstrInfo *TII,
820                                                bool &Advance,
821                                                MachineBasicBlock::iterator &I) {
822   MachineInstr *MI = MBBI;
823   unsigned Base = MI->getOperand(1).getReg();
824   bool BaseKill = MI->getOperand(1).isKill();
825   unsigned Bytes = getLSMultipleTransferSize(MI);
826   int Opcode = MI->getOpcode();
827   DebugLoc dl = MI->getDebugLoc();
828   bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
829                 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
830   bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
831   if (isi32Load(Opcode) || isi32Store(Opcode))
832     if (MI->getOperand(2).getImm() != 0)
833       return false;
834   if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
835     return false;
836 
837   bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
838   // Can't do the merge if the destination register is the same as the would-be
839   // writeback register.
840   if (isLd && MI->getOperand(0).getReg() == Base)
841     return false;
842 
843   unsigned PredReg = 0;
844   ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
845   bool DoMerge = false;
846   ARM_AM::AddrOpc AddSub = ARM_AM::add;
847   unsigned NewOpc = 0;
848   // AM2 - 12 bits, thumb2 - 8 bits.
849   unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
850 
851   // Try merging with the previous instruction.
852   MachineBasicBlock::iterator BeginMBBI = MBB.begin();
853   if (MBBI != BeginMBBI) {
854     MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
855     while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
856       --PrevMBBI;
857     if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
858       DoMerge = true;
859       AddSub = ARM_AM::sub;
860     } else if (!isAM5 &&
861                isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
862       DoMerge = true;
863     }
864     if (DoMerge) {
865       NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
866       MBB.erase(PrevMBBI);
867     }
868   }
869 
870   // Try merging with the next instruction.
871   MachineBasicBlock::iterator EndMBBI = MBB.end();
872   if (!DoMerge && MBBI != EndMBBI) {
873     MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
874     while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
875       ++NextMBBI;
876     if (!isAM5 &&
877         isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
878       DoMerge = true;
879       AddSub = ARM_AM::sub;
880     } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
881       DoMerge = true;
882     }
883     if (DoMerge) {
884       NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
885       if (NextMBBI == I) {
886         Advance = true;
887         ++I;
888       }
889       MBB.erase(NextMBBI);
890     }
891   }
892 
893   if (!DoMerge)
894     return false;
895 
896   if (isAM5) {
897     // VLDM[SD}_UPD, VSTM[SD]_UPD
898     // (There are no base-updating versions of VLDR/VSTR instructions, but the
899     // updating load/store-multiple instructions can be used with only one
900     // register.)
901     MachineOperand &MO = MI->getOperand(0);
902     BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
903       .addReg(Base, getDefRegState(true)) // WB base register
904       .addReg(Base, getKillRegState(isLd ? BaseKill : false))
905       .addImm(Pred).addReg(PredReg)
906       .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
907                             getKillRegState(MO.isKill())));
908   } else if (isLd) {
909     if (isAM2) {
910       // LDR_PRE, LDR_POST
911       if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
912         int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
913         BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
914           .addReg(Base, RegState::Define)
915           .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
916       } else {
917         int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
918         BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
919           .addReg(Base, RegState::Define)
920           .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
921       }
922     } else {
923       int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
924       // t2LDR_PRE, t2LDR_POST
925       BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
926         .addReg(Base, RegState::Define)
927         .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
928     }
929   } else {
930     MachineOperand &MO = MI->getOperand(0);
931     // FIXME: post-indexed stores use am2offset_imm, which still encodes
932     // the vestigal zero-reg offset register. When that's fixed, this clause
933     // can be removed entirely.
934     if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
935       int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
936       // STR_PRE, STR_POST
937       BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
938         .addReg(MO.getReg(), getKillRegState(MO.isKill()))
939         .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
940     } else {
941       int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
942       // t2STR_PRE, t2STR_POST
943       BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
944         .addReg(MO.getReg(), getKillRegState(MO.isKill()))
945         .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
946     }
947   }
948   MBB.erase(MBBI);
949 
950   return true;
951 }
952 
953 /// isMemoryOp - Returns true if instruction is a memory operation that this
954 /// pass is capable of operating on.
isMemoryOp(const MachineInstr * MI)955 static bool isMemoryOp(const MachineInstr *MI) {
956   // When no memory operands are present, conservatively assume unaligned,
957   // volatile, unfoldable.
958   if (!MI->hasOneMemOperand())
959     return false;
960 
961   const MachineMemOperand *MMO = *MI->memoperands_begin();
962 
963   // Don't touch volatile memory accesses - we may be changing their order.
964   if (MMO->isVolatile())
965     return false;
966 
967   // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
968   // not.
969   if (MMO->getAlignment() < 4)
970     return false;
971 
972   // str <undef> could probably be eliminated entirely, but for now we just want
973   // to avoid making a mess of it.
974   // FIXME: Use str <undef> as a wildcard to enable better stm folding.
975   if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
976       MI->getOperand(0).isUndef())
977     return false;
978 
979   // Likewise don't mess with references to undefined addresses.
980   if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
981       MI->getOperand(1).isUndef())
982     return false;
983 
984   int Opcode = MI->getOpcode();
985   switch (Opcode) {
986   default: break;
987   case ARM::VLDRS:
988   case ARM::VSTRS:
989     return MI->getOperand(1).isReg();
990   case ARM::VLDRD:
991   case ARM::VSTRD:
992     return MI->getOperand(1).isReg();
993   case ARM::LDRi12:
994   case ARM::STRi12:
995   case ARM::t2LDRi8:
996   case ARM::t2LDRi12:
997   case ARM::t2STRi8:
998   case ARM::t2STRi12:
999     return MI->getOperand(1).isReg();
1000   }
1001   return false;
1002 }
1003 
1004 /// AdvanceRS - Advance register scavenger to just before the earliest memory
1005 /// op that is being merged.
AdvanceRS(MachineBasicBlock & MBB,MemOpQueue & MemOps)1006 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1007   MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1008   unsigned Position = MemOps[0].Position;
1009   for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1010     if (MemOps[i].Position < Position) {
1011       Position = MemOps[i].Position;
1012       Loc = MemOps[i].MBBI;
1013     }
1014   }
1015 
1016   if (Loc != MBB.begin())
1017     RS->forward(prior(Loc));
1018 }
1019 
getMemoryOpOffset(const MachineInstr * MI)1020 static int getMemoryOpOffset(const MachineInstr *MI) {
1021   int Opcode = MI->getOpcode();
1022   bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1023   unsigned NumOperands = MI->getDesc().getNumOperands();
1024   unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1025 
1026   if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1027       Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1028       Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1029       Opcode == ARM::LDRi12   || Opcode == ARM::STRi12)
1030     return OffField;
1031 
1032   int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1033     : ARM_AM::getAM5Offset(OffField) * 4;
1034   if (isAM3) {
1035     if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1036       Offset = -Offset;
1037   } else {
1038     if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1039       Offset = -Offset;
1040   }
1041   return Offset;
1042 }
1043 
InsertLDR_STR(MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI,int Offset,bool isDef,DebugLoc dl,unsigned NewOpc,unsigned Reg,bool RegDeadKill,bool RegUndef,unsigned BaseReg,bool BaseKill,bool BaseUndef,bool OffKill,bool OffUndef,ARMCC::CondCodes Pred,unsigned PredReg,const TargetInstrInfo * TII,bool isT2)1044 static void InsertLDR_STR(MachineBasicBlock &MBB,
1045                           MachineBasicBlock::iterator &MBBI,
1046                           int Offset, bool isDef,
1047                           DebugLoc dl, unsigned NewOpc,
1048                           unsigned Reg, bool RegDeadKill, bool RegUndef,
1049                           unsigned BaseReg, bool BaseKill, bool BaseUndef,
1050                           bool OffKill, bool OffUndef,
1051                           ARMCC::CondCodes Pred, unsigned PredReg,
1052                           const TargetInstrInfo *TII, bool isT2) {
1053   if (isDef) {
1054     MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1055                                       TII->get(NewOpc))
1056       .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1057       .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1058     MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1059   } else {
1060     MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1061                                       TII->get(NewOpc))
1062       .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1063       .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1064     MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1065   }
1066 }
1067 
FixInvalidRegPairOp(MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI)1068 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1069                                           MachineBasicBlock::iterator &MBBI) {
1070   MachineInstr *MI = &*MBBI;
1071   unsigned Opcode = MI->getOpcode();
1072   if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1073       Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1074     unsigned EvenReg = MI->getOperand(0).getReg();
1075     unsigned OddReg  = MI->getOperand(1).getReg();
1076     unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1077     unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
1078     if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
1079       return false;
1080 
1081     MachineBasicBlock::iterator NewBBI = MBBI;
1082     bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1083     bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1084     bool EvenDeadKill = isLd ?
1085       MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1086     bool EvenUndef = MI->getOperand(0).isUndef();
1087     bool OddDeadKill  = isLd ?
1088       MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1089     bool OddUndef = MI->getOperand(1).isUndef();
1090     const MachineOperand &BaseOp = MI->getOperand(2);
1091     unsigned BaseReg = BaseOp.getReg();
1092     bool BaseKill = BaseOp.isKill();
1093     bool BaseUndef = BaseOp.isUndef();
1094     bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1095     bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1096     int OffImm = getMemoryOpOffset(MI);
1097     unsigned PredReg = 0;
1098     ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
1099 
1100     if (OddRegNum > EvenRegNum && OffImm == 0) {
1101       // Ascending register numbers and no offset. It's safe to change it to a
1102       // ldm or stm.
1103       unsigned NewOpc = (isLd)
1104         ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1105         : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1106       if (isLd) {
1107         BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1108           .addReg(BaseReg, getKillRegState(BaseKill))
1109           .addImm(Pred).addReg(PredReg)
1110           .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1111           .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1112         ++NumLDRD2LDM;
1113       } else {
1114         BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1115           .addReg(BaseReg, getKillRegState(BaseKill))
1116           .addImm(Pred).addReg(PredReg)
1117           .addReg(EvenReg,
1118                   getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1119           .addReg(OddReg,
1120                   getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
1121         ++NumSTRD2STM;
1122       }
1123       NewBBI = llvm::prior(MBBI);
1124     } else {
1125       // Split into two instructions.
1126       unsigned NewOpc = (isLd)
1127         ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1128         : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1129       DebugLoc dl = MBBI->getDebugLoc();
1130       // If this is a load and base register is killed, it may have been
1131       // re-defed by the load, make sure the first load does not clobber it.
1132       if (isLd &&
1133           (BaseKill || OffKill) &&
1134           (TRI->regsOverlap(EvenReg, BaseReg))) {
1135         assert(!TRI->regsOverlap(OddReg, BaseReg));
1136         InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1137                       OddReg, OddDeadKill, false,
1138                       BaseReg, false, BaseUndef, false, OffUndef,
1139                       Pred, PredReg, TII, isT2);
1140         NewBBI = llvm::prior(MBBI);
1141         InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1142                       EvenReg, EvenDeadKill, false,
1143                       BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1144                       Pred, PredReg, TII, isT2);
1145       } else {
1146         if (OddReg == EvenReg && EvenDeadKill) {
1147           // If the two source operands are the same, the kill marker is
1148           // probably on the first one. e.g.
1149           // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1150           EvenDeadKill = false;
1151           OddDeadKill = true;
1152         }
1153         InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1154                       EvenReg, EvenDeadKill, EvenUndef,
1155                       BaseReg, false, BaseUndef, false, OffUndef,
1156                       Pred, PredReg, TII, isT2);
1157         NewBBI = llvm::prior(MBBI);
1158         InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1159                       OddReg, OddDeadKill, OddUndef,
1160                       BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1161                       Pred, PredReg, TII, isT2);
1162       }
1163       if (isLd)
1164         ++NumLDRD2LDR;
1165       else
1166         ++NumSTRD2STR;
1167     }
1168 
1169     MBB.erase(MI);
1170     MBBI = NewBBI;
1171     return true;
1172   }
1173   return false;
1174 }
1175 
1176 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1177 /// ops of the same base and incrementing offset into LDM / STM ops.
LoadStoreMultipleOpti(MachineBasicBlock & MBB)1178 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1179   unsigned NumMerges = 0;
1180   unsigned NumMemOps = 0;
1181   MemOpQueue MemOps;
1182   unsigned CurrBase = 0;
1183   int CurrOpc = -1;
1184   unsigned CurrSize = 0;
1185   ARMCC::CondCodes CurrPred = ARMCC::AL;
1186   unsigned CurrPredReg = 0;
1187   unsigned Position = 0;
1188   SmallVector<MachineBasicBlock::iterator,4> Merges;
1189 
1190   RS->enterBasicBlock(&MBB);
1191   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1192   while (MBBI != E) {
1193     if (FixInvalidRegPairOp(MBB, MBBI))
1194       continue;
1195 
1196     bool Advance  = false;
1197     bool TryMerge = false;
1198     bool Clobber  = false;
1199 
1200     bool isMemOp = isMemoryOp(MBBI);
1201     if (isMemOp) {
1202       int Opcode = MBBI->getOpcode();
1203       unsigned Size = getLSMultipleTransferSize(MBBI);
1204       const MachineOperand &MO = MBBI->getOperand(0);
1205       unsigned Reg = MO.getReg();
1206       bool isKill = MO.isDef() ? false : MO.isKill();
1207       unsigned Base = MBBI->getOperand(1).getReg();
1208       unsigned PredReg = 0;
1209       ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1210       int Offset = getMemoryOpOffset(MBBI);
1211       // Watch out for:
1212       // r4 := ldr [r5]
1213       // r5 := ldr [r5, #4]
1214       // r6 := ldr [r5, #8]
1215       //
1216       // The second ldr has effectively broken the chain even though it
1217       // looks like the later ldr(s) use the same base register. Try to
1218       // merge the ldr's so far, including this one. But don't try to
1219       // combine the following ldr(s).
1220       Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1221       if (CurrBase == 0 && !Clobber) {
1222         // Start of a new chain.
1223         CurrBase = Base;
1224         CurrOpc  = Opcode;
1225         CurrSize = Size;
1226         CurrPred = Pred;
1227         CurrPredReg = PredReg;
1228         MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1229         ++NumMemOps;
1230         Advance = true;
1231       } else {
1232         if (Clobber) {
1233           TryMerge = true;
1234           Advance = true;
1235         }
1236 
1237         if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1238           // No need to match PredReg.
1239           // Continue adding to the queue.
1240           if (Offset > MemOps.back().Offset) {
1241             MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1242                                              Position, MBBI));
1243             ++NumMemOps;
1244             Advance = true;
1245           } else {
1246             for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1247                  I != E; ++I) {
1248               if (Offset < I->Offset) {
1249                 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1250                                                  Position, MBBI));
1251                 ++NumMemOps;
1252                 Advance = true;
1253                 break;
1254               } else if (Offset == I->Offset) {
1255                 // Collision! This can't be merged!
1256                 break;
1257               }
1258             }
1259           }
1260         }
1261       }
1262     }
1263 
1264     if (MBBI->isDebugValue()) {
1265       ++MBBI;
1266       if (MBBI == E)
1267         // Reach the end of the block, try merging the memory instructions.
1268         TryMerge = true;
1269     } else if (Advance) {
1270       ++Position;
1271       ++MBBI;
1272       if (MBBI == E)
1273         // Reach the end of the block, try merging the memory instructions.
1274         TryMerge = true;
1275     } else
1276       TryMerge = true;
1277 
1278     if (TryMerge) {
1279       if (NumMemOps > 1) {
1280         // Try to find a free register to use as a new base in case it's needed.
1281         // First advance to the instruction just before the start of the chain.
1282         AdvanceRS(MBB, MemOps);
1283         // Find a scratch register.
1284         unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1285         // Process the load / store instructions.
1286         RS->forward(prior(MBBI));
1287 
1288         // Merge ops.
1289         Merges.clear();
1290         MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1291                      CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1292 
1293         // Try folding preceding/trailing base inc/dec into the generated
1294         // LDM/STM ops.
1295         for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1296           if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1297             ++NumMerges;
1298         NumMerges += Merges.size();
1299 
1300         // Try folding preceding/trailing base inc/dec into those load/store
1301         // that were not merged to form LDM/STM ops.
1302         for (unsigned i = 0; i != NumMemOps; ++i)
1303           if (!MemOps[i].Merged)
1304             if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1305               ++NumMerges;
1306 
1307         // RS may be pointing to an instruction that's deleted.
1308         RS->skipTo(prior(MBBI));
1309       } else if (NumMemOps == 1) {
1310         // Try folding preceding/trailing base inc/dec into the single
1311         // load/store.
1312         if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1313           ++NumMerges;
1314           RS->forward(prior(MBBI));
1315         }
1316       }
1317 
1318       CurrBase = 0;
1319       CurrOpc = -1;
1320       CurrSize = 0;
1321       CurrPred = ARMCC::AL;
1322       CurrPredReg = 0;
1323       if (NumMemOps) {
1324         MemOps.clear();
1325         NumMemOps = 0;
1326       }
1327 
1328       // If iterator hasn't been advanced and this is not a memory op, skip it.
1329       // It can't start a new chain anyway.
1330       if (!Advance && !isMemOp && MBBI != E) {
1331         ++Position;
1332         ++MBBI;
1333       }
1334     }
1335   }
1336   return NumMerges > 0;
1337 }
1338 
1339 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1340 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1341 /// directly restore the value of LR into pc.
1342 ///   ldmfd sp!, {..., lr}
1343 ///   bx lr
1344 /// or
1345 ///   ldmfd sp!, {..., lr}
1346 ///   mov pc, lr
1347 /// =>
1348 ///   ldmfd sp!, {..., pc}
MergeReturnIntoLDM(MachineBasicBlock & MBB)1349 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1350   if (MBB.empty()) return false;
1351 
1352   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1353   if (MBBI != MBB.begin() &&
1354       (MBBI->getOpcode() == ARM::BX_RET ||
1355        MBBI->getOpcode() == ARM::tBX_RET ||
1356        MBBI->getOpcode() == ARM::MOVPCLR)) {
1357     MachineInstr *PrevMI = prior(MBBI);
1358     unsigned Opcode = PrevMI->getOpcode();
1359     if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1360         Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1361         Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1362       MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1363       if (MO.getReg() != ARM::LR)
1364         return false;
1365       unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1366       assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1367               Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1368       PrevMI->setDesc(TII->get(NewOpc));
1369       MO.setReg(ARM::PC);
1370       PrevMI->copyImplicitOps(&*MBBI);
1371       MBB.erase(MBBI);
1372       return true;
1373     }
1374   }
1375   return false;
1376 }
1377 
runOnMachineFunction(MachineFunction & Fn)1378 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1379   const TargetMachine &TM = Fn.getTarget();
1380   AFI = Fn.getInfo<ARMFunctionInfo>();
1381   TII = TM.getInstrInfo();
1382   TRI = TM.getRegisterInfo();
1383   RS = new RegScavenger();
1384   isThumb2 = AFI->isThumb2Function();
1385 
1386   bool Modified = false;
1387   for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1388        ++MFI) {
1389     MachineBasicBlock &MBB = *MFI;
1390     Modified |= LoadStoreMultipleOpti(MBB);
1391     if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1392       Modified |= MergeReturnIntoLDM(MBB);
1393   }
1394 
1395   delete RS;
1396   return Modified;
1397 }
1398 
1399 
1400 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1401 /// load / stores from consecutive locations close to make it more
1402 /// likely they will be combined later.
1403 
1404 namespace {
1405   struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1406     static char ID;
ARMPreAllocLoadStoreOpt__anon16ab0f520211::ARMPreAllocLoadStoreOpt1407     ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1408 
1409     const TargetData *TD;
1410     const TargetInstrInfo *TII;
1411     const TargetRegisterInfo *TRI;
1412     const ARMSubtarget *STI;
1413     MachineRegisterInfo *MRI;
1414     MachineFunction *MF;
1415 
1416     virtual bool runOnMachineFunction(MachineFunction &Fn);
1417 
getPassName__anon16ab0f520211::ARMPreAllocLoadStoreOpt1418     virtual const char *getPassName() const {
1419       return "ARM pre- register allocation load / store optimization pass";
1420     }
1421 
1422   private:
1423     bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1424                           unsigned &NewOpc, unsigned &EvenReg,
1425                           unsigned &OddReg, unsigned &BaseReg,
1426                           int &Offset,
1427                           unsigned &PredReg, ARMCC::CondCodes &Pred,
1428                           bool &isT2);
1429     bool RescheduleOps(MachineBasicBlock *MBB,
1430                        SmallVector<MachineInstr*, 4> &Ops,
1431                        unsigned Base, bool isLd,
1432                        DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1433     bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1434   };
1435   char ARMPreAllocLoadStoreOpt::ID = 0;
1436 }
1437 
runOnMachineFunction(MachineFunction & Fn)1438 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1439   TD  = Fn.getTarget().getTargetData();
1440   TII = Fn.getTarget().getInstrInfo();
1441   TRI = Fn.getTarget().getRegisterInfo();
1442   STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1443   MRI = &Fn.getRegInfo();
1444   MF  = &Fn;
1445 
1446   bool Modified = false;
1447   for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1448        ++MFI)
1449     Modified |= RescheduleLoadStoreInstrs(MFI);
1450 
1451   return Modified;
1452 }
1453 
IsSafeAndProfitableToMove(bool isLd,unsigned Base,MachineBasicBlock::iterator I,MachineBasicBlock::iterator E,SmallPtrSet<MachineInstr *,4> & MemOps,SmallSet<unsigned,4> & MemRegs,const TargetRegisterInfo * TRI)1454 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1455                                       MachineBasicBlock::iterator I,
1456                                       MachineBasicBlock::iterator E,
1457                                       SmallPtrSet<MachineInstr*, 4> &MemOps,
1458                                       SmallSet<unsigned, 4> &MemRegs,
1459                                       const TargetRegisterInfo *TRI) {
1460   // Are there stores / loads / calls between them?
1461   // FIXME: This is overly conservative. We should make use of alias information
1462   // some day.
1463   SmallSet<unsigned, 4> AddedRegPressure;
1464   while (++I != E) {
1465     if (I->isDebugValue() || MemOps.count(&*I))
1466       continue;
1467     const MCInstrDesc &MCID = I->getDesc();
1468     if (MCID.isCall() || MCID.isTerminator() || I->hasUnmodeledSideEffects())
1469       return false;
1470     if (isLd && MCID.mayStore())
1471       return false;
1472     if (!isLd) {
1473       if (MCID.mayLoad())
1474         return false;
1475       // It's not safe to move the first 'str' down.
1476       // str r1, [r0]
1477       // strh r5, [r0]
1478       // str r4, [r0, #+4]
1479       if (MCID.mayStore())
1480         return false;
1481     }
1482     for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1483       MachineOperand &MO = I->getOperand(j);
1484       if (!MO.isReg())
1485         continue;
1486       unsigned Reg = MO.getReg();
1487       if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1488         return false;
1489       if (Reg != Base && !MemRegs.count(Reg))
1490         AddedRegPressure.insert(Reg);
1491     }
1492   }
1493 
1494   // Estimate register pressure increase due to the transformation.
1495   if (MemRegs.size() <= 4)
1496     // Ok if we are moving small number of instructions.
1497     return true;
1498   return AddedRegPressure.size() <= MemRegs.size() * 2;
1499 }
1500 
1501 bool
CanFormLdStDWord(MachineInstr * Op0,MachineInstr * Op1,DebugLoc & dl,unsigned & NewOpc,unsigned & EvenReg,unsigned & OddReg,unsigned & BaseReg,int & Offset,unsigned & PredReg,ARMCC::CondCodes & Pred,bool & isT2)1502 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1503                                           DebugLoc &dl,
1504                                           unsigned &NewOpc, unsigned &EvenReg,
1505                                           unsigned &OddReg, unsigned &BaseReg,
1506                                           int &Offset, unsigned &PredReg,
1507                                           ARMCC::CondCodes &Pred,
1508                                           bool &isT2) {
1509   // Make sure we're allowed to generate LDRD/STRD.
1510   if (!STI->hasV5TEOps())
1511     return false;
1512 
1513   // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1514   unsigned Scale = 1;
1515   unsigned Opcode = Op0->getOpcode();
1516   if (Opcode == ARM::LDRi12)
1517     NewOpc = ARM::LDRD;
1518   else if (Opcode == ARM::STRi12)
1519     NewOpc = ARM::STRD;
1520   else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1521     NewOpc = ARM::t2LDRDi8;
1522     Scale = 4;
1523     isT2 = true;
1524   } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1525     NewOpc = ARM::t2STRDi8;
1526     Scale = 4;
1527     isT2 = true;
1528   } else
1529     return false;
1530 
1531   // Make sure the base address satisfies i64 ld / st alignment requirement.
1532   if (!Op0->hasOneMemOperand() ||
1533       !(*Op0->memoperands_begin())->getValue() ||
1534       (*Op0->memoperands_begin())->isVolatile())
1535     return false;
1536 
1537   unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1538   const Function *Func = MF->getFunction();
1539   unsigned ReqAlign = STI->hasV6Ops()
1540     ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1541     : 8;  // Pre-v6 need 8-byte align
1542   if (Align < ReqAlign)
1543     return false;
1544 
1545   // Then make sure the immediate offset fits.
1546   int OffImm = getMemoryOpOffset(Op0);
1547   if (isT2) {
1548     int Limit = (1 << 8) * Scale;
1549     if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1550       return false;
1551     Offset = OffImm;
1552   } else {
1553     ARM_AM::AddrOpc AddSub = ARM_AM::add;
1554     if (OffImm < 0) {
1555       AddSub = ARM_AM::sub;
1556       OffImm = - OffImm;
1557     }
1558     int Limit = (1 << 8) * Scale;
1559     if (OffImm >= Limit || (OffImm & (Scale-1)))
1560       return false;
1561     Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1562   }
1563   EvenReg = Op0->getOperand(0).getReg();
1564   OddReg  = Op1->getOperand(0).getReg();
1565   if (EvenReg == OddReg)
1566     return false;
1567   BaseReg = Op0->getOperand(1).getReg();
1568   Pred = llvm::getInstrPredicate(Op0, PredReg);
1569   dl = Op0->getDebugLoc();
1570   return true;
1571 }
1572 
1573 namespace {
1574   struct OffsetCompare {
operator ()__anon16ab0f520311::OffsetCompare1575     bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1576       int LOffset = getMemoryOpOffset(LHS);
1577       int ROffset = getMemoryOpOffset(RHS);
1578       assert(LHS == RHS || LOffset != ROffset);
1579       return LOffset > ROffset;
1580     }
1581   };
1582 }
1583 
RescheduleOps(MachineBasicBlock * MBB,SmallVector<MachineInstr *,4> & Ops,unsigned Base,bool isLd,DenseMap<MachineInstr *,unsigned> & MI2LocMap)1584 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1585                                  SmallVector<MachineInstr*, 4> &Ops,
1586                                  unsigned Base, bool isLd,
1587                                  DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1588   bool RetVal = false;
1589 
1590   // Sort by offset (in reverse order).
1591   std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1592 
1593   // The loads / stores of the same base are in order. Scan them from first to
1594   // last and check for the following:
1595   // 1. Any def of base.
1596   // 2. Any gaps.
1597   while (Ops.size() > 1) {
1598     unsigned FirstLoc = ~0U;
1599     unsigned LastLoc = 0;
1600     MachineInstr *FirstOp = 0;
1601     MachineInstr *LastOp = 0;
1602     int LastOffset = 0;
1603     unsigned LastOpcode = 0;
1604     unsigned LastBytes = 0;
1605     unsigned NumMove = 0;
1606     for (int i = Ops.size() - 1; i >= 0; --i) {
1607       MachineInstr *Op = Ops[i];
1608       unsigned Loc = MI2LocMap[Op];
1609       if (Loc <= FirstLoc) {
1610         FirstLoc = Loc;
1611         FirstOp = Op;
1612       }
1613       if (Loc >= LastLoc) {
1614         LastLoc = Loc;
1615         LastOp = Op;
1616       }
1617 
1618       unsigned Opcode = Op->getOpcode();
1619       if (LastOpcode && Opcode != LastOpcode)
1620         break;
1621 
1622       int Offset = getMemoryOpOffset(Op);
1623       unsigned Bytes = getLSMultipleTransferSize(Op);
1624       if (LastBytes) {
1625         if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1626           break;
1627       }
1628       LastOffset = Offset;
1629       LastBytes = Bytes;
1630       LastOpcode = Opcode;
1631       if (++NumMove == 8) // FIXME: Tune this limit.
1632         break;
1633     }
1634 
1635     if (NumMove <= 1)
1636       Ops.pop_back();
1637     else {
1638       SmallPtrSet<MachineInstr*, 4> MemOps;
1639       SmallSet<unsigned, 4> MemRegs;
1640       for (int i = NumMove-1; i >= 0; --i) {
1641         MemOps.insert(Ops[i]);
1642         MemRegs.insert(Ops[i]->getOperand(0).getReg());
1643       }
1644 
1645       // Be conservative, if the instructions are too far apart, don't
1646       // move them. We want to limit the increase of register pressure.
1647       bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1648       if (DoMove)
1649         DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1650                                            MemOps, MemRegs, TRI);
1651       if (!DoMove) {
1652         for (unsigned i = 0; i != NumMove; ++i)
1653           Ops.pop_back();
1654       } else {
1655         // This is the new location for the loads / stores.
1656         MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1657         while (InsertPos != MBB->end()
1658                && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1659           ++InsertPos;
1660 
1661         // If we are moving a pair of loads / stores, see if it makes sense
1662         // to try to allocate a pair of registers that can form register pairs.
1663         MachineInstr *Op0 = Ops.back();
1664         MachineInstr *Op1 = Ops[Ops.size()-2];
1665         unsigned EvenReg = 0, OddReg = 0;
1666         unsigned BaseReg = 0, PredReg = 0;
1667         ARMCC::CondCodes Pred = ARMCC::AL;
1668         bool isT2 = false;
1669         unsigned NewOpc = 0;
1670         int Offset = 0;
1671         DebugLoc dl;
1672         if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1673                                              EvenReg, OddReg, BaseReg,
1674                                              Offset, PredReg, Pred, isT2)) {
1675           Ops.pop_back();
1676           Ops.pop_back();
1677 
1678           const MCInstrDesc &MCID = TII->get(NewOpc);
1679           const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI);
1680           MRI->constrainRegClass(EvenReg, TRC);
1681           MRI->constrainRegClass(OddReg, TRC);
1682 
1683           // Form the pair instruction.
1684           if (isLd) {
1685             MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1686               .addReg(EvenReg, RegState::Define)
1687               .addReg(OddReg, RegState::Define)
1688               .addReg(BaseReg);
1689             // FIXME: We're converting from LDRi12 to an insn that still
1690             // uses addrmode2, so we need an explicit offset reg. It should
1691             // always by reg0 since we're transforming LDRi12s.
1692             if (!isT2)
1693               MIB.addReg(0);
1694             MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1695             ++NumLDRDFormed;
1696           } else {
1697             MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1698               .addReg(EvenReg)
1699               .addReg(OddReg)
1700               .addReg(BaseReg);
1701             // FIXME: We're converting from LDRi12 to an insn that still
1702             // uses addrmode2, so we need an explicit offset reg. It should
1703             // always by reg0 since we're transforming STRi12s.
1704             if (!isT2)
1705               MIB.addReg(0);
1706             MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1707             ++NumSTRDFormed;
1708           }
1709           MBB->erase(Op0);
1710           MBB->erase(Op1);
1711 
1712           // Add register allocation hints to form register pairs.
1713           MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1714           MRI->setRegAllocationHint(OddReg,  ARMRI::RegPairOdd, EvenReg);
1715         } else {
1716           for (unsigned i = 0; i != NumMove; ++i) {
1717             MachineInstr *Op = Ops.back();
1718             Ops.pop_back();
1719             MBB->splice(InsertPos, MBB, Op);
1720           }
1721         }
1722 
1723         NumLdStMoved += NumMove;
1724         RetVal = true;
1725       }
1726     }
1727   }
1728 
1729   return RetVal;
1730 }
1731 
1732 bool
RescheduleLoadStoreInstrs(MachineBasicBlock * MBB)1733 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1734   bool RetVal = false;
1735 
1736   DenseMap<MachineInstr*, unsigned> MI2LocMap;
1737   DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1738   DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1739   SmallVector<unsigned, 4> LdBases;
1740   SmallVector<unsigned, 4> StBases;
1741 
1742   unsigned Loc = 0;
1743   MachineBasicBlock::iterator MBBI = MBB->begin();
1744   MachineBasicBlock::iterator E = MBB->end();
1745   while (MBBI != E) {
1746     for (; MBBI != E; ++MBBI) {
1747       MachineInstr *MI = MBBI;
1748       const MCInstrDesc &MCID = MI->getDesc();
1749       if (MCID.isCall() || MCID.isTerminator()) {
1750         // Stop at barriers.
1751         ++MBBI;
1752         break;
1753       }
1754 
1755       if (!MI->isDebugValue())
1756         MI2LocMap[MI] = ++Loc;
1757 
1758       if (!isMemoryOp(MI))
1759         continue;
1760       unsigned PredReg = 0;
1761       if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1762         continue;
1763 
1764       int Opc = MI->getOpcode();
1765       bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1766       unsigned Base = MI->getOperand(1).getReg();
1767       int Offset = getMemoryOpOffset(MI);
1768 
1769       bool StopHere = false;
1770       if (isLd) {
1771         DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1772           Base2LdsMap.find(Base);
1773         if (BI != Base2LdsMap.end()) {
1774           for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1775             if (Offset == getMemoryOpOffset(BI->second[i])) {
1776               StopHere = true;
1777               break;
1778             }
1779           }
1780           if (!StopHere)
1781             BI->second.push_back(MI);
1782         } else {
1783           SmallVector<MachineInstr*, 4> MIs;
1784           MIs.push_back(MI);
1785           Base2LdsMap[Base] = MIs;
1786           LdBases.push_back(Base);
1787         }
1788       } else {
1789         DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1790           Base2StsMap.find(Base);
1791         if (BI != Base2StsMap.end()) {
1792           for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1793             if (Offset == getMemoryOpOffset(BI->second[i])) {
1794               StopHere = true;
1795               break;
1796             }
1797           }
1798           if (!StopHere)
1799             BI->second.push_back(MI);
1800         } else {
1801           SmallVector<MachineInstr*, 4> MIs;
1802           MIs.push_back(MI);
1803           Base2StsMap[Base] = MIs;
1804           StBases.push_back(Base);
1805         }
1806       }
1807 
1808       if (StopHere) {
1809         // Found a duplicate (a base+offset combination that's seen earlier).
1810         // Backtrack.
1811         --Loc;
1812         break;
1813       }
1814     }
1815 
1816     // Re-schedule loads.
1817     for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1818       unsigned Base = LdBases[i];
1819       SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1820       if (Lds.size() > 1)
1821         RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1822     }
1823 
1824     // Re-schedule stores.
1825     for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1826       unsigned Base = StBases[i];
1827       SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1828       if (Sts.size() > 1)
1829         RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1830     }
1831 
1832     if (MBBI != E) {
1833       Base2LdsMap.clear();
1834       Base2StsMap.clear();
1835       LdBases.clear();
1836       StBases.clear();
1837     }
1838   }
1839 
1840   return RetVal;
1841 }
1842 
1843 
1844 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1845 /// optimization pass.
createARMLoadStoreOptimizationPass(bool PreAlloc)1846 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1847   if (PreAlloc)
1848     return new ARMPreAllocLoadStoreOpt();
1849   return new ARMLoadStoreOpt();
1850 }
1851