1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #define DEBUG_TYPE "arm-ldst-opt"
16 #include "ARM.h"
17 #include "ARMBaseInstrInfo.h"
18 #include "ARMMachineFunctionInfo.h"
19 #include "ARMRegisterInfo.h"
20 #include "MCTargetDesc/ARMAddressingModes.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/CodeGen/SelectionDAGNodes.h"
30 #include "llvm/Target/TargetData.h"
31 #include "llvm/Target/TargetInstrInfo.h"
32 #include "llvm/Target/TargetMachine.h"
33 #include "llvm/Target/TargetRegisterInfo.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/ADT/DenseMap.h"
36 #include "llvm/ADT/STLExtras.h"
37 #include "llvm/ADT/SmallPtrSet.h"
38 #include "llvm/ADT/SmallSet.h"
39 #include "llvm/ADT/SmallVector.h"
40 #include "llvm/ADT/Statistic.h"
41 using namespace llvm;
42
43 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
44 STATISTIC(NumSTMGened , "Number of stm instructions generated");
45 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
46 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
47 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
48 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
49 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
50 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
51 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
52 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
53 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
54
55 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
56 /// load / store instructions to form ldm / stm instructions.
57
58 namespace {
59 struct ARMLoadStoreOpt : public MachineFunctionPass {
60 static char ID;
ARMLoadStoreOpt__anon16ab0f520111::ARMLoadStoreOpt61 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
62
63 const TargetInstrInfo *TII;
64 const TargetRegisterInfo *TRI;
65 ARMFunctionInfo *AFI;
66 RegScavenger *RS;
67 bool isThumb2;
68
69 virtual bool runOnMachineFunction(MachineFunction &Fn);
70
getPassName__anon16ab0f520111::ARMLoadStoreOpt71 virtual const char *getPassName() const {
72 return "ARM load / store optimization pass";
73 }
74
75 private:
76 struct MemOpQueueEntry {
77 int Offset;
78 unsigned Reg;
79 bool isKill;
80 unsigned Position;
81 MachineBasicBlock::iterator MBBI;
82 bool Merged;
MemOpQueueEntry__anon16ab0f520111::ARMLoadStoreOpt::MemOpQueueEntry83 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
84 MachineBasicBlock::iterator i)
85 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
86 };
87 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
88 typedef MemOpQueue::iterator MemOpQueueIter;
89
90 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
91 int Offset, unsigned Base, bool BaseKill, int Opcode,
92 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
93 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
94 void MergeOpsUpdate(MachineBasicBlock &MBB,
95 MemOpQueue &MemOps,
96 unsigned memOpsBegin,
97 unsigned memOpsEnd,
98 unsigned insertAfter,
99 int Offset,
100 unsigned Base,
101 bool BaseKill,
102 int Opcode,
103 ARMCC::CondCodes Pred,
104 unsigned PredReg,
105 unsigned Scratch,
106 DebugLoc dl,
107 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
108 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
109 int Opcode, unsigned Size,
110 ARMCC::CondCodes Pred, unsigned PredReg,
111 unsigned Scratch, MemOpQueue &MemOps,
112 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
113
114 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
115 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
116 MachineBasicBlock::iterator &MBBI);
117 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
118 MachineBasicBlock::iterator MBBI,
119 const TargetInstrInfo *TII,
120 bool &Advance,
121 MachineBasicBlock::iterator &I);
122 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
123 MachineBasicBlock::iterator MBBI,
124 bool &Advance,
125 MachineBasicBlock::iterator &I);
126 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
127 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
128 };
129 char ARMLoadStoreOpt::ID = 0;
130 }
131
getLoadStoreMultipleOpcode(int Opcode,ARM_AM::AMSubMode Mode)132 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
133 switch (Opcode) {
134 default: llvm_unreachable("Unhandled opcode!");
135 case ARM::LDRi12:
136 ++NumLDMGened;
137 switch (Mode) {
138 default: llvm_unreachable("Unhandled submode!");
139 case ARM_AM::ia: return ARM::LDMIA;
140 case ARM_AM::da: return ARM::LDMDA;
141 case ARM_AM::db: return ARM::LDMDB;
142 case ARM_AM::ib: return ARM::LDMIB;
143 }
144 break;
145 case ARM::STRi12:
146 ++NumSTMGened;
147 switch (Mode) {
148 default: llvm_unreachable("Unhandled submode!");
149 case ARM_AM::ia: return ARM::STMIA;
150 case ARM_AM::da: return ARM::STMDA;
151 case ARM_AM::db: return ARM::STMDB;
152 case ARM_AM::ib: return ARM::STMIB;
153 }
154 break;
155 case ARM::t2LDRi8:
156 case ARM::t2LDRi12:
157 ++NumLDMGened;
158 switch (Mode) {
159 default: llvm_unreachable("Unhandled submode!");
160 case ARM_AM::ia: return ARM::t2LDMIA;
161 case ARM_AM::db: return ARM::t2LDMDB;
162 }
163 break;
164 case ARM::t2STRi8:
165 case ARM::t2STRi12:
166 ++NumSTMGened;
167 switch (Mode) {
168 default: llvm_unreachable("Unhandled submode!");
169 case ARM_AM::ia: return ARM::t2STMIA;
170 case ARM_AM::db: return ARM::t2STMDB;
171 }
172 break;
173 case ARM::VLDRS:
174 ++NumVLDMGened;
175 switch (Mode) {
176 default: llvm_unreachable("Unhandled submode!");
177 case ARM_AM::ia: return ARM::VLDMSIA;
178 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
179 }
180 break;
181 case ARM::VSTRS:
182 ++NumVSTMGened;
183 switch (Mode) {
184 default: llvm_unreachable("Unhandled submode!");
185 case ARM_AM::ia: return ARM::VSTMSIA;
186 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
187 }
188 break;
189 case ARM::VLDRD:
190 ++NumVLDMGened;
191 switch (Mode) {
192 default: llvm_unreachable("Unhandled submode!");
193 case ARM_AM::ia: return ARM::VLDMDIA;
194 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
195 }
196 break;
197 case ARM::VSTRD:
198 ++NumVSTMGened;
199 switch (Mode) {
200 default: llvm_unreachable("Unhandled submode!");
201 case ARM_AM::ia: return ARM::VSTMDIA;
202 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
203 }
204 break;
205 }
206
207 return 0;
208 }
209
210 namespace llvm {
211 namespace ARM_AM {
212
getLoadStoreMultipleSubMode(int Opcode)213 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
214 switch (Opcode) {
215 default: llvm_unreachable("Unhandled opcode!");
216 case ARM::LDMIA_RET:
217 case ARM::LDMIA:
218 case ARM::LDMIA_UPD:
219 case ARM::STMIA:
220 case ARM::STMIA_UPD:
221 case ARM::t2LDMIA_RET:
222 case ARM::t2LDMIA:
223 case ARM::t2LDMIA_UPD:
224 case ARM::t2STMIA:
225 case ARM::t2STMIA_UPD:
226 case ARM::VLDMSIA:
227 case ARM::VLDMSIA_UPD:
228 case ARM::VSTMSIA:
229 case ARM::VSTMSIA_UPD:
230 case ARM::VLDMDIA:
231 case ARM::VLDMDIA_UPD:
232 case ARM::VSTMDIA:
233 case ARM::VSTMDIA_UPD:
234 return ARM_AM::ia;
235
236 case ARM::LDMDA:
237 case ARM::LDMDA_UPD:
238 case ARM::STMDA:
239 case ARM::STMDA_UPD:
240 return ARM_AM::da;
241
242 case ARM::LDMDB:
243 case ARM::LDMDB_UPD:
244 case ARM::STMDB:
245 case ARM::STMDB_UPD:
246 case ARM::t2LDMDB:
247 case ARM::t2LDMDB_UPD:
248 case ARM::t2STMDB:
249 case ARM::t2STMDB_UPD:
250 case ARM::VLDMSDB_UPD:
251 case ARM::VSTMSDB_UPD:
252 case ARM::VLDMDDB_UPD:
253 case ARM::VSTMDDB_UPD:
254 return ARM_AM::db;
255
256 case ARM::LDMIB:
257 case ARM::LDMIB_UPD:
258 case ARM::STMIB:
259 case ARM::STMIB_UPD:
260 return ARM_AM::ib;
261 }
262
263 return ARM_AM::bad_am_submode;
264 }
265
266 } // end namespace ARM_AM
267 } // end namespace llvm
268
isT2i32Load(unsigned Opc)269 static bool isT2i32Load(unsigned Opc) {
270 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
271 }
272
isi32Load(unsigned Opc)273 static bool isi32Load(unsigned Opc) {
274 return Opc == ARM::LDRi12 || isT2i32Load(Opc);
275 }
276
isT2i32Store(unsigned Opc)277 static bool isT2i32Store(unsigned Opc) {
278 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
279 }
280
isi32Store(unsigned Opc)281 static bool isi32Store(unsigned Opc) {
282 return Opc == ARM::STRi12 || isT2i32Store(Opc);
283 }
284
285 /// MergeOps - Create and insert a LDM or STM with Base as base register and
286 /// registers in Regs as the register operands that would be loaded / stored.
287 /// It returns true if the transformation is done.
288 bool
MergeOps(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,int Offset,unsigned Base,bool BaseKill,int Opcode,ARMCC::CondCodes Pred,unsigned PredReg,unsigned Scratch,DebugLoc dl,SmallVector<std::pair<unsigned,bool>,8> & Regs)289 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
290 MachineBasicBlock::iterator MBBI,
291 int Offset, unsigned Base, bool BaseKill,
292 int Opcode, ARMCC::CondCodes Pred,
293 unsigned PredReg, unsigned Scratch, DebugLoc dl,
294 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
295 // Only a single register to load / store. Don't bother.
296 unsigned NumRegs = Regs.size();
297 if (NumRegs <= 1)
298 return false;
299
300 ARM_AM::AMSubMode Mode = ARM_AM::ia;
301 // VFP and Thumb2 do not support IB or DA modes.
302 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
303 bool haveIBAndDA = isNotVFP && !isThumb2;
304 if (Offset == 4 && haveIBAndDA)
305 Mode = ARM_AM::ib;
306 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
307 Mode = ARM_AM::da;
308 else if (Offset == -4 * (int)NumRegs && isNotVFP)
309 // VLDM/VSTM do not support DB mode without also updating the base reg.
310 Mode = ARM_AM::db;
311 else if (Offset != 0) {
312 // Check if this is a supported opcode before we insert instructions to
313 // calculate a new base register.
314 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
315
316 // If starting offset isn't zero, insert a MI to materialize a new base.
317 // But only do so if it is cost effective, i.e. merging more than two
318 // loads / stores.
319 if (NumRegs <= 2)
320 return false;
321
322 unsigned NewBase;
323 if (isi32Load(Opcode))
324 // If it is a load, then just use one of the destination register to
325 // use as the new base.
326 NewBase = Regs[NumRegs-1].first;
327 else {
328 // Use the scratch register to use as a new base.
329 NewBase = Scratch;
330 if (NewBase == 0)
331 return false;
332 }
333 int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
334 if (Offset < 0) {
335 BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
336 Offset = - Offset;
337 }
338 int ImmedOffset = isThumb2
339 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
340 if (ImmedOffset == -1)
341 // FIXME: Try t2ADDri12 or t2SUBri12?
342 return false; // Probably not worth it then.
343
344 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
345 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
346 .addImm(Pred).addReg(PredReg).addReg(0);
347 Base = NewBase;
348 BaseKill = true; // New base is always killed right its use.
349 }
350
351 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
352 Opcode == ARM::VLDRD);
353 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
354 if (!Opcode) return false;
355 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
356 .addReg(Base, getKillRegState(BaseKill))
357 .addImm(Pred).addReg(PredReg);
358 for (unsigned i = 0; i != NumRegs; ++i)
359 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
360 | getKillRegState(Regs[i].second));
361
362 return true;
363 }
364
365 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
366 // success.
MergeOpsUpdate(MachineBasicBlock & MBB,MemOpQueue & memOps,unsigned memOpsBegin,unsigned memOpsEnd,unsigned insertAfter,int Offset,unsigned Base,bool BaseKill,int Opcode,ARMCC::CondCodes Pred,unsigned PredReg,unsigned Scratch,DebugLoc dl,SmallVector<MachineBasicBlock::iterator,4> & Merges)367 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
368 MemOpQueue &memOps,
369 unsigned memOpsBegin, unsigned memOpsEnd,
370 unsigned insertAfter, int Offset,
371 unsigned Base, bool BaseKill,
372 int Opcode,
373 ARMCC::CondCodes Pred, unsigned PredReg,
374 unsigned Scratch,
375 DebugLoc dl,
376 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
377 // First calculate which of the registers should be killed by the merged
378 // instruction.
379 const unsigned insertPos = memOps[insertAfter].Position;
380 SmallSet<unsigned, 4> KilledRegs;
381 DenseMap<unsigned, unsigned> Killer;
382 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
383 if (i == memOpsBegin) {
384 i = memOpsEnd;
385 if (i == e)
386 break;
387 }
388 if (memOps[i].Position < insertPos && memOps[i].isKill) {
389 unsigned Reg = memOps[i].Reg;
390 KilledRegs.insert(Reg);
391 Killer[Reg] = i;
392 }
393 }
394
395 SmallVector<std::pair<unsigned, bool>, 8> Regs;
396 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
397 unsigned Reg = memOps[i].Reg;
398 // If we are inserting the merged operation after an operation that
399 // uses the same register, make sure to transfer any kill flag.
400 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
401 Regs.push_back(std::make_pair(Reg, isKill));
402 }
403
404 // Try to do the merge.
405 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
406 ++Loc;
407 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
408 Pred, PredReg, Scratch, dl, Regs))
409 return;
410
411 // Merge succeeded, update records.
412 Merges.push_back(prior(Loc));
413 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
414 // Remove kill flags from any memops that come before insertPos.
415 if (Regs[i-memOpsBegin].second) {
416 unsigned Reg = Regs[i-memOpsBegin].first;
417 if (KilledRegs.count(Reg)) {
418 unsigned j = Killer[Reg];
419 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
420 assert(Idx >= 0 && "Cannot find killing operand");
421 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
422 memOps[j].isKill = false;
423 }
424 memOps[i].isKill = true;
425 }
426 MBB.erase(memOps[i].MBBI);
427 // Update this memop to refer to the merged instruction.
428 // We may need to move kill flags again.
429 memOps[i].Merged = true;
430 memOps[i].MBBI = Merges.back();
431 memOps[i].Position = insertPos;
432 }
433 }
434
435 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
436 /// load / store multiple instructions.
437 void
MergeLDR_STR(MachineBasicBlock & MBB,unsigned SIndex,unsigned Base,int Opcode,unsigned Size,ARMCC::CondCodes Pred,unsigned PredReg,unsigned Scratch,MemOpQueue & MemOps,SmallVector<MachineBasicBlock::iterator,4> & Merges)438 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
439 unsigned Base, int Opcode, unsigned Size,
440 ARMCC::CondCodes Pred, unsigned PredReg,
441 unsigned Scratch, MemOpQueue &MemOps,
442 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
443 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
444 int Offset = MemOps[SIndex].Offset;
445 int SOffset = Offset;
446 unsigned insertAfter = SIndex;
447 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
448 DebugLoc dl = Loc->getDebugLoc();
449 const MachineOperand &PMO = Loc->getOperand(0);
450 unsigned PReg = PMO.getReg();
451 unsigned PRegNum = PMO.isUndef() ? UINT_MAX
452 : getARMRegisterNumbering(PReg);
453 unsigned Count = 1;
454 unsigned Limit = ~0U;
455
456 // vldm / vstm limit are 32 for S variants, 16 for D variants.
457
458 switch (Opcode) {
459 default: break;
460 case ARM::VSTRS:
461 Limit = 32;
462 break;
463 case ARM::VSTRD:
464 Limit = 16;
465 break;
466 case ARM::VLDRD:
467 Limit = 16;
468 break;
469 case ARM::VLDRS:
470 Limit = 32;
471 break;
472 }
473
474 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
475 int NewOffset = MemOps[i].Offset;
476 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
477 unsigned Reg = MO.getReg();
478 unsigned RegNum = MO.isUndef() ? UINT_MAX
479 : getARMRegisterNumbering(Reg);
480 // Register numbers must be in ascending order. For VFP / NEON load and
481 // store multiples, the registers must also be consecutive and within the
482 // limit on the number of registers per instruction.
483 if (Reg != ARM::SP &&
484 NewOffset == Offset + (int)Size &&
485 ((isNotVFP && RegNum > PRegNum) ||
486 ((Count < Limit) && RegNum == PRegNum+1))) {
487 Offset += Size;
488 PRegNum = RegNum;
489 ++Count;
490 } else {
491 // Can't merge this in. Try merge the earlier ones first.
492 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
493 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
494 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
495 MemOps, Merges);
496 return;
497 }
498
499 if (MemOps[i].Position > MemOps[insertAfter].Position)
500 insertAfter = i;
501 }
502
503 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
504 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
505 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
506 return;
507 }
508
isMatchingDecrement(MachineInstr * MI,unsigned Base,unsigned Bytes,unsigned Limit,ARMCC::CondCodes Pred,unsigned PredReg)509 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
510 unsigned Bytes, unsigned Limit,
511 ARMCC::CondCodes Pred, unsigned PredReg){
512 unsigned MyPredReg = 0;
513 if (!MI)
514 return false;
515 if (MI->getOpcode() != ARM::t2SUBri &&
516 MI->getOpcode() != ARM::tSUBspi &&
517 MI->getOpcode() != ARM::SUBri)
518 return false;
519
520 // Make sure the offset fits in 8 bits.
521 if (Bytes == 0 || (Limit && Bytes >= Limit))
522 return false;
523
524 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
525 return (MI->getOperand(0).getReg() == Base &&
526 MI->getOperand(1).getReg() == Base &&
527 (MI->getOperand(2).getImm()*Scale) == Bytes &&
528 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
529 MyPredReg == PredReg);
530 }
531
isMatchingIncrement(MachineInstr * MI,unsigned Base,unsigned Bytes,unsigned Limit,ARMCC::CondCodes Pred,unsigned PredReg)532 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
533 unsigned Bytes, unsigned Limit,
534 ARMCC::CondCodes Pred, unsigned PredReg){
535 unsigned MyPredReg = 0;
536 if (!MI)
537 return false;
538 if (MI->getOpcode() != ARM::t2ADDri &&
539 MI->getOpcode() != ARM::tADDspi &&
540 MI->getOpcode() != ARM::ADDri)
541 return false;
542
543 if (Bytes == 0 || (Limit && Bytes >= Limit))
544 // Make sure the offset fits in 8 bits.
545 return false;
546
547 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
548 return (MI->getOperand(0).getReg() == Base &&
549 MI->getOperand(1).getReg() == Base &&
550 (MI->getOperand(2).getImm()*Scale) == Bytes &&
551 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
552 MyPredReg == PredReg);
553 }
554
getLSMultipleTransferSize(MachineInstr * MI)555 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
556 switch (MI->getOpcode()) {
557 default: return 0;
558 case ARM::LDRi12:
559 case ARM::STRi12:
560 case ARM::t2LDRi8:
561 case ARM::t2LDRi12:
562 case ARM::t2STRi8:
563 case ARM::t2STRi12:
564 case ARM::VLDRS:
565 case ARM::VSTRS:
566 return 4;
567 case ARM::VLDRD:
568 case ARM::VSTRD:
569 return 8;
570 case ARM::LDMIA:
571 case ARM::LDMDA:
572 case ARM::LDMDB:
573 case ARM::LDMIB:
574 case ARM::STMIA:
575 case ARM::STMDA:
576 case ARM::STMDB:
577 case ARM::STMIB:
578 case ARM::t2LDMIA:
579 case ARM::t2LDMDB:
580 case ARM::t2STMIA:
581 case ARM::t2STMDB:
582 case ARM::VLDMSIA:
583 case ARM::VSTMSIA:
584 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
585 case ARM::VLDMDIA:
586 case ARM::VSTMDIA:
587 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
588 }
589 }
590
getUpdatingLSMultipleOpcode(unsigned Opc,ARM_AM::AMSubMode Mode)591 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
592 ARM_AM::AMSubMode Mode) {
593 switch (Opc) {
594 default: llvm_unreachable("Unhandled opcode!");
595 case ARM::LDMIA:
596 case ARM::LDMDA:
597 case ARM::LDMDB:
598 case ARM::LDMIB:
599 switch (Mode) {
600 default: llvm_unreachable("Unhandled submode!");
601 case ARM_AM::ia: return ARM::LDMIA_UPD;
602 case ARM_AM::ib: return ARM::LDMIB_UPD;
603 case ARM_AM::da: return ARM::LDMDA_UPD;
604 case ARM_AM::db: return ARM::LDMDB_UPD;
605 }
606 break;
607 case ARM::STMIA:
608 case ARM::STMDA:
609 case ARM::STMDB:
610 case ARM::STMIB:
611 switch (Mode) {
612 default: llvm_unreachable("Unhandled submode!");
613 case ARM_AM::ia: return ARM::STMIA_UPD;
614 case ARM_AM::ib: return ARM::STMIB_UPD;
615 case ARM_AM::da: return ARM::STMDA_UPD;
616 case ARM_AM::db: return ARM::STMDB_UPD;
617 }
618 break;
619 case ARM::t2LDMIA:
620 case ARM::t2LDMDB:
621 switch (Mode) {
622 default: llvm_unreachable("Unhandled submode!");
623 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
624 case ARM_AM::db: return ARM::t2LDMDB_UPD;
625 }
626 break;
627 case ARM::t2STMIA:
628 case ARM::t2STMDB:
629 switch (Mode) {
630 default: llvm_unreachable("Unhandled submode!");
631 case ARM_AM::ia: return ARM::t2STMIA_UPD;
632 case ARM_AM::db: return ARM::t2STMDB_UPD;
633 }
634 break;
635 case ARM::VLDMSIA:
636 switch (Mode) {
637 default: llvm_unreachable("Unhandled submode!");
638 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
639 case ARM_AM::db: return ARM::VLDMSDB_UPD;
640 }
641 break;
642 case ARM::VLDMDIA:
643 switch (Mode) {
644 default: llvm_unreachable("Unhandled submode!");
645 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
646 case ARM_AM::db: return ARM::VLDMDDB_UPD;
647 }
648 break;
649 case ARM::VSTMSIA:
650 switch (Mode) {
651 default: llvm_unreachable("Unhandled submode!");
652 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
653 case ARM_AM::db: return ARM::VSTMSDB_UPD;
654 }
655 break;
656 case ARM::VSTMDIA:
657 switch (Mode) {
658 default: llvm_unreachable("Unhandled submode!");
659 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
660 case ARM_AM::db: return ARM::VSTMDDB_UPD;
661 }
662 break;
663 }
664
665 return 0;
666 }
667
668 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
669 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
670 ///
671 /// stmia rn, <ra, rb, rc>
672 /// rn := rn + 4 * 3;
673 /// =>
674 /// stmia rn!, <ra, rb, rc>
675 ///
676 /// rn := rn - 4 * 3;
677 /// ldmia rn, <ra, rb, rc>
678 /// =>
679 /// ldmdb rn!, <ra, rb, rc>
MergeBaseUpdateLSMultiple(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,bool & Advance,MachineBasicBlock::iterator & I)680 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
681 MachineBasicBlock::iterator MBBI,
682 bool &Advance,
683 MachineBasicBlock::iterator &I) {
684 MachineInstr *MI = MBBI;
685 unsigned Base = MI->getOperand(0).getReg();
686 bool BaseKill = MI->getOperand(0).isKill();
687 unsigned Bytes = getLSMultipleTransferSize(MI);
688 unsigned PredReg = 0;
689 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
690 int Opcode = MI->getOpcode();
691 DebugLoc dl = MI->getDebugLoc();
692
693 // Can't use an updating ld/st if the base register is also a dest
694 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
695 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
696 if (MI->getOperand(i).getReg() == Base)
697 return false;
698
699 bool DoMerge = false;
700 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
701
702 // Try merging with the previous instruction.
703 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
704 if (MBBI != BeginMBBI) {
705 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
706 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
707 --PrevMBBI;
708 if (Mode == ARM_AM::ia &&
709 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
710 Mode = ARM_AM::db;
711 DoMerge = true;
712 } else if (Mode == ARM_AM::ib &&
713 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
714 Mode = ARM_AM::da;
715 DoMerge = true;
716 }
717 if (DoMerge)
718 MBB.erase(PrevMBBI);
719 }
720
721 // Try merging with the next instruction.
722 MachineBasicBlock::iterator EndMBBI = MBB.end();
723 if (!DoMerge && MBBI != EndMBBI) {
724 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
725 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
726 ++NextMBBI;
727 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
728 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
729 DoMerge = true;
730 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
731 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
732 DoMerge = true;
733 }
734 if (DoMerge) {
735 if (NextMBBI == I) {
736 Advance = true;
737 ++I;
738 }
739 MBB.erase(NextMBBI);
740 }
741 }
742
743 if (!DoMerge)
744 return false;
745
746 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
747 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
748 .addReg(Base, getDefRegState(true)) // WB base register
749 .addReg(Base, getKillRegState(BaseKill))
750 .addImm(Pred).addReg(PredReg);
751
752 // Transfer the rest of operands.
753 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
754 MIB.addOperand(MI->getOperand(OpNum));
755
756 // Transfer memoperands.
757 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
758
759 MBB.erase(MBBI);
760 return true;
761 }
762
getPreIndexedLoadStoreOpcode(unsigned Opc,ARM_AM::AddrOpc Mode)763 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
764 ARM_AM::AddrOpc Mode) {
765 switch (Opc) {
766 case ARM::LDRi12:
767 return ARM::LDR_PRE_IMM;
768 case ARM::STRi12:
769 return ARM::STR_PRE_IMM;
770 case ARM::VLDRS:
771 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
772 case ARM::VLDRD:
773 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
774 case ARM::VSTRS:
775 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
776 case ARM::VSTRD:
777 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
778 case ARM::t2LDRi8:
779 case ARM::t2LDRi12:
780 return ARM::t2LDR_PRE;
781 case ARM::t2STRi8:
782 case ARM::t2STRi12:
783 return ARM::t2STR_PRE;
784 default: llvm_unreachable("Unhandled opcode!");
785 }
786 return 0;
787 }
788
getPostIndexedLoadStoreOpcode(unsigned Opc,ARM_AM::AddrOpc Mode)789 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
790 ARM_AM::AddrOpc Mode) {
791 switch (Opc) {
792 case ARM::LDRi12:
793 return ARM::LDR_POST_IMM;
794 case ARM::STRi12:
795 return ARM::STR_POST_IMM;
796 case ARM::VLDRS:
797 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
798 case ARM::VLDRD:
799 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
800 case ARM::VSTRS:
801 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
802 case ARM::VSTRD:
803 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
804 case ARM::t2LDRi8:
805 case ARM::t2LDRi12:
806 return ARM::t2LDR_POST;
807 case ARM::t2STRi8:
808 case ARM::t2STRi12:
809 return ARM::t2STR_POST;
810 default: llvm_unreachable("Unhandled opcode!");
811 }
812 return 0;
813 }
814
815 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
816 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
MergeBaseUpdateLoadStore(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const TargetInstrInfo * TII,bool & Advance,MachineBasicBlock::iterator & I)817 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
818 MachineBasicBlock::iterator MBBI,
819 const TargetInstrInfo *TII,
820 bool &Advance,
821 MachineBasicBlock::iterator &I) {
822 MachineInstr *MI = MBBI;
823 unsigned Base = MI->getOperand(1).getReg();
824 bool BaseKill = MI->getOperand(1).isKill();
825 unsigned Bytes = getLSMultipleTransferSize(MI);
826 int Opcode = MI->getOpcode();
827 DebugLoc dl = MI->getDebugLoc();
828 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
829 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
830 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
831 if (isi32Load(Opcode) || isi32Store(Opcode))
832 if (MI->getOperand(2).getImm() != 0)
833 return false;
834 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
835 return false;
836
837 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
838 // Can't do the merge if the destination register is the same as the would-be
839 // writeback register.
840 if (isLd && MI->getOperand(0).getReg() == Base)
841 return false;
842
843 unsigned PredReg = 0;
844 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
845 bool DoMerge = false;
846 ARM_AM::AddrOpc AddSub = ARM_AM::add;
847 unsigned NewOpc = 0;
848 // AM2 - 12 bits, thumb2 - 8 bits.
849 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
850
851 // Try merging with the previous instruction.
852 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
853 if (MBBI != BeginMBBI) {
854 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
855 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
856 --PrevMBBI;
857 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
858 DoMerge = true;
859 AddSub = ARM_AM::sub;
860 } else if (!isAM5 &&
861 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
862 DoMerge = true;
863 }
864 if (DoMerge) {
865 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
866 MBB.erase(PrevMBBI);
867 }
868 }
869
870 // Try merging with the next instruction.
871 MachineBasicBlock::iterator EndMBBI = MBB.end();
872 if (!DoMerge && MBBI != EndMBBI) {
873 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
874 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
875 ++NextMBBI;
876 if (!isAM5 &&
877 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
878 DoMerge = true;
879 AddSub = ARM_AM::sub;
880 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
881 DoMerge = true;
882 }
883 if (DoMerge) {
884 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
885 if (NextMBBI == I) {
886 Advance = true;
887 ++I;
888 }
889 MBB.erase(NextMBBI);
890 }
891 }
892
893 if (!DoMerge)
894 return false;
895
896 if (isAM5) {
897 // VLDM[SD}_UPD, VSTM[SD]_UPD
898 // (There are no base-updating versions of VLDR/VSTR instructions, but the
899 // updating load/store-multiple instructions can be used with only one
900 // register.)
901 MachineOperand &MO = MI->getOperand(0);
902 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
903 .addReg(Base, getDefRegState(true)) // WB base register
904 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
905 .addImm(Pred).addReg(PredReg)
906 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
907 getKillRegState(MO.isKill())));
908 } else if (isLd) {
909 if (isAM2) {
910 // LDR_PRE, LDR_POST
911 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
912 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
913 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
914 .addReg(Base, RegState::Define)
915 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
916 } else {
917 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
918 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
919 .addReg(Base, RegState::Define)
920 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
921 }
922 } else {
923 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
924 // t2LDR_PRE, t2LDR_POST
925 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
926 .addReg(Base, RegState::Define)
927 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
928 }
929 } else {
930 MachineOperand &MO = MI->getOperand(0);
931 // FIXME: post-indexed stores use am2offset_imm, which still encodes
932 // the vestigal zero-reg offset register. When that's fixed, this clause
933 // can be removed entirely.
934 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
935 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
936 // STR_PRE, STR_POST
937 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
938 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
939 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
940 } else {
941 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
942 // t2STR_PRE, t2STR_POST
943 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
944 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
945 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
946 }
947 }
948 MBB.erase(MBBI);
949
950 return true;
951 }
952
953 /// isMemoryOp - Returns true if instruction is a memory operation that this
954 /// pass is capable of operating on.
isMemoryOp(const MachineInstr * MI)955 static bool isMemoryOp(const MachineInstr *MI) {
956 // When no memory operands are present, conservatively assume unaligned,
957 // volatile, unfoldable.
958 if (!MI->hasOneMemOperand())
959 return false;
960
961 const MachineMemOperand *MMO = *MI->memoperands_begin();
962
963 // Don't touch volatile memory accesses - we may be changing their order.
964 if (MMO->isVolatile())
965 return false;
966
967 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
968 // not.
969 if (MMO->getAlignment() < 4)
970 return false;
971
972 // str <undef> could probably be eliminated entirely, but for now we just want
973 // to avoid making a mess of it.
974 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
975 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
976 MI->getOperand(0).isUndef())
977 return false;
978
979 // Likewise don't mess with references to undefined addresses.
980 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
981 MI->getOperand(1).isUndef())
982 return false;
983
984 int Opcode = MI->getOpcode();
985 switch (Opcode) {
986 default: break;
987 case ARM::VLDRS:
988 case ARM::VSTRS:
989 return MI->getOperand(1).isReg();
990 case ARM::VLDRD:
991 case ARM::VSTRD:
992 return MI->getOperand(1).isReg();
993 case ARM::LDRi12:
994 case ARM::STRi12:
995 case ARM::t2LDRi8:
996 case ARM::t2LDRi12:
997 case ARM::t2STRi8:
998 case ARM::t2STRi12:
999 return MI->getOperand(1).isReg();
1000 }
1001 return false;
1002 }
1003
1004 /// AdvanceRS - Advance register scavenger to just before the earliest memory
1005 /// op that is being merged.
AdvanceRS(MachineBasicBlock & MBB,MemOpQueue & MemOps)1006 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1007 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1008 unsigned Position = MemOps[0].Position;
1009 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1010 if (MemOps[i].Position < Position) {
1011 Position = MemOps[i].Position;
1012 Loc = MemOps[i].MBBI;
1013 }
1014 }
1015
1016 if (Loc != MBB.begin())
1017 RS->forward(prior(Loc));
1018 }
1019
getMemoryOpOffset(const MachineInstr * MI)1020 static int getMemoryOpOffset(const MachineInstr *MI) {
1021 int Opcode = MI->getOpcode();
1022 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1023 unsigned NumOperands = MI->getDesc().getNumOperands();
1024 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1025
1026 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1027 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1028 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1029 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
1030 return OffField;
1031
1032 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1033 : ARM_AM::getAM5Offset(OffField) * 4;
1034 if (isAM3) {
1035 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1036 Offset = -Offset;
1037 } else {
1038 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1039 Offset = -Offset;
1040 }
1041 return Offset;
1042 }
1043
InsertLDR_STR(MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI,int Offset,bool isDef,DebugLoc dl,unsigned NewOpc,unsigned Reg,bool RegDeadKill,bool RegUndef,unsigned BaseReg,bool BaseKill,bool BaseUndef,bool OffKill,bool OffUndef,ARMCC::CondCodes Pred,unsigned PredReg,const TargetInstrInfo * TII,bool isT2)1044 static void InsertLDR_STR(MachineBasicBlock &MBB,
1045 MachineBasicBlock::iterator &MBBI,
1046 int Offset, bool isDef,
1047 DebugLoc dl, unsigned NewOpc,
1048 unsigned Reg, bool RegDeadKill, bool RegUndef,
1049 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1050 bool OffKill, bool OffUndef,
1051 ARMCC::CondCodes Pred, unsigned PredReg,
1052 const TargetInstrInfo *TII, bool isT2) {
1053 if (isDef) {
1054 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1055 TII->get(NewOpc))
1056 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1057 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1058 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1059 } else {
1060 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1061 TII->get(NewOpc))
1062 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1063 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1064 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1065 }
1066 }
1067
FixInvalidRegPairOp(MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI)1068 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1069 MachineBasicBlock::iterator &MBBI) {
1070 MachineInstr *MI = &*MBBI;
1071 unsigned Opcode = MI->getOpcode();
1072 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1073 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1074 unsigned EvenReg = MI->getOperand(0).getReg();
1075 unsigned OddReg = MI->getOperand(1).getReg();
1076 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1077 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1078 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
1079 return false;
1080
1081 MachineBasicBlock::iterator NewBBI = MBBI;
1082 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1083 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1084 bool EvenDeadKill = isLd ?
1085 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1086 bool EvenUndef = MI->getOperand(0).isUndef();
1087 bool OddDeadKill = isLd ?
1088 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1089 bool OddUndef = MI->getOperand(1).isUndef();
1090 const MachineOperand &BaseOp = MI->getOperand(2);
1091 unsigned BaseReg = BaseOp.getReg();
1092 bool BaseKill = BaseOp.isKill();
1093 bool BaseUndef = BaseOp.isUndef();
1094 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1095 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1096 int OffImm = getMemoryOpOffset(MI);
1097 unsigned PredReg = 0;
1098 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
1099
1100 if (OddRegNum > EvenRegNum && OffImm == 0) {
1101 // Ascending register numbers and no offset. It's safe to change it to a
1102 // ldm or stm.
1103 unsigned NewOpc = (isLd)
1104 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1105 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1106 if (isLd) {
1107 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1108 .addReg(BaseReg, getKillRegState(BaseKill))
1109 .addImm(Pred).addReg(PredReg)
1110 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1111 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1112 ++NumLDRD2LDM;
1113 } else {
1114 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1115 .addReg(BaseReg, getKillRegState(BaseKill))
1116 .addImm(Pred).addReg(PredReg)
1117 .addReg(EvenReg,
1118 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1119 .addReg(OddReg,
1120 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1121 ++NumSTRD2STM;
1122 }
1123 NewBBI = llvm::prior(MBBI);
1124 } else {
1125 // Split into two instructions.
1126 unsigned NewOpc = (isLd)
1127 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1128 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1129 DebugLoc dl = MBBI->getDebugLoc();
1130 // If this is a load and base register is killed, it may have been
1131 // re-defed by the load, make sure the first load does not clobber it.
1132 if (isLd &&
1133 (BaseKill || OffKill) &&
1134 (TRI->regsOverlap(EvenReg, BaseReg))) {
1135 assert(!TRI->regsOverlap(OddReg, BaseReg));
1136 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1137 OddReg, OddDeadKill, false,
1138 BaseReg, false, BaseUndef, false, OffUndef,
1139 Pred, PredReg, TII, isT2);
1140 NewBBI = llvm::prior(MBBI);
1141 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1142 EvenReg, EvenDeadKill, false,
1143 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1144 Pred, PredReg, TII, isT2);
1145 } else {
1146 if (OddReg == EvenReg && EvenDeadKill) {
1147 // If the two source operands are the same, the kill marker is
1148 // probably on the first one. e.g.
1149 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1150 EvenDeadKill = false;
1151 OddDeadKill = true;
1152 }
1153 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1154 EvenReg, EvenDeadKill, EvenUndef,
1155 BaseReg, false, BaseUndef, false, OffUndef,
1156 Pred, PredReg, TII, isT2);
1157 NewBBI = llvm::prior(MBBI);
1158 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1159 OddReg, OddDeadKill, OddUndef,
1160 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1161 Pred, PredReg, TII, isT2);
1162 }
1163 if (isLd)
1164 ++NumLDRD2LDR;
1165 else
1166 ++NumSTRD2STR;
1167 }
1168
1169 MBB.erase(MI);
1170 MBBI = NewBBI;
1171 return true;
1172 }
1173 return false;
1174 }
1175
1176 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1177 /// ops of the same base and incrementing offset into LDM / STM ops.
LoadStoreMultipleOpti(MachineBasicBlock & MBB)1178 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1179 unsigned NumMerges = 0;
1180 unsigned NumMemOps = 0;
1181 MemOpQueue MemOps;
1182 unsigned CurrBase = 0;
1183 int CurrOpc = -1;
1184 unsigned CurrSize = 0;
1185 ARMCC::CondCodes CurrPred = ARMCC::AL;
1186 unsigned CurrPredReg = 0;
1187 unsigned Position = 0;
1188 SmallVector<MachineBasicBlock::iterator,4> Merges;
1189
1190 RS->enterBasicBlock(&MBB);
1191 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1192 while (MBBI != E) {
1193 if (FixInvalidRegPairOp(MBB, MBBI))
1194 continue;
1195
1196 bool Advance = false;
1197 bool TryMerge = false;
1198 bool Clobber = false;
1199
1200 bool isMemOp = isMemoryOp(MBBI);
1201 if (isMemOp) {
1202 int Opcode = MBBI->getOpcode();
1203 unsigned Size = getLSMultipleTransferSize(MBBI);
1204 const MachineOperand &MO = MBBI->getOperand(0);
1205 unsigned Reg = MO.getReg();
1206 bool isKill = MO.isDef() ? false : MO.isKill();
1207 unsigned Base = MBBI->getOperand(1).getReg();
1208 unsigned PredReg = 0;
1209 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1210 int Offset = getMemoryOpOffset(MBBI);
1211 // Watch out for:
1212 // r4 := ldr [r5]
1213 // r5 := ldr [r5, #4]
1214 // r6 := ldr [r5, #8]
1215 //
1216 // The second ldr has effectively broken the chain even though it
1217 // looks like the later ldr(s) use the same base register. Try to
1218 // merge the ldr's so far, including this one. But don't try to
1219 // combine the following ldr(s).
1220 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1221 if (CurrBase == 0 && !Clobber) {
1222 // Start of a new chain.
1223 CurrBase = Base;
1224 CurrOpc = Opcode;
1225 CurrSize = Size;
1226 CurrPred = Pred;
1227 CurrPredReg = PredReg;
1228 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1229 ++NumMemOps;
1230 Advance = true;
1231 } else {
1232 if (Clobber) {
1233 TryMerge = true;
1234 Advance = true;
1235 }
1236
1237 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1238 // No need to match PredReg.
1239 // Continue adding to the queue.
1240 if (Offset > MemOps.back().Offset) {
1241 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1242 Position, MBBI));
1243 ++NumMemOps;
1244 Advance = true;
1245 } else {
1246 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1247 I != E; ++I) {
1248 if (Offset < I->Offset) {
1249 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1250 Position, MBBI));
1251 ++NumMemOps;
1252 Advance = true;
1253 break;
1254 } else if (Offset == I->Offset) {
1255 // Collision! This can't be merged!
1256 break;
1257 }
1258 }
1259 }
1260 }
1261 }
1262 }
1263
1264 if (MBBI->isDebugValue()) {
1265 ++MBBI;
1266 if (MBBI == E)
1267 // Reach the end of the block, try merging the memory instructions.
1268 TryMerge = true;
1269 } else if (Advance) {
1270 ++Position;
1271 ++MBBI;
1272 if (MBBI == E)
1273 // Reach the end of the block, try merging the memory instructions.
1274 TryMerge = true;
1275 } else
1276 TryMerge = true;
1277
1278 if (TryMerge) {
1279 if (NumMemOps > 1) {
1280 // Try to find a free register to use as a new base in case it's needed.
1281 // First advance to the instruction just before the start of the chain.
1282 AdvanceRS(MBB, MemOps);
1283 // Find a scratch register.
1284 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1285 // Process the load / store instructions.
1286 RS->forward(prior(MBBI));
1287
1288 // Merge ops.
1289 Merges.clear();
1290 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1291 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1292
1293 // Try folding preceding/trailing base inc/dec into the generated
1294 // LDM/STM ops.
1295 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1296 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1297 ++NumMerges;
1298 NumMerges += Merges.size();
1299
1300 // Try folding preceding/trailing base inc/dec into those load/store
1301 // that were not merged to form LDM/STM ops.
1302 for (unsigned i = 0; i != NumMemOps; ++i)
1303 if (!MemOps[i].Merged)
1304 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1305 ++NumMerges;
1306
1307 // RS may be pointing to an instruction that's deleted.
1308 RS->skipTo(prior(MBBI));
1309 } else if (NumMemOps == 1) {
1310 // Try folding preceding/trailing base inc/dec into the single
1311 // load/store.
1312 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1313 ++NumMerges;
1314 RS->forward(prior(MBBI));
1315 }
1316 }
1317
1318 CurrBase = 0;
1319 CurrOpc = -1;
1320 CurrSize = 0;
1321 CurrPred = ARMCC::AL;
1322 CurrPredReg = 0;
1323 if (NumMemOps) {
1324 MemOps.clear();
1325 NumMemOps = 0;
1326 }
1327
1328 // If iterator hasn't been advanced and this is not a memory op, skip it.
1329 // It can't start a new chain anyway.
1330 if (!Advance && !isMemOp && MBBI != E) {
1331 ++Position;
1332 ++MBBI;
1333 }
1334 }
1335 }
1336 return NumMerges > 0;
1337 }
1338
1339 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1340 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1341 /// directly restore the value of LR into pc.
1342 /// ldmfd sp!, {..., lr}
1343 /// bx lr
1344 /// or
1345 /// ldmfd sp!, {..., lr}
1346 /// mov pc, lr
1347 /// =>
1348 /// ldmfd sp!, {..., pc}
MergeReturnIntoLDM(MachineBasicBlock & MBB)1349 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1350 if (MBB.empty()) return false;
1351
1352 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1353 if (MBBI != MBB.begin() &&
1354 (MBBI->getOpcode() == ARM::BX_RET ||
1355 MBBI->getOpcode() == ARM::tBX_RET ||
1356 MBBI->getOpcode() == ARM::MOVPCLR)) {
1357 MachineInstr *PrevMI = prior(MBBI);
1358 unsigned Opcode = PrevMI->getOpcode();
1359 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1360 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1361 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1362 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1363 if (MO.getReg() != ARM::LR)
1364 return false;
1365 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1366 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1367 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1368 PrevMI->setDesc(TII->get(NewOpc));
1369 MO.setReg(ARM::PC);
1370 PrevMI->copyImplicitOps(&*MBBI);
1371 MBB.erase(MBBI);
1372 return true;
1373 }
1374 }
1375 return false;
1376 }
1377
runOnMachineFunction(MachineFunction & Fn)1378 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1379 const TargetMachine &TM = Fn.getTarget();
1380 AFI = Fn.getInfo<ARMFunctionInfo>();
1381 TII = TM.getInstrInfo();
1382 TRI = TM.getRegisterInfo();
1383 RS = new RegScavenger();
1384 isThumb2 = AFI->isThumb2Function();
1385
1386 bool Modified = false;
1387 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1388 ++MFI) {
1389 MachineBasicBlock &MBB = *MFI;
1390 Modified |= LoadStoreMultipleOpti(MBB);
1391 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1392 Modified |= MergeReturnIntoLDM(MBB);
1393 }
1394
1395 delete RS;
1396 return Modified;
1397 }
1398
1399
1400 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1401 /// load / stores from consecutive locations close to make it more
1402 /// likely they will be combined later.
1403
1404 namespace {
1405 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1406 static char ID;
ARMPreAllocLoadStoreOpt__anon16ab0f520211::ARMPreAllocLoadStoreOpt1407 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1408
1409 const TargetData *TD;
1410 const TargetInstrInfo *TII;
1411 const TargetRegisterInfo *TRI;
1412 const ARMSubtarget *STI;
1413 MachineRegisterInfo *MRI;
1414 MachineFunction *MF;
1415
1416 virtual bool runOnMachineFunction(MachineFunction &Fn);
1417
getPassName__anon16ab0f520211::ARMPreAllocLoadStoreOpt1418 virtual const char *getPassName() const {
1419 return "ARM pre- register allocation load / store optimization pass";
1420 }
1421
1422 private:
1423 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1424 unsigned &NewOpc, unsigned &EvenReg,
1425 unsigned &OddReg, unsigned &BaseReg,
1426 int &Offset,
1427 unsigned &PredReg, ARMCC::CondCodes &Pred,
1428 bool &isT2);
1429 bool RescheduleOps(MachineBasicBlock *MBB,
1430 SmallVector<MachineInstr*, 4> &Ops,
1431 unsigned Base, bool isLd,
1432 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1433 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1434 };
1435 char ARMPreAllocLoadStoreOpt::ID = 0;
1436 }
1437
runOnMachineFunction(MachineFunction & Fn)1438 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1439 TD = Fn.getTarget().getTargetData();
1440 TII = Fn.getTarget().getInstrInfo();
1441 TRI = Fn.getTarget().getRegisterInfo();
1442 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1443 MRI = &Fn.getRegInfo();
1444 MF = &Fn;
1445
1446 bool Modified = false;
1447 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1448 ++MFI)
1449 Modified |= RescheduleLoadStoreInstrs(MFI);
1450
1451 return Modified;
1452 }
1453
IsSafeAndProfitableToMove(bool isLd,unsigned Base,MachineBasicBlock::iterator I,MachineBasicBlock::iterator E,SmallPtrSet<MachineInstr *,4> & MemOps,SmallSet<unsigned,4> & MemRegs,const TargetRegisterInfo * TRI)1454 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1455 MachineBasicBlock::iterator I,
1456 MachineBasicBlock::iterator E,
1457 SmallPtrSet<MachineInstr*, 4> &MemOps,
1458 SmallSet<unsigned, 4> &MemRegs,
1459 const TargetRegisterInfo *TRI) {
1460 // Are there stores / loads / calls between them?
1461 // FIXME: This is overly conservative. We should make use of alias information
1462 // some day.
1463 SmallSet<unsigned, 4> AddedRegPressure;
1464 while (++I != E) {
1465 if (I->isDebugValue() || MemOps.count(&*I))
1466 continue;
1467 const MCInstrDesc &MCID = I->getDesc();
1468 if (MCID.isCall() || MCID.isTerminator() || I->hasUnmodeledSideEffects())
1469 return false;
1470 if (isLd && MCID.mayStore())
1471 return false;
1472 if (!isLd) {
1473 if (MCID.mayLoad())
1474 return false;
1475 // It's not safe to move the first 'str' down.
1476 // str r1, [r0]
1477 // strh r5, [r0]
1478 // str r4, [r0, #+4]
1479 if (MCID.mayStore())
1480 return false;
1481 }
1482 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1483 MachineOperand &MO = I->getOperand(j);
1484 if (!MO.isReg())
1485 continue;
1486 unsigned Reg = MO.getReg();
1487 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1488 return false;
1489 if (Reg != Base && !MemRegs.count(Reg))
1490 AddedRegPressure.insert(Reg);
1491 }
1492 }
1493
1494 // Estimate register pressure increase due to the transformation.
1495 if (MemRegs.size() <= 4)
1496 // Ok if we are moving small number of instructions.
1497 return true;
1498 return AddedRegPressure.size() <= MemRegs.size() * 2;
1499 }
1500
1501 bool
CanFormLdStDWord(MachineInstr * Op0,MachineInstr * Op1,DebugLoc & dl,unsigned & NewOpc,unsigned & EvenReg,unsigned & OddReg,unsigned & BaseReg,int & Offset,unsigned & PredReg,ARMCC::CondCodes & Pred,bool & isT2)1502 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1503 DebugLoc &dl,
1504 unsigned &NewOpc, unsigned &EvenReg,
1505 unsigned &OddReg, unsigned &BaseReg,
1506 int &Offset, unsigned &PredReg,
1507 ARMCC::CondCodes &Pred,
1508 bool &isT2) {
1509 // Make sure we're allowed to generate LDRD/STRD.
1510 if (!STI->hasV5TEOps())
1511 return false;
1512
1513 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1514 unsigned Scale = 1;
1515 unsigned Opcode = Op0->getOpcode();
1516 if (Opcode == ARM::LDRi12)
1517 NewOpc = ARM::LDRD;
1518 else if (Opcode == ARM::STRi12)
1519 NewOpc = ARM::STRD;
1520 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1521 NewOpc = ARM::t2LDRDi8;
1522 Scale = 4;
1523 isT2 = true;
1524 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1525 NewOpc = ARM::t2STRDi8;
1526 Scale = 4;
1527 isT2 = true;
1528 } else
1529 return false;
1530
1531 // Make sure the base address satisfies i64 ld / st alignment requirement.
1532 if (!Op0->hasOneMemOperand() ||
1533 !(*Op0->memoperands_begin())->getValue() ||
1534 (*Op0->memoperands_begin())->isVolatile())
1535 return false;
1536
1537 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1538 const Function *Func = MF->getFunction();
1539 unsigned ReqAlign = STI->hasV6Ops()
1540 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1541 : 8; // Pre-v6 need 8-byte align
1542 if (Align < ReqAlign)
1543 return false;
1544
1545 // Then make sure the immediate offset fits.
1546 int OffImm = getMemoryOpOffset(Op0);
1547 if (isT2) {
1548 int Limit = (1 << 8) * Scale;
1549 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1550 return false;
1551 Offset = OffImm;
1552 } else {
1553 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1554 if (OffImm < 0) {
1555 AddSub = ARM_AM::sub;
1556 OffImm = - OffImm;
1557 }
1558 int Limit = (1 << 8) * Scale;
1559 if (OffImm >= Limit || (OffImm & (Scale-1)))
1560 return false;
1561 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1562 }
1563 EvenReg = Op0->getOperand(0).getReg();
1564 OddReg = Op1->getOperand(0).getReg();
1565 if (EvenReg == OddReg)
1566 return false;
1567 BaseReg = Op0->getOperand(1).getReg();
1568 Pred = llvm::getInstrPredicate(Op0, PredReg);
1569 dl = Op0->getDebugLoc();
1570 return true;
1571 }
1572
1573 namespace {
1574 struct OffsetCompare {
operator ()__anon16ab0f520311::OffsetCompare1575 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1576 int LOffset = getMemoryOpOffset(LHS);
1577 int ROffset = getMemoryOpOffset(RHS);
1578 assert(LHS == RHS || LOffset != ROffset);
1579 return LOffset > ROffset;
1580 }
1581 };
1582 }
1583
RescheduleOps(MachineBasicBlock * MBB,SmallVector<MachineInstr *,4> & Ops,unsigned Base,bool isLd,DenseMap<MachineInstr *,unsigned> & MI2LocMap)1584 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1585 SmallVector<MachineInstr*, 4> &Ops,
1586 unsigned Base, bool isLd,
1587 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1588 bool RetVal = false;
1589
1590 // Sort by offset (in reverse order).
1591 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1592
1593 // The loads / stores of the same base are in order. Scan them from first to
1594 // last and check for the following:
1595 // 1. Any def of base.
1596 // 2. Any gaps.
1597 while (Ops.size() > 1) {
1598 unsigned FirstLoc = ~0U;
1599 unsigned LastLoc = 0;
1600 MachineInstr *FirstOp = 0;
1601 MachineInstr *LastOp = 0;
1602 int LastOffset = 0;
1603 unsigned LastOpcode = 0;
1604 unsigned LastBytes = 0;
1605 unsigned NumMove = 0;
1606 for (int i = Ops.size() - 1; i >= 0; --i) {
1607 MachineInstr *Op = Ops[i];
1608 unsigned Loc = MI2LocMap[Op];
1609 if (Loc <= FirstLoc) {
1610 FirstLoc = Loc;
1611 FirstOp = Op;
1612 }
1613 if (Loc >= LastLoc) {
1614 LastLoc = Loc;
1615 LastOp = Op;
1616 }
1617
1618 unsigned Opcode = Op->getOpcode();
1619 if (LastOpcode && Opcode != LastOpcode)
1620 break;
1621
1622 int Offset = getMemoryOpOffset(Op);
1623 unsigned Bytes = getLSMultipleTransferSize(Op);
1624 if (LastBytes) {
1625 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1626 break;
1627 }
1628 LastOffset = Offset;
1629 LastBytes = Bytes;
1630 LastOpcode = Opcode;
1631 if (++NumMove == 8) // FIXME: Tune this limit.
1632 break;
1633 }
1634
1635 if (NumMove <= 1)
1636 Ops.pop_back();
1637 else {
1638 SmallPtrSet<MachineInstr*, 4> MemOps;
1639 SmallSet<unsigned, 4> MemRegs;
1640 for (int i = NumMove-1; i >= 0; --i) {
1641 MemOps.insert(Ops[i]);
1642 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1643 }
1644
1645 // Be conservative, if the instructions are too far apart, don't
1646 // move them. We want to limit the increase of register pressure.
1647 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1648 if (DoMove)
1649 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1650 MemOps, MemRegs, TRI);
1651 if (!DoMove) {
1652 for (unsigned i = 0; i != NumMove; ++i)
1653 Ops.pop_back();
1654 } else {
1655 // This is the new location for the loads / stores.
1656 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1657 while (InsertPos != MBB->end()
1658 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1659 ++InsertPos;
1660
1661 // If we are moving a pair of loads / stores, see if it makes sense
1662 // to try to allocate a pair of registers that can form register pairs.
1663 MachineInstr *Op0 = Ops.back();
1664 MachineInstr *Op1 = Ops[Ops.size()-2];
1665 unsigned EvenReg = 0, OddReg = 0;
1666 unsigned BaseReg = 0, PredReg = 0;
1667 ARMCC::CondCodes Pred = ARMCC::AL;
1668 bool isT2 = false;
1669 unsigned NewOpc = 0;
1670 int Offset = 0;
1671 DebugLoc dl;
1672 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1673 EvenReg, OddReg, BaseReg,
1674 Offset, PredReg, Pred, isT2)) {
1675 Ops.pop_back();
1676 Ops.pop_back();
1677
1678 const MCInstrDesc &MCID = TII->get(NewOpc);
1679 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI);
1680 MRI->constrainRegClass(EvenReg, TRC);
1681 MRI->constrainRegClass(OddReg, TRC);
1682
1683 // Form the pair instruction.
1684 if (isLd) {
1685 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1686 .addReg(EvenReg, RegState::Define)
1687 .addReg(OddReg, RegState::Define)
1688 .addReg(BaseReg);
1689 // FIXME: We're converting from LDRi12 to an insn that still
1690 // uses addrmode2, so we need an explicit offset reg. It should
1691 // always by reg0 since we're transforming LDRi12s.
1692 if (!isT2)
1693 MIB.addReg(0);
1694 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1695 ++NumLDRDFormed;
1696 } else {
1697 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1698 .addReg(EvenReg)
1699 .addReg(OddReg)
1700 .addReg(BaseReg);
1701 // FIXME: We're converting from LDRi12 to an insn that still
1702 // uses addrmode2, so we need an explicit offset reg. It should
1703 // always by reg0 since we're transforming STRi12s.
1704 if (!isT2)
1705 MIB.addReg(0);
1706 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1707 ++NumSTRDFormed;
1708 }
1709 MBB->erase(Op0);
1710 MBB->erase(Op1);
1711
1712 // Add register allocation hints to form register pairs.
1713 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1714 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1715 } else {
1716 for (unsigned i = 0; i != NumMove; ++i) {
1717 MachineInstr *Op = Ops.back();
1718 Ops.pop_back();
1719 MBB->splice(InsertPos, MBB, Op);
1720 }
1721 }
1722
1723 NumLdStMoved += NumMove;
1724 RetVal = true;
1725 }
1726 }
1727 }
1728
1729 return RetVal;
1730 }
1731
1732 bool
RescheduleLoadStoreInstrs(MachineBasicBlock * MBB)1733 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1734 bool RetVal = false;
1735
1736 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1737 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1738 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1739 SmallVector<unsigned, 4> LdBases;
1740 SmallVector<unsigned, 4> StBases;
1741
1742 unsigned Loc = 0;
1743 MachineBasicBlock::iterator MBBI = MBB->begin();
1744 MachineBasicBlock::iterator E = MBB->end();
1745 while (MBBI != E) {
1746 for (; MBBI != E; ++MBBI) {
1747 MachineInstr *MI = MBBI;
1748 const MCInstrDesc &MCID = MI->getDesc();
1749 if (MCID.isCall() || MCID.isTerminator()) {
1750 // Stop at barriers.
1751 ++MBBI;
1752 break;
1753 }
1754
1755 if (!MI->isDebugValue())
1756 MI2LocMap[MI] = ++Loc;
1757
1758 if (!isMemoryOp(MI))
1759 continue;
1760 unsigned PredReg = 0;
1761 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1762 continue;
1763
1764 int Opc = MI->getOpcode();
1765 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1766 unsigned Base = MI->getOperand(1).getReg();
1767 int Offset = getMemoryOpOffset(MI);
1768
1769 bool StopHere = false;
1770 if (isLd) {
1771 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1772 Base2LdsMap.find(Base);
1773 if (BI != Base2LdsMap.end()) {
1774 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1775 if (Offset == getMemoryOpOffset(BI->second[i])) {
1776 StopHere = true;
1777 break;
1778 }
1779 }
1780 if (!StopHere)
1781 BI->second.push_back(MI);
1782 } else {
1783 SmallVector<MachineInstr*, 4> MIs;
1784 MIs.push_back(MI);
1785 Base2LdsMap[Base] = MIs;
1786 LdBases.push_back(Base);
1787 }
1788 } else {
1789 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1790 Base2StsMap.find(Base);
1791 if (BI != Base2StsMap.end()) {
1792 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1793 if (Offset == getMemoryOpOffset(BI->second[i])) {
1794 StopHere = true;
1795 break;
1796 }
1797 }
1798 if (!StopHere)
1799 BI->second.push_back(MI);
1800 } else {
1801 SmallVector<MachineInstr*, 4> MIs;
1802 MIs.push_back(MI);
1803 Base2StsMap[Base] = MIs;
1804 StBases.push_back(Base);
1805 }
1806 }
1807
1808 if (StopHere) {
1809 // Found a duplicate (a base+offset combination that's seen earlier).
1810 // Backtrack.
1811 --Loc;
1812 break;
1813 }
1814 }
1815
1816 // Re-schedule loads.
1817 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1818 unsigned Base = LdBases[i];
1819 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1820 if (Lds.size() > 1)
1821 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1822 }
1823
1824 // Re-schedule stores.
1825 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1826 unsigned Base = StBases[i];
1827 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1828 if (Sts.size() > 1)
1829 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1830 }
1831
1832 if (MBBI != E) {
1833 Base2LdsMap.clear();
1834 Base2StsMap.clear();
1835 LdBases.clear();
1836 StBases.clear();
1837 }
1838 }
1839
1840 return RetVal;
1841 }
1842
1843
1844 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1845 /// optimization pass.
createARMLoadStoreOptimizationPass(bool PreAlloc)1846 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1847 if (PreAlloc)
1848 return new ARMPreAllocLoadStoreOpt();
1849 return new ARMLoadStoreOpt();
1850 }
1851