1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #define DEBUG_TYPE "arm-ldst-opt"
16 #include "ARM.h"
17 #include "ARMBaseInstrInfo.h"
18 #include "ARMBaseRegisterInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "MCTargetDesc/ARMAddressingModes.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/CodeGen/SelectionDAGNodes.h"
30 #include "llvm/Target/TargetData.h"
31 #include "llvm/Target/TargetInstrInfo.h"
32 #include "llvm/Target/TargetMachine.h"
33 #include "llvm/Target/TargetRegisterInfo.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/raw_ostream.h"
37 #include "llvm/ADT/DenseMap.h"
38 #include "llvm/ADT/STLExtras.h"
39 #include "llvm/ADT/SmallPtrSet.h"
40 #include "llvm/ADT/SmallSet.h"
41 #include "llvm/ADT/SmallVector.h"
42 #include "llvm/ADT/Statistic.h"
43 using namespace llvm;
44
45 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
46 STATISTIC(NumSTMGened , "Number of stm instructions generated");
47 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
48 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
49 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
50 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
51 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
52 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
53 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
54 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
55 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
56
57 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
58 /// load / store instructions to form ldm / stm instructions.
59
60 namespace {
61 struct ARMLoadStoreOpt : public MachineFunctionPass {
62 static char ID;
ARMLoadStoreOpt__anona299b52d0111::ARMLoadStoreOpt63 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
64
65 const TargetInstrInfo *TII;
66 const TargetRegisterInfo *TRI;
67 const ARMSubtarget *STI;
68 ARMFunctionInfo *AFI;
69 RegScavenger *RS;
70 bool isThumb2;
71
72 virtual bool runOnMachineFunction(MachineFunction &Fn);
73
getPassName__anona299b52d0111::ARMLoadStoreOpt74 virtual const char *getPassName() const {
75 return "ARM load / store optimization pass";
76 }
77
78 private:
79 struct MemOpQueueEntry {
80 int Offset;
81 unsigned Reg;
82 bool isKill;
83 unsigned Position;
84 MachineBasicBlock::iterator MBBI;
85 bool Merged;
MemOpQueueEntry__anona299b52d0111::ARMLoadStoreOpt::MemOpQueueEntry86 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
87 MachineBasicBlock::iterator i)
88 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
89 };
90 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
91 typedef MemOpQueue::iterator MemOpQueueIter;
92
93 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
94 int Offset, unsigned Base, bool BaseKill, int Opcode,
95 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
96 DebugLoc dl,
97 ArrayRef<std::pair<unsigned, bool> > Regs,
98 ArrayRef<unsigned> ImpDefs);
99 void MergeOpsUpdate(MachineBasicBlock &MBB,
100 MemOpQueue &MemOps,
101 unsigned memOpsBegin,
102 unsigned memOpsEnd,
103 unsigned insertAfter,
104 int Offset,
105 unsigned Base,
106 bool BaseKill,
107 int Opcode,
108 ARMCC::CondCodes Pred,
109 unsigned PredReg,
110 unsigned Scratch,
111 DebugLoc dl,
112 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
113 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
114 int Opcode, unsigned Size,
115 ARMCC::CondCodes Pred, unsigned PredReg,
116 unsigned Scratch, MemOpQueue &MemOps,
117 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
118
119 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
120 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
121 MachineBasicBlock::iterator &MBBI);
122 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
123 MachineBasicBlock::iterator MBBI,
124 const TargetInstrInfo *TII,
125 bool &Advance,
126 MachineBasicBlock::iterator &I);
127 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
128 MachineBasicBlock::iterator MBBI,
129 bool &Advance,
130 MachineBasicBlock::iterator &I);
131 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
132 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
133 };
134 char ARMLoadStoreOpt::ID = 0;
135 }
136
getLoadStoreMultipleOpcode(int Opcode,ARM_AM::AMSubMode Mode)137 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
138 switch (Opcode) {
139 default: llvm_unreachable("Unhandled opcode!");
140 case ARM::LDRi12:
141 ++NumLDMGened;
142 switch (Mode) {
143 default: llvm_unreachable("Unhandled submode!");
144 case ARM_AM::ia: return ARM::LDMIA;
145 case ARM_AM::da: return ARM::LDMDA;
146 case ARM_AM::db: return ARM::LDMDB;
147 case ARM_AM::ib: return ARM::LDMIB;
148 }
149 case ARM::STRi12:
150 ++NumSTMGened;
151 switch (Mode) {
152 default: llvm_unreachable("Unhandled submode!");
153 case ARM_AM::ia: return ARM::STMIA;
154 case ARM_AM::da: return ARM::STMDA;
155 case ARM_AM::db: return ARM::STMDB;
156 case ARM_AM::ib: return ARM::STMIB;
157 }
158 case ARM::t2LDRi8:
159 case ARM::t2LDRi12:
160 ++NumLDMGened;
161 switch (Mode) {
162 default: llvm_unreachable("Unhandled submode!");
163 case ARM_AM::ia: return ARM::t2LDMIA;
164 case ARM_AM::db: return ARM::t2LDMDB;
165 }
166 case ARM::t2STRi8:
167 case ARM::t2STRi12:
168 ++NumSTMGened;
169 switch (Mode) {
170 default: llvm_unreachable("Unhandled submode!");
171 case ARM_AM::ia: return ARM::t2STMIA;
172 case ARM_AM::db: return ARM::t2STMDB;
173 }
174 case ARM::VLDRS:
175 ++NumVLDMGened;
176 switch (Mode) {
177 default: llvm_unreachable("Unhandled submode!");
178 case ARM_AM::ia: return ARM::VLDMSIA;
179 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
180 }
181 case ARM::VSTRS:
182 ++NumVSTMGened;
183 switch (Mode) {
184 default: llvm_unreachable("Unhandled submode!");
185 case ARM_AM::ia: return ARM::VSTMSIA;
186 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
187 }
188 case ARM::VLDRD:
189 ++NumVLDMGened;
190 switch (Mode) {
191 default: llvm_unreachable("Unhandled submode!");
192 case ARM_AM::ia: return ARM::VLDMDIA;
193 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
194 }
195 case ARM::VSTRD:
196 ++NumVSTMGened;
197 switch (Mode) {
198 default: llvm_unreachable("Unhandled submode!");
199 case ARM_AM::ia: return ARM::VSTMDIA;
200 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
201 }
202 }
203 }
204
205 namespace llvm {
206 namespace ARM_AM {
207
getLoadStoreMultipleSubMode(int Opcode)208 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
209 switch (Opcode) {
210 default: llvm_unreachable("Unhandled opcode!");
211 case ARM::LDMIA_RET:
212 case ARM::LDMIA:
213 case ARM::LDMIA_UPD:
214 case ARM::STMIA:
215 case ARM::STMIA_UPD:
216 case ARM::t2LDMIA_RET:
217 case ARM::t2LDMIA:
218 case ARM::t2LDMIA_UPD:
219 case ARM::t2STMIA:
220 case ARM::t2STMIA_UPD:
221 case ARM::VLDMSIA:
222 case ARM::VLDMSIA_UPD:
223 case ARM::VSTMSIA:
224 case ARM::VSTMSIA_UPD:
225 case ARM::VLDMDIA:
226 case ARM::VLDMDIA_UPD:
227 case ARM::VSTMDIA:
228 case ARM::VSTMDIA_UPD:
229 return ARM_AM::ia;
230
231 case ARM::LDMDA:
232 case ARM::LDMDA_UPD:
233 case ARM::STMDA:
234 case ARM::STMDA_UPD:
235 return ARM_AM::da;
236
237 case ARM::LDMDB:
238 case ARM::LDMDB_UPD:
239 case ARM::STMDB:
240 case ARM::STMDB_UPD:
241 case ARM::t2LDMDB:
242 case ARM::t2LDMDB_UPD:
243 case ARM::t2STMDB:
244 case ARM::t2STMDB_UPD:
245 case ARM::VLDMSDB_UPD:
246 case ARM::VSTMSDB_UPD:
247 case ARM::VLDMDDB_UPD:
248 case ARM::VSTMDDB_UPD:
249 return ARM_AM::db;
250
251 case ARM::LDMIB:
252 case ARM::LDMIB_UPD:
253 case ARM::STMIB:
254 case ARM::STMIB_UPD:
255 return ARM_AM::ib;
256 }
257 }
258
259 } // end namespace ARM_AM
260 } // end namespace llvm
261
isT2i32Load(unsigned Opc)262 static bool isT2i32Load(unsigned Opc) {
263 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
264 }
265
isi32Load(unsigned Opc)266 static bool isi32Load(unsigned Opc) {
267 return Opc == ARM::LDRi12 || isT2i32Load(Opc);
268 }
269
isT2i32Store(unsigned Opc)270 static bool isT2i32Store(unsigned Opc) {
271 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
272 }
273
isi32Store(unsigned Opc)274 static bool isi32Store(unsigned Opc) {
275 return Opc == ARM::STRi12 || isT2i32Store(Opc);
276 }
277
278 /// MergeOps - Create and insert a LDM or STM with Base as base register and
279 /// registers in Regs as the register operands that would be loaded / stored.
280 /// It returns true if the transformation is done.
281 bool
MergeOps(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,int Offset,unsigned Base,bool BaseKill,int Opcode,ARMCC::CondCodes Pred,unsigned PredReg,unsigned Scratch,DebugLoc dl,ArrayRef<std::pair<unsigned,bool>> Regs,ArrayRef<unsigned> ImpDefs)282 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
283 MachineBasicBlock::iterator MBBI,
284 int Offset, unsigned Base, bool BaseKill,
285 int Opcode, ARMCC::CondCodes Pred,
286 unsigned PredReg, unsigned Scratch, DebugLoc dl,
287 ArrayRef<std::pair<unsigned, bool> > Regs,
288 ArrayRef<unsigned> ImpDefs) {
289 // Only a single register to load / store. Don't bother.
290 unsigned NumRegs = Regs.size();
291 if (NumRegs <= 1)
292 return false;
293
294 ARM_AM::AMSubMode Mode = ARM_AM::ia;
295 // VFP and Thumb2 do not support IB or DA modes.
296 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
297 bool haveIBAndDA = isNotVFP && !isThumb2;
298 if (Offset == 4 && haveIBAndDA)
299 Mode = ARM_AM::ib;
300 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
301 Mode = ARM_AM::da;
302 else if (Offset == -4 * (int)NumRegs && isNotVFP)
303 // VLDM/VSTM do not support DB mode without also updating the base reg.
304 Mode = ARM_AM::db;
305 else if (Offset != 0) {
306 // Check if this is a supported opcode before we insert instructions to
307 // calculate a new base register.
308 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
309
310 // If starting offset isn't zero, insert a MI to materialize a new base.
311 // But only do so if it is cost effective, i.e. merging more than two
312 // loads / stores.
313 if (NumRegs <= 2)
314 return false;
315
316 unsigned NewBase;
317 if (isi32Load(Opcode))
318 // If it is a load, then just use one of the destination register to
319 // use as the new base.
320 NewBase = Regs[NumRegs-1].first;
321 else {
322 // Use the scratch register to use as a new base.
323 NewBase = Scratch;
324 if (NewBase == 0)
325 return false;
326 }
327 int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
328 if (Offset < 0) {
329 BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
330 Offset = - Offset;
331 }
332 int ImmedOffset = isThumb2
333 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
334 if (ImmedOffset == -1)
335 // FIXME: Try t2ADDri12 or t2SUBri12?
336 return false; // Probably not worth it then.
337
338 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
339 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
340 .addImm(Pred).addReg(PredReg).addReg(0);
341 Base = NewBase;
342 BaseKill = true; // New base is always killed right its use.
343 }
344
345 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
346 Opcode == ARM::VLDRD);
347 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
348 if (!Opcode) return false;
349 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
350 .addReg(Base, getKillRegState(BaseKill))
351 .addImm(Pred).addReg(PredReg);
352 for (unsigned i = 0; i != NumRegs; ++i)
353 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
354 | getKillRegState(Regs[i].second));
355
356 // Add implicit defs for super-registers.
357 for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i)
358 MIB.addReg(ImpDefs[i], RegState::ImplicitDefine);
359
360 return true;
361 }
362
363 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
364 // success.
MergeOpsUpdate(MachineBasicBlock & MBB,MemOpQueue & memOps,unsigned memOpsBegin,unsigned memOpsEnd,unsigned insertAfter,int Offset,unsigned Base,bool BaseKill,int Opcode,ARMCC::CondCodes Pred,unsigned PredReg,unsigned Scratch,DebugLoc dl,SmallVector<MachineBasicBlock::iterator,4> & Merges)365 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
366 MemOpQueue &memOps,
367 unsigned memOpsBegin, unsigned memOpsEnd,
368 unsigned insertAfter, int Offset,
369 unsigned Base, bool BaseKill,
370 int Opcode,
371 ARMCC::CondCodes Pred, unsigned PredReg,
372 unsigned Scratch,
373 DebugLoc dl,
374 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
375 // First calculate which of the registers should be killed by the merged
376 // instruction.
377 const unsigned insertPos = memOps[insertAfter].Position;
378 SmallSet<unsigned, 4> KilledRegs;
379 DenseMap<unsigned, unsigned> Killer;
380 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
381 if (i == memOpsBegin) {
382 i = memOpsEnd;
383 if (i == e)
384 break;
385 }
386 if (memOps[i].Position < insertPos && memOps[i].isKill) {
387 unsigned Reg = memOps[i].Reg;
388 KilledRegs.insert(Reg);
389 Killer[Reg] = i;
390 }
391 }
392
393 SmallVector<std::pair<unsigned, bool>, 8> Regs;
394 SmallVector<unsigned, 8> ImpDefs;
395 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
396 unsigned Reg = memOps[i].Reg;
397 // If we are inserting the merged operation after an operation that
398 // uses the same register, make sure to transfer any kill flag.
399 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
400 Regs.push_back(std::make_pair(Reg, isKill));
401
402 // Collect any implicit defs of super-registers. They must be preserved.
403 for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) {
404 if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead())
405 continue;
406 unsigned DefReg = MO->getReg();
407 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
408 ImpDefs.push_back(DefReg);
409 }
410 }
411
412 // Try to do the merge.
413 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
414 ++Loc;
415 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
416 Pred, PredReg, Scratch, dl, Regs, ImpDefs))
417 return;
418
419 // Merge succeeded, update records.
420 Merges.push_back(prior(Loc));
421 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
422 // Remove kill flags from any memops that come before insertPos.
423 if (Regs[i-memOpsBegin].second) {
424 unsigned Reg = Regs[i-memOpsBegin].first;
425 if (KilledRegs.count(Reg)) {
426 unsigned j = Killer[Reg];
427 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
428 assert(Idx >= 0 && "Cannot find killing operand");
429 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
430 memOps[j].isKill = false;
431 }
432 memOps[i].isKill = true;
433 }
434 MBB.erase(memOps[i].MBBI);
435 // Update this memop to refer to the merged instruction.
436 // We may need to move kill flags again.
437 memOps[i].Merged = true;
438 memOps[i].MBBI = Merges.back();
439 memOps[i].Position = insertPos;
440 }
441 }
442
443 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
444 /// load / store multiple instructions.
445 void
MergeLDR_STR(MachineBasicBlock & MBB,unsigned SIndex,unsigned Base,int Opcode,unsigned Size,ARMCC::CondCodes Pred,unsigned PredReg,unsigned Scratch,MemOpQueue & MemOps,SmallVector<MachineBasicBlock::iterator,4> & Merges)446 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
447 unsigned Base, int Opcode, unsigned Size,
448 ARMCC::CondCodes Pred, unsigned PredReg,
449 unsigned Scratch, MemOpQueue &MemOps,
450 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
451 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
452 int Offset = MemOps[SIndex].Offset;
453 int SOffset = Offset;
454 unsigned insertAfter = SIndex;
455 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
456 DebugLoc dl = Loc->getDebugLoc();
457 const MachineOperand &PMO = Loc->getOperand(0);
458 unsigned PReg = PMO.getReg();
459 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
460 unsigned Count = 1;
461 unsigned Limit = ~0U;
462
463 // vldm / vstm limit are 32 for S variants, 16 for D variants.
464
465 switch (Opcode) {
466 default: break;
467 case ARM::VSTRS:
468 Limit = 32;
469 break;
470 case ARM::VSTRD:
471 Limit = 16;
472 break;
473 case ARM::VLDRD:
474 Limit = 16;
475 break;
476 case ARM::VLDRS:
477 Limit = 32;
478 break;
479 }
480
481 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
482 int NewOffset = MemOps[i].Offset;
483 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
484 unsigned Reg = MO.getReg();
485 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
486 // Register numbers must be in ascending order. For VFP / NEON load and
487 // store multiples, the registers must also be consecutive and within the
488 // limit on the number of registers per instruction.
489 if (Reg != ARM::SP &&
490 NewOffset == Offset + (int)Size &&
491 ((isNotVFP && RegNum > PRegNum) ||
492 ((Count < Limit) && RegNum == PRegNum+1))) {
493 Offset += Size;
494 PRegNum = RegNum;
495 ++Count;
496 } else {
497 // Can't merge this in. Try merge the earlier ones first.
498 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
499 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
500 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
501 MemOps, Merges);
502 return;
503 }
504
505 if (MemOps[i].Position > MemOps[insertAfter].Position)
506 insertAfter = i;
507 }
508
509 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
510 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
511 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
512 return;
513 }
514
definesCPSR(MachineInstr * MI)515 static bool definesCPSR(MachineInstr *MI) {
516 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
517 const MachineOperand &MO = MI->getOperand(i);
518 if (!MO.isReg())
519 continue;
520 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
521 // If the instruction has live CPSR def, then it's not safe to fold it
522 // into load / store.
523 return true;
524 }
525
526 return false;
527 }
528
isMatchingDecrement(MachineInstr * MI,unsigned Base,unsigned Bytes,unsigned Limit,ARMCC::CondCodes Pred,unsigned PredReg)529 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
530 unsigned Bytes, unsigned Limit,
531 ARMCC::CondCodes Pred, unsigned PredReg) {
532 unsigned MyPredReg = 0;
533 if (!MI)
534 return false;
535
536 bool CheckCPSRDef = false;
537 switch (MI->getOpcode()) {
538 default: return false;
539 case ARM::t2SUBri:
540 case ARM::SUBri:
541 CheckCPSRDef = true;
542 // fallthrough
543 case ARM::tSUBspi:
544 break;
545 }
546
547 // Make sure the offset fits in 8 bits.
548 if (Bytes == 0 || (Limit && Bytes >= Limit))
549 return false;
550
551 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
552 if (!(MI->getOperand(0).getReg() == Base &&
553 MI->getOperand(1).getReg() == Base &&
554 (MI->getOperand(2).getImm()*Scale) == Bytes &&
555 getInstrPredicate(MI, MyPredReg) == Pred &&
556 MyPredReg == PredReg))
557 return false;
558
559 return CheckCPSRDef ? !definesCPSR(MI) : true;
560 }
561
isMatchingIncrement(MachineInstr * MI,unsigned Base,unsigned Bytes,unsigned Limit,ARMCC::CondCodes Pred,unsigned PredReg)562 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
563 unsigned Bytes, unsigned Limit,
564 ARMCC::CondCodes Pred, unsigned PredReg) {
565 unsigned MyPredReg = 0;
566 if (!MI)
567 return false;
568
569 bool CheckCPSRDef = false;
570 switch (MI->getOpcode()) {
571 default: return false;
572 case ARM::t2ADDri:
573 case ARM::ADDri:
574 CheckCPSRDef = true;
575 // fallthrough
576 case ARM::tADDspi:
577 break;
578 }
579
580 if (Bytes == 0 || (Limit && Bytes >= Limit))
581 // Make sure the offset fits in 8 bits.
582 return false;
583
584 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
585 if (!(MI->getOperand(0).getReg() == Base &&
586 MI->getOperand(1).getReg() == Base &&
587 (MI->getOperand(2).getImm()*Scale) == Bytes &&
588 getInstrPredicate(MI, MyPredReg) == Pred &&
589 MyPredReg == PredReg))
590 return false;
591
592 return CheckCPSRDef ? !definesCPSR(MI) : true;
593 }
594
getLSMultipleTransferSize(MachineInstr * MI)595 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
596 switch (MI->getOpcode()) {
597 default: return 0;
598 case ARM::LDRi12:
599 case ARM::STRi12:
600 case ARM::t2LDRi8:
601 case ARM::t2LDRi12:
602 case ARM::t2STRi8:
603 case ARM::t2STRi12:
604 case ARM::VLDRS:
605 case ARM::VSTRS:
606 return 4;
607 case ARM::VLDRD:
608 case ARM::VSTRD:
609 return 8;
610 case ARM::LDMIA:
611 case ARM::LDMDA:
612 case ARM::LDMDB:
613 case ARM::LDMIB:
614 case ARM::STMIA:
615 case ARM::STMDA:
616 case ARM::STMDB:
617 case ARM::STMIB:
618 case ARM::t2LDMIA:
619 case ARM::t2LDMDB:
620 case ARM::t2STMIA:
621 case ARM::t2STMDB:
622 case ARM::VLDMSIA:
623 case ARM::VSTMSIA:
624 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
625 case ARM::VLDMDIA:
626 case ARM::VSTMDIA:
627 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
628 }
629 }
630
getUpdatingLSMultipleOpcode(unsigned Opc,ARM_AM::AMSubMode Mode)631 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
632 ARM_AM::AMSubMode Mode) {
633 switch (Opc) {
634 default: llvm_unreachable("Unhandled opcode!");
635 case ARM::LDMIA:
636 case ARM::LDMDA:
637 case ARM::LDMDB:
638 case ARM::LDMIB:
639 switch (Mode) {
640 default: llvm_unreachable("Unhandled submode!");
641 case ARM_AM::ia: return ARM::LDMIA_UPD;
642 case ARM_AM::ib: return ARM::LDMIB_UPD;
643 case ARM_AM::da: return ARM::LDMDA_UPD;
644 case ARM_AM::db: return ARM::LDMDB_UPD;
645 }
646 case ARM::STMIA:
647 case ARM::STMDA:
648 case ARM::STMDB:
649 case ARM::STMIB:
650 switch (Mode) {
651 default: llvm_unreachable("Unhandled submode!");
652 case ARM_AM::ia: return ARM::STMIA_UPD;
653 case ARM_AM::ib: return ARM::STMIB_UPD;
654 case ARM_AM::da: return ARM::STMDA_UPD;
655 case ARM_AM::db: return ARM::STMDB_UPD;
656 }
657 case ARM::t2LDMIA:
658 case ARM::t2LDMDB:
659 switch (Mode) {
660 default: llvm_unreachable("Unhandled submode!");
661 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
662 case ARM_AM::db: return ARM::t2LDMDB_UPD;
663 }
664 case ARM::t2STMIA:
665 case ARM::t2STMDB:
666 switch (Mode) {
667 default: llvm_unreachable("Unhandled submode!");
668 case ARM_AM::ia: return ARM::t2STMIA_UPD;
669 case ARM_AM::db: return ARM::t2STMDB_UPD;
670 }
671 case ARM::VLDMSIA:
672 switch (Mode) {
673 default: llvm_unreachable("Unhandled submode!");
674 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
675 case ARM_AM::db: return ARM::VLDMSDB_UPD;
676 }
677 case ARM::VLDMDIA:
678 switch (Mode) {
679 default: llvm_unreachable("Unhandled submode!");
680 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
681 case ARM_AM::db: return ARM::VLDMDDB_UPD;
682 }
683 case ARM::VSTMSIA:
684 switch (Mode) {
685 default: llvm_unreachable("Unhandled submode!");
686 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
687 case ARM_AM::db: return ARM::VSTMSDB_UPD;
688 }
689 case ARM::VSTMDIA:
690 switch (Mode) {
691 default: llvm_unreachable("Unhandled submode!");
692 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
693 case ARM_AM::db: return ARM::VSTMDDB_UPD;
694 }
695 }
696 }
697
698 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
699 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
700 ///
701 /// stmia rn, <ra, rb, rc>
702 /// rn := rn + 4 * 3;
703 /// =>
704 /// stmia rn!, <ra, rb, rc>
705 ///
706 /// rn := rn - 4 * 3;
707 /// ldmia rn, <ra, rb, rc>
708 /// =>
709 /// ldmdb rn!, <ra, rb, rc>
MergeBaseUpdateLSMultiple(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,bool & Advance,MachineBasicBlock::iterator & I)710 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
711 MachineBasicBlock::iterator MBBI,
712 bool &Advance,
713 MachineBasicBlock::iterator &I) {
714 MachineInstr *MI = MBBI;
715 unsigned Base = MI->getOperand(0).getReg();
716 bool BaseKill = MI->getOperand(0).isKill();
717 unsigned Bytes = getLSMultipleTransferSize(MI);
718 unsigned PredReg = 0;
719 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
720 int Opcode = MI->getOpcode();
721 DebugLoc dl = MI->getDebugLoc();
722
723 // Can't use an updating ld/st if the base register is also a dest
724 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
725 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
726 if (MI->getOperand(i).getReg() == Base)
727 return false;
728
729 bool DoMerge = false;
730 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
731
732 // Try merging with the previous instruction.
733 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
734 if (MBBI != BeginMBBI) {
735 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
736 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
737 --PrevMBBI;
738 if (Mode == ARM_AM::ia &&
739 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
740 Mode = ARM_AM::db;
741 DoMerge = true;
742 } else if (Mode == ARM_AM::ib &&
743 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
744 Mode = ARM_AM::da;
745 DoMerge = true;
746 }
747 if (DoMerge)
748 MBB.erase(PrevMBBI);
749 }
750
751 // Try merging with the next instruction.
752 MachineBasicBlock::iterator EndMBBI = MBB.end();
753 if (!DoMerge && MBBI != EndMBBI) {
754 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
755 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
756 ++NextMBBI;
757 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
758 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
759 DoMerge = true;
760 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
761 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
762 DoMerge = true;
763 }
764 if (DoMerge) {
765 if (NextMBBI == I) {
766 Advance = true;
767 ++I;
768 }
769 MBB.erase(NextMBBI);
770 }
771 }
772
773 if (!DoMerge)
774 return false;
775
776 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
777 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
778 .addReg(Base, getDefRegState(true)) // WB base register
779 .addReg(Base, getKillRegState(BaseKill))
780 .addImm(Pred).addReg(PredReg);
781
782 // Transfer the rest of operands.
783 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
784 MIB.addOperand(MI->getOperand(OpNum));
785
786 // Transfer memoperands.
787 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
788
789 MBB.erase(MBBI);
790 return true;
791 }
792
getPreIndexedLoadStoreOpcode(unsigned Opc,ARM_AM::AddrOpc Mode)793 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
794 ARM_AM::AddrOpc Mode) {
795 switch (Opc) {
796 case ARM::LDRi12:
797 return ARM::LDR_PRE_IMM;
798 case ARM::STRi12:
799 return ARM::STR_PRE_IMM;
800 case ARM::VLDRS:
801 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
802 case ARM::VLDRD:
803 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
804 case ARM::VSTRS:
805 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
806 case ARM::VSTRD:
807 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
808 case ARM::t2LDRi8:
809 case ARM::t2LDRi12:
810 return ARM::t2LDR_PRE;
811 case ARM::t2STRi8:
812 case ARM::t2STRi12:
813 return ARM::t2STR_PRE;
814 default: llvm_unreachable("Unhandled opcode!");
815 }
816 }
817
getPostIndexedLoadStoreOpcode(unsigned Opc,ARM_AM::AddrOpc Mode)818 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
819 ARM_AM::AddrOpc Mode) {
820 switch (Opc) {
821 case ARM::LDRi12:
822 return ARM::LDR_POST_IMM;
823 case ARM::STRi12:
824 return ARM::STR_POST_IMM;
825 case ARM::VLDRS:
826 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
827 case ARM::VLDRD:
828 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
829 case ARM::VSTRS:
830 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
831 case ARM::VSTRD:
832 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
833 case ARM::t2LDRi8:
834 case ARM::t2LDRi12:
835 return ARM::t2LDR_POST;
836 case ARM::t2STRi8:
837 case ARM::t2STRi12:
838 return ARM::t2STR_POST;
839 default: llvm_unreachable("Unhandled opcode!");
840 }
841 }
842
843 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
844 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
MergeBaseUpdateLoadStore(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const TargetInstrInfo * TII,bool & Advance,MachineBasicBlock::iterator & I)845 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
846 MachineBasicBlock::iterator MBBI,
847 const TargetInstrInfo *TII,
848 bool &Advance,
849 MachineBasicBlock::iterator &I) {
850 MachineInstr *MI = MBBI;
851 unsigned Base = MI->getOperand(1).getReg();
852 bool BaseKill = MI->getOperand(1).isKill();
853 unsigned Bytes = getLSMultipleTransferSize(MI);
854 int Opcode = MI->getOpcode();
855 DebugLoc dl = MI->getDebugLoc();
856 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
857 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
858 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
859 if (isi32Load(Opcode) || isi32Store(Opcode))
860 if (MI->getOperand(2).getImm() != 0)
861 return false;
862 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
863 return false;
864
865 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
866 // Can't do the merge if the destination register is the same as the would-be
867 // writeback register.
868 if (isLd && MI->getOperand(0).getReg() == Base)
869 return false;
870
871 unsigned PredReg = 0;
872 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
873 bool DoMerge = false;
874 ARM_AM::AddrOpc AddSub = ARM_AM::add;
875 unsigned NewOpc = 0;
876 // AM2 - 12 bits, thumb2 - 8 bits.
877 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
878
879 // Try merging with the previous instruction.
880 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
881 if (MBBI != BeginMBBI) {
882 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
883 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
884 --PrevMBBI;
885 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
886 DoMerge = true;
887 AddSub = ARM_AM::sub;
888 } else if (!isAM5 &&
889 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
890 DoMerge = true;
891 }
892 if (DoMerge) {
893 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
894 MBB.erase(PrevMBBI);
895 }
896 }
897
898 // Try merging with the next instruction.
899 MachineBasicBlock::iterator EndMBBI = MBB.end();
900 if (!DoMerge && MBBI != EndMBBI) {
901 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
902 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
903 ++NextMBBI;
904 if (!isAM5 &&
905 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
906 DoMerge = true;
907 AddSub = ARM_AM::sub;
908 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
909 DoMerge = true;
910 }
911 if (DoMerge) {
912 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
913 if (NextMBBI == I) {
914 Advance = true;
915 ++I;
916 }
917 MBB.erase(NextMBBI);
918 }
919 }
920
921 if (!DoMerge)
922 return false;
923
924 if (isAM5) {
925 // VLDM[SD}_UPD, VSTM[SD]_UPD
926 // (There are no base-updating versions of VLDR/VSTR instructions, but the
927 // updating load/store-multiple instructions can be used with only one
928 // register.)
929 MachineOperand &MO = MI->getOperand(0);
930 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
931 .addReg(Base, getDefRegState(true)) // WB base register
932 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
933 .addImm(Pred).addReg(PredReg)
934 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
935 getKillRegState(MO.isKill())));
936 } else if (isLd) {
937 if (isAM2) {
938 // LDR_PRE, LDR_POST
939 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
940 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
941 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
942 .addReg(Base, RegState::Define)
943 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
944 } else {
945 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
946 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
947 .addReg(Base, RegState::Define)
948 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
949 }
950 } else {
951 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
952 // t2LDR_PRE, t2LDR_POST
953 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
954 .addReg(Base, RegState::Define)
955 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
956 }
957 } else {
958 MachineOperand &MO = MI->getOperand(0);
959 // FIXME: post-indexed stores use am2offset_imm, which still encodes
960 // the vestigal zero-reg offset register. When that's fixed, this clause
961 // can be removed entirely.
962 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
963 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
964 // STR_PRE, STR_POST
965 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
966 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
967 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
968 } else {
969 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
970 // t2STR_PRE, t2STR_POST
971 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
972 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
973 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
974 }
975 }
976 MBB.erase(MBBI);
977
978 return true;
979 }
980
981 /// isMemoryOp - Returns true if instruction is a memory operation that this
982 /// pass is capable of operating on.
isMemoryOp(const MachineInstr * MI)983 static bool isMemoryOp(const MachineInstr *MI) {
984 // When no memory operands are present, conservatively assume unaligned,
985 // volatile, unfoldable.
986 if (!MI->hasOneMemOperand())
987 return false;
988
989 const MachineMemOperand *MMO = *MI->memoperands_begin();
990
991 // Don't touch volatile memory accesses - we may be changing their order.
992 if (MMO->isVolatile())
993 return false;
994
995 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
996 // not.
997 if (MMO->getAlignment() < 4)
998 return false;
999
1000 // str <undef> could probably be eliminated entirely, but for now we just want
1001 // to avoid making a mess of it.
1002 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1003 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1004 MI->getOperand(0).isUndef())
1005 return false;
1006
1007 // Likewise don't mess with references to undefined addresses.
1008 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1009 MI->getOperand(1).isUndef())
1010 return false;
1011
1012 int Opcode = MI->getOpcode();
1013 switch (Opcode) {
1014 default: break;
1015 case ARM::VLDRS:
1016 case ARM::VSTRS:
1017 return MI->getOperand(1).isReg();
1018 case ARM::VLDRD:
1019 case ARM::VSTRD:
1020 return MI->getOperand(1).isReg();
1021 case ARM::LDRi12:
1022 case ARM::STRi12:
1023 case ARM::t2LDRi8:
1024 case ARM::t2LDRi12:
1025 case ARM::t2STRi8:
1026 case ARM::t2STRi12:
1027 return MI->getOperand(1).isReg();
1028 }
1029 return false;
1030 }
1031
1032 /// AdvanceRS - Advance register scavenger to just before the earliest memory
1033 /// op that is being merged.
AdvanceRS(MachineBasicBlock & MBB,MemOpQueue & MemOps)1034 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1035 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1036 unsigned Position = MemOps[0].Position;
1037 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1038 if (MemOps[i].Position < Position) {
1039 Position = MemOps[i].Position;
1040 Loc = MemOps[i].MBBI;
1041 }
1042 }
1043
1044 if (Loc != MBB.begin())
1045 RS->forward(prior(Loc));
1046 }
1047
getMemoryOpOffset(const MachineInstr * MI)1048 static int getMemoryOpOffset(const MachineInstr *MI) {
1049 int Opcode = MI->getOpcode();
1050 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1051 unsigned NumOperands = MI->getDesc().getNumOperands();
1052 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1053
1054 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1055 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1056 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1057 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
1058 return OffField;
1059
1060 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1061 : ARM_AM::getAM5Offset(OffField) * 4;
1062 if (isAM3) {
1063 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1064 Offset = -Offset;
1065 } else {
1066 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1067 Offset = -Offset;
1068 }
1069 return Offset;
1070 }
1071
InsertLDR_STR(MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI,int Offset,bool isDef,DebugLoc dl,unsigned NewOpc,unsigned Reg,bool RegDeadKill,bool RegUndef,unsigned BaseReg,bool BaseKill,bool BaseUndef,bool OffKill,bool OffUndef,ARMCC::CondCodes Pred,unsigned PredReg,const TargetInstrInfo * TII,bool isT2)1072 static void InsertLDR_STR(MachineBasicBlock &MBB,
1073 MachineBasicBlock::iterator &MBBI,
1074 int Offset, bool isDef,
1075 DebugLoc dl, unsigned NewOpc,
1076 unsigned Reg, bool RegDeadKill, bool RegUndef,
1077 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1078 bool OffKill, bool OffUndef,
1079 ARMCC::CondCodes Pred, unsigned PredReg,
1080 const TargetInstrInfo *TII, bool isT2) {
1081 if (isDef) {
1082 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1083 TII->get(NewOpc))
1084 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1085 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1086 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1087 } else {
1088 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1089 TII->get(NewOpc))
1090 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1091 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1092 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1093 }
1094 }
1095
FixInvalidRegPairOp(MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI)1096 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1097 MachineBasicBlock::iterator &MBBI) {
1098 MachineInstr *MI = &*MBBI;
1099 unsigned Opcode = MI->getOpcode();
1100 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1101 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1102 const MachineOperand &BaseOp = MI->getOperand(2);
1103 unsigned BaseReg = BaseOp.getReg();
1104 unsigned EvenReg = MI->getOperand(0).getReg();
1105 unsigned OddReg = MI->getOperand(1).getReg();
1106 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1107 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1108 // ARM errata 602117: LDRD with base in list may result in incorrect base
1109 // register when interrupted or faulted.
1110 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1111 if (!Errata602117 &&
1112 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1113 return false;
1114
1115 MachineBasicBlock::iterator NewBBI = MBBI;
1116 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1117 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1118 bool EvenDeadKill = isLd ?
1119 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1120 bool EvenUndef = MI->getOperand(0).isUndef();
1121 bool OddDeadKill = isLd ?
1122 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1123 bool OddUndef = MI->getOperand(1).isUndef();
1124 bool BaseKill = BaseOp.isKill();
1125 bool BaseUndef = BaseOp.isUndef();
1126 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1127 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1128 int OffImm = getMemoryOpOffset(MI);
1129 unsigned PredReg = 0;
1130 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1131
1132 if (OddRegNum > EvenRegNum && OffImm == 0) {
1133 // Ascending register numbers and no offset. It's safe to change it to a
1134 // ldm or stm.
1135 unsigned NewOpc = (isLd)
1136 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1137 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1138 if (isLd) {
1139 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1140 .addReg(BaseReg, getKillRegState(BaseKill))
1141 .addImm(Pred).addReg(PredReg)
1142 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1143 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1144 ++NumLDRD2LDM;
1145 } else {
1146 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1147 .addReg(BaseReg, getKillRegState(BaseKill))
1148 .addImm(Pred).addReg(PredReg)
1149 .addReg(EvenReg,
1150 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1151 .addReg(OddReg,
1152 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1153 ++NumSTRD2STM;
1154 }
1155 NewBBI = llvm::prior(MBBI);
1156 } else {
1157 // Split into two instructions.
1158 unsigned NewOpc = (isLd)
1159 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1160 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1161 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1162 // so adjust and use t2LDRi12 here for that.
1163 unsigned NewOpc2 = (isLd)
1164 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1165 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1166 DebugLoc dl = MBBI->getDebugLoc();
1167 // If this is a load and base register is killed, it may have been
1168 // re-defed by the load, make sure the first load does not clobber it.
1169 if (isLd &&
1170 (BaseKill || OffKill) &&
1171 (TRI->regsOverlap(EvenReg, BaseReg))) {
1172 assert(!TRI->regsOverlap(OddReg, BaseReg));
1173 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1174 OddReg, OddDeadKill, false,
1175 BaseReg, false, BaseUndef, false, OffUndef,
1176 Pred, PredReg, TII, isT2);
1177 NewBBI = llvm::prior(MBBI);
1178 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1179 EvenReg, EvenDeadKill, false,
1180 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1181 Pred, PredReg, TII, isT2);
1182 } else {
1183 if (OddReg == EvenReg && EvenDeadKill) {
1184 // If the two source operands are the same, the kill marker is
1185 // probably on the first one. e.g.
1186 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1187 EvenDeadKill = false;
1188 OddDeadKill = true;
1189 }
1190 // Never kill the base register in the first instruction.
1191 // <rdar://problem/11101911>
1192 if (EvenReg == BaseReg)
1193 EvenDeadKill = false;
1194 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1195 EvenReg, EvenDeadKill, EvenUndef,
1196 BaseReg, false, BaseUndef, false, OffUndef,
1197 Pred, PredReg, TII, isT2);
1198 NewBBI = llvm::prior(MBBI);
1199 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1200 OddReg, OddDeadKill, OddUndef,
1201 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1202 Pred, PredReg, TII, isT2);
1203 }
1204 if (isLd)
1205 ++NumLDRD2LDR;
1206 else
1207 ++NumSTRD2STR;
1208 }
1209
1210 MBB.erase(MI);
1211 MBBI = NewBBI;
1212 return true;
1213 }
1214 return false;
1215 }
1216
1217 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1218 /// ops of the same base and incrementing offset into LDM / STM ops.
LoadStoreMultipleOpti(MachineBasicBlock & MBB)1219 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1220 unsigned NumMerges = 0;
1221 unsigned NumMemOps = 0;
1222 MemOpQueue MemOps;
1223 unsigned CurrBase = 0;
1224 int CurrOpc = -1;
1225 unsigned CurrSize = 0;
1226 ARMCC::CondCodes CurrPred = ARMCC::AL;
1227 unsigned CurrPredReg = 0;
1228 unsigned Position = 0;
1229 SmallVector<MachineBasicBlock::iterator,4> Merges;
1230
1231 RS->enterBasicBlock(&MBB);
1232 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1233 while (MBBI != E) {
1234 if (FixInvalidRegPairOp(MBB, MBBI))
1235 continue;
1236
1237 bool Advance = false;
1238 bool TryMerge = false;
1239 bool Clobber = false;
1240
1241 bool isMemOp = isMemoryOp(MBBI);
1242 if (isMemOp) {
1243 int Opcode = MBBI->getOpcode();
1244 unsigned Size = getLSMultipleTransferSize(MBBI);
1245 const MachineOperand &MO = MBBI->getOperand(0);
1246 unsigned Reg = MO.getReg();
1247 bool isKill = MO.isDef() ? false : MO.isKill();
1248 unsigned Base = MBBI->getOperand(1).getReg();
1249 unsigned PredReg = 0;
1250 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1251 int Offset = getMemoryOpOffset(MBBI);
1252 // Watch out for:
1253 // r4 := ldr [r5]
1254 // r5 := ldr [r5, #4]
1255 // r6 := ldr [r5, #8]
1256 //
1257 // The second ldr has effectively broken the chain even though it
1258 // looks like the later ldr(s) use the same base register. Try to
1259 // merge the ldr's so far, including this one. But don't try to
1260 // combine the following ldr(s).
1261 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1262 if (CurrBase == 0 && !Clobber) {
1263 // Start of a new chain.
1264 CurrBase = Base;
1265 CurrOpc = Opcode;
1266 CurrSize = Size;
1267 CurrPred = Pred;
1268 CurrPredReg = PredReg;
1269 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1270 ++NumMemOps;
1271 Advance = true;
1272 } else {
1273 if (Clobber) {
1274 TryMerge = true;
1275 Advance = true;
1276 }
1277
1278 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1279 // No need to match PredReg.
1280 // Continue adding to the queue.
1281 if (Offset > MemOps.back().Offset) {
1282 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1283 Position, MBBI));
1284 ++NumMemOps;
1285 Advance = true;
1286 } else {
1287 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1288 I != E; ++I) {
1289 if (Offset < I->Offset) {
1290 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1291 Position, MBBI));
1292 ++NumMemOps;
1293 Advance = true;
1294 break;
1295 } else if (Offset == I->Offset) {
1296 // Collision! This can't be merged!
1297 break;
1298 }
1299 }
1300 }
1301 }
1302 }
1303 }
1304
1305 if (MBBI->isDebugValue()) {
1306 ++MBBI;
1307 if (MBBI == E)
1308 // Reach the end of the block, try merging the memory instructions.
1309 TryMerge = true;
1310 } else if (Advance) {
1311 ++Position;
1312 ++MBBI;
1313 if (MBBI == E)
1314 // Reach the end of the block, try merging the memory instructions.
1315 TryMerge = true;
1316 } else
1317 TryMerge = true;
1318
1319 if (TryMerge) {
1320 if (NumMemOps > 1) {
1321 // Try to find a free register to use as a new base in case it's needed.
1322 // First advance to the instruction just before the start of the chain.
1323 AdvanceRS(MBB, MemOps);
1324 // Find a scratch register.
1325 unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass);
1326 // Process the load / store instructions.
1327 RS->forward(prior(MBBI));
1328
1329 // Merge ops.
1330 Merges.clear();
1331 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1332 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1333
1334 // Try folding preceding/trailing base inc/dec into the generated
1335 // LDM/STM ops.
1336 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1337 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1338 ++NumMerges;
1339 NumMerges += Merges.size();
1340
1341 // Try folding preceding/trailing base inc/dec into those load/store
1342 // that were not merged to form LDM/STM ops.
1343 for (unsigned i = 0; i != NumMemOps; ++i)
1344 if (!MemOps[i].Merged)
1345 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1346 ++NumMerges;
1347
1348 // RS may be pointing to an instruction that's deleted.
1349 RS->skipTo(prior(MBBI));
1350 } else if (NumMemOps == 1) {
1351 // Try folding preceding/trailing base inc/dec into the single
1352 // load/store.
1353 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1354 ++NumMerges;
1355 RS->forward(prior(MBBI));
1356 }
1357 }
1358
1359 CurrBase = 0;
1360 CurrOpc = -1;
1361 CurrSize = 0;
1362 CurrPred = ARMCC::AL;
1363 CurrPredReg = 0;
1364 if (NumMemOps) {
1365 MemOps.clear();
1366 NumMemOps = 0;
1367 }
1368
1369 // If iterator hasn't been advanced and this is not a memory op, skip it.
1370 // It can't start a new chain anyway.
1371 if (!Advance && !isMemOp && MBBI != E) {
1372 ++Position;
1373 ++MBBI;
1374 }
1375 }
1376 }
1377 return NumMerges > 0;
1378 }
1379
1380 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1381 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1382 /// directly restore the value of LR into pc.
1383 /// ldmfd sp!, {..., lr}
1384 /// bx lr
1385 /// or
1386 /// ldmfd sp!, {..., lr}
1387 /// mov pc, lr
1388 /// =>
1389 /// ldmfd sp!, {..., pc}
MergeReturnIntoLDM(MachineBasicBlock & MBB)1390 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1391 if (MBB.empty()) return false;
1392
1393 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1394 if (MBBI != MBB.begin() &&
1395 (MBBI->getOpcode() == ARM::BX_RET ||
1396 MBBI->getOpcode() == ARM::tBX_RET ||
1397 MBBI->getOpcode() == ARM::MOVPCLR)) {
1398 MachineInstr *PrevMI = prior(MBBI);
1399 unsigned Opcode = PrevMI->getOpcode();
1400 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1401 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1402 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1403 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1404 if (MO.getReg() != ARM::LR)
1405 return false;
1406 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1407 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1408 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1409 PrevMI->setDesc(TII->get(NewOpc));
1410 MO.setReg(ARM::PC);
1411 PrevMI->copyImplicitOps(&*MBBI);
1412 MBB.erase(MBBI);
1413 return true;
1414 }
1415 }
1416 return false;
1417 }
1418
runOnMachineFunction(MachineFunction & Fn)1419 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1420 const TargetMachine &TM = Fn.getTarget();
1421 AFI = Fn.getInfo<ARMFunctionInfo>();
1422 TII = TM.getInstrInfo();
1423 TRI = TM.getRegisterInfo();
1424 STI = &TM.getSubtarget<ARMSubtarget>();
1425 RS = new RegScavenger();
1426 isThumb2 = AFI->isThumb2Function();
1427
1428 bool Modified = false;
1429 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1430 ++MFI) {
1431 MachineBasicBlock &MBB = *MFI;
1432 Modified |= LoadStoreMultipleOpti(MBB);
1433 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1434 Modified |= MergeReturnIntoLDM(MBB);
1435 }
1436
1437 delete RS;
1438 return Modified;
1439 }
1440
1441
1442 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1443 /// load / stores from consecutive locations close to make it more
1444 /// likely they will be combined later.
1445
1446 namespace {
1447 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1448 static char ID;
ARMPreAllocLoadStoreOpt__anona299b52d0211::ARMPreAllocLoadStoreOpt1449 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1450
1451 const TargetData *TD;
1452 const TargetInstrInfo *TII;
1453 const TargetRegisterInfo *TRI;
1454 const ARMSubtarget *STI;
1455 MachineRegisterInfo *MRI;
1456 MachineFunction *MF;
1457
1458 virtual bool runOnMachineFunction(MachineFunction &Fn);
1459
getPassName__anona299b52d0211::ARMPreAllocLoadStoreOpt1460 virtual const char *getPassName() const {
1461 return "ARM pre- register allocation load / store optimization pass";
1462 }
1463
1464 private:
1465 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1466 unsigned &NewOpc, unsigned &EvenReg,
1467 unsigned &OddReg, unsigned &BaseReg,
1468 int &Offset,
1469 unsigned &PredReg, ARMCC::CondCodes &Pred,
1470 bool &isT2);
1471 bool RescheduleOps(MachineBasicBlock *MBB,
1472 SmallVector<MachineInstr*, 4> &Ops,
1473 unsigned Base, bool isLd,
1474 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1475 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1476 };
1477 char ARMPreAllocLoadStoreOpt::ID = 0;
1478 }
1479
runOnMachineFunction(MachineFunction & Fn)1480 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1481 TD = Fn.getTarget().getTargetData();
1482 TII = Fn.getTarget().getInstrInfo();
1483 TRI = Fn.getTarget().getRegisterInfo();
1484 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1485 MRI = &Fn.getRegInfo();
1486 MF = &Fn;
1487
1488 bool Modified = false;
1489 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1490 ++MFI)
1491 Modified |= RescheduleLoadStoreInstrs(MFI);
1492
1493 return Modified;
1494 }
1495
IsSafeAndProfitableToMove(bool isLd,unsigned Base,MachineBasicBlock::iterator I,MachineBasicBlock::iterator E,SmallPtrSet<MachineInstr *,4> & MemOps,SmallSet<unsigned,4> & MemRegs,const TargetRegisterInfo * TRI)1496 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1497 MachineBasicBlock::iterator I,
1498 MachineBasicBlock::iterator E,
1499 SmallPtrSet<MachineInstr*, 4> &MemOps,
1500 SmallSet<unsigned, 4> &MemRegs,
1501 const TargetRegisterInfo *TRI) {
1502 // Are there stores / loads / calls between them?
1503 // FIXME: This is overly conservative. We should make use of alias information
1504 // some day.
1505 SmallSet<unsigned, 4> AddedRegPressure;
1506 while (++I != E) {
1507 if (I->isDebugValue() || MemOps.count(&*I))
1508 continue;
1509 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1510 return false;
1511 if (isLd && I->mayStore())
1512 return false;
1513 if (!isLd) {
1514 if (I->mayLoad())
1515 return false;
1516 // It's not safe to move the first 'str' down.
1517 // str r1, [r0]
1518 // strh r5, [r0]
1519 // str r4, [r0, #+4]
1520 if (I->mayStore())
1521 return false;
1522 }
1523 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1524 MachineOperand &MO = I->getOperand(j);
1525 if (!MO.isReg())
1526 continue;
1527 unsigned Reg = MO.getReg();
1528 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1529 return false;
1530 if (Reg != Base && !MemRegs.count(Reg))
1531 AddedRegPressure.insert(Reg);
1532 }
1533 }
1534
1535 // Estimate register pressure increase due to the transformation.
1536 if (MemRegs.size() <= 4)
1537 // Ok if we are moving small number of instructions.
1538 return true;
1539 return AddedRegPressure.size() <= MemRegs.size() * 2;
1540 }
1541
1542
1543 /// Copy Op0 and Op1 operands into a new array assigned to MI.
concatenateMemOperands(MachineInstr * MI,MachineInstr * Op0,MachineInstr * Op1)1544 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1545 MachineInstr *Op1) {
1546 assert(MI->memoperands_empty() && "expected a new machineinstr");
1547 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1548 + (Op1->memoperands_end() - Op1->memoperands_begin());
1549
1550 MachineFunction *MF = MI->getParent()->getParent();
1551 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1552 MachineSDNode::mmo_iterator MemEnd =
1553 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1554 MemEnd =
1555 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1556 MI->setMemRefs(MemBegin, MemEnd);
1557 }
1558
1559 bool
CanFormLdStDWord(MachineInstr * Op0,MachineInstr * Op1,DebugLoc & dl,unsigned & NewOpc,unsigned & EvenReg,unsigned & OddReg,unsigned & BaseReg,int & Offset,unsigned & PredReg,ARMCC::CondCodes & Pred,bool & isT2)1560 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1561 DebugLoc &dl,
1562 unsigned &NewOpc, unsigned &EvenReg,
1563 unsigned &OddReg, unsigned &BaseReg,
1564 int &Offset, unsigned &PredReg,
1565 ARMCC::CondCodes &Pred,
1566 bool &isT2) {
1567 // Make sure we're allowed to generate LDRD/STRD.
1568 if (!STI->hasV5TEOps())
1569 return false;
1570
1571 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1572 unsigned Scale = 1;
1573 unsigned Opcode = Op0->getOpcode();
1574 if (Opcode == ARM::LDRi12)
1575 NewOpc = ARM::LDRD;
1576 else if (Opcode == ARM::STRi12)
1577 NewOpc = ARM::STRD;
1578 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1579 NewOpc = ARM::t2LDRDi8;
1580 Scale = 4;
1581 isT2 = true;
1582 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1583 NewOpc = ARM::t2STRDi8;
1584 Scale = 4;
1585 isT2 = true;
1586 } else
1587 return false;
1588
1589 // Make sure the base address satisfies i64 ld / st alignment requirement.
1590 if (!Op0->hasOneMemOperand() ||
1591 !(*Op0->memoperands_begin())->getValue() ||
1592 (*Op0->memoperands_begin())->isVolatile())
1593 return false;
1594
1595 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1596 const Function *Func = MF->getFunction();
1597 unsigned ReqAlign = STI->hasV6Ops()
1598 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1599 : 8; // Pre-v6 need 8-byte align
1600 if (Align < ReqAlign)
1601 return false;
1602
1603 // Then make sure the immediate offset fits.
1604 int OffImm = getMemoryOpOffset(Op0);
1605 if (isT2) {
1606 int Limit = (1 << 8) * Scale;
1607 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1608 return false;
1609 Offset = OffImm;
1610 } else {
1611 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1612 if (OffImm < 0) {
1613 AddSub = ARM_AM::sub;
1614 OffImm = - OffImm;
1615 }
1616 int Limit = (1 << 8) * Scale;
1617 if (OffImm >= Limit || (OffImm & (Scale-1)))
1618 return false;
1619 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1620 }
1621 EvenReg = Op0->getOperand(0).getReg();
1622 OddReg = Op1->getOperand(0).getReg();
1623 if (EvenReg == OddReg)
1624 return false;
1625 BaseReg = Op0->getOperand(1).getReg();
1626 Pred = getInstrPredicate(Op0, PredReg);
1627 dl = Op0->getDebugLoc();
1628 return true;
1629 }
1630
1631 namespace {
1632 struct OffsetCompare {
operator ()__anona299b52d0311::OffsetCompare1633 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1634 int LOffset = getMemoryOpOffset(LHS);
1635 int ROffset = getMemoryOpOffset(RHS);
1636 assert(LHS == RHS || LOffset != ROffset);
1637 return LOffset > ROffset;
1638 }
1639 };
1640 }
1641
RescheduleOps(MachineBasicBlock * MBB,SmallVector<MachineInstr *,4> & Ops,unsigned Base,bool isLd,DenseMap<MachineInstr *,unsigned> & MI2LocMap)1642 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1643 SmallVector<MachineInstr*, 4> &Ops,
1644 unsigned Base, bool isLd,
1645 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1646 bool RetVal = false;
1647
1648 // Sort by offset (in reverse order).
1649 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1650
1651 // The loads / stores of the same base are in order. Scan them from first to
1652 // last and check for the following:
1653 // 1. Any def of base.
1654 // 2. Any gaps.
1655 while (Ops.size() > 1) {
1656 unsigned FirstLoc = ~0U;
1657 unsigned LastLoc = 0;
1658 MachineInstr *FirstOp = 0;
1659 MachineInstr *LastOp = 0;
1660 int LastOffset = 0;
1661 unsigned LastOpcode = 0;
1662 unsigned LastBytes = 0;
1663 unsigned NumMove = 0;
1664 for (int i = Ops.size() - 1; i >= 0; --i) {
1665 MachineInstr *Op = Ops[i];
1666 unsigned Loc = MI2LocMap[Op];
1667 if (Loc <= FirstLoc) {
1668 FirstLoc = Loc;
1669 FirstOp = Op;
1670 }
1671 if (Loc >= LastLoc) {
1672 LastLoc = Loc;
1673 LastOp = Op;
1674 }
1675
1676 unsigned LSMOpcode
1677 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
1678 if (LastOpcode && LSMOpcode != LastOpcode)
1679 break;
1680
1681 int Offset = getMemoryOpOffset(Op);
1682 unsigned Bytes = getLSMultipleTransferSize(Op);
1683 if (LastBytes) {
1684 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1685 break;
1686 }
1687 LastOffset = Offset;
1688 LastBytes = Bytes;
1689 LastOpcode = LSMOpcode;
1690 if (++NumMove == 8) // FIXME: Tune this limit.
1691 break;
1692 }
1693
1694 if (NumMove <= 1)
1695 Ops.pop_back();
1696 else {
1697 SmallPtrSet<MachineInstr*, 4> MemOps;
1698 SmallSet<unsigned, 4> MemRegs;
1699 for (int i = NumMove-1; i >= 0; --i) {
1700 MemOps.insert(Ops[i]);
1701 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1702 }
1703
1704 // Be conservative, if the instructions are too far apart, don't
1705 // move them. We want to limit the increase of register pressure.
1706 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1707 if (DoMove)
1708 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1709 MemOps, MemRegs, TRI);
1710 if (!DoMove) {
1711 for (unsigned i = 0; i != NumMove; ++i)
1712 Ops.pop_back();
1713 } else {
1714 // This is the new location for the loads / stores.
1715 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1716 while (InsertPos != MBB->end()
1717 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1718 ++InsertPos;
1719
1720 // If we are moving a pair of loads / stores, see if it makes sense
1721 // to try to allocate a pair of registers that can form register pairs.
1722 MachineInstr *Op0 = Ops.back();
1723 MachineInstr *Op1 = Ops[Ops.size()-2];
1724 unsigned EvenReg = 0, OddReg = 0;
1725 unsigned BaseReg = 0, PredReg = 0;
1726 ARMCC::CondCodes Pred = ARMCC::AL;
1727 bool isT2 = false;
1728 unsigned NewOpc = 0;
1729 int Offset = 0;
1730 DebugLoc dl;
1731 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1732 EvenReg, OddReg, BaseReg,
1733 Offset, PredReg, Pred, isT2)) {
1734 Ops.pop_back();
1735 Ops.pop_back();
1736
1737 const MCInstrDesc &MCID = TII->get(NewOpc);
1738 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
1739 MRI->constrainRegClass(EvenReg, TRC);
1740 MRI->constrainRegClass(OddReg, TRC);
1741
1742 // Form the pair instruction.
1743 if (isLd) {
1744 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1745 .addReg(EvenReg, RegState::Define)
1746 .addReg(OddReg, RegState::Define)
1747 .addReg(BaseReg);
1748 // FIXME: We're converting from LDRi12 to an insn that still
1749 // uses addrmode2, so we need an explicit offset reg. It should
1750 // always by reg0 since we're transforming LDRi12s.
1751 if (!isT2)
1752 MIB.addReg(0);
1753 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1754 concatenateMemOperands(MIB, Op0, Op1);
1755 DEBUG(dbgs() << "Formed " << *MIB << "\n");
1756 ++NumLDRDFormed;
1757 } else {
1758 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1759 .addReg(EvenReg)
1760 .addReg(OddReg)
1761 .addReg(BaseReg);
1762 // FIXME: We're converting from LDRi12 to an insn that still
1763 // uses addrmode2, so we need an explicit offset reg. It should
1764 // always by reg0 since we're transforming STRi12s.
1765 if (!isT2)
1766 MIB.addReg(0);
1767 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1768 concatenateMemOperands(MIB, Op0, Op1);
1769 DEBUG(dbgs() << "Formed " << *MIB << "\n");
1770 ++NumSTRDFormed;
1771 }
1772 MBB->erase(Op0);
1773 MBB->erase(Op1);
1774
1775 // Add register allocation hints to form register pairs.
1776 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1777 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1778 } else {
1779 for (unsigned i = 0; i != NumMove; ++i) {
1780 MachineInstr *Op = Ops.back();
1781 Ops.pop_back();
1782 MBB->splice(InsertPos, MBB, Op);
1783 }
1784 }
1785
1786 NumLdStMoved += NumMove;
1787 RetVal = true;
1788 }
1789 }
1790 }
1791
1792 return RetVal;
1793 }
1794
1795 bool
RescheduleLoadStoreInstrs(MachineBasicBlock * MBB)1796 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1797 bool RetVal = false;
1798
1799 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1800 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1801 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1802 SmallVector<unsigned, 4> LdBases;
1803 SmallVector<unsigned, 4> StBases;
1804
1805 unsigned Loc = 0;
1806 MachineBasicBlock::iterator MBBI = MBB->begin();
1807 MachineBasicBlock::iterator E = MBB->end();
1808 while (MBBI != E) {
1809 for (; MBBI != E; ++MBBI) {
1810 MachineInstr *MI = MBBI;
1811 if (MI->isCall() || MI->isTerminator()) {
1812 // Stop at barriers.
1813 ++MBBI;
1814 break;
1815 }
1816
1817 if (!MI->isDebugValue())
1818 MI2LocMap[MI] = ++Loc;
1819
1820 if (!isMemoryOp(MI))
1821 continue;
1822 unsigned PredReg = 0;
1823 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
1824 continue;
1825
1826 int Opc = MI->getOpcode();
1827 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1828 unsigned Base = MI->getOperand(1).getReg();
1829 int Offset = getMemoryOpOffset(MI);
1830
1831 bool StopHere = false;
1832 if (isLd) {
1833 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1834 Base2LdsMap.find(Base);
1835 if (BI != Base2LdsMap.end()) {
1836 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1837 if (Offset == getMemoryOpOffset(BI->second[i])) {
1838 StopHere = true;
1839 break;
1840 }
1841 }
1842 if (!StopHere)
1843 BI->second.push_back(MI);
1844 } else {
1845 SmallVector<MachineInstr*, 4> MIs;
1846 MIs.push_back(MI);
1847 Base2LdsMap[Base] = MIs;
1848 LdBases.push_back(Base);
1849 }
1850 } else {
1851 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1852 Base2StsMap.find(Base);
1853 if (BI != Base2StsMap.end()) {
1854 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1855 if (Offset == getMemoryOpOffset(BI->second[i])) {
1856 StopHere = true;
1857 break;
1858 }
1859 }
1860 if (!StopHere)
1861 BI->second.push_back(MI);
1862 } else {
1863 SmallVector<MachineInstr*, 4> MIs;
1864 MIs.push_back(MI);
1865 Base2StsMap[Base] = MIs;
1866 StBases.push_back(Base);
1867 }
1868 }
1869
1870 if (StopHere) {
1871 // Found a duplicate (a base+offset combination that's seen earlier).
1872 // Backtrack.
1873 --Loc;
1874 break;
1875 }
1876 }
1877
1878 // Re-schedule loads.
1879 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1880 unsigned Base = LdBases[i];
1881 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1882 if (Lds.size() > 1)
1883 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1884 }
1885
1886 // Re-schedule stores.
1887 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1888 unsigned Base = StBases[i];
1889 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1890 if (Sts.size() > 1)
1891 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1892 }
1893
1894 if (MBBI != E) {
1895 Base2LdsMap.clear();
1896 Base2StsMap.clear();
1897 LdBases.clear();
1898 StBases.clear();
1899 }
1900 }
1901
1902 return RetVal;
1903 }
1904
1905
1906 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1907 /// optimization pass.
createARMLoadStoreOptimizationPass(bool PreAlloc)1908 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1909 if (PreAlloc)
1910 return new ARMPreAllocLoadStoreOpt();
1911 return new ARMLoadStoreOpt();
1912 }
1913