1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the AArch64 implementation of the TargetInstrInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "AArch64InstrInfo.h"
15 #include "AArch64Subtarget.h"
16 #include "MCTargetDesc/AArch64AddressingModes.h"
17 #include "llvm/CodeGen/MachineFrameInfo.h"
18 #include "llvm/CodeGen/MachineInstrBuilder.h"
19 #include "llvm/CodeGen/MachineMemOperand.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/PseudoSourceValue.h"
22 #include "llvm/MC/MCInst.h"
23 #include "llvm/Support/ErrorHandling.h"
24 #include "llvm/Support/TargetRegistry.h"
25 #include <algorithm>
26
27 using namespace llvm;
28
29 #define GET_INSTRINFO_CTOR_DTOR
30 #include "AArch64GenInstrInfo.inc"
31
32 static LLVM_CONSTEXPR MachineMemOperand::Flags MOSuppressPair =
33 MachineMemOperand::MOTargetFlag1;
34
AArch64InstrInfo(const AArch64Subtarget & STI)35 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
36 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
37 RI(STI.getTargetTriple()), Subtarget(STI) {}
38
39 /// GetInstSize - Return the number of bytes of code the specified
40 /// instruction may be. This returns the maximum number of bytes.
GetInstSizeInBytes(const MachineInstr & MI) const41 unsigned AArch64InstrInfo::GetInstSizeInBytes(const MachineInstr &MI) const {
42 const MachineBasicBlock &MBB = *MI.getParent();
43 const MachineFunction *MF = MBB.getParent();
44 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
45
46 if (MI.getOpcode() == AArch64::INLINEASM)
47 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
48
49 const MCInstrDesc &Desc = MI.getDesc();
50 switch (Desc.getOpcode()) {
51 default:
52 // Anything not explicitly designated otherwise is a nomal 4-byte insn.
53 return 4;
54 case TargetOpcode::DBG_VALUE:
55 case TargetOpcode::EH_LABEL:
56 case TargetOpcode::IMPLICIT_DEF:
57 case TargetOpcode::KILL:
58 return 0;
59 }
60
61 llvm_unreachable("GetInstSizeInBytes()- Unable to determin insn size");
62 }
63
parseCondBranch(MachineInstr * LastInst,MachineBasicBlock * & Target,SmallVectorImpl<MachineOperand> & Cond)64 static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
65 SmallVectorImpl<MachineOperand> &Cond) {
66 // Block ends with fall-through condbranch.
67 switch (LastInst->getOpcode()) {
68 default:
69 llvm_unreachable("Unknown branch instruction?");
70 case AArch64::Bcc:
71 Target = LastInst->getOperand(1).getMBB();
72 Cond.push_back(LastInst->getOperand(0));
73 break;
74 case AArch64::CBZW:
75 case AArch64::CBZX:
76 case AArch64::CBNZW:
77 case AArch64::CBNZX:
78 Target = LastInst->getOperand(1).getMBB();
79 Cond.push_back(MachineOperand::CreateImm(-1));
80 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
81 Cond.push_back(LastInst->getOperand(0));
82 break;
83 case AArch64::TBZW:
84 case AArch64::TBZX:
85 case AArch64::TBNZW:
86 case AArch64::TBNZX:
87 Target = LastInst->getOperand(2).getMBB();
88 Cond.push_back(MachineOperand::CreateImm(-1));
89 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
90 Cond.push_back(LastInst->getOperand(0));
91 Cond.push_back(LastInst->getOperand(1));
92 }
93 }
94
95 // Branch analysis.
analyzeBranch(MachineBasicBlock & MBB,MachineBasicBlock * & TBB,MachineBasicBlock * & FBB,SmallVectorImpl<MachineOperand> & Cond,bool AllowModify) const96 bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
97 MachineBasicBlock *&TBB,
98 MachineBasicBlock *&FBB,
99 SmallVectorImpl<MachineOperand> &Cond,
100 bool AllowModify) const {
101 // If the block has no terminators, it just falls into the block after it.
102 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
103 if (I == MBB.end())
104 return false;
105
106 if (!isUnpredicatedTerminator(*I))
107 return false;
108
109 // Get the last instruction in the block.
110 MachineInstr *LastInst = &*I;
111
112 // If there is only one terminator instruction, process it.
113 unsigned LastOpc = LastInst->getOpcode();
114 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
115 if (isUncondBranchOpcode(LastOpc)) {
116 TBB = LastInst->getOperand(0).getMBB();
117 return false;
118 }
119 if (isCondBranchOpcode(LastOpc)) {
120 // Block ends with fall-through condbranch.
121 parseCondBranch(LastInst, TBB, Cond);
122 return false;
123 }
124 return true; // Can't handle indirect branch.
125 }
126
127 // Get the instruction before it if it is a terminator.
128 MachineInstr *SecondLastInst = &*I;
129 unsigned SecondLastOpc = SecondLastInst->getOpcode();
130
131 // If AllowModify is true and the block ends with two or more unconditional
132 // branches, delete all but the first unconditional branch.
133 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
134 while (isUncondBranchOpcode(SecondLastOpc)) {
135 LastInst->eraseFromParent();
136 LastInst = SecondLastInst;
137 LastOpc = LastInst->getOpcode();
138 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
139 // Return now the only terminator is an unconditional branch.
140 TBB = LastInst->getOperand(0).getMBB();
141 return false;
142 } else {
143 SecondLastInst = &*I;
144 SecondLastOpc = SecondLastInst->getOpcode();
145 }
146 }
147 }
148
149 // If there are three terminators, we don't know what sort of block this is.
150 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
151 return true;
152
153 // If the block ends with a B and a Bcc, handle it.
154 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
155 parseCondBranch(SecondLastInst, TBB, Cond);
156 FBB = LastInst->getOperand(0).getMBB();
157 return false;
158 }
159
160 // If the block ends with two unconditional branches, handle it. The second
161 // one is not executed, so remove it.
162 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
163 TBB = SecondLastInst->getOperand(0).getMBB();
164 I = LastInst;
165 if (AllowModify)
166 I->eraseFromParent();
167 return false;
168 }
169
170 // ...likewise if it ends with an indirect branch followed by an unconditional
171 // branch.
172 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
173 I = LastInst;
174 if (AllowModify)
175 I->eraseFromParent();
176 return true;
177 }
178
179 // Otherwise, can't handle this.
180 return true;
181 }
182
ReverseBranchCondition(SmallVectorImpl<MachineOperand> & Cond) const183 bool AArch64InstrInfo::ReverseBranchCondition(
184 SmallVectorImpl<MachineOperand> &Cond) const {
185 if (Cond[0].getImm() != -1) {
186 // Regular Bcc
187 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
188 Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
189 } else {
190 // Folded compare-and-branch
191 switch (Cond[1].getImm()) {
192 default:
193 llvm_unreachable("Unknown conditional branch!");
194 case AArch64::CBZW:
195 Cond[1].setImm(AArch64::CBNZW);
196 break;
197 case AArch64::CBNZW:
198 Cond[1].setImm(AArch64::CBZW);
199 break;
200 case AArch64::CBZX:
201 Cond[1].setImm(AArch64::CBNZX);
202 break;
203 case AArch64::CBNZX:
204 Cond[1].setImm(AArch64::CBZX);
205 break;
206 case AArch64::TBZW:
207 Cond[1].setImm(AArch64::TBNZW);
208 break;
209 case AArch64::TBNZW:
210 Cond[1].setImm(AArch64::TBZW);
211 break;
212 case AArch64::TBZX:
213 Cond[1].setImm(AArch64::TBNZX);
214 break;
215 case AArch64::TBNZX:
216 Cond[1].setImm(AArch64::TBZX);
217 break;
218 }
219 }
220
221 return false;
222 }
223
RemoveBranch(MachineBasicBlock & MBB) const224 unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
225 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
226 if (I == MBB.end())
227 return 0;
228
229 if (!isUncondBranchOpcode(I->getOpcode()) &&
230 !isCondBranchOpcode(I->getOpcode()))
231 return 0;
232
233 // Remove the branch.
234 I->eraseFromParent();
235
236 I = MBB.end();
237
238 if (I == MBB.begin())
239 return 1;
240 --I;
241 if (!isCondBranchOpcode(I->getOpcode()))
242 return 1;
243
244 // Remove the branch.
245 I->eraseFromParent();
246 return 2;
247 }
248
instantiateCondBranch(MachineBasicBlock & MBB,const DebugLoc & DL,MachineBasicBlock * TBB,ArrayRef<MachineOperand> Cond) const249 void AArch64InstrInfo::instantiateCondBranch(
250 MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
251 ArrayRef<MachineOperand> Cond) const {
252 if (Cond[0].getImm() != -1) {
253 // Regular Bcc
254 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
255 } else {
256 // Folded compare-and-branch
257 // Note that we use addOperand instead of addReg to keep the flags.
258 const MachineInstrBuilder MIB =
259 BuildMI(&MBB, DL, get(Cond[1].getImm())).addOperand(Cond[2]);
260 if (Cond.size() > 3)
261 MIB.addImm(Cond[3].getImm());
262 MIB.addMBB(TBB);
263 }
264 }
265
InsertBranch(MachineBasicBlock & MBB,MachineBasicBlock * TBB,MachineBasicBlock * FBB,ArrayRef<MachineOperand> Cond,const DebugLoc & DL) const266 unsigned AArch64InstrInfo::InsertBranch(MachineBasicBlock &MBB,
267 MachineBasicBlock *TBB,
268 MachineBasicBlock *FBB,
269 ArrayRef<MachineOperand> Cond,
270 const DebugLoc &DL) const {
271 // Shouldn't be a fall through.
272 assert(TBB && "InsertBranch must not be told to insert a fallthrough");
273
274 if (!FBB) {
275 if (Cond.empty()) // Unconditional branch?
276 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
277 else
278 instantiateCondBranch(MBB, DL, TBB, Cond);
279 return 1;
280 }
281
282 // Two-way conditional branch.
283 instantiateCondBranch(MBB, DL, TBB, Cond);
284 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
285 return 2;
286 }
287
288 // Find the original register that VReg is copied from.
removeCopies(const MachineRegisterInfo & MRI,unsigned VReg)289 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
290 while (TargetRegisterInfo::isVirtualRegister(VReg)) {
291 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
292 if (!DefMI->isFullCopy())
293 return VReg;
294 VReg = DefMI->getOperand(1).getReg();
295 }
296 return VReg;
297 }
298
299 // Determine if VReg is defined by an instruction that can be folded into a
300 // csel instruction. If so, return the folded opcode, and the replacement
301 // register.
canFoldIntoCSel(const MachineRegisterInfo & MRI,unsigned VReg,unsigned * NewVReg=nullptr)302 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
303 unsigned *NewVReg = nullptr) {
304 VReg = removeCopies(MRI, VReg);
305 if (!TargetRegisterInfo::isVirtualRegister(VReg))
306 return 0;
307
308 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
309 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
310 unsigned Opc = 0;
311 unsigned SrcOpNum = 0;
312 switch (DefMI->getOpcode()) {
313 case AArch64::ADDSXri:
314 case AArch64::ADDSWri:
315 // if NZCV is used, do not fold.
316 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
317 return 0;
318 // fall-through to ADDXri and ADDWri.
319 case AArch64::ADDXri:
320 case AArch64::ADDWri:
321 // add x, 1 -> csinc.
322 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
323 DefMI->getOperand(3).getImm() != 0)
324 return 0;
325 SrcOpNum = 1;
326 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
327 break;
328
329 case AArch64::ORNXrr:
330 case AArch64::ORNWrr: {
331 // not x -> csinv, represented as orn dst, xzr, src.
332 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
333 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
334 return 0;
335 SrcOpNum = 2;
336 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
337 break;
338 }
339
340 case AArch64::SUBSXrr:
341 case AArch64::SUBSWrr:
342 // if NZCV is used, do not fold.
343 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
344 return 0;
345 // fall-through to SUBXrr and SUBWrr.
346 case AArch64::SUBXrr:
347 case AArch64::SUBWrr: {
348 // neg x -> csneg, represented as sub dst, xzr, src.
349 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
350 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
351 return 0;
352 SrcOpNum = 2;
353 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
354 break;
355 }
356 default:
357 return 0;
358 }
359 assert(Opc && SrcOpNum && "Missing parameters");
360
361 if (NewVReg)
362 *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
363 return Opc;
364 }
365
canInsertSelect(const MachineBasicBlock & MBB,ArrayRef<MachineOperand> Cond,unsigned TrueReg,unsigned FalseReg,int & CondCycles,int & TrueCycles,int & FalseCycles) const366 bool AArch64InstrInfo::canInsertSelect(
367 const MachineBasicBlock &MBB, ArrayRef<MachineOperand> Cond,
368 unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles,
369 int &FalseCycles) const {
370 // Check register classes.
371 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
372 const TargetRegisterClass *RC =
373 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
374 if (!RC)
375 return false;
376
377 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
378 unsigned ExtraCondLat = Cond.size() != 1;
379
380 // GPRs are handled by csel.
381 // FIXME: Fold in x+1, -x, and ~x when applicable.
382 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
383 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
384 // Single-cycle csel, csinc, csinv, and csneg.
385 CondCycles = 1 + ExtraCondLat;
386 TrueCycles = FalseCycles = 1;
387 if (canFoldIntoCSel(MRI, TrueReg))
388 TrueCycles = 0;
389 else if (canFoldIntoCSel(MRI, FalseReg))
390 FalseCycles = 0;
391 return true;
392 }
393
394 // Scalar floating point is handled by fcsel.
395 // FIXME: Form fabs, fmin, and fmax when applicable.
396 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
397 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
398 CondCycles = 5 + ExtraCondLat;
399 TrueCycles = FalseCycles = 2;
400 return true;
401 }
402
403 // Can't do vectors.
404 return false;
405 }
406
insertSelect(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,unsigned DstReg,ArrayRef<MachineOperand> Cond,unsigned TrueReg,unsigned FalseReg) const407 void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
408 MachineBasicBlock::iterator I,
409 const DebugLoc &DL, unsigned DstReg,
410 ArrayRef<MachineOperand> Cond,
411 unsigned TrueReg, unsigned FalseReg) const {
412 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
413
414 // Parse the condition code, see parseCondBranch() above.
415 AArch64CC::CondCode CC;
416 switch (Cond.size()) {
417 default:
418 llvm_unreachable("Unknown condition opcode in Cond");
419 case 1: // b.cc
420 CC = AArch64CC::CondCode(Cond[0].getImm());
421 break;
422 case 3: { // cbz/cbnz
423 // We must insert a compare against 0.
424 bool Is64Bit;
425 switch (Cond[1].getImm()) {
426 default:
427 llvm_unreachable("Unknown branch opcode in Cond");
428 case AArch64::CBZW:
429 Is64Bit = 0;
430 CC = AArch64CC::EQ;
431 break;
432 case AArch64::CBZX:
433 Is64Bit = 1;
434 CC = AArch64CC::EQ;
435 break;
436 case AArch64::CBNZW:
437 Is64Bit = 0;
438 CC = AArch64CC::NE;
439 break;
440 case AArch64::CBNZX:
441 Is64Bit = 1;
442 CC = AArch64CC::NE;
443 break;
444 }
445 unsigned SrcReg = Cond[2].getReg();
446 if (Is64Bit) {
447 // cmp reg, #0 is actually subs xzr, reg, #0.
448 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
449 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
450 .addReg(SrcReg)
451 .addImm(0)
452 .addImm(0);
453 } else {
454 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
455 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
456 .addReg(SrcReg)
457 .addImm(0)
458 .addImm(0);
459 }
460 break;
461 }
462 case 4: { // tbz/tbnz
463 // We must insert a tst instruction.
464 switch (Cond[1].getImm()) {
465 default:
466 llvm_unreachable("Unknown branch opcode in Cond");
467 case AArch64::TBZW:
468 case AArch64::TBZX:
469 CC = AArch64CC::EQ;
470 break;
471 case AArch64::TBNZW:
472 case AArch64::TBNZX:
473 CC = AArch64CC::NE;
474 break;
475 }
476 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
477 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
478 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
479 .addReg(Cond[2].getReg())
480 .addImm(
481 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
482 else
483 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
484 .addReg(Cond[2].getReg())
485 .addImm(
486 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
487 break;
488 }
489 }
490
491 unsigned Opc = 0;
492 const TargetRegisterClass *RC = nullptr;
493 bool TryFold = false;
494 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
495 RC = &AArch64::GPR64RegClass;
496 Opc = AArch64::CSELXr;
497 TryFold = true;
498 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
499 RC = &AArch64::GPR32RegClass;
500 Opc = AArch64::CSELWr;
501 TryFold = true;
502 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
503 RC = &AArch64::FPR64RegClass;
504 Opc = AArch64::FCSELDrrr;
505 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
506 RC = &AArch64::FPR32RegClass;
507 Opc = AArch64::FCSELSrrr;
508 }
509 assert(RC && "Unsupported regclass");
510
511 // Try folding simple instructions into the csel.
512 if (TryFold) {
513 unsigned NewVReg = 0;
514 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
515 if (FoldedOpc) {
516 // The folded opcodes csinc, csinc and csneg apply the operation to
517 // FalseReg, so we need to invert the condition.
518 CC = AArch64CC::getInvertedCondCode(CC);
519 TrueReg = FalseReg;
520 } else
521 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
522
523 // Fold the operation. Leave any dead instructions for DCE to clean up.
524 if (FoldedOpc) {
525 FalseReg = NewVReg;
526 Opc = FoldedOpc;
527 // The extends the live range of NewVReg.
528 MRI.clearKillFlags(NewVReg);
529 }
530 }
531
532 // Pull all virtual register into the appropriate class.
533 MRI.constrainRegClass(TrueReg, RC);
534 MRI.constrainRegClass(FalseReg, RC);
535
536 // Insert the csel.
537 BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(TrueReg).addReg(FalseReg).addImm(
538 CC);
539 }
540
541 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
canBeExpandedToORR(const MachineInstr & MI,unsigned BitSize)542 static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
543 uint64_t Imm = MI.getOperand(1).getImm();
544 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
545 uint64_t Encoding;
546 return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
547 }
548
549 // FIXME: this implementation should be micro-architecture dependent, so a
550 // micro-architecture target hook should be introduced here in future.
isAsCheapAsAMove(const MachineInstr & MI) const551 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
552 if (!Subtarget.hasCustomCheapAsMoveHandling())
553 return MI.isAsCheapAsAMove();
554
555 unsigned Imm;
556
557 switch (MI.getOpcode()) {
558 default:
559 return false;
560
561 // add/sub on register without shift
562 case AArch64::ADDWri:
563 case AArch64::ADDXri:
564 case AArch64::SUBWri:
565 case AArch64::SUBXri:
566 return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 ||
567 MI.getOperand(3).getImm() == 0);
568
569 // add/sub on register with shift
570 case AArch64::ADDWrs:
571 case AArch64::ADDXrs:
572 case AArch64::SUBWrs:
573 case AArch64::SUBXrs:
574 Imm = MI.getOperand(3).getImm();
575 return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
576 AArch64_AM::getArithShiftValue(Imm) < 4);
577
578 // logical ops on immediate
579 case AArch64::ANDWri:
580 case AArch64::ANDXri:
581 case AArch64::EORWri:
582 case AArch64::EORXri:
583 case AArch64::ORRWri:
584 case AArch64::ORRXri:
585 return true;
586
587 // logical ops on register without shift
588 case AArch64::ANDWrr:
589 case AArch64::ANDXrr:
590 case AArch64::BICWrr:
591 case AArch64::BICXrr:
592 case AArch64::EONWrr:
593 case AArch64::EONXrr:
594 case AArch64::EORWrr:
595 case AArch64::EORXrr:
596 case AArch64::ORNWrr:
597 case AArch64::ORNXrr:
598 case AArch64::ORRWrr:
599 case AArch64::ORRXrr:
600 return true;
601
602 // logical ops on register with shift
603 case AArch64::ANDWrs:
604 case AArch64::ANDXrs:
605 case AArch64::BICWrs:
606 case AArch64::BICXrs:
607 case AArch64::EONWrs:
608 case AArch64::EONXrs:
609 case AArch64::EORWrs:
610 case AArch64::EORXrs:
611 case AArch64::ORNWrs:
612 case AArch64::ORNXrs:
613 case AArch64::ORRWrs:
614 case AArch64::ORRXrs:
615 Imm = MI.getOperand(3).getImm();
616 return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
617 AArch64_AM::getShiftValue(Imm) < 4 &&
618 AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL);
619
620 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
621 // ORRXri, it is as cheap as MOV
622 case AArch64::MOVi32imm:
623 return canBeExpandedToORR(MI, 32);
624 case AArch64::MOVi64imm:
625 return canBeExpandedToORR(MI, 64);
626
627 // It is cheap to move #0 to float registers if the subtarget has
628 // ZeroCycleZeroing feature.
629 case AArch64::FMOVS0:
630 case AArch64::FMOVD0:
631 return Subtarget.hasZeroCycleZeroing();
632 }
633
634 llvm_unreachable("Unknown opcode to check as cheap as a move!");
635 }
636
isCoalescableExtInstr(const MachineInstr & MI,unsigned & SrcReg,unsigned & DstReg,unsigned & SubIdx) const637 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
638 unsigned &SrcReg, unsigned &DstReg,
639 unsigned &SubIdx) const {
640 switch (MI.getOpcode()) {
641 default:
642 return false;
643 case AArch64::SBFMXri: // aka sxtw
644 case AArch64::UBFMXri: // aka uxtw
645 // Check for the 32 -> 64 bit extension case, these instructions can do
646 // much more.
647 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
648 return false;
649 // This is a signed or unsigned 32 -> 64 bit extension.
650 SrcReg = MI.getOperand(1).getReg();
651 DstReg = MI.getOperand(0).getReg();
652 SubIdx = AArch64::sub_32;
653 return true;
654 }
655 }
656
areMemAccessesTriviallyDisjoint(MachineInstr & MIa,MachineInstr & MIb,AliasAnalysis * AA) const657 bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
658 MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
659 const TargetRegisterInfo *TRI = &getRegisterInfo();
660 unsigned BaseRegA = 0, BaseRegB = 0;
661 int64_t OffsetA = 0, OffsetB = 0;
662 unsigned WidthA = 0, WidthB = 0;
663
664 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
665 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
666
667 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
668 MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
669 return false;
670
671 // Retrieve the base register, offset from the base register and width. Width
672 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
673 // base registers are identical, and the offset of a lower memory access +
674 // the width doesn't overlap the offset of a higher memory access,
675 // then the memory accesses are different.
676 if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
677 getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
678 if (BaseRegA == BaseRegB) {
679 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
680 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
681 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
682 if (LowOffset + LowWidth <= HighOffset)
683 return true;
684 }
685 }
686 return false;
687 }
688
689 /// analyzeCompare - For a comparison instruction, return the source registers
690 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
691 /// Return true if the comparison instruction can be analyzed.
analyzeCompare(const MachineInstr & MI,unsigned & SrcReg,unsigned & SrcReg2,int & CmpMask,int & CmpValue) const692 bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
693 unsigned &SrcReg2, int &CmpMask,
694 int &CmpValue) const {
695 switch (MI.getOpcode()) {
696 default:
697 break;
698 case AArch64::SUBSWrr:
699 case AArch64::SUBSWrs:
700 case AArch64::SUBSWrx:
701 case AArch64::SUBSXrr:
702 case AArch64::SUBSXrs:
703 case AArch64::SUBSXrx:
704 case AArch64::ADDSWrr:
705 case AArch64::ADDSWrs:
706 case AArch64::ADDSWrx:
707 case AArch64::ADDSXrr:
708 case AArch64::ADDSXrs:
709 case AArch64::ADDSXrx:
710 // Replace SUBSWrr with SUBWrr if NZCV is not used.
711 SrcReg = MI.getOperand(1).getReg();
712 SrcReg2 = MI.getOperand(2).getReg();
713 CmpMask = ~0;
714 CmpValue = 0;
715 return true;
716 case AArch64::SUBSWri:
717 case AArch64::ADDSWri:
718 case AArch64::SUBSXri:
719 case AArch64::ADDSXri:
720 SrcReg = MI.getOperand(1).getReg();
721 SrcReg2 = 0;
722 CmpMask = ~0;
723 // FIXME: In order to convert CmpValue to 0 or 1
724 CmpValue = MI.getOperand(2).getImm() != 0;
725 return true;
726 case AArch64::ANDSWri:
727 case AArch64::ANDSXri:
728 // ANDS does not use the same encoding scheme as the others xxxS
729 // instructions.
730 SrcReg = MI.getOperand(1).getReg();
731 SrcReg2 = 0;
732 CmpMask = ~0;
733 // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
734 // while the type of CmpValue is int. When converting uint64_t to int,
735 // the high 32 bits of uint64_t will be lost.
736 // In fact it causes a bug in spec2006-483.xalancbmk
737 // CmpValue is only used to compare with zero in OptimizeCompareInstr
738 CmpValue = AArch64_AM::decodeLogicalImmediate(
739 MI.getOperand(2).getImm(),
740 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
741 return true;
742 }
743
744 return false;
745 }
746
UpdateOperandRegClass(MachineInstr & Instr)747 static bool UpdateOperandRegClass(MachineInstr &Instr) {
748 MachineBasicBlock *MBB = Instr.getParent();
749 assert(MBB && "Can't get MachineBasicBlock here");
750 MachineFunction *MF = MBB->getParent();
751 assert(MF && "Can't get MachineFunction here");
752 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
753 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
754 MachineRegisterInfo *MRI = &MF->getRegInfo();
755
756 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
757 ++OpIdx) {
758 MachineOperand &MO = Instr.getOperand(OpIdx);
759 const TargetRegisterClass *OpRegCstraints =
760 Instr.getRegClassConstraint(OpIdx, TII, TRI);
761
762 // If there's no constraint, there's nothing to do.
763 if (!OpRegCstraints)
764 continue;
765 // If the operand is a frame index, there's nothing to do here.
766 // A frame index operand will resolve correctly during PEI.
767 if (MO.isFI())
768 continue;
769
770 assert(MO.isReg() &&
771 "Operand has register constraints without being a register!");
772
773 unsigned Reg = MO.getReg();
774 if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
775 if (!OpRegCstraints->contains(Reg))
776 return false;
777 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
778 !MRI->constrainRegClass(Reg, OpRegCstraints))
779 return false;
780 }
781
782 return true;
783 }
784
785 /// \brief Return the opcode that does not set flags when possible - otherwise
786 /// return the original opcode. The caller is responsible to do the actual
787 /// substitution and legality checking.
convertFlagSettingOpcode(const MachineInstr & MI)788 static unsigned convertFlagSettingOpcode(const MachineInstr &MI) {
789 // Don't convert all compare instructions, because for some the zero register
790 // encoding becomes the sp register.
791 bool MIDefinesZeroReg = false;
792 if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
793 MIDefinesZeroReg = true;
794
795 switch (MI.getOpcode()) {
796 default:
797 return MI.getOpcode();
798 case AArch64::ADDSWrr:
799 return AArch64::ADDWrr;
800 case AArch64::ADDSWri:
801 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
802 case AArch64::ADDSWrs:
803 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
804 case AArch64::ADDSWrx:
805 return AArch64::ADDWrx;
806 case AArch64::ADDSXrr:
807 return AArch64::ADDXrr;
808 case AArch64::ADDSXri:
809 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
810 case AArch64::ADDSXrs:
811 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
812 case AArch64::ADDSXrx:
813 return AArch64::ADDXrx;
814 case AArch64::SUBSWrr:
815 return AArch64::SUBWrr;
816 case AArch64::SUBSWri:
817 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
818 case AArch64::SUBSWrs:
819 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
820 case AArch64::SUBSWrx:
821 return AArch64::SUBWrx;
822 case AArch64::SUBSXrr:
823 return AArch64::SUBXrr;
824 case AArch64::SUBSXri:
825 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
826 case AArch64::SUBSXrs:
827 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
828 case AArch64::SUBSXrx:
829 return AArch64::SUBXrx;
830 }
831 }
832
833 enum AccessKind {
834 AK_Write = 0x01,
835 AK_Read = 0x10,
836 AK_All = 0x11
837 };
838
839 /// True when condition flags are accessed (either by writing or reading)
840 /// on the instruction trace starting at From and ending at To.
841 ///
842 /// Note: If From and To are from different blocks it's assumed CC are accessed
843 /// on the path.
areCFlagsAccessedBetweenInstrs(MachineBasicBlock::iterator From,MachineBasicBlock::iterator To,const TargetRegisterInfo * TRI,const AccessKind AccessToCheck=AK_All)844 static bool areCFlagsAccessedBetweenInstrs(
845 MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
846 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
847 // Early exit if To is at the beginning of the BB.
848 if (To == To->getParent()->begin())
849 return true;
850
851 // Check whether the instructions are in the same basic block
852 // If not, assume the condition flags might get modified somewhere.
853 if (To->getParent() != From->getParent())
854 return true;
855
856 // From must be above To.
857 assert(std::find_if(MachineBasicBlock::reverse_iterator(To),
858 To->getParent()->rend(), [From](MachineInstr &MI) {
859 return MachineBasicBlock::iterator(MI) == From;
860 }) != To->getParent()->rend());
861
862 // We iterate backward starting \p To until we hit \p From.
863 for (--To; To != From; --To) {
864 const MachineInstr &Instr = *To;
865
866 if ( ((AccessToCheck & AK_Write) && Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
867 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
868 return true;
869 }
870 return false;
871 }
872
873 /// Try to optimize a compare instruction. A compare instruction is an
874 /// instruction which produces AArch64::NZCV. It can be truly compare instruction
875 /// when there are no uses of its destination register.
876 ///
877 /// The following steps are tried in order:
878 /// 1. Convert CmpInstr into an unconditional version.
879 /// 2. Remove CmpInstr if above there is an instruction producing a needed
880 /// condition code or an instruction which can be converted into such an instruction.
881 /// Only comparison with zero is supported.
optimizeCompareInstr(MachineInstr & CmpInstr,unsigned SrcReg,unsigned SrcReg2,int CmpMask,int CmpValue,const MachineRegisterInfo * MRI) const882 bool AArch64InstrInfo::optimizeCompareInstr(
883 MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
884 int CmpValue, const MachineRegisterInfo *MRI) const {
885 assert(CmpInstr.getParent());
886 assert(MRI);
887
888 // Replace SUBSWrr with SUBWrr if NZCV is not used.
889 int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
890 if (DeadNZCVIdx != -1) {
891 if (CmpInstr.definesRegister(AArch64::WZR) ||
892 CmpInstr.definesRegister(AArch64::XZR)) {
893 CmpInstr.eraseFromParent();
894 return true;
895 }
896 unsigned Opc = CmpInstr.getOpcode();
897 unsigned NewOpc = convertFlagSettingOpcode(CmpInstr);
898 if (NewOpc == Opc)
899 return false;
900 const MCInstrDesc &MCID = get(NewOpc);
901 CmpInstr.setDesc(MCID);
902 CmpInstr.RemoveOperand(DeadNZCVIdx);
903 bool succeeded = UpdateOperandRegClass(CmpInstr);
904 (void)succeeded;
905 assert(succeeded && "Some operands reg class are incompatible!");
906 return true;
907 }
908
909 // Continue only if we have a "ri" where immediate is zero.
910 // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
911 // function.
912 assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
913 if (CmpValue != 0 || SrcReg2 != 0)
914 return false;
915
916 // CmpInstr is a Compare instruction if destination register is not used.
917 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
918 return false;
919
920 return substituteCmpToZero(CmpInstr, SrcReg, MRI);
921 }
922
923 /// Get opcode of S version of Instr.
924 /// If Instr is S version its opcode is returned.
925 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
926 /// or we are not interested in it.
sForm(MachineInstr & Instr)927 static unsigned sForm(MachineInstr &Instr) {
928 switch (Instr.getOpcode()) {
929 default:
930 return AArch64::INSTRUCTION_LIST_END;
931
932 case AArch64::ADDSWrr:
933 case AArch64::ADDSWri:
934 case AArch64::ADDSXrr:
935 case AArch64::ADDSXri:
936 case AArch64::SUBSWrr:
937 case AArch64::SUBSWri:
938 case AArch64::SUBSXrr:
939 case AArch64::SUBSXri:
940 return Instr.getOpcode();;
941
942 case AArch64::ADDWrr: return AArch64::ADDSWrr;
943 case AArch64::ADDWri: return AArch64::ADDSWri;
944 case AArch64::ADDXrr: return AArch64::ADDSXrr;
945 case AArch64::ADDXri: return AArch64::ADDSXri;
946 case AArch64::ADCWr: return AArch64::ADCSWr;
947 case AArch64::ADCXr: return AArch64::ADCSXr;
948 case AArch64::SUBWrr: return AArch64::SUBSWrr;
949 case AArch64::SUBWri: return AArch64::SUBSWri;
950 case AArch64::SUBXrr: return AArch64::SUBSXrr;
951 case AArch64::SUBXri: return AArch64::SUBSXri;
952 case AArch64::SBCWr: return AArch64::SBCSWr;
953 case AArch64::SBCXr: return AArch64::SBCSXr;
954 case AArch64::ANDWri: return AArch64::ANDSWri;
955 case AArch64::ANDXri: return AArch64::ANDSXri;
956 }
957 }
958
959 /// Check if AArch64::NZCV should be alive in successors of MBB.
areCFlagsAliveInSuccessors(MachineBasicBlock * MBB)960 static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
961 for (auto *BB : MBB->successors())
962 if (BB->isLiveIn(AArch64::NZCV))
963 return true;
964 return false;
965 }
966
967 struct UsedNZCV {
968 bool N;
969 bool Z;
970 bool C;
971 bool V;
UsedNZCVUsedNZCV972 UsedNZCV(): N(false), Z(false), C(false), V(false) {}
operator |=UsedNZCV973 UsedNZCV& operator |=(const UsedNZCV& UsedFlags) {
974 this->N |= UsedFlags.N;
975 this->Z |= UsedFlags.Z;
976 this->C |= UsedFlags.C;
977 this->V |= UsedFlags.V;
978 return *this;
979 }
980 };
981
982 /// Find a condition code used by the instruction.
983 /// Returns AArch64CC::Invalid if either the instruction does not use condition
984 /// codes or we don't optimize CmpInstr in the presence of such instructions.
findCondCodeUsedByInstr(const MachineInstr & Instr)985 static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
986 switch (Instr.getOpcode()) {
987 default:
988 return AArch64CC::Invalid;
989
990 case AArch64::Bcc: {
991 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
992 assert(Idx >= 2);
993 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
994 }
995
996 case AArch64::CSINVWr:
997 case AArch64::CSINVXr:
998 case AArch64::CSINCWr:
999 case AArch64::CSINCXr:
1000 case AArch64::CSELWr:
1001 case AArch64::CSELXr:
1002 case AArch64::CSNEGWr:
1003 case AArch64::CSNEGXr:
1004 case AArch64::FCSELSrrr:
1005 case AArch64::FCSELDrrr: {
1006 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1007 assert(Idx >= 1);
1008 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
1009 }
1010 }
1011 }
1012
getUsedNZCV(AArch64CC::CondCode CC)1013 static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
1014 assert(CC != AArch64CC::Invalid);
1015 UsedNZCV UsedFlags;
1016 switch (CC) {
1017 default:
1018 break;
1019
1020 case AArch64CC::EQ: // Z set
1021 case AArch64CC::NE: // Z clear
1022 UsedFlags.Z = true;
1023 break;
1024
1025 case AArch64CC::HI: // Z clear and C set
1026 case AArch64CC::LS: // Z set or C clear
1027 UsedFlags.Z = true;
1028 case AArch64CC::HS: // C set
1029 case AArch64CC::LO: // C clear
1030 UsedFlags.C = true;
1031 break;
1032
1033 case AArch64CC::MI: // N set
1034 case AArch64CC::PL: // N clear
1035 UsedFlags.N = true;
1036 break;
1037
1038 case AArch64CC::VS: // V set
1039 case AArch64CC::VC: // V clear
1040 UsedFlags.V = true;
1041 break;
1042
1043 case AArch64CC::GT: // Z clear, N and V the same
1044 case AArch64CC::LE: // Z set, N and V differ
1045 UsedFlags.Z = true;
1046 case AArch64CC::GE: // N and V the same
1047 case AArch64CC::LT: // N and V differ
1048 UsedFlags.N = true;
1049 UsedFlags.V = true;
1050 break;
1051 }
1052 return UsedFlags;
1053 }
1054
isADDSRegImm(unsigned Opcode)1055 static bool isADDSRegImm(unsigned Opcode) {
1056 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1057 }
1058
isSUBSRegImm(unsigned Opcode)1059 static bool isSUBSRegImm(unsigned Opcode) {
1060 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1061 }
1062
1063 /// Check if CmpInstr can be substituted by MI.
1064 ///
1065 /// CmpInstr can be substituted:
1066 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1067 /// - and, MI and CmpInstr are from the same MachineBB
1068 /// - and, condition flags are not alive in successors of the CmpInstr parent
1069 /// - and, if MI opcode is the S form there must be no defs of flags between
1070 /// MI and CmpInstr
1071 /// or if MI opcode is not the S form there must be neither defs of flags
1072 /// nor uses of flags between MI and CmpInstr.
1073 /// - and C/V flags are not used after CmpInstr
canInstrSubstituteCmpInstr(MachineInstr * MI,MachineInstr * CmpInstr,const TargetRegisterInfo * TRI)1074 static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
1075 const TargetRegisterInfo *TRI) {
1076 assert(MI);
1077 assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
1078 assert(CmpInstr);
1079
1080 const unsigned CmpOpcode = CmpInstr->getOpcode();
1081 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1082 return false;
1083
1084 if (MI->getParent() != CmpInstr->getParent())
1085 return false;
1086
1087 if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
1088 return false;
1089
1090 AccessKind AccessToCheck = AK_Write;
1091 if (sForm(*MI) != MI->getOpcode())
1092 AccessToCheck = AK_All;
1093 if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
1094 return false;
1095
1096 UsedNZCV NZCVUsedAfterCmp;
1097 for (auto I = std::next(CmpInstr->getIterator()), E = CmpInstr->getParent()->instr_end();
1098 I != E; ++I) {
1099 const MachineInstr &Instr = *I;
1100 if (Instr.readsRegister(AArch64::NZCV, TRI)) {
1101 AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
1102 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1103 return false;
1104 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1105 }
1106
1107 if (Instr.modifiesRegister(AArch64::NZCV, TRI))
1108 break;
1109 }
1110
1111 return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
1112 }
1113
1114 /// Substitute an instruction comparing to zero with another instruction
1115 /// which produces needed condition flags.
1116 ///
1117 /// Return true on success.
substituteCmpToZero(MachineInstr & CmpInstr,unsigned SrcReg,const MachineRegisterInfo * MRI) const1118 bool AArch64InstrInfo::substituteCmpToZero(
1119 MachineInstr &CmpInstr, unsigned SrcReg,
1120 const MachineRegisterInfo *MRI) const {
1121 assert(MRI);
1122 // Get the unique definition of SrcReg.
1123 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
1124 if (!MI)
1125 return false;
1126
1127 const TargetRegisterInfo *TRI = &getRegisterInfo();
1128
1129 unsigned NewOpc = sForm(*MI);
1130 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1131 return false;
1132
1133 if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
1134 return false;
1135
1136 // Update the instruction to set NZCV.
1137 MI->setDesc(get(NewOpc));
1138 CmpInstr.eraseFromParent();
1139 bool succeeded = UpdateOperandRegClass(*MI);
1140 (void)succeeded;
1141 assert(succeeded && "Some operands reg class are incompatible!");
1142 MI->addRegisterDefined(AArch64::NZCV, TRI);
1143 return true;
1144 }
1145
expandPostRAPseudo(MachineInstr & MI) const1146 bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
1147 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD)
1148 return false;
1149
1150 MachineBasicBlock &MBB = *MI.getParent();
1151 DebugLoc DL = MI.getDebugLoc();
1152 unsigned Reg = MI.getOperand(0).getReg();
1153 const GlobalValue *GV =
1154 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
1155 const TargetMachine &TM = MBB.getParent()->getTarget();
1156 unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
1157 const unsigned char MO_NC = AArch64II::MO_NC;
1158
1159 if ((OpFlags & AArch64II::MO_GOT) != 0) {
1160 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
1161 .addGlobalAddress(GV, 0, AArch64II::MO_GOT);
1162 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1163 .addReg(Reg, RegState::Kill)
1164 .addImm(0)
1165 .addMemOperand(*MI.memoperands_begin());
1166 } else if (TM.getCodeModel() == CodeModel::Large) {
1167 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
1168 .addGlobalAddress(GV, 0, AArch64II::MO_G3).addImm(48);
1169 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1170 .addReg(Reg, RegState::Kill)
1171 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC).addImm(32);
1172 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1173 .addReg(Reg, RegState::Kill)
1174 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC).addImm(16);
1175 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1176 .addReg(Reg, RegState::Kill)
1177 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC).addImm(0);
1178 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1179 .addReg(Reg, RegState::Kill)
1180 .addImm(0)
1181 .addMemOperand(*MI.memoperands_begin());
1182 } else {
1183 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
1184 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
1185 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
1186 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1187 .addReg(Reg, RegState::Kill)
1188 .addGlobalAddress(GV, 0, LoFlags)
1189 .addMemOperand(*MI.memoperands_begin());
1190 }
1191
1192 MBB.erase(MI);
1193
1194 return true;
1195 }
1196
1197 /// Return true if this is this instruction has a non-zero immediate
hasShiftedReg(const MachineInstr & MI) const1198 bool AArch64InstrInfo::hasShiftedReg(const MachineInstr &MI) const {
1199 switch (MI.getOpcode()) {
1200 default:
1201 break;
1202 case AArch64::ADDSWrs:
1203 case AArch64::ADDSXrs:
1204 case AArch64::ADDWrs:
1205 case AArch64::ADDXrs:
1206 case AArch64::ANDSWrs:
1207 case AArch64::ANDSXrs:
1208 case AArch64::ANDWrs:
1209 case AArch64::ANDXrs:
1210 case AArch64::BICSWrs:
1211 case AArch64::BICSXrs:
1212 case AArch64::BICWrs:
1213 case AArch64::BICXrs:
1214 case AArch64::CRC32Brr:
1215 case AArch64::CRC32CBrr:
1216 case AArch64::CRC32CHrr:
1217 case AArch64::CRC32CWrr:
1218 case AArch64::CRC32CXrr:
1219 case AArch64::CRC32Hrr:
1220 case AArch64::CRC32Wrr:
1221 case AArch64::CRC32Xrr:
1222 case AArch64::EONWrs:
1223 case AArch64::EONXrs:
1224 case AArch64::EORWrs:
1225 case AArch64::EORXrs:
1226 case AArch64::ORNWrs:
1227 case AArch64::ORNXrs:
1228 case AArch64::ORRWrs:
1229 case AArch64::ORRXrs:
1230 case AArch64::SUBSWrs:
1231 case AArch64::SUBSXrs:
1232 case AArch64::SUBWrs:
1233 case AArch64::SUBXrs:
1234 if (MI.getOperand(3).isImm()) {
1235 unsigned val = MI.getOperand(3).getImm();
1236 return (val != 0);
1237 }
1238 break;
1239 }
1240 return false;
1241 }
1242
1243 /// Return true if this is this instruction has a non-zero immediate
hasExtendedReg(const MachineInstr & MI) const1244 bool AArch64InstrInfo::hasExtendedReg(const MachineInstr &MI) const {
1245 switch (MI.getOpcode()) {
1246 default:
1247 break;
1248 case AArch64::ADDSWrx:
1249 case AArch64::ADDSXrx:
1250 case AArch64::ADDSXrx64:
1251 case AArch64::ADDWrx:
1252 case AArch64::ADDXrx:
1253 case AArch64::ADDXrx64:
1254 case AArch64::SUBSWrx:
1255 case AArch64::SUBSXrx:
1256 case AArch64::SUBSXrx64:
1257 case AArch64::SUBWrx:
1258 case AArch64::SUBXrx:
1259 case AArch64::SUBXrx64:
1260 if (MI.getOperand(3).isImm()) {
1261 unsigned val = MI.getOperand(3).getImm();
1262 return (val != 0);
1263 }
1264 break;
1265 }
1266
1267 return false;
1268 }
1269
1270 // Return true if this instruction simply sets its single destination register
1271 // to zero. This is equivalent to a register rename of the zero-register.
isGPRZero(const MachineInstr & MI) const1272 bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) const {
1273 switch (MI.getOpcode()) {
1274 default:
1275 break;
1276 case AArch64::MOVZWi:
1277 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
1278 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
1279 assert(MI.getDesc().getNumOperands() == 3 &&
1280 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
1281 return true;
1282 }
1283 break;
1284 case AArch64::ANDWri: // and Rd, Rzr, #imm
1285 return MI.getOperand(1).getReg() == AArch64::WZR;
1286 case AArch64::ANDXri:
1287 return MI.getOperand(1).getReg() == AArch64::XZR;
1288 case TargetOpcode::COPY:
1289 return MI.getOperand(1).getReg() == AArch64::WZR;
1290 }
1291 return false;
1292 }
1293
1294 // Return true if this instruction simply renames a general register without
1295 // modifying bits.
isGPRCopy(const MachineInstr & MI) const1296 bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) const {
1297 switch (MI.getOpcode()) {
1298 default:
1299 break;
1300 case TargetOpcode::COPY: {
1301 // GPR32 copies will by lowered to ORRXrs
1302 unsigned DstReg = MI.getOperand(0).getReg();
1303 return (AArch64::GPR32RegClass.contains(DstReg) ||
1304 AArch64::GPR64RegClass.contains(DstReg));
1305 }
1306 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
1307 if (MI.getOperand(1).getReg() == AArch64::XZR) {
1308 assert(MI.getDesc().getNumOperands() == 4 &&
1309 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
1310 return true;
1311 }
1312 break;
1313 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
1314 if (MI.getOperand(2).getImm() == 0) {
1315 assert(MI.getDesc().getNumOperands() == 4 &&
1316 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
1317 return true;
1318 }
1319 break;
1320 }
1321 return false;
1322 }
1323
1324 // Return true if this instruction simply renames a general register without
1325 // modifying bits.
isFPRCopy(const MachineInstr & MI) const1326 bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) const {
1327 switch (MI.getOpcode()) {
1328 default:
1329 break;
1330 case TargetOpcode::COPY: {
1331 // FPR64 copies will by lowered to ORR.16b
1332 unsigned DstReg = MI.getOperand(0).getReg();
1333 return (AArch64::FPR64RegClass.contains(DstReg) ||
1334 AArch64::FPR128RegClass.contains(DstReg));
1335 }
1336 case AArch64::ORRv16i8:
1337 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
1338 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
1339 "invalid ORRv16i8 operands");
1340 return true;
1341 }
1342 break;
1343 }
1344 return false;
1345 }
1346
isLoadFromStackSlot(const MachineInstr & MI,int & FrameIndex) const1347 unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
1348 int &FrameIndex) const {
1349 switch (MI.getOpcode()) {
1350 default:
1351 break;
1352 case AArch64::LDRWui:
1353 case AArch64::LDRXui:
1354 case AArch64::LDRBui:
1355 case AArch64::LDRHui:
1356 case AArch64::LDRSui:
1357 case AArch64::LDRDui:
1358 case AArch64::LDRQui:
1359 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1360 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1361 FrameIndex = MI.getOperand(1).getIndex();
1362 return MI.getOperand(0).getReg();
1363 }
1364 break;
1365 }
1366
1367 return 0;
1368 }
1369
isStoreToStackSlot(const MachineInstr & MI,int & FrameIndex) const1370 unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
1371 int &FrameIndex) const {
1372 switch (MI.getOpcode()) {
1373 default:
1374 break;
1375 case AArch64::STRWui:
1376 case AArch64::STRXui:
1377 case AArch64::STRBui:
1378 case AArch64::STRHui:
1379 case AArch64::STRSui:
1380 case AArch64::STRDui:
1381 case AArch64::STRQui:
1382 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1383 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1384 FrameIndex = MI.getOperand(1).getIndex();
1385 return MI.getOperand(0).getReg();
1386 }
1387 break;
1388 }
1389 return 0;
1390 }
1391
1392 /// Return true if this is load/store scales or extends its register offset.
1393 /// This refers to scaling a dynamic index as opposed to scaled immediates.
1394 /// MI should be a memory op that allows scaled addressing.
isScaledAddr(const MachineInstr & MI) const1395 bool AArch64InstrInfo::isScaledAddr(const MachineInstr &MI) const {
1396 switch (MI.getOpcode()) {
1397 default:
1398 break;
1399 case AArch64::LDRBBroW:
1400 case AArch64::LDRBroW:
1401 case AArch64::LDRDroW:
1402 case AArch64::LDRHHroW:
1403 case AArch64::LDRHroW:
1404 case AArch64::LDRQroW:
1405 case AArch64::LDRSBWroW:
1406 case AArch64::LDRSBXroW:
1407 case AArch64::LDRSHWroW:
1408 case AArch64::LDRSHXroW:
1409 case AArch64::LDRSWroW:
1410 case AArch64::LDRSroW:
1411 case AArch64::LDRWroW:
1412 case AArch64::LDRXroW:
1413 case AArch64::STRBBroW:
1414 case AArch64::STRBroW:
1415 case AArch64::STRDroW:
1416 case AArch64::STRHHroW:
1417 case AArch64::STRHroW:
1418 case AArch64::STRQroW:
1419 case AArch64::STRSroW:
1420 case AArch64::STRWroW:
1421 case AArch64::STRXroW:
1422 case AArch64::LDRBBroX:
1423 case AArch64::LDRBroX:
1424 case AArch64::LDRDroX:
1425 case AArch64::LDRHHroX:
1426 case AArch64::LDRHroX:
1427 case AArch64::LDRQroX:
1428 case AArch64::LDRSBWroX:
1429 case AArch64::LDRSBXroX:
1430 case AArch64::LDRSHWroX:
1431 case AArch64::LDRSHXroX:
1432 case AArch64::LDRSWroX:
1433 case AArch64::LDRSroX:
1434 case AArch64::LDRWroX:
1435 case AArch64::LDRXroX:
1436 case AArch64::STRBBroX:
1437 case AArch64::STRBroX:
1438 case AArch64::STRDroX:
1439 case AArch64::STRHHroX:
1440 case AArch64::STRHroX:
1441 case AArch64::STRQroX:
1442 case AArch64::STRSroX:
1443 case AArch64::STRWroX:
1444 case AArch64::STRXroX:
1445
1446 unsigned Val = MI.getOperand(3).getImm();
1447 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val);
1448 return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val);
1449 }
1450 return false;
1451 }
1452
1453 /// Check all MachineMemOperands for a hint to suppress pairing.
isLdStPairSuppressed(const MachineInstr & MI) const1454 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) const {
1455 return any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1456 return MMO->getFlags() & MOSuppressPair;
1457 });
1458 }
1459
1460 /// Set a flag on the first MachineMemOperand to suppress pairing.
suppressLdStPair(MachineInstr & MI) const1461 void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) const {
1462 if (MI.memoperands_empty())
1463 return;
1464 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
1465 }
1466
isUnscaledLdSt(unsigned Opc) const1467 bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) const {
1468 switch (Opc) {
1469 default:
1470 return false;
1471 case AArch64::STURSi:
1472 case AArch64::STURDi:
1473 case AArch64::STURQi:
1474 case AArch64::STURBBi:
1475 case AArch64::STURHHi:
1476 case AArch64::STURWi:
1477 case AArch64::STURXi:
1478 case AArch64::LDURSi:
1479 case AArch64::LDURDi:
1480 case AArch64::LDURQi:
1481 case AArch64::LDURWi:
1482 case AArch64::LDURXi:
1483 case AArch64::LDURSWi:
1484 case AArch64::LDURHHi:
1485 case AArch64::LDURBBi:
1486 case AArch64::LDURSBWi:
1487 case AArch64::LDURSHWi:
1488 return true;
1489 }
1490 }
1491
isUnscaledLdSt(MachineInstr & MI) const1492 bool AArch64InstrInfo::isUnscaledLdSt(MachineInstr &MI) const {
1493 return isUnscaledLdSt(MI.getOpcode());
1494 }
1495
1496 // Is this a candidate for ld/st merging or pairing? For example, we don't
1497 // touch volatiles or load/stores that have a hint to avoid pair formation.
isCandidateToMergeOrPair(MachineInstr & MI) const1498 bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
1499 // If this is a volatile load/store, don't mess with it.
1500 if (MI.hasOrderedMemoryRef())
1501 return false;
1502
1503 // Make sure this is a reg+imm (as opposed to an address reloc).
1504 assert(MI.getOperand(1).isReg() && "Expected a reg operand.");
1505 if (!MI.getOperand(2).isImm())
1506 return false;
1507
1508 // Can't merge/pair if the instruction modifies the base register.
1509 // e.g., ldr x0, [x0]
1510 unsigned BaseReg = MI.getOperand(1).getReg();
1511 const TargetRegisterInfo *TRI = &getRegisterInfo();
1512 if (MI.modifiesRegister(BaseReg, TRI))
1513 return false;
1514
1515 // Check if this load/store has a hint to avoid pair formation.
1516 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
1517 if (isLdStPairSuppressed(MI))
1518 return false;
1519
1520 // On some CPUs quad load/store pairs are slower than two single load/stores.
1521 if (Subtarget.avoidQuadLdStPairs()) {
1522 switch (MI.getOpcode()) {
1523 default:
1524 break;
1525
1526 case AArch64::LDURQi:
1527 case AArch64::STURQi:
1528 case AArch64::LDRQui:
1529 case AArch64::STRQui:
1530 return false;
1531 }
1532 }
1533
1534 return true;
1535 }
1536
getMemOpBaseRegImmOfs(MachineInstr & LdSt,unsigned & BaseReg,int64_t & Offset,const TargetRegisterInfo * TRI) const1537 bool AArch64InstrInfo::getMemOpBaseRegImmOfs(
1538 MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset,
1539 const TargetRegisterInfo *TRI) const {
1540 switch (LdSt.getOpcode()) {
1541 default:
1542 return false;
1543 // Scaled instructions.
1544 case AArch64::STRSui:
1545 case AArch64::STRDui:
1546 case AArch64::STRQui:
1547 case AArch64::STRXui:
1548 case AArch64::STRWui:
1549 case AArch64::LDRSui:
1550 case AArch64::LDRDui:
1551 case AArch64::LDRQui:
1552 case AArch64::LDRXui:
1553 case AArch64::LDRWui:
1554 case AArch64::LDRSWui:
1555 // Unscaled instructions.
1556 case AArch64::STURSi:
1557 case AArch64::STURDi:
1558 case AArch64::STURQi:
1559 case AArch64::STURXi:
1560 case AArch64::STURWi:
1561 case AArch64::LDURSi:
1562 case AArch64::LDURDi:
1563 case AArch64::LDURQi:
1564 case AArch64::LDURWi:
1565 case AArch64::LDURXi:
1566 case AArch64::LDURSWi:
1567 unsigned Width;
1568 return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI);
1569 };
1570 }
1571
getMemOpBaseRegImmOfsWidth(MachineInstr & LdSt,unsigned & BaseReg,int64_t & Offset,unsigned & Width,const TargetRegisterInfo * TRI) const1572 bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
1573 MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width,
1574 const TargetRegisterInfo *TRI) const {
1575 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
1576 // Handle only loads/stores with base register followed by immediate offset.
1577 if (LdSt.getNumExplicitOperands() == 3) {
1578 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
1579 if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm())
1580 return false;
1581 } else if (LdSt.getNumExplicitOperands() == 4) {
1582 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
1583 if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isReg() ||
1584 !LdSt.getOperand(3).isImm())
1585 return false;
1586 } else
1587 return false;
1588
1589 // Offset is calculated as the immediate operand multiplied by the scaling factor.
1590 // Unscaled instructions have scaling factor set to 1.
1591 unsigned Scale = 0;
1592 switch (LdSt.getOpcode()) {
1593 default:
1594 return false;
1595 case AArch64::LDURQi:
1596 case AArch64::STURQi:
1597 Width = 16;
1598 Scale = 1;
1599 break;
1600 case AArch64::LDURXi:
1601 case AArch64::LDURDi:
1602 case AArch64::STURXi:
1603 case AArch64::STURDi:
1604 Width = 8;
1605 Scale = 1;
1606 break;
1607 case AArch64::LDURWi:
1608 case AArch64::LDURSi:
1609 case AArch64::LDURSWi:
1610 case AArch64::STURWi:
1611 case AArch64::STURSi:
1612 Width = 4;
1613 Scale = 1;
1614 break;
1615 case AArch64::LDURHi:
1616 case AArch64::LDURHHi:
1617 case AArch64::LDURSHXi:
1618 case AArch64::LDURSHWi:
1619 case AArch64::STURHi:
1620 case AArch64::STURHHi:
1621 Width = 2;
1622 Scale = 1;
1623 break;
1624 case AArch64::LDURBi:
1625 case AArch64::LDURBBi:
1626 case AArch64::LDURSBXi:
1627 case AArch64::LDURSBWi:
1628 case AArch64::STURBi:
1629 case AArch64::STURBBi:
1630 Width = 1;
1631 Scale = 1;
1632 break;
1633 case AArch64::LDPQi:
1634 case AArch64::LDNPQi:
1635 case AArch64::STPQi:
1636 case AArch64::STNPQi:
1637 Scale = 16;
1638 Width = 32;
1639 break;
1640 case AArch64::LDRQui:
1641 case AArch64::STRQui:
1642 Scale = Width = 16;
1643 break;
1644 case AArch64::LDPXi:
1645 case AArch64::LDPDi:
1646 case AArch64::LDNPXi:
1647 case AArch64::LDNPDi:
1648 case AArch64::STPXi:
1649 case AArch64::STPDi:
1650 case AArch64::STNPXi:
1651 case AArch64::STNPDi:
1652 Scale = 8;
1653 Width = 16;
1654 break;
1655 case AArch64::LDRXui:
1656 case AArch64::LDRDui:
1657 case AArch64::STRXui:
1658 case AArch64::STRDui:
1659 Scale = Width = 8;
1660 break;
1661 case AArch64::LDPWi:
1662 case AArch64::LDPSi:
1663 case AArch64::LDNPWi:
1664 case AArch64::LDNPSi:
1665 case AArch64::STPWi:
1666 case AArch64::STPSi:
1667 case AArch64::STNPWi:
1668 case AArch64::STNPSi:
1669 Scale = 4;
1670 Width = 8;
1671 break;
1672 case AArch64::LDRWui:
1673 case AArch64::LDRSui:
1674 case AArch64::LDRSWui:
1675 case AArch64::STRWui:
1676 case AArch64::STRSui:
1677 Scale = Width = 4;
1678 break;
1679 case AArch64::LDRHui:
1680 case AArch64::LDRHHui:
1681 case AArch64::STRHui:
1682 case AArch64::STRHHui:
1683 Scale = Width = 2;
1684 break;
1685 case AArch64::LDRBui:
1686 case AArch64::LDRBBui:
1687 case AArch64::STRBui:
1688 case AArch64::STRBBui:
1689 Scale = Width = 1;
1690 break;
1691 }
1692
1693 if (LdSt.getNumExplicitOperands() == 3) {
1694 BaseReg = LdSt.getOperand(1).getReg();
1695 Offset = LdSt.getOperand(2).getImm() * Scale;
1696 } else {
1697 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
1698 BaseReg = LdSt.getOperand(2).getReg();
1699 Offset = LdSt.getOperand(3).getImm() * Scale;
1700 }
1701 return true;
1702 }
1703
1704 // Scale the unscaled offsets. Returns false if the unscaled offset can't be
1705 // scaled.
scaleOffset(unsigned Opc,int64_t & Offset)1706 static bool scaleOffset(unsigned Opc, int64_t &Offset) {
1707 unsigned OffsetStride = 1;
1708 switch (Opc) {
1709 default:
1710 return false;
1711 case AArch64::LDURQi:
1712 case AArch64::STURQi:
1713 OffsetStride = 16;
1714 break;
1715 case AArch64::LDURXi:
1716 case AArch64::LDURDi:
1717 case AArch64::STURXi:
1718 case AArch64::STURDi:
1719 OffsetStride = 8;
1720 break;
1721 case AArch64::LDURWi:
1722 case AArch64::LDURSi:
1723 case AArch64::LDURSWi:
1724 case AArch64::STURWi:
1725 case AArch64::STURSi:
1726 OffsetStride = 4;
1727 break;
1728 }
1729 // If the byte-offset isn't a multiple of the stride, we can't scale this
1730 // offset.
1731 if (Offset % OffsetStride != 0)
1732 return false;
1733
1734 // Convert the byte-offset used by unscaled into an "element" offset used
1735 // by the scaled pair load/store instructions.
1736 Offset /= OffsetStride;
1737 return true;
1738 }
1739
canPairLdStOpc(unsigned FirstOpc,unsigned SecondOpc)1740 static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
1741 if (FirstOpc == SecondOpc)
1742 return true;
1743 // We can also pair sign-ext and zero-ext instructions.
1744 switch (FirstOpc) {
1745 default:
1746 return false;
1747 case AArch64::LDRWui:
1748 case AArch64::LDURWi:
1749 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
1750 case AArch64::LDRSWui:
1751 case AArch64::LDURSWi:
1752 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
1753 }
1754 // These instructions can't be paired based on their opcodes.
1755 return false;
1756 }
1757
1758 /// Detect opportunities for ldp/stp formation.
1759 ///
1760 /// Only called for LdSt for which getMemOpBaseRegImmOfs returns true.
shouldClusterMemOps(MachineInstr & FirstLdSt,MachineInstr & SecondLdSt,unsigned NumLoads) const1761 bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
1762 MachineInstr &SecondLdSt,
1763 unsigned NumLoads) const {
1764 // Only cluster up to a single pair.
1765 if (NumLoads > 1)
1766 return false;
1767
1768 // Can we pair these instructions based on their opcodes?
1769 unsigned FirstOpc = FirstLdSt.getOpcode();
1770 unsigned SecondOpc = SecondLdSt.getOpcode();
1771 if (!canPairLdStOpc(FirstOpc, SecondOpc))
1772 return false;
1773
1774 // Can't merge volatiles or load/stores that have a hint to avoid pair
1775 // formation, for example.
1776 if (!isCandidateToMergeOrPair(FirstLdSt) ||
1777 !isCandidateToMergeOrPair(SecondLdSt))
1778 return false;
1779
1780 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
1781 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
1782 if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
1783 return false;
1784
1785 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
1786 if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
1787 return false;
1788
1789 // Pairwise instructions have a 7-bit signed offset field.
1790 if (Offset1 > 63 || Offset1 < -64)
1791 return false;
1792
1793 // The caller should already have ordered First/SecondLdSt by offset.
1794 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
1795 return Offset1 + 1 == Offset2;
1796 }
1797
shouldScheduleAdjacent(MachineInstr & First,MachineInstr & Second) const1798 bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr &First,
1799 MachineInstr &Second) const {
1800 if (Subtarget.hasMacroOpFusion()) {
1801 // Fuse CMN, CMP, TST followed by Bcc.
1802 unsigned SecondOpcode = Second.getOpcode();
1803 if (SecondOpcode == AArch64::Bcc) {
1804 switch (First.getOpcode()) {
1805 default:
1806 return false;
1807 case AArch64::SUBSWri:
1808 case AArch64::ADDSWri:
1809 case AArch64::ANDSWri:
1810 case AArch64::SUBSXri:
1811 case AArch64::ADDSXri:
1812 case AArch64::ANDSXri:
1813 return true;
1814 }
1815 }
1816 // Fuse ALU operations followed by CBZ/CBNZ.
1817 if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX ||
1818 SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) {
1819 switch (First.getOpcode()) {
1820 default:
1821 return false;
1822 case AArch64::ADDWri:
1823 case AArch64::ADDXri:
1824 case AArch64::ANDWri:
1825 case AArch64::ANDXri:
1826 case AArch64::EORWri:
1827 case AArch64::EORXri:
1828 case AArch64::ORRWri:
1829 case AArch64::ORRXri:
1830 case AArch64::SUBWri:
1831 case AArch64::SUBXri:
1832 return true;
1833 }
1834 }
1835 }
1836 return false;
1837 }
1838
emitFrameIndexDebugValue(MachineFunction & MF,int FrameIx,uint64_t Offset,const MDNode * Var,const MDNode * Expr,const DebugLoc & DL) const1839 MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue(
1840 MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *Var,
1841 const MDNode *Expr, const DebugLoc &DL) const {
1842 MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE))
1843 .addFrameIndex(FrameIx)
1844 .addImm(0)
1845 .addImm(Offset)
1846 .addMetadata(Var)
1847 .addMetadata(Expr);
1848 return &*MIB;
1849 }
1850
AddSubReg(const MachineInstrBuilder & MIB,unsigned Reg,unsigned SubIdx,unsigned State,const TargetRegisterInfo * TRI)1851 static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
1852 unsigned Reg, unsigned SubIdx,
1853 unsigned State,
1854 const TargetRegisterInfo *TRI) {
1855 if (!SubIdx)
1856 return MIB.addReg(Reg, State);
1857
1858 if (TargetRegisterInfo::isPhysicalRegister(Reg))
1859 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
1860 return MIB.addReg(Reg, State, SubIdx);
1861 }
1862
forwardCopyWillClobberTuple(unsigned DestReg,unsigned SrcReg,unsigned NumRegs)1863 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
1864 unsigned NumRegs) {
1865 // We really want the positive remainder mod 32 here, that happens to be
1866 // easily obtainable with a mask.
1867 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
1868 }
1869
copyPhysRegTuple(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,unsigned DestReg,unsigned SrcReg,bool KillSrc,unsigned Opcode,llvm::ArrayRef<unsigned> Indices) const1870 void AArch64InstrInfo::copyPhysRegTuple(
1871 MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL,
1872 unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode,
1873 llvm::ArrayRef<unsigned> Indices) const {
1874 assert(Subtarget.hasNEON() &&
1875 "Unexpected register copy without NEON");
1876 const TargetRegisterInfo *TRI = &getRegisterInfo();
1877 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
1878 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
1879 unsigned NumRegs = Indices.size();
1880
1881 int SubReg = 0, End = NumRegs, Incr = 1;
1882 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
1883 SubReg = NumRegs - 1;
1884 End = -1;
1885 Incr = -1;
1886 }
1887
1888 for (; SubReg != End; SubReg += Incr) {
1889 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
1890 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
1891 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
1892 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
1893 }
1894 }
1895
copyPhysReg(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,unsigned DestReg,unsigned SrcReg,bool KillSrc) const1896 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
1897 MachineBasicBlock::iterator I,
1898 const DebugLoc &DL, unsigned DestReg,
1899 unsigned SrcReg, bool KillSrc) const {
1900 if (AArch64::GPR32spRegClass.contains(DestReg) &&
1901 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
1902 const TargetRegisterInfo *TRI = &getRegisterInfo();
1903
1904 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
1905 // If either operand is WSP, expand to ADD #0.
1906 if (Subtarget.hasZeroCycleRegMove()) {
1907 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
1908 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
1909 &AArch64::GPR64spRegClass);
1910 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
1911 &AArch64::GPR64spRegClass);
1912 // This instruction is reading and writing X registers. This may upset
1913 // the register scavenger and machine verifier, so we need to indicate
1914 // that we are reading an undefined value from SrcRegX, but a proper
1915 // value from SrcReg.
1916 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
1917 .addReg(SrcRegX, RegState::Undef)
1918 .addImm(0)
1919 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
1920 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
1921 } else {
1922 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
1923 .addReg(SrcReg, getKillRegState(KillSrc))
1924 .addImm(0)
1925 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
1926 }
1927 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) {
1928 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg).addImm(0).addImm(
1929 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
1930 } else {
1931 if (Subtarget.hasZeroCycleRegMove()) {
1932 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
1933 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
1934 &AArch64::GPR64spRegClass);
1935 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
1936 &AArch64::GPR64spRegClass);
1937 // This instruction is reading and writing X registers. This may upset
1938 // the register scavenger and machine verifier, so we need to indicate
1939 // that we are reading an undefined value from SrcRegX, but a proper
1940 // value from SrcReg.
1941 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
1942 .addReg(AArch64::XZR)
1943 .addReg(SrcRegX, RegState::Undef)
1944 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
1945 } else {
1946 // Otherwise, expand to ORR WZR.
1947 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
1948 .addReg(AArch64::WZR)
1949 .addReg(SrcReg, getKillRegState(KillSrc));
1950 }
1951 }
1952 return;
1953 }
1954
1955 if (AArch64::GPR64spRegClass.contains(DestReg) &&
1956 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
1957 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
1958 // If either operand is SP, expand to ADD #0.
1959 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
1960 .addReg(SrcReg, getKillRegState(KillSrc))
1961 .addImm(0)
1962 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
1963 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) {
1964 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg).addImm(0).addImm(
1965 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
1966 } else {
1967 // Otherwise, expand to ORR XZR.
1968 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
1969 .addReg(AArch64::XZR)
1970 .addReg(SrcReg, getKillRegState(KillSrc));
1971 }
1972 return;
1973 }
1974
1975 // Copy a DDDD register quad by copying the individual sub-registers.
1976 if (AArch64::DDDDRegClass.contains(DestReg) &&
1977 AArch64::DDDDRegClass.contains(SrcReg)) {
1978 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1,
1979 AArch64::dsub2, AArch64::dsub3 };
1980 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
1981 Indices);
1982 return;
1983 }
1984
1985 // Copy a DDD register triple by copying the individual sub-registers.
1986 if (AArch64::DDDRegClass.contains(DestReg) &&
1987 AArch64::DDDRegClass.contains(SrcReg)) {
1988 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1,
1989 AArch64::dsub2 };
1990 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
1991 Indices);
1992 return;
1993 }
1994
1995 // Copy a DD register pair by copying the individual sub-registers.
1996 if (AArch64::DDRegClass.contains(DestReg) &&
1997 AArch64::DDRegClass.contains(SrcReg)) {
1998 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1 };
1999 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2000 Indices);
2001 return;
2002 }
2003
2004 // Copy a QQQQ register quad by copying the individual sub-registers.
2005 if (AArch64::QQQQRegClass.contains(DestReg) &&
2006 AArch64::QQQQRegClass.contains(SrcReg)) {
2007 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1,
2008 AArch64::qsub2, AArch64::qsub3 };
2009 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2010 Indices);
2011 return;
2012 }
2013
2014 // Copy a QQQ register triple by copying the individual sub-registers.
2015 if (AArch64::QQQRegClass.contains(DestReg) &&
2016 AArch64::QQQRegClass.contains(SrcReg)) {
2017 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1,
2018 AArch64::qsub2 };
2019 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2020 Indices);
2021 return;
2022 }
2023
2024 // Copy a QQ register pair by copying the individual sub-registers.
2025 if (AArch64::QQRegClass.contains(DestReg) &&
2026 AArch64::QQRegClass.contains(SrcReg)) {
2027 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1 };
2028 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2029 Indices);
2030 return;
2031 }
2032
2033 if (AArch64::FPR128RegClass.contains(DestReg) &&
2034 AArch64::FPR128RegClass.contains(SrcReg)) {
2035 if(Subtarget.hasNEON()) {
2036 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2037 .addReg(SrcReg)
2038 .addReg(SrcReg, getKillRegState(KillSrc));
2039 } else {
2040 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
2041 .addReg(AArch64::SP, RegState::Define)
2042 .addReg(SrcReg, getKillRegState(KillSrc))
2043 .addReg(AArch64::SP)
2044 .addImm(-16);
2045 BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
2046 .addReg(AArch64::SP, RegState::Define)
2047 .addReg(DestReg, RegState::Define)
2048 .addReg(AArch64::SP)
2049 .addImm(16);
2050 }
2051 return;
2052 }
2053
2054 if (AArch64::FPR64RegClass.contains(DestReg) &&
2055 AArch64::FPR64RegClass.contains(SrcReg)) {
2056 if(Subtarget.hasNEON()) {
2057 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
2058 &AArch64::FPR128RegClass);
2059 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
2060 &AArch64::FPR128RegClass);
2061 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2062 .addReg(SrcReg)
2063 .addReg(SrcReg, getKillRegState(KillSrc));
2064 } else {
2065 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
2066 .addReg(SrcReg, getKillRegState(KillSrc));
2067 }
2068 return;
2069 }
2070
2071 if (AArch64::FPR32RegClass.contains(DestReg) &&
2072 AArch64::FPR32RegClass.contains(SrcReg)) {
2073 if(Subtarget.hasNEON()) {
2074 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
2075 &AArch64::FPR128RegClass);
2076 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
2077 &AArch64::FPR128RegClass);
2078 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2079 .addReg(SrcReg)
2080 .addReg(SrcReg, getKillRegState(KillSrc));
2081 } else {
2082 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2083 .addReg(SrcReg, getKillRegState(KillSrc));
2084 }
2085 return;
2086 }
2087
2088 if (AArch64::FPR16RegClass.contains(DestReg) &&
2089 AArch64::FPR16RegClass.contains(SrcReg)) {
2090 if(Subtarget.hasNEON()) {
2091 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2092 &AArch64::FPR128RegClass);
2093 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2094 &AArch64::FPR128RegClass);
2095 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2096 .addReg(SrcReg)
2097 .addReg(SrcReg, getKillRegState(KillSrc));
2098 } else {
2099 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2100 &AArch64::FPR32RegClass);
2101 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2102 &AArch64::FPR32RegClass);
2103 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2104 .addReg(SrcReg, getKillRegState(KillSrc));
2105 }
2106 return;
2107 }
2108
2109 if (AArch64::FPR8RegClass.contains(DestReg) &&
2110 AArch64::FPR8RegClass.contains(SrcReg)) {
2111 if(Subtarget.hasNEON()) {
2112 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2113 &AArch64::FPR128RegClass);
2114 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2115 &AArch64::FPR128RegClass);
2116 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2117 .addReg(SrcReg)
2118 .addReg(SrcReg, getKillRegState(KillSrc));
2119 } else {
2120 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2121 &AArch64::FPR32RegClass);
2122 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2123 &AArch64::FPR32RegClass);
2124 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2125 .addReg(SrcReg, getKillRegState(KillSrc));
2126 }
2127 return;
2128 }
2129
2130 // Copies between GPR64 and FPR64.
2131 if (AArch64::FPR64RegClass.contains(DestReg) &&
2132 AArch64::GPR64RegClass.contains(SrcReg)) {
2133 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
2134 .addReg(SrcReg, getKillRegState(KillSrc));
2135 return;
2136 }
2137 if (AArch64::GPR64RegClass.contains(DestReg) &&
2138 AArch64::FPR64RegClass.contains(SrcReg)) {
2139 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
2140 .addReg(SrcReg, getKillRegState(KillSrc));
2141 return;
2142 }
2143 // Copies between GPR32 and FPR32.
2144 if (AArch64::FPR32RegClass.contains(DestReg) &&
2145 AArch64::GPR32RegClass.contains(SrcReg)) {
2146 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
2147 .addReg(SrcReg, getKillRegState(KillSrc));
2148 return;
2149 }
2150 if (AArch64::GPR32RegClass.contains(DestReg) &&
2151 AArch64::FPR32RegClass.contains(SrcReg)) {
2152 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
2153 .addReg(SrcReg, getKillRegState(KillSrc));
2154 return;
2155 }
2156
2157 if (DestReg == AArch64::NZCV) {
2158 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
2159 BuildMI(MBB, I, DL, get(AArch64::MSR))
2160 .addImm(AArch64SysReg::NZCV)
2161 .addReg(SrcReg, getKillRegState(KillSrc))
2162 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
2163 return;
2164 }
2165
2166 if (SrcReg == AArch64::NZCV) {
2167 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
2168 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
2169 .addImm(AArch64SysReg::NZCV)
2170 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
2171 return;
2172 }
2173
2174 llvm_unreachable("unimplemented reg-to-reg copy");
2175 }
2176
storeRegToStackSlot(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,unsigned SrcReg,bool isKill,int FI,const TargetRegisterClass * RC,const TargetRegisterInfo * TRI) const2177 void AArch64InstrInfo::storeRegToStackSlot(
2178 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
2179 bool isKill, int FI, const TargetRegisterClass *RC,
2180 const TargetRegisterInfo *TRI) const {
2181 DebugLoc DL;
2182 if (MBBI != MBB.end())
2183 DL = MBBI->getDebugLoc();
2184 MachineFunction &MF = *MBB.getParent();
2185 MachineFrameInfo &MFI = *MF.getFrameInfo();
2186 unsigned Align = MFI.getObjectAlignment(FI);
2187
2188 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
2189 MachineMemOperand *MMO = MF.getMachineMemOperand(
2190 PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
2191 unsigned Opc = 0;
2192 bool Offset = true;
2193 switch (RC->getSize()) {
2194 case 1:
2195 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2196 Opc = AArch64::STRBui;
2197 break;
2198 case 2:
2199 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2200 Opc = AArch64::STRHui;
2201 break;
2202 case 4:
2203 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2204 Opc = AArch64::STRWui;
2205 if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2206 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2207 else
2208 assert(SrcReg != AArch64::WSP);
2209 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2210 Opc = AArch64::STRSui;
2211 break;
2212 case 8:
2213 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2214 Opc = AArch64::STRXui;
2215 if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2216 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2217 else
2218 assert(SrcReg != AArch64::SP);
2219 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
2220 Opc = AArch64::STRDui;
2221 break;
2222 case 16:
2223 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2224 Opc = AArch64::STRQui;
2225 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2226 assert(Subtarget.hasNEON() &&
2227 "Unexpected register store without NEON");
2228 Opc = AArch64::ST1Twov1d;
2229 Offset = false;
2230 }
2231 break;
2232 case 24:
2233 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2234 assert(Subtarget.hasNEON() &&
2235 "Unexpected register store without NEON");
2236 Opc = AArch64::ST1Threev1d;
2237 Offset = false;
2238 }
2239 break;
2240 case 32:
2241 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2242 assert(Subtarget.hasNEON() &&
2243 "Unexpected register store without NEON");
2244 Opc = AArch64::ST1Fourv1d;
2245 Offset = false;
2246 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2247 assert(Subtarget.hasNEON() &&
2248 "Unexpected register store without NEON");
2249 Opc = AArch64::ST1Twov2d;
2250 Offset = false;
2251 }
2252 break;
2253 case 48:
2254 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2255 assert(Subtarget.hasNEON() &&
2256 "Unexpected register store without NEON");
2257 Opc = AArch64::ST1Threev2d;
2258 Offset = false;
2259 }
2260 break;
2261 case 64:
2262 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2263 assert(Subtarget.hasNEON() &&
2264 "Unexpected register store without NEON");
2265 Opc = AArch64::ST1Fourv2d;
2266 Offset = false;
2267 }
2268 break;
2269 }
2270 assert(Opc && "Unknown register class");
2271
2272 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
2273 .addReg(SrcReg, getKillRegState(isKill))
2274 .addFrameIndex(FI);
2275
2276 if (Offset)
2277 MI.addImm(0);
2278 MI.addMemOperand(MMO);
2279 }
2280
loadRegFromStackSlot(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,unsigned DestReg,int FI,const TargetRegisterClass * RC,const TargetRegisterInfo * TRI) const2281 void AArch64InstrInfo::loadRegFromStackSlot(
2282 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
2283 int FI, const TargetRegisterClass *RC,
2284 const TargetRegisterInfo *TRI) const {
2285 DebugLoc DL;
2286 if (MBBI != MBB.end())
2287 DL = MBBI->getDebugLoc();
2288 MachineFunction &MF = *MBB.getParent();
2289 MachineFrameInfo &MFI = *MF.getFrameInfo();
2290 unsigned Align = MFI.getObjectAlignment(FI);
2291 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
2292 MachineMemOperand *MMO = MF.getMachineMemOperand(
2293 PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
2294
2295 unsigned Opc = 0;
2296 bool Offset = true;
2297 switch (RC->getSize()) {
2298 case 1:
2299 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2300 Opc = AArch64::LDRBui;
2301 break;
2302 case 2:
2303 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2304 Opc = AArch64::LDRHui;
2305 break;
2306 case 4:
2307 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2308 Opc = AArch64::LDRWui;
2309 if (TargetRegisterInfo::isVirtualRegister(DestReg))
2310 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
2311 else
2312 assert(DestReg != AArch64::WSP);
2313 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2314 Opc = AArch64::LDRSui;
2315 break;
2316 case 8:
2317 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2318 Opc = AArch64::LDRXui;
2319 if (TargetRegisterInfo::isVirtualRegister(DestReg))
2320 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
2321 else
2322 assert(DestReg != AArch64::SP);
2323 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
2324 Opc = AArch64::LDRDui;
2325 break;
2326 case 16:
2327 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2328 Opc = AArch64::LDRQui;
2329 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2330 assert(Subtarget.hasNEON() &&
2331 "Unexpected register load without NEON");
2332 Opc = AArch64::LD1Twov1d;
2333 Offset = false;
2334 }
2335 break;
2336 case 24:
2337 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2338 assert(Subtarget.hasNEON() &&
2339 "Unexpected register load without NEON");
2340 Opc = AArch64::LD1Threev1d;
2341 Offset = false;
2342 }
2343 break;
2344 case 32:
2345 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2346 assert(Subtarget.hasNEON() &&
2347 "Unexpected register load without NEON");
2348 Opc = AArch64::LD1Fourv1d;
2349 Offset = false;
2350 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2351 assert(Subtarget.hasNEON() &&
2352 "Unexpected register load without NEON");
2353 Opc = AArch64::LD1Twov2d;
2354 Offset = false;
2355 }
2356 break;
2357 case 48:
2358 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2359 assert(Subtarget.hasNEON() &&
2360 "Unexpected register load without NEON");
2361 Opc = AArch64::LD1Threev2d;
2362 Offset = false;
2363 }
2364 break;
2365 case 64:
2366 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2367 assert(Subtarget.hasNEON() &&
2368 "Unexpected register load without NEON");
2369 Opc = AArch64::LD1Fourv2d;
2370 Offset = false;
2371 }
2372 break;
2373 }
2374 assert(Opc && "Unknown register class");
2375
2376 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
2377 .addReg(DestReg, getDefRegState(true))
2378 .addFrameIndex(FI);
2379 if (Offset)
2380 MI.addImm(0);
2381 MI.addMemOperand(MMO);
2382 }
2383
emitFrameOffset(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const DebugLoc & DL,unsigned DestReg,unsigned SrcReg,int Offset,const TargetInstrInfo * TII,MachineInstr::MIFlag Flag,bool SetNZCV)2384 void llvm::emitFrameOffset(MachineBasicBlock &MBB,
2385 MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
2386 unsigned DestReg, unsigned SrcReg, int Offset,
2387 const TargetInstrInfo *TII,
2388 MachineInstr::MIFlag Flag, bool SetNZCV) {
2389 if (DestReg == SrcReg && Offset == 0)
2390 return;
2391
2392 assert((DestReg != AArch64::SP || Offset % 16 == 0) &&
2393 "SP increment/decrement not 16-byte aligned");
2394
2395 bool isSub = Offset < 0;
2396 if (isSub)
2397 Offset = -Offset;
2398
2399 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
2400 // scratch register. If DestReg is a virtual register, use it as the
2401 // scratch register; otherwise, create a new virtual register (to be
2402 // replaced by the scavenger at the end of PEI). That case can be optimized
2403 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
2404 // register can be loaded with offset%8 and the add/sub can use an extending
2405 // instruction with LSL#3.
2406 // Currently the function handles any offsets but generates a poor sequence
2407 // of code.
2408 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
2409
2410 unsigned Opc;
2411 if (SetNZCV)
2412 Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
2413 else
2414 Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
2415 const unsigned MaxEncoding = 0xfff;
2416 const unsigned ShiftSize = 12;
2417 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
2418 while (((unsigned)Offset) >= (1 << ShiftSize)) {
2419 unsigned ThisVal;
2420 if (((unsigned)Offset) > MaxEncodableValue) {
2421 ThisVal = MaxEncodableValue;
2422 } else {
2423 ThisVal = Offset & MaxEncodableValue;
2424 }
2425 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
2426 "Encoding cannot handle value that big");
2427 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2428 .addReg(SrcReg)
2429 .addImm(ThisVal >> ShiftSize)
2430 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize))
2431 .setMIFlag(Flag);
2432
2433 SrcReg = DestReg;
2434 Offset -= ThisVal;
2435 if (Offset == 0)
2436 return;
2437 }
2438 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2439 .addReg(SrcReg)
2440 .addImm(Offset)
2441 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
2442 .setMIFlag(Flag);
2443 }
2444
foldMemoryOperandImpl(MachineFunction & MF,MachineInstr & MI,ArrayRef<unsigned> Ops,MachineBasicBlock::iterator InsertPt,int FrameIndex,LiveIntervals * LIS) const2445 MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
2446 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
2447 MachineBasicBlock::iterator InsertPt, int FrameIndex,
2448 LiveIntervals *LIS) const {
2449 // This is a bit of a hack. Consider this instruction:
2450 //
2451 // %vreg0<def> = COPY %SP; GPR64all:%vreg0
2452 //
2453 // We explicitly chose GPR64all for the virtual register so such a copy might
2454 // be eliminated by RegisterCoalescer. However, that may not be possible, and
2455 // %vreg0 may even spill. We can't spill %SP, and since it is in the GPR64all
2456 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
2457 //
2458 // To prevent that, we are going to constrain the %vreg0 register class here.
2459 //
2460 // <rdar://problem/11522048>
2461 //
2462 if (MI.isCopy()) {
2463 unsigned DstReg = MI.getOperand(0).getReg();
2464 unsigned SrcReg = MI.getOperand(1).getReg();
2465 if (SrcReg == AArch64::SP &&
2466 TargetRegisterInfo::isVirtualRegister(DstReg)) {
2467 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
2468 return nullptr;
2469 }
2470 if (DstReg == AArch64::SP &&
2471 TargetRegisterInfo::isVirtualRegister(SrcReg)) {
2472 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2473 return nullptr;
2474 }
2475 }
2476
2477 // Cannot fold.
2478 return nullptr;
2479 }
2480
isAArch64FrameOffsetLegal(const MachineInstr & MI,int & Offset,bool * OutUseUnscaledOp,unsigned * OutUnscaledOp,int * EmittableOffset)2481 int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
2482 bool *OutUseUnscaledOp,
2483 unsigned *OutUnscaledOp,
2484 int *EmittableOffset) {
2485 int Scale = 1;
2486 bool IsSigned = false;
2487 // The ImmIdx should be changed case by case if it is not 2.
2488 unsigned ImmIdx = 2;
2489 unsigned UnscaledOp = 0;
2490 // Set output values in case of early exit.
2491 if (EmittableOffset)
2492 *EmittableOffset = 0;
2493 if (OutUseUnscaledOp)
2494 *OutUseUnscaledOp = false;
2495 if (OutUnscaledOp)
2496 *OutUnscaledOp = 0;
2497 switch (MI.getOpcode()) {
2498 default:
2499 llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
2500 // Vector spills/fills can't take an immediate offset.
2501 case AArch64::LD1Twov2d:
2502 case AArch64::LD1Threev2d:
2503 case AArch64::LD1Fourv2d:
2504 case AArch64::LD1Twov1d:
2505 case AArch64::LD1Threev1d:
2506 case AArch64::LD1Fourv1d:
2507 case AArch64::ST1Twov2d:
2508 case AArch64::ST1Threev2d:
2509 case AArch64::ST1Fourv2d:
2510 case AArch64::ST1Twov1d:
2511 case AArch64::ST1Threev1d:
2512 case AArch64::ST1Fourv1d:
2513 return AArch64FrameOffsetCannotUpdate;
2514 case AArch64::PRFMui:
2515 Scale = 8;
2516 UnscaledOp = AArch64::PRFUMi;
2517 break;
2518 case AArch64::LDRXui:
2519 Scale = 8;
2520 UnscaledOp = AArch64::LDURXi;
2521 break;
2522 case AArch64::LDRWui:
2523 Scale = 4;
2524 UnscaledOp = AArch64::LDURWi;
2525 break;
2526 case AArch64::LDRBui:
2527 Scale = 1;
2528 UnscaledOp = AArch64::LDURBi;
2529 break;
2530 case AArch64::LDRHui:
2531 Scale = 2;
2532 UnscaledOp = AArch64::LDURHi;
2533 break;
2534 case AArch64::LDRSui:
2535 Scale = 4;
2536 UnscaledOp = AArch64::LDURSi;
2537 break;
2538 case AArch64::LDRDui:
2539 Scale = 8;
2540 UnscaledOp = AArch64::LDURDi;
2541 break;
2542 case AArch64::LDRQui:
2543 Scale = 16;
2544 UnscaledOp = AArch64::LDURQi;
2545 break;
2546 case AArch64::LDRBBui:
2547 Scale = 1;
2548 UnscaledOp = AArch64::LDURBBi;
2549 break;
2550 case AArch64::LDRHHui:
2551 Scale = 2;
2552 UnscaledOp = AArch64::LDURHHi;
2553 break;
2554 case AArch64::LDRSBXui:
2555 Scale = 1;
2556 UnscaledOp = AArch64::LDURSBXi;
2557 break;
2558 case AArch64::LDRSBWui:
2559 Scale = 1;
2560 UnscaledOp = AArch64::LDURSBWi;
2561 break;
2562 case AArch64::LDRSHXui:
2563 Scale = 2;
2564 UnscaledOp = AArch64::LDURSHXi;
2565 break;
2566 case AArch64::LDRSHWui:
2567 Scale = 2;
2568 UnscaledOp = AArch64::LDURSHWi;
2569 break;
2570 case AArch64::LDRSWui:
2571 Scale = 4;
2572 UnscaledOp = AArch64::LDURSWi;
2573 break;
2574
2575 case AArch64::STRXui:
2576 Scale = 8;
2577 UnscaledOp = AArch64::STURXi;
2578 break;
2579 case AArch64::STRWui:
2580 Scale = 4;
2581 UnscaledOp = AArch64::STURWi;
2582 break;
2583 case AArch64::STRBui:
2584 Scale = 1;
2585 UnscaledOp = AArch64::STURBi;
2586 break;
2587 case AArch64::STRHui:
2588 Scale = 2;
2589 UnscaledOp = AArch64::STURHi;
2590 break;
2591 case AArch64::STRSui:
2592 Scale = 4;
2593 UnscaledOp = AArch64::STURSi;
2594 break;
2595 case AArch64::STRDui:
2596 Scale = 8;
2597 UnscaledOp = AArch64::STURDi;
2598 break;
2599 case AArch64::STRQui:
2600 Scale = 16;
2601 UnscaledOp = AArch64::STURQi;
2602 break;
2603 case AArch64::STRBBui:
2604 Scale = 1;
2605 UnscaledOp = AArch64::STURBBi;
2606 break;
2607 case AArch64::STRHHui:
2608 Scale = 2;
2609 UnscaledOp = AArch64::STURHHi;
2610 break;
2611
2612 case AArch64::LDPXi:
2613 case AArch64::LDPDi:
2614 case AArch64::STPXi:
2615 case AArch64::STPDi:
2616 case AArch64::LDNPXi:
2617 case AArch64::LDNPDi:
2618 case AArch64::STNPXi:
2619 case AArch64::STNPDi:
2620 ImmIdx = 3;
2621 IsSigned = true;
2622 Scale = 8;
2623 break;
2624 case AArch64::LDPQi:
2625 case AArch64::STPQi:
2626 case AArch64::LDNPQi:
2627 case AArch64::STNPQi:
2628 ImmIdx = 3;
2629 IsSigned = true;
2630 Scale = 16;
2631 break;
2632 case AArch64::LDPWi:
2633 case AArch64::LDPSi:
2634 case AArch64::STPWi:
2635 case AArch64::STPSi:
2636 case AArch64::LDNPWi:
2637 case AArch64::LDNPSi:
2638 case AArch64::STNPWi:
2639 case AArch64::STNPSi:
2640 ImmIdx = 3;
2641 IsSigned = true;
2642 Scale = 4;
2643 break;
2644
2645 case AArch64::LDURXi:
2646 case AArch64::LDURWi:
2647 case AArch64::LDURBi:
2648 case AArch64::LDURHi:
2649 case AArch64::LDURSi:
2650 case AArch64::LDURDi:
2651 case AArch64::LDURQi:
2652 case AArch64::LDURHHi:
2653 case AArch64::LDURBBi:
2654 case AArch64::LDURSBXi:
2655 case AArch64::LDURSBWi:
2656 case AArch64::LDURSHXi:
2657 case AArch64::LDURSHWi:
2658 case AArch64::LDURSWi:
2659 case AArch64::STURXi:
2660 case AArch64::STURWi:
2661 case AArch64::STURBi:
2662 case AArch64::STURHi:
2663 case AArch64::STURSi:
2664 case AArch64::STURDi:
2665 case AArch64::STURQi:
2666 case AArch64::STURBBi:
2667 case AArch64::STURHHi:
2668 Scale = 1;
2669 break;
2670 }
2671
2672 Offset += MI.getOperand(ImmIdx).getImm() * Scale;
2673
2674 bool useUnscaledOp = false;
2675 // If the offset doesn't match the scale, we rewrite the instruction to
2676 // use the unscaled instruction instead. Likewise, if we have a negative
2677 // offset (and have an unscaled op to use).
2678 if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
2679 useUnscaledOp = true;
2680
2681 // Use an unscaled addressing mode if the instruction has a negative offset
2682 // (or if the instruction is already using an unscaled addressing mode).
2683 unsigned MaskBits;
2684 if (IsSigned) {
2685 // ldp/stp instructions.
2686 MaskBits = 7;
2687 Offset /= Scale;
2688 } else if (UnscaledOp == 0 || useUnscaledOp) {
2689 MaskBits = 9;
2690 IsSigned = true;
2691 Scale = 1;
2692 } else {
2693 MaskBits = 12;
2694 IsSigned = false;
2695 Offset /= Scale;
2696 }
2697
2698 // Attempt to fold address computation.
2699 int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
2700 int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
2701 if (Offset >= MinOff && Offset <= MaxOff) {
2702 if (EmittableOffset)
2703 *EmittableOffset = Offset;
2704 Offset = 0;
2705 } else {
2706 int NewOff = Offset < 0 ? MinOff : MaxOff;
2707 if (EmittableOffset)
2708 *EmittableOffset = NewOff;
2709 Offset = (Offset - NewOff) * Scale;
2710 }
2711 if (OutUseUnscaledOp)
2712 *OutUseUnscaledOp = useUnscaledOp;
2713 if (OutUnscaledOp)
2714 *OutUnscaledOp = UnscaledOp;
2715 return AArch64FrameOffsetCanUpdate |
2716 (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
2717 }
2718
rewriteAArch64FrameIndex(MachineInstr & MI,unsigned FrameRegIdx,unsigned FrameReg,int & Offset,const AArch64InstrInfo * TII)2719 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
2720 unsigned FrameReg, int &Offset,
2721 const AArch64InstrInfo *TII) {
2722 unsigned Opcode = MI.getOpcode();
2723 unsigned ImmIdx = FrameRegIdx + 1;
2724
2725 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
2726 Offset += MI.getOperand(ImmIdx).getImm();
2727 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
2728 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
2729 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
2730 MI.eraseFromParent();
2731 Offset = 0;
2732 return true;
2733 }
2734
2735 int NewOffset;
2736 unsigned UnscaledOp;
2737 bool UseUnscaledOp;
2738 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
2739 &UnscaledOp, &NewOffset);
2740 if (Status & AArch64FrameOffsetCanUpdate) {
2741 if (Status & AArch64FrameOffsetIsLegal)
2742 // Replace the FrameIndex with FrameReg.
2743 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2744 if (UseUnscaledOp)
2745 MI.setDesc(TII->get(UnscaledOp));
2746
2747 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
2748 return Offset == 0;
2749 }
2750
2751 return false;
2752 }
2753
getNoopForMachoTarget(MCInst & NopInst) const2754 void AArch64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
2755 NopInst.setOpcode(AArch64::HINT);
2756 NopInst.addOperand(MCOperand::createImm(0));
2757 }
2758
2759 // AArch64 supports MachineCombiner.
useMachineCombiner() const2760 bool AArch64InstrInfo::useMachineCombiner() const {
2761
2762 return true;
2763 }
2764 //
2765 // True when Opc sets flag
isCombineInstrSettingFlag(unsigned Opc)2766 static bool isCombineInstrSettingFlag(unsigned Opc) {
2767 switch (Opc) {
2768 case AArch64::ADDSWrr:
2769 case AArch64::ADDSWri:
2770 case AArch64::ADDSXrr:
2771 case AArch64::ADDSXri:
2772 case AArch64::SUBSWrr:
2773 case AArch64::SUBSXrr:
2774 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
2775 case AArch64::SUBSWri:
2776 case AArch64::SUBSXri:
2777 return true;
2778 default:
2779 break;
2780 }
2781 return false;
2782 }
2783 //
2784 // 32b Opcodes that can be combined with a MUL
isCombineInstrCandidate32(unsigned Opc)2785 static bool isCombineInstrCandidate32(unsigned Opc) {
2786 switch (Opc) {
2787 case AArch64::ADDWrr:
2788 case AArch64::ADDWri:
2789 case AArch64::SUBWrr:
2790 case AArch64::ADDSWrr:
2791 case AArch64::ADDSWri:
2792 case AArch64::SUBSWrr:
2793 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
2794 case AArch64::SUBWri:
2795 case AArch64::SUBSWri:
2796 return true;
2797 default:
2798 break;
2799 }
2800 return false;
2801 }
2802 //
2803 // 64b Opcodes that can be combined with a MUL
isCombineInstrCandidate64(unsigned Opc)2804 static bool isCombineInstrCandidate64(unsigned Opc) {
2805 switch (Opc) {
2806 case AArch64::ADDXrr:
2807 case AArch64::ADDXri:
2808 case AArch64::SUBXrr:
2809 case AArch64::ADDSXrr:
2810 case AArch64::ADDSXri:
2811 case AArch64::SUBSXrr:
2812 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
2813 case AArch64::SUBXri:
2814 case AArch64::SUBSXri:
2815 return true;
2816 default:
2817 break;
2818 }
2819 return false;
2820 }
2821 //
2822 // FP Opcodes that can be combined with a FMUL
isCombineInstrCandidateFP(const MachineInstr & Inst)2823 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
2824 switch (Inst.getOpcode()) {
2825 case AArch64::FADDSrr:
2826 case AArch64::FADDDrr:
2827 case AArch64::FADDv2f32:
2828 case AArch64::FADDv2f64:
2829 case AArch64::FADDv4f32:
2830 case AArch64::FSUBSrr:
2831 case AArch64::FSUBDrr:
2832 case AArch64::FSUBv2f32:
2833 case AArch64::FSUBv2f64:
2834 case AArch64::FSUBv4f32:
2835 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
2836 default:
2837 break;
2838 }
2839 return false;
2840 }
2841 //
2842 // Opcodes that can be combined with a MUL
isCombineInstrCandidate(unsigned Opc)2843 static bool isCombineInstrCandidate(unsigned Opc) {
2844 return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
2845 }
2846
2847 //
2848 // Utility routine that checks if \param MO is defined by an
2849 // \param CombineOpc instruction in the basic block \param MBB
canCombine(MachineBasicBlock & MBB,MachineOperand & MO,unsigned CombineOpc,unsigned ZeroReg=0,bool CheckZeroReg=false)2850 static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
2851 unsigned CombineOpc, unsigned ZeroReg = 0,
2852 bool CheckZeroReg = false) {
2853 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
2854 MachineInstr *MI = nullptr;
2855
2856 if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
2857 MI = MRI.getUniqueVRegDef(MO.getReg());
2858 // And it needs to be in the trace (otherwise, it won't have a depth).
2859 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
2860 return false;
2861 // Must only used by the user we combine with.
2862 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
2863 return false;
2864
2865 if (CheckZeroReg) {
2866 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
2867 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
2868 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
2869 // The third input reg must be zero.
2870 if (MI->getOperand(3).getReg() != ZeroReg)
2871 return false;
2872 }
2873
2874 return true;
2875 }
2876
2877 //
2878 // Is \param MO defined by an integer multiply and can be combined?
canCombineWithMUL(MachineBasicBlock & MBB,MachineOperand & MO,unsigned MulOpc,unsigned ZeroReg)2879 static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
2880 unsigned MulOpc, unsigned ZeroReg) {
2881 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
2882 }
2883
2884 //
2885 // Is \param MO defined by a floating-point multiply and can be combined?
canCombineWithFMUL(MachineBasicBlock & MBB,MachineOperand & MO,unsigned MulOpc)2886 static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
2887 unsigned MulOpc) {
2888 return canCombine(MBB, MO, MulOpc);
2889 }
2890
2891 // TODO: There are many more machine instruction opcodes to match:
2892 // 1. Other data types (integer, vectors)
2893 // 2. Other math / logic operations (xor, or)
2894 // 3. Other forms of the same operation (intrinsics and other variants)
isAssociativeAndCommutative(const MachineInstr & Inst) const2895 bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
2896 switch (Inst.getOpcode()) {
2897 case AArch64::FADDDrr:
2898 case AArch64::FADDSrr:
2899 case AArch64::FADDv2f32:
2900 case AArch64::FADDv2f64:
2901 case AArch64::FADDv4f32:
2902 case AArch64::FMULDrr:
2903 case AArch64::FMULSrr:
2904 case AArch64::FMULX32:
2905 case AArch64::FMULX64:
2906 case AArch64::FMULXv2f32:
2907 case AArch64::FMULXv2f64:
2908 case AArch64::FMULXv4f32:
2909 case AArch64::FMULv2f32:
2910 case AArch64::FMULv2f64:
2911 case AArch64::FMULv4f32:
2912 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
2913 default:
2914 return false;
2915 }
2916 }
2917
2918 /// Find instructions that can be turned into madd.
getMaddPatterns(MachineInstr & Root,SmallVectorImpl<MachineCombinerPattern> & Patterns)2919 static bool getMaddPatterns(MachineInstr &Root,
2920 SmallVectorImpl<MachineCombinerPattern> &Patterns) {
2921 unsigned Opc = Root.getOpcode();
2922 MachineBasicBlock &MBB = *Root.getParent();
2923 bool Found = false;
2924
2925 if (!isCombineInstrCandidate(Opc))
2926 return false;
2927 if (isCombineInstrSettingFlag(Opc)) {
2928 int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
2929 // When NZCV is live bail out.
2930 if (Cmp_NZCV == -1)
2931 return false;
2932 unsigned NewOpc = convertFlagSettingOpcode(Root);
2933 // When opcode can't change bail out.
2934 // CHECKME: do we miss any cases for opcode conversion?
2935 if (NewOpc == Opc)
2936 return false;
2937 Opc = NewOpc;
2938 }
2939
2940 switch (Opc) {
2941 default:
2942 break;
2943 case AArch64::ADDWrr:
2944 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
2945 "ADDWrr does not have register operands");
2946 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
2947 AArch64::WZR)) {
2948 Patterns.push_back(MachineCombinerPattern::MULADDW_OP1);
2949 Found = true;
2950 }
2951 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
2952 AArch64::WZR)) {
2953 Patterns.push_back(MachineCombinerPattern::MULADDW_OP2);
2954 Found = true;
2955 }
2956 break;
2957 case AArch64::ADDXrr:
2958 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
2959 AArch64::XZR)) {
2960 Patterns.push_back(MachineCombinerPattern::MULADDX_OP1);
2961 Found = true;
2962 }
2963 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
2964 AArch64::XZR)) {
2965 Patterns.push_back(MachineCombinerPattern::MULADDX_OP2);
2966 Found = true;
2967 }
2968 break;
2969 case AArch64::SUBWrr:
2970 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
2971 AArch64::WZR)) {
2972 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1);
2973 Found = true;
2974 }
2975 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
2976 AArch64::WZR)) {
2977 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2);
2978 Found = true;
2979 }
2980 break;
2981 case AArch64::SUBXrr:
2982 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
2983 AArch64::XZR)) {
2984 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1);
2985 Found = true;
2986 }
2987 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
2988 AArch64::XZR)) {
2989 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2);
2990 Found = true;
2991 }
2992 break;
2993 case AArch64::ADDWri:
2994 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
2995 AArch64::WZR)) {
2996 Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1);
2997 Found = true;
2998 }
2999 break;
3000 case AArch64::ADDXri:
3001 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3002 AArch64::XZR)) {
3003 Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1);
3004 Found = true;
3005 }
3006 break;
3007 case AArch64::SUBWri:
3008 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3009 AArch64::WZR)) {
3010 Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1);
3011 Found = true;
3012 }
3013 break;
3014 case AArch64::SUBXri:
3015 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3016 AArch64::XZR)) {
3017 Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1);
3018 Found = true;
3019 }
3020 break;
3021 }
3022 return Found;
3023 }
3024 /// Floating-Point Support
3025
3026 /// Find instructions that can be turned into madd.
getFMAPatterns(MachineInstr & Root,SmallVectorImpl<MachineCombinerPattern> & Patterns)3027 static bool getFMAPatterns(MachineInstr &Root,
3028 SmallVectorImpl<MachineCombinerPattern> &Patterns) {
3029
3030 if (!isCombineInstrCandidateFP(Root))
3031 return 0;
3032
3033 MachineBasicBlock &MBB = *Root.getParent();
3034 bool Found = false;
3035
3036 switch (Root.getOpcode()) {
3037 default:
3038 assert(false && "Unsupported FP instruction in combiner\n");
3039 break;
3040 case AArch64::FADDSrr:
3041 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3042 "FADDWrr does not have register operands");
3043 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3044 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1);
3045 Found = true;
3046 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3047 AArch64::FMULv1i32_indexed)) {
3048 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1);
3049 Found = true;
3050 }
3051 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3052 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2);
3053 Found = true;
3054 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3055 AArch64::FMULv1i32_indexed)) {
3056 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2);
3057 Found = true;
3058 }
3059 break;
3060 case AArch64::FADDDrr:
3061 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3062 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1);
3063 Found = true;
3064 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3065 AArch64::FMULv1i64_indexed)) {
3066 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1);
3067 Found = true;
3068 }
3069 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3070 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2);
3071 Found = true;
3072 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3073 AArch64::FMULv1i64_indexed)) {
3074 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2);
3075 Found = true;
3076 }
3077 break;
3078 case AArch64::FADDv2f32:
3079 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3080 AArch64::FMULv2i32_indexed)) {
3081 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1);
3082 Found = true;
3083 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3084 AArch64::FMULv2f32)) {
3085 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP1);
3086 Found = true;
3087 }
3088 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3089 AArch64::FMULv2i32_indexed)) {
3090 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2);
3091 Found = true;
3092 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3093 AArch64::FMULv2f32)) {
3094 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP2);
3095 Found = true;
3096 }
3097 break;
3098 case AArch64::FADDv2f64:
3099 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3100 AArch64::FMULv2i64_indexed)) {
3101 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1);
3102 Found = true;
3103 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3104 AArch64::FMULv2f64)) {
3105 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP1);
3106 Found = true;
3107 }
3108 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3109 AArch64::FMULv2i64_indexed)) {
3110 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2);
3111 Found = true;
3112 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3113 AArch64::FMULv2f64)) {
3114 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP2);
3115 Found = true;
3116 }
3117 break;
3118 case AArch64::FADDv4f32:
3119 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3120 AArch64::FMULv4i32_indexed)) {
3121 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1);
3122 Found = true;
3123 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3124 AArch64::FMULv4f32)) {
3125 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP1);
3126 Found = true;
3127 }
3128 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3129 AArch64::FMULv4i32_indexed)) {
3130 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2);
3131 Found = true;
3132 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3133 AArch64::FMULv4f32)) {
3134 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP2);
3135 Found = true;
3136 }
3137 break;
3138
3139 case AArch64::FSUBSrr:
3140 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3141 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1);
3142 Found = true;
3143 }
3144 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3145 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2);
3146 Found = true;
3147 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3148 AArch64::FMULv1i32_indexed)) {
3149 Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2);
3150 Found = true;
3151 }
3152 break;
3153 case AArch64::FSUBDrr:
3154 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3155 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1);
3156 Found = true;
3157 }
3158 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3159 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2);
3160 Found = true;
3161 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3162 AArch64::FMULv1i64_indexed)) {
3163 Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2);
3164 Found = true;
3165 }
3166 break;
3167 case AArch64::FSUBv2f32:
3168 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3169 AArch64::FMULv2i32_indexed)) {
3170 Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2);
3171 Found = true;
3172 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3173 AArch64::FMULv2f32)) {
3174 Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2);
3175 Found = true;
3176 }
3177 break;
3178 case AArch64::FSUBv2f64:
3179 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3180 AArch64::FMULv2i64_indexed)) {
3181 Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2);
3182 Found = true;
3183 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3184 AArch64::FMULv2f64)) {
3185 Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2);
3186 Found = true;
3187 }
3188 break;
3189 case AArch64::FSUBv4f32:
3190 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3191 AArch64::FMULv4i32_indexed)) {
3192 Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2);
3193 Found = true;
3194 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3195 AArch64::FMULv4f32)) {
3196 Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2);
3197 Found = true;
3198 }
3199 break;
3200 }
3201 return Found;
3202 }
3203
3204 /// Return true when a code sequence can improve throughput. It
3205 /// should be called only for instructions in loops.
3206 /// \param Pattern - combiner pattern
3207 bool
isThroughputPattern(MachineCombinerPattern Pattern) const3208 AArch64InstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const {
3209 switch (Pattern) {
3210 default:
3211 break;
3212 case MachineCombinerPattern::FMULADDS_OP1:
3213 case MachineCombinerPattern::FMULADDS_OP2:
3214 case MachineCombinerPattern::FMULSUBS_OP1:
3215 case MachineCombinerPattern::FMULSUBS_OP2:
3216 case MachineCombinerPattern::FMULADDD_OP1:
3217 case MachineCombinerPattern::FMULADDD_OP2:
3218 case MachineCombinerPattern::FMULSUBD_OP1:
3219 case MachineCombinerPattern::FMULSUBD_OP2:
3220 case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
3221 case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
3222 case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
3223 case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
3224 case MachineCombinerPattern::FMLAv2f32_OP2:
3225 case MachineCombinerPattern::FMLAv2f32_OP1:
3226 case MachineCombinerPattern::FMLAv2f64_OP1:
3227 case MachineCombinerPattern::FMLAv2f64_OP2:
3228 case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
3229 case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
3230 case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
3231 case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
3232 case MachineCombinerPattern::FMLAv4f32_OP1:
3233 case MachineCombinerPattern::FMLAv4f32_OP2:
3234 case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
3235 case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
3236 case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
3237 case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
3238 case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
3239 case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
3240 case MachineCombinerPattern::FMLSv2f32_OP2:
3241 case MachineCombinerPattern::FMLSv2f64_OP2:
3242 case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
3243 case MachineCombinerPattern::FMLSv4f32_OP2:
3244 return true;
3245 } // end switch (Pattern)
3246 return false;
3247 }
3248 /// Return true when there is potentially a faster code sequence for an
3249 /// instruction chain ending in \p Root. All potential patterns are listed in
3250 /// the \p Pattern vector. Pattern should be sorted in priority order since the
3251 /// pattern evaluator stops checking as soon as it finds a faster sequence.
3252
getMachineCombinerPatterns(MachineInstr & Root,SmallVectorImpl<MachineCombinerPattern> & Patterns) const3253 bool AArch64InstrInfo::getMachineCombinerPatterns(
3254 MachineInstr &Root,
3255 SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
3256 // Integer patterns
3257 if (getMaddPatterns(Root, Patterns))
3258 return true;
3259 // Floating point patterns
3260 if (getFMAPatterns(Root, Patterns))
3261 return true;
3262
3263 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
3264 }
3265
3266 enum class FMAInstKind { Default, Indexed, Accumulator };
3267 /// genFusedMultiply - Generate fused multiply instructions.
3268 /// This function supports both integer and floating point instructions.
3269 /// A typical example:
3270 /// F|MUL I=A,B,0
3271 /// F|ADD R,I,C
3272 /// ==> F|MADD R,A,B,C
3273 /// \param Root is the F|ADD instruction
3274 /// \param [out] InsInstrs is a vector of machine instructions and will
3275 /// contain the generated madd instruction
3276 /// \param IdxMulOpd is index of operand in Root that is the result of
3277 /// the F|MUL. In the example above IdxMulOpd is 1.
3278 /// \param MaddOpc the opcode fo the f|madd instruction
3279 static MachineInstr *
genFusedMultiply(MachineFunction & MF,MachineRegisterInfo & MRI,const TargetInstrInfo * TII,MachineInstr & Root,SmallVectorImpl<MachineInstr * > & InsInstrs,unsigned IdxMulOpd,unsigned MaddOpc,const TargetRegisterClass * RC,FMAInstKind kind=FMAInstKind::Default)3280 genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
3281 const TargetInstrInfo *TII, MachineInstr &Root,
3282 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
3283 unsigned MaddOpc, const TargetRegisterClass *RC,
3284 FMAInstKind kind = FMAInstKind::Default) {
3285 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3286
3287 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
3288 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
3289 unsigned ResultReg = Root.getOperand(0).getReg();
3290 unsigned SrcReg0 = MUL->getOperand(1).getReg();
3291 bool Src0IsKill = MUL->getOperand(1).isKill();
3292 unsigned SrcReg1 = MUL->getOperand(2).getReg();
3293 bool Src1IsKill = MUL->getOperand(2).isKill();
3294 unsigned SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
3295 bool Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
3296
3297 if (TargetRegisterInfo::isVirtualRegister(ResultReg))
3298 MRI.constrainRegClass(ResultReg, RC);
3299 if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
3300 MRI.constrainRegClass(SrcReg0, RC);
3301 if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
3302 MRI.constrainRegClass(SrcReg1, RC);
3303 if (TargetRegisterInfo::isVirtualRegister(SrcReg2))
3304 MRI.constrainRegClass(SrcReg2, RC);
3305
3306 MachineInstrBuilder MIB;
3307 if (kind == FMAInstKind::Default)
3308 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3309 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3310 .addReg(SrcReg1, getKillRegState(Src1IsKill))
3311 .addReg(SrcReg2, getKillRegState(Src2IsKill));
3312 else if (kind == FMAInstKind::Indexed)
3313 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3314 .addReg(SrcReg2, getKillRegState(Src2IsKill))
3315 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3316 .addReg(SrcReg1, getKillRegState(Src1IsKill))
3317 .addImm(MUL->getOperand(3).getImm());
3318 else if (kind == FMAInstKind::Accumulator)
3319 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3320 .addReg(SrcReg2, getKillRegState(Src2IsKill))
3321 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3322 .addReg(SrcReg1, getKillRegState(Src1IsKill));
3323 else
3324 assert(false && "Invalid FMA instruction kind \n");
3325 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
3326 InsInstrs.push_back(MIB);
3327 return MUL;
3328 }
3329
3330 /// genMaddR - Generate madd instruction and combine mul and add using
3331 /// an extra virtual register
3332 /// Example - an ADD intermediate needs to be stored in a register:
3333 /// MUL I=A,B,0
3334 /// ADD R,I,Imm
3335 /// ==> ORR V, ZR, Imm
3336 /// ==> MADD R,A,B,V
3337 /// \param Root is the ADD instruction
3338 /// \param [out] InsInstrs is a vector of machine instructions and will
3339 /// contain the generated madd instruction
3340 /// \param IdxMulOpd is index of operand in Root that is the result of
3341 /// the MUL. In the example above IdxMulOpd is 1.
3342 /// \param MaddOpc the opcode fo the madd instruction
3343 /// \param VR is a virtual register that holds the value of an ADD operand
3344 /// (V in the example above).
genMaddR(MachineFunction & MF,MachineRegisterInfo & MRI,const TargetInstrInfo * TII,MachineInstr & Root,SmallVectorImpl<MachineInstr * > & InsInstrs,unsigned IdxMulOpd,unsigned MaddOpc,unsigned VR,const TargetRegisterClass * RC)3345 static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
3346 const TargetInstrInfo *TII, MachineInstr &Root,
3347 SmallVectorImpl<MachineInstr *> &InsInstrs,
3348 unsigned IdxMulOpd, unsigned MaddOpc,
3349 unsigned VR, const TargetRegisterClass *RC) {
3350 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3351
3352 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
3353 unsigned ResultReg = Root.getOperand(0).getReg();
3354 unsigned SrcReg0 = MUL->getOperand(1).getReg();
3355 bool Src0IsKill = MUL->getOperand(1).isKill();
3356 unsigned SrcReg1 = MUL->getOperand(2).getReg();
3357 bool Src1IsKill = MUL->getOperand(2).isKill();
3358
3359 if (TargetRegisterInfo::isVirtualRegister(ResultReg))
3360 MRI.constrainRegClass(ResultReg, RC);
3361 if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
3362 MRI.constrainRegClass(SrcReg0, RC);
3363 if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
3364 MRI.constrainRegClass(SrcReg1, RC);
3365 if (TargetRegisterInfo::isVirtualRegister(VR))
3366 MRI.constrainRegClass(VR, RC);
3367
3368 MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc),
3369 ResultReg)
3370 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3371 .addReg(SrcReg1, getKillRegState(Src1IsKill))
3372 .addReg(VR);
3373 // Insert the MADD
3374 InsInstrs.push_back(MIB);
3375 return MUL;
3376 }
3377
3378 /// When getMachineCombinerPatterns() finds potential patterns,
3379 /// this function generates the instructions that could replace the
3380 /// original code sequence
genAlternativeCodeSequence(MachineInstr & Root,MachineCombinerPattern Pattern,SmallVectorImpl<MachineInstr * > & InsInstrs,SmallVectorImpl<MachineInstr * > & DelInstrs,DenseMap<unsigned,unsigned> & InstrIdxForVirtReg) const3381 void AArch64InstrInfo::genAlternativeCodeSequence(
3382 MachineInstr &Root, MachineCombinerPattern Pattern,
3383 SmallVectorImpl<MachineInstr *> &InsInstrs,
3384 SmallVectorImpl<MachineInstr *> &DelInstrs,
3385 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
3386 MachineBasicBlock &MBB = *Root.getParent();
3387 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3388 MachineFunction &MF = *MBB.getParent();
3389 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
3390
3391 MachineInstr *MUL;
3392 const TargetRegisterClass *RC;
3393 unsigned Opc;
3394 switch (Pattern) {
3395 default:
3396 // Reassociate instructions.
3397 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
3398 DelInstrs, InstrIdxForVirtReg);
3399 return;
3400 case MachineCombinerPattern::MULADDW_OP1:
3401 case MachineCombinerPattern::MULADDX_OP1:
3402 // MUL I=A,B,0
3403 // ADD R,I,C
3404 // ==> MADD R,A,B,C
3405 // --- Create(MADD);
3406 if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
3407 Opc = AArch64::MADDWrrr;
3408 RC = &AArch64::GPR32RegClass;
3409 } else {
3410 Opc = AArch64::MADDXrrr;
3411 RC = &AArch64::GPR64RegClass;
3412 }
3413 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
3414 break;
3415 case MachineCombinerPattern::MULADDW_OP2:
3416 case MachineCombinerPattern::MULADDX_OP2:
3417 // MUL I=A,B,0
3418 // ADD R,C,I
3419 // ==> MADD R,A,B,C
3420 // --- Create(MADD);
3421 if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
3422 Opc = AArch64::MADDWrrr;
3423 RC = &AArch64::GPR32RegClass;
3424 } else {
3425 Opc = AArch64::MADDXrrr;
3426 RC = &AArch64::GPR64RegClass;
3427 }
3428 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
3429 break;
3430 case MachineCombinerPattern::MULADDWI_OP1:
3431 case MachineCombinerPattern::MULADDXI_OP1: {
3432 // MUL I=A,B,0
3433 // ADD R,I,Imm
3434 // ==> ORR V, ZR, Imm
3435 // ==> MADD R,A,B,V
3436 // --- Create(MADD);
3437 const TargetRegisterClass *OrrRC;
3438 unsigned BitSize, OrrOpc, ZeroReg;
3439 if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
3440 OrrOpc = AArch64::ORRWri;
3441 OrrRC = &AArch64::GPR32spRegClass;
3442 BitSize = 32;
3443 ZeroReg = AArch64::WZR;
3444 Opc = AArch64::MADDWrrr;
3445 RC = &AArch64::GPR32RegClass;
3446 } else {
3447 OrrOpc = AArch64::ORRXri;
3448 OrrRC = &AArch64::GPR64spRegClass;
3449 BitSize = 64;
3450 ZeroReg = AArch64::XZR;
3451 Opc = AArch64::MADDXrrr;
3452 RC = &AArch64::GPR64RegClass;
3453 }
3454 unsigned NewVR = MRI.createVirtualRegister(OrrRC);
3455 uint64_t Imm = Root.getOperand(2).getImm();
3456
3457 if (Root.getOperand(3).isImm()) {
3458 unsigned Val = Root.getOperand(3).getImm();
3459 Imm = Imm << Val;
3460 }
3461 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
3462 uint64_t Encoding;
3463 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
3464 MachineInstrBuilder MIB1 =
3465 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
3466 .addReg(ZeroReg)
3467 .addImm(Encoding);
3468 InsInstrs.push_back(MIB1);
3469 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
3470 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
3471 }
3472 break;
3473 }
3474 case MachineCombinerPattern::MULSUBW_OP1:
3475 case MachineCombinerPattern::MULSUBX_OP1: {
3476 // MUL I=A,B,0
3477 // SUB R,I, C
3478 // ==> SUB V, 0, C
3479 // ==> MADD R,A,B,V // = -C + A*B
3480 // --- Create(MADD);
3481 const TargetRegisterClass *SubRC;
3482 unsigned SubOpc, ZeroReg;
3483 if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
3484 SubOpc = AArch64::SUBWrr;
3485 SubRC = &AArch64::GPR32spRegClass;
3486 ZeroReg = AArch64::WZR;
3487 Opc = AArch64::MADDWrrr;
3488 RC = &AArch64::GPR32RegClass;
3489 } else {
3490 SubOpc = AArch64::SUBXrr;
3491 SubRC = &AArch64::GPR64spRegClass;
3492 ZeroReg = AArch64::XZR;
3493 Opc = AArch64::MADDXrrr;
3494 RC = &AArch64::GPR64RegClass;
3495 }
3496 unsigned NewVR = MRI.createVirtualRegister(SubRC);
3497 // SUB NewVR, 0, C
3498 MachineInstrBuilder MIB1 =
3499 BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
3500 .addReg(ZeroReg)
3501 .addOperand(Root.getOperand(2));
3502 InsInstrs.push_back(MIB1);
3503 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
3504 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
3505 break;
3506 }
3507 case MachineCombinerPattern::MULSUBW_OP2:
3508 case MachineCombinerPattern::MULSUBX_OP2:
3509 // MUL I=A,B,0
3510 // SUB R,C,I
3511 // ==> MSUB R,A,B,C (computes C - A*B)
3512 // --- Create(MSUB);
3513 if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
3514 Opc = AArch64::MSUBWrrr;
3515 RC = &AArch64::GPR32RegClass;
3516 } else {
3517 Opc = AArch64::MSUBXrrr;
3518 RC = &AArch64::GPR64RegClass;
3519 }
3520 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
3521 break;
3522 case MachineCombinerPattern::MULSUBWI_OP1:
3523 case MachineCombinerPattern::MULSUBXI_OP1: {
3524 // MUL I=A,B,0
3525 // SUB R,I, Imm
3526 // ==> ORR V, ZR, -Imm
3527 // ==> MADD R,A,B,V // = -Imm + A*B
3528 // --- Create(MADD);
3529 const TargetRegisterClass *OrrRC;
3530 unsigned BitSize, OrrOpc, ZeroReg;
3531 if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
3532 OrrOpc = AArch64::ORRWri;
3533 OrrRC = &AArch64::GPR32spRegClass;
3534 BitSize = 32;
3535 ZeroReg = AArch64::WZR;
3536 Opc = AArch64::MADDWrrr;
3537 RC = &AArch64::GPR32RegClass;
3538 } else {
3539 OrrOpc = AArch64::ORRXri;
3540 OrrRC = &AArch64::GPR64spRegClass;
3541 BitSize = 64;
3542 ZeroReg = AArch64::XZR;
3543 Opc = AArch64::MADDXrrr;
3544 RC = &AArch64::GPR64RegClass;
3545 }
3546 unsigned NewVR = MRI.createVirtualRegister(OrrRC);
3547 int Imm = Root.getOperand(2).getImm();
3548 if (Root.getOperand(3).isImm()) {
3549 unsigned Val = Root.getOperand(3).getImm();
3550 Imm = Imm << Val;
3551 }
3552 uint64_t UImm = -Imm << (64 - BitSize) >> (64 - BitSize);
3553 uint64_t Encoding;
3554 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
3555 MachineInstrBuilder MIB1 =
3556 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
3557 .addReg(ZeroReg)
3558 .addImm(Encoding);
3559 InsInstrs.push_back(MIB1);
3560 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
3561 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
3562 }
3563 break;
3564 }
3565 // Floating Point Support
3566 case MachineCombinerPattern::FMULADDS_OP1:
3567 case MachineCombinerPattern::FMULADDD_OP1:
3568 // MUL I=A,B,0
3569 // ADD R,I,C
3570 // ==> MADD R,A,B,C
3571 // --- Create(MADD);
3572 if (Pattern == MachineCombinerPattern::FMULADDS_OP1) {
3573 Opc = AArch64::FMADDSrrr;
3574 RC = &AArch64::FPR32RegClass;
3575 } else {
3576 Opc = AArch64::FMADDDrrr;
3577 RC = &AArch64::FPR64RegClass;
3578 }
3579 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
3580 break;
3581 case MachineCombinerPattern::FMULADDS_OP2:
3582 case MachineCombinerPattern::FMULADDD_OP2:
3583 // FMUL I=A,B,0
3584 // FADD R,C,I
3585 // ==> FMADD R,A,B,C
3586 // --- Create(FMADD);
3587 if (Pattern == MachineCombinerPattern::FMULADDS_OP2) {
3588 Opc = AArch64::FMADDSrrr;
3589 RC = &AArch64::FPR32RegClass;
3590 } else {
3591 Opc = AArch64::FMADDDrrr;
3592 RC = &AArch64::FPR64RegClass;
3593 }
3594 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
3595 break;
3596
3597 case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
3598 Opc = AArch64::FMLAv1i32_indexed;
3599 RC = &AArch64::FPR32RegClass;
3600 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3601 FMAInstKind::Indexed);
3602 break;
3603 case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
3604 Opc = AArch64::FMLAv1i32_indexed;
3605 RC = &AArch64::FPR32RegClass;
3606 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3607 FMAInstKind::Indexed);
3608 break;
3609
3610 case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
3611 Opc = AArch64::FMLAv1i64_indexed;
3612 RC = &AArch64::FPR64RegClass;
3613 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3614 FMAInstKind::Indexed);
3615 break;
3616 case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
3617 Opc = AArch64::FMLAv1i64_indexed;
3618 RC = &AArch64::FPR64RegClass;
3619 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3620 FMAInstKind::Indexed);
3621 break;
3622
3623 case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
3624 case MachineCombinerPattern::FMLAv2f32_OP1:
3625 RC = &AArch64::FPR64RegClass;
3626 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
3627 Opc = AArch64::FMLAv2i32_indexed;
3628 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3629 FMAInstKind::Indexed);
3630 } else {
3631 Opc = AArch64::FMLAv2f32;
3632 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3633 FMAInstKind::Accumulator);
3634 }
3635 break;
3636 case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
3637 case MachineCombinerPattern::FMLAv2f32_OP2:
3638 RC = &AArch64::FPR64RegClass;
3639 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
3640 Opc = AArch64::FMLAv2i32_indexed;
3641 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3642 FMAInstKind::Indexed);
3643 } else {
3644 Opc = AArch64::FMLAv2f32;
3645 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3646 FMAInstKind::Accumulator);
3647 }
3648 break;
3649
3650 case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
3651 case MachineCombinerPattern::FMLAv2f64_OP1:
3652 RC = &AArch64::FPR128RegClass;
3653 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
3654 Opc = AArch64::FMLAv2i64_indexed;
3655 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3656 FMAInstKind::Indexed);
3657 } else {
3658 Opc = AArch64::FMLAv2f64;
3659 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3660 FMAInstKind::Accumulator);
3661 }
3662 break;
3663 case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
3664 case MachineCombinerPattern::FMLAv2f64_OP2:
3665 RC = &AArch64::FPR128RegClass;
3666 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
3667 Opc = AArch64::FMLAv2i64_indexed;
3668 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3669 FMAInstKind::Indexed);
3670 } else {
3671 Opc = AArch64::FMLAv2f64;
3672 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3673 FMAInstKind::Accumulator);
3674 }
3675 break;
3676
3677 case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
3678 case MachineCombinerPattern::FMLAv4f32_OP1:
3679 RC = &AArch64::FPR128RegClass;
3680 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
3681 Opc = AArch64::FMLAv4i32_indexed;
3682 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3683 FMAInstKind::Indexed);
3684 } else {
3685 Opc = AArch64::FMLAv4f32;
3686 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3687 FMAInstKind::Accumulator);
3688 }
3689 break;
3690
3691 case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
3692 case MachineCombinerPattern::FMLAv4f32_OP2:
3693 RC = &AArch64::FPR128RegClass;
3694 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
3695 Opc = AArch64::FMLAv4i32_indexed;
3696 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3697 FMAInstKind::Indexed);
3698 } else {
3699 Opc = AArch64::FMLAv4f32;
3700 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3701 FMAInstKind::Accumulator);
3702 }
3703 break;
3704
3705 case MachineCombinerPattern::FMULSUBS_OP1:
3706 case MachineCombinerPattern::FMULSUBD_OP1: {
3707 // FMUL I=A,B,0
3708 // FSUB R,I,C
3709 // ==> FNMSUB R,A,B,C // = -C + A*B
3710 // --- Create(FNMSUB);
3711 if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) {
3712 Opc = AArch64::FNMSUBSrrr;
3713 RC = &AArch64::FPR32RegClass;
3714 } else {
3715 Opc = AArch64::FNMSUBDrrr;
3716 RC = &AArch64::FPR64RegClass;
3717 }
3718 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
3719 break;
3720 }
3721 case MachineCombinerPattern::FMULSUBS_OP2:
3722 case MachineCombinerPattern::FMULSUBD_OP2: {
3723 // FMUL I=A,B,0
3724 // FSUB R,C,I
3725 // ==> FMSUB R,A,B,C (computes C - A*B)
3726 // --- Create(FMSUB);
3727 if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) {
3728 Opc = AArch64::FMSUBSrrr;
3729 RC = &AArch64::FPR32RegClass;
3730 } else {
3731 Opc = AArch64::FMSUBDrrr;
3732 RC = &AArch64::FPR64RegClass;
3733 }
3734 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
3735 break;
3736
3737 case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
3738 Opc = AArch64::FMLSv1i32_indexed;
3739 RC = &AArch64::FPR32RegClass;
3740 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3741 FMAInstKind::Indexed);
3742 break;
3743
3744 case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
3745 Opc = AArch64::FMLSv1i64_indexed;
3746 RC = &AArch64::FPR64RegClass;
3747 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3748 FMAInstKind::Indexed);
3749 break;
3750
3751 case MachineCombinerPattern::FMLSv2f32_OP2:
3752 case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
3753 RC = &AArch64::FPR64RegClass;
3754 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
3755 Opc = AArch64::FMLSv2i32_indexed;
3756 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3757 FMAInstKind::Indexed);
3758 } else {
3759 Opc = AArch64::FMLSv2f32;
3760 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3761 FMAInstKind::Accumulator);
3762 }
3763 break;
3764
3765 case MachineCombinerPattern::FMLSv2f64_OP2:
3766 case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
3767 RC = &AArch64::FPR128RegClass;
3768 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
3769 Opc = AArch64::FMLSv2i64_indexed;
3770 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3771 FMAInstKind::Indexed);
3772 } else {
3773 Opc = AArch64::FMLSv2f64;
3774 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3775 FMAInstKind::Accumulator);
3776 }
3777 break;
3778
3779 case MachineCombinerPattern::FMLSv4f32_OP2:
3780 case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
3781 RC = &AArch64::FPR128RegClass;
3782 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
3783 Opc = AArch64::FMLSv4i32_indexed;
3784 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3785 FMAInstKind::Indexed);
3786 } else {
3787 Opc = AArch64::FMLSv4f32;
3788 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3789 FMAInstKind::Accumulator);
3790 }
3791 break;
3792 }
3793 } // end switch (Pattern)
3794 // Record MUL and ADD/SUB for deletion
3795 DelInstrs.push_back(MUL);
3796 DelInstrs.push_back(&Root);
3797
3798 return;
3799 }
3800
3801 /// \brief Replace csincr-branch sequence by simple conditional branch
3802 ///
3803 /// Examples:
3804 /// 1.
3805 /// csinc w9, wzr, wzr, <condition code>
3806 /// tbnz w9, #0, 0x44
3807 /// to
3808 /// b.<inverted condition code>
3809 ///
3810 /// 2.
3811 /// csinc w9, wzr, wzr, <condition code>
3812 /// tbz w9, #0, 0x44
3813 /// to
3814 /// b.<condition code>
3815 ///
3816 /// Replace compare and branch sequence by TBZ/TBNZ instruction when the
3817 /// compare's constant operand is power of 2.
3818 ///
3819 /// Examples:
3820 /// and w8, w8, #0x400
3821 /// cbnz w8, L1
3822 /// to
3823 /// tbnz w8, #10, L1
3824 ///
3825 /// \param MI Conditional Branch
3826 /// \return True when the simple conditional branch is generated
3827 ///
optimizeCondBranch(MachineInstr & MI) const3828 bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
3829 bool IsNegativeBranch = false;
3830 bool IsTestAndBranch = false;
3831 unsigned TargetBBInMI = 0;
3832 switch (MI.getOpcode()) {
3833 default:
3834 llvm_unreachable("Unknown branch instruction?");
3835 case AArch64::Bcc:
3836 return false;
3837 case AArch64::CBZW:
3838 case AArch64::CBZX:
3839 TargetBBInMI = 1;
3840 break;
3841 case AArch64::CBNZW:
3842 case AArch64::CBNZX:
3843 TargetBBInMI = 1;
3844 IsNegativeBranch = true;
3845 break;
3846 case AArch64::TBZW:
3847 case AArch64::TBZX:
3848 TargetBBInMI = 2;
3849 IsTestAndBranch = true;
3850 break;
3851 case AArch64::TBNZW:
3852 case AArch64::TBNZX:
3853 TargetBBInMI = 2;
3854 IsNegativeBranch = true;
3855 IsTestAndBranch = true;
3856 break;
3857 }
3858 // So we increment a zero register and test for bits other
3859 // than bit 0? Conservatively bail out in case the verifier
3860 // missed this case.
3861 if (IsTestAndBranch && MI.getOperand(1).getImm())
3862 return false;
3863
3864 // Find Definition.
3865 assert(MI.getParent() && "Incomplete machine instruciton\n");
3866 MachineBasicBlock *MBB = MI.getParent();
3867 MachineFunction *MF = MBB->getParent();
3868 MachineRegisterInfo *MRI = &MF->getRegInfo();
3869 unsigned VReg = MI.getOperand(0).getReg();
3870 if (!TargetRegisterInfo::isVirtualRegister(VReg))
3871 return false;
3872
3873 MachineInstr *DefMI = MRI->getVRegDef(VReg);
3874
3875 // Look through COPY instructions to find definition.
3876 while (DefMI->isCopy()) {
3877 unsigned CopyVReg = DefMI->getOperand(1).getReg();
3878 if (!MRI->hasOneNonDBGUse(CopyVReg))
3879 return false;
3880 if (!MRI->hasOneDef(CopyVReg))
3881 return false;
3882 DefMI = MRI->getVRegDef(CopyVReg);
3883 }
3884
3885 switch (DefMI->getOpcode()) {
3886 default:
3887 return false;
3888 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
3889 case AArch64::ANDWri:
3890 case AArch64::ANDXri: {
3891 if (IsTestAndBranch)
3892 return false;
3893 if (DefMI->getParent() != MBB)
3894 return false;
3895 if (!MRI->hasOneNonDBGUse(VReg))
3896 return false;
3897
3898 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
3899 uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
3900 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
3901 if (!isPowerOf2_64(Mask))
3902 return false;
3903
3904 MachineOperand &MO = DefMI->getOperand(1);
3905 unsigned NewReg = MO.getReg();
3906 if (!TargetRegisterInfo::isVirtualRegister(NewReg))
3907 return false;
3908
3909 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
3910
3911 MachineBasicBlock &RefToMBB = *MBB;
3912 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
3913 DebugLoc DL = MI.getDebugLoc();
3914 unsigned Imm = Log2_64(Mask);
3915 unsigned Opc = (Imm < 32)
3916 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
3917 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
3918 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
3919 .addReg(NewReg)
3920 .addImm(Imm)
3921 .addMBB(TBB);
3922 // Register lives on to the CBZ now.
3923 MO.setIsKill(false);
3924
3925 // For immediate smaller than 32, we need to use the 32-bit
3926 // variant (W) in all cases. Indeed the 64-bit variant does not
3927 // allow to encode them.
3928 // Therefore, if the input register is 64-bit, we need to take the
3929 // 32-bit sub-part.
3930 if (!Is32Bit && Imm < 32)
3931 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
3932 MI.eraseFromParent();
3933 return true;
3934 }
3935 // Look for CSINC
3936 case AArch64::CSINCWr:
3937 case AArch64::CSINCXr: {
3938 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
3939 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
3940 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
3941 DefMI->getOperand(2).getReg() == AArch64::XZR))
3942 return false;
3943
3944 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
3945 return false;
3946
3947 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
3948 // Convert only when the condition code is not modified between
3949 // the CSINC and the branch. The CC may be used by other
3950 // instructions in between.
3951 if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))
3952 return false;
3953 MachineBasicBlock &RefToMBB = *MBB;
3954 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
3955 DebugLoc DL = MI.getDebugLoc();
3956 if (IsNegativeBranch)
3957 CC = AArch64CC::getInvertedCondCode(CC);
3958 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
3959 MI.eraseFromParent();
3960 return true;
3961 }
3962 }
3963 }
3964
3965 std::pair<unsigned, unsigned>
decomposeMachineOperandsTargetFlags(unsigned TF) const3966 AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
3967 const unsigned Mask = AArch64II::MO_FRAGMENT;
3968 return std::make_pair(TF & Mask, TF & ~Mask);
3969 }
3970
3971 ArrayRef<std::pair<unsigned, const char *>>
getSerializableDirectMachineOperandTargetFlags() const3972 AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
3973 using namespace AArch64II;
3974 static const std::pair<unsigned, const char *> TargetFlags[] = {
3975 {MO_PAGE, "aarch64-page"},
3976 {MO_PAGEOFF, "aarch64-pageoff"},
3977 {MO_G3, "aarch64-g3"},
3978 {MO_G2, "aarch64-g2"},
3979 {MO_G1, "aarch64-g1"},
3980 {MO_G0, "aarch64-g0"},
3981 {MO_HI12, "aarch64-hi12"}};
3982 return makeArrayRef(TargetFlags);
3983 }
3984
3985 ArrayRef<std::pair<unsigned, const char *>>
getSerializableBitmaskMachineOperandTargetFlags() const3986 AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
3987 using namespace AArch64II;
3988 static const std::pair<unsigned, const char *> TargetFlags[] = {
3989 {MO_GOT, "aarch64-got"},
3990 {MO_NC, "aarch64-nc"},
3991 {MO_TLS, "aarch64-tls"}};
3992 return makeArrayRef(TargetFlags);
3993 }
3994