1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the AArch64 implementation of the TargetInstrInfo class.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "AArch64InstrInfo.h"
14 #include "AArch64MachineFunctionInfo.h"
15 #include "AArch64Subtarget.h"
16 #include "MCTargetDesc/AArch64AddressingModes.h"
17 #include "Utils/AArch64BaseInfo.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/CodeGen/MachineBasicBlock.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstr.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineMemOperand.h"
27 #include "llvm/CodeGen/MachineOperand.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/CodeGen/MachineModuleInfo.h"
30 #include "llvm/CodeGen/StackMaps.h"
31 #include "llvm/CodeGen/TargetRegisterInfo.h"
32 #include "llvm/CodeGen/TargetSubtargetInfo.h"
33 #include "llvm/IR/DebugInfoMetadata.h"
34 #include "llvm/IR/DebugLoc.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/MC/MCAsmInfo.h"
37 #include "llvm/MC/MCInst.h"
38 #include "llvm/MC/MCInstrDesc.h"
39 #include "llvm/Support/Casting.h"
40 #include "llvm/Support/CodeGen.h"
41 #include "llvm/Support/CommandLine.h"
42 #include "llvm/Support/Compiler.h"
43 #include "llvm/Support/ErrorHandling.h"
44 #include "llvm/Support/MathExtras.h"
45 #include "llvm/Target/TargetMachine.h"
46 #include "llvm/Target/TargetOptions.h"
47 #include <cassert>
48 #include <cstdint>
49 #include <iterator>
50 #include <utility>
51
52 using namespace llvm;
53
54 #define GET_INSTRINFO_CTOR_DTOR
55 #include "AArch64GenInstrInfo.inc"
56
57 static cl::opt<unsigned> TBZDisplacementBits(
58 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
59 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
60
61 static cl::opt<unsigned> CBZDisplacementBits(
62 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
63 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
64
65 static cl::opt<unsigned>
66 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
67 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
68
AArch64InstrInfo(const AArch64Subtarget & STI)69 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
70 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
71 AArch64::CATCHRET),
72 RI(STI.getTargetTriple()), Subtarget(STI) {}
73
74 /// GetInstSize - Return the number of bytes of code the specified
75 /// instruction may be. This returns the maximum number of bytes.
getInstSizeInBytes(const MachineInstr & MI) const76 unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
77 const MachineBasicBlock &MBB = *MI.getParent();
78 const MachineFunction *MF = MBB.getParent();
79 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
80
81 {
82 auto Op = MI.getOpcode();
83 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
84 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
85 }
86
87 // Meta-instructions emit no code.
88 if (MI.isMetaInstruction())
89 return 0;
90
91 // FIXME: We currently only handle pseudoinstructions that don't get expanded
92 // before the assembly printer.
93 unsigned NumBytes = 0;
94 const MCInstrDesc &Desc = MI.getDesc();
95 switch (Desc.getOpcode()) {
96 default:
97 // Anything not explicitly designated otherwise is a normal 4-byte insn.
98 NumBytes = 4;
99 break;
100 case TargetOpcode::STACKMAP:
101 // The upper bound for a stackmap intrinsic is the full length of its shadow
102 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
103 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
104 break;
105 case TargetOpcode::PATCHPOINT:
106 // The size of the patchpoint intrinsic is the number of bytes requested
107 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
108 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
109 break;
110 case AArch64::TLSDESC_CALLSEQ:
111 // This gets lowered to an instruction sequence which takes 16 bytes
112 NumBytes = 16;
113 break;
114 case AArch64::JumpTableDest32:
115 case AArch64::JumpTableDest16:
116 case AArch64::JumpTableDest8:
117 NumBytes = 12;
118 break;
119 case AArch64::SPACE:
120 NumBytes = MI.getOperand(1).getImm();
121 break;
122 }
123
124 return NumBytes;
125 }
126
parseCondBranch(MachineInstr * LastInst,MachineBasicBlock * & Target,SmallVectorImpl<MachineOperand> & Cond)127 static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
128 SmallVectorImpl<MachineOperand> &Cond) {
129 // Block ends with fall-through condbranch.
130 switch (LastInst->getOpcode()) {
131 default:
132 llvm_unreachable("Unknown branch instruction?");
133 case AArch64::Bcc:
134 Target = LastInst->getOperand(1).getMBB();
135 Cond.push_back(LastInst->getOperand(0));
136 break;
137 case AArch64::CBZW:
138 case AArch64::CBZX:
139 case AArch64::CBNZW:
140 case AArch64::CBNZX:
141 Target = LastInst->getOperand(1).getMBB();
142 Cond.push_back(MachineOperand::CreateImm(-1));
143 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
144 Cond.push_back(LastInst->getOperand(0));
145 break;
146 case AArch64::TBZW:
147 case AArch64::TBZX:
148 case AArch64::TBNZW:
149 case AArch64::TBNZX:
150 Target = LastInst->getOperand(2).getMBB();
151 Cond.push_back(MachineOperand::CreateImm(-1));
152 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
153 Cond.push_back(LastInst->getOperand(0));
154 Cond.push_back(LastInst->getOperand(1));
155 }
156 }
157
getBranchDisplacementBits(unsigned Opc)158 static unsigned getBranchDisplacementBits(unsigned Opc) {
159 switch (Opc) {
160 default:
161 llvm_unreachable("unexpected opcode!");
162 case AArch64::B:
163 return 64;
164 case AArch64::TBNZW:
165 case AArch64::TBZW:
166 case AArch64::TBNZX:
167 case AArch64::TBZX:
168 return TBZDisplacementBits;
169 case AArch64::CBNZW:
170 case AArch64::CBZW:
171 case AArch64::CBNZX:
172 case AArch64::CBZX:
173 return CBZDisplacementBits;
174 case AArch64::Bcc:
175 return BCCDisplacementBits;
176 }
177 }
178
isBranchOffsetInRange(unsigned BranchOp,int64_t BrOffset) const179 bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp,
180 int64_t BrOffset) const {
181 unsigned Bits = getBranchDisplacementBits(BranchOp);
182 assert(Bits >= 3 && "max branch displacement must be enough to jump"
183 "over conditional branch expansion");
184 return isIntN(Bits, BrOffset / 4);
185 }
186
187 MachineBasicBlock *
getBranchDestBlock(const MachineInstr & MI) const188 AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
189 switch (MI.getOpcode()) {
190 default:
191 llvm_unreachable("unexpected opcode!");
192 case AArch64::B:
193 return MI.getOperand(0).getMBB();
194 case AArch64::TBZW:
195 case AArch64::TBNZW:
196 case AArch64::TBZX:
197 case AArch64::TBNZX:
198 return MI.getOperand(2).getMBB();
199 case AArch64::CBZW:
200 case AArch64::CBNZW:
201 case AArch64::CBZX:
202 case AArch64::CBNZX:
203 case AArch64::Bcc:
204 return MI.getOperand(1).getMBB();
205 }
206 }
207
208 // Branch analysis.
analyzeBranch(MachineBasicBlock & MBB,MachineBasicBlock * & TBB,MachineBasicBlock * & FBB,SmallVectorImpl<MachineOperand> & Cond,bool AllowModify) const209 bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
210 MachineBasicBlock *&TBB,
211 MachineBasicBlock *&FBB,
212 SmallVectorImpl<MachineOperand> &Cond,
213 bool AllowModify) const {
214 // If the block has no terminators, it just falls into the block after it.
215 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
216 if (I == MBB.end())
217 return false;
218
219 if (!isUnpredicatedTerminator(*I))
220 return false;
221
222 // Get the last instruction in the block.
223 MachineInstr *LastInst = &*I;
224
225 // If there is only one terminator instruction, process it.
226 unsigned LastOpc = LastInst->getOpcode();
227 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
228 if (isUncondBranchOpcode(LastOpc)) {
229 TBB = LastInst->getOperand(0).getMBB();
230 return false;
231 }
232 if (isCondBranchOpcode(LastOpc)) {
233 // Block ends with fall-through condbranch.
234 parseCondBranch(LastInst, TBB, Cond);
235 return false;
236 }
237 return true; // Can't handle indirect branch.
238 }
239
240 // Get the instruction before it if it is a terminator.
241 MachineInstr *SecondLastInst = &*I;
242 unsigned SecondLastOpc = SecondLastInst->getOpcode();
243
244 // If AllowModify is true and the block ends with two or more unconditional
245 // branches, delete all but the first unconditional branch.
246 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
247 while (isUncondBranchOpcode(SecondLastOpc)) {
248 LastInst->eraseFromParent();
249 LastInst = SecondLastInst;
250 LastOpc = LastInst->getOpcode();
251 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
252 // Return now the only terminator is an unconditional branch.
253 TBB = LastInst->getOperand(0).getMBB();
254 return false;
255 } else {
256 SecondLastInst = &*I;
257 SecondLastOpc = SecondLastInst->getOpcode();
258 }
259 }
260 }
261
262 // If there are three terminators, we don't know what sort of block this is.
263 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
264 return true;
265
266 // If the block ends with a B and a Bcc, handle it.
267 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
268 parseCondBranch(SecondLastInst, TBB, Cond);
269 FBB = LastInst->getOperand(0).getMBB();
270 return false;
271 }
272
273 // If the block ends with two unconditional branches, handle it. The second
274 // one is not executed, so remove it.
275 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
276 TBB = SecondLastInst->getOperand(0).getMBB();
277 I = LastInst;
278 if (AllowModify)
279 I->eraseFromParent();
280 return false;
281 }
282
283 // ...likewise if it ends with an indirect branch followed by an unconditional
284 // branch.
285 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
286 I = LastInst;
287 if (AllowModify)
288 I->eraseFromParent();
289 return true;
290 }
291
292 // Otherwise, can't handle this.
293 return true;
294 }
295
reverseBranchCondition(SmallVectorImpl<MachineOperand> & Cond) const296 bool AArch64InstrInfo::reverseBranchCondition(
297 SmallVectorImpl<MachineOperand> &Cond) const {
298 if (Cond[0].getImm() != -1) {
299 // Regular Bcc
300 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
301 Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
302 } else {
303 // Folded compare-and-branch
304 switch (Cond[1].getImm()) {
305 default:
306 llvm_unreachable("Unknown conditional branch!");
307 case AArch64::CBZW:
308 Cond[1].setImm(AArch64::CBNZW);
309 break;
310 case AArch64::CBNZW:
311 Cond[1].setImm(AArch64::CBZW);
312 break;
313 case AArch64::CBZX:
314 Cond[1].setImm(AArch64::CBNZX);
315 break;
316 case AArch64::CBNZX:
317 Cond[1].setImm(AArch64::CBZX);
318 break;
319 case AArch64::TBZW:
320 Cond[1].setImm(AArch64::TBNZW);
321 break;
322 case AArch64::TBNZW:
323 Cond[1].setImm(AArch64::TBZW);
324 break;
325 case AArch64::TBZX:
326 Cond[1].setImm(AArch64::TBNZX);
327 break;
328 case AArch64::TBNZX:
329 Cond[1].setImm(AArch64::TBZX);
330 break;
331 }
332 }
333
334 return false;
335 }
336
removeBranch(MachineBasicBlock & MBB,int * BytesRemoved) const337 unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,
338 int *BytesRemoved) const {
339 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
340 if (I == MBB.end())
341 return 0;
342
343 if (!isUncondBranchOpcode(I->getOpcode()) &&
344 !isCondBranchOpcode(I->getOpcode()))
345 return 0;
346
347 // Remove the branch.
348 I->eraseFromParent();
349
350 I = MBB.end();
351
352 if (I == MBB.begin()) {
353 if (BytesRemoved)
354 *BytesRemoved = 4;
355 return 1;
356 }
357 --I;
358 if (!isCondBranchOpcode(I->getOpcode())) {
359 if (BytesRemoved)
360 *BytesRemoved = 4;
361 return 1;
362 }
363
364 // Remove the branch.
365 I->eraseFromParent();
366 if (BytesRemoved)
367 *BytesRemoved = 8;
368
369 return 2;
370 }
371
instantiateCondBranch(MachineBasicBlock & MBB,const DebugLoc & DL,MachineBasicBlock * TBB,ArrayRef<MachineOperand> Cond) const372 void AArch64InstrInfo::instantiateCondBranch(
373 MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
374 ArrayRef<MachineOperand> Cond) const {
375 if (Cond[0].getImm() != -1) {
376 // Regular Bcc
377 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
378 } else {
379 // Folded compare-and-branch
380 // Note that we use addOperand instead of addReg to keep the flags.
381 const MachineInstrBuilder MIB =
382 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
383 if (Cond.size() > 3)
384 MIB.addImm(Cond[3].getImm());
385 MIB.addMBB(TBB);
386 }
387 }
388
insertBranch(MachineBasicBlock & MBB,MachineBasicBlock * TBB,MachineBasicBlock * FBB,ArrayRef<MachineOperand> Cond,const DebugLoc & DL,int * BytesAdded) const389 unsigned AArch64InstrInfo::insertBranch(
390 MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
391 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
392 // Shouldn't be a fall through.
393 assert(TBB && "insertBranch must not be told to insert a fallthrough");
394
395 if (!FBB) {
396 if (Cond.empty()) // Unconditional branch?
397 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
398 else
399 instantiateCondBranch(MBB, DL, TBB, Cond);
400
401 if (BytesAdded)
402 *BytesAdded = 4;
403
404 return 1;
405 }
406
407 // Two-way conditional branch.
408 instantiateCondBranch(MBB, DL, TBB, Cond);
409 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
410
411 if (BytesAdded)
412 *BytesAdded = 8;
413
414 return 2;
415 }
416
417 // Find the original register that VReg is copied from.
removeCopies(const MachineRegisterInfo & MRI,unsigned VReg)418 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
419 while (Register::isVirtualRegister(VReg)) {
420 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
421 if (!DefMI->isFullCopy())
422 return VReg;
423 VReg = DefMI->getOperand(1).getReg();
424 }
425 return VReg;
426 }
427
428 // Determine if VReg is defined by an instruction that can be folded into a
429 // csel instruction. If so, return the folded opcode, and the replacement
430 // register.
canFoldIntoCSel(const MachineRegisterInfo & MRI,unsigned VReg,unsigned * NewVReg=nullptr)431 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
432 unsigned *NewVReg = nullptr) {
433 VReg = removeCopies(MRI, VReg);
434 if (!Register::isVirtualRegister(VReg))
435 return 0;
436
437 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
438 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
439 unsigned Opc = 0;
440 unsigned SrcOpNum = 0;
441 switch (DefMI->getOpcode()) {
442 case AArch64::ADDSXri:
443 case AArch64::ADDSWri:
444 // if NZCV is used, do not fold.
445 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
446 return 0;
447 // fall-through to ADDXri and ADDWri.
448 LLVM_FALLTHROUGH;
449 case AArch64::ADDXri:
450 case AArch64::ADDWri:
451 // add x, 1 -> csinc.
452 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
453 DefMI->getOperand(3).getImm() != 0)
454 return 0;
455 SrcOpNum = 1;
456 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
457 break;
458
459 case AArch64::ORNXrr:
460 case AArch64::ORNWrr: {
461 // not x -> csinv, represented as orn dst, xzr, src.
462 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
463 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
464 return 0;
465 SrcOpNum = 2;
466 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
467 break;
468 }
469
470 case AArch64::SUBSXrr:
471 case AArch64::SUBSWrr:
472 // if NZCV is used, do not fold.
473 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
474 return 0;
475 // fall-through to SUBXrr and SUBWrr.
476 LLVM_FALLTHROUGH;
477 case AArch64::SUBXrr:
478 case AArch64::SUBWrr: {
479 // neg x -> csneg, represented as sub dst, xzr, src.
480 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
481 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
482 return 0;
483 SrcOpNum = 2;
484 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
485 break;
486 }
487 default:
488 return 0;
489 }
490 assert(Opc && SrcOpNum && "Missing parameters");
491
492 if (NewVReg)
493 *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
494 return Opc;
495 }
496
canInsertSelect(const MachineBasicBlock & MBB,ArrayRef<MachineOperand> Cond,unsigned TrueReg,unsigned FalseReg,int & CondCycles,int & TrueCycles,int & FalseCycles) const497 bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
498 ArrayRef<MachineOperand> Cond,
499 unsigned TrueReg, unsigned FalseReg,
500 int &CondCycles, int &TrueCycles,
501 int &FalseCycles) const {
502 // Check register classes.
503 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
504 const TargetRegisterClass *RC =
505 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
506 if (!RC)
507 return false;
508
509 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
510 unsigned ExtraCondLat = Cond.size() != 1;
511
512 // GPRs are handled by csel.
513 // FIXME: Fold in x+1, -x, and ~x when applicable.
514 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
515 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
516 // Single-cycle csel, csinc, csinv, and csneg.
517 CondCycles = 1 + ExtraCondLat;
518 TrueCycles = FalseCycles = 1;
519 if (canFoldIntoCSel(MRI, TrueReg))
520 TrueCycles = 0;
521 else if (canFoldIntoCSel(MRI, FalseReg))
522 FalseCycles = 0;
523 return true;
524 }
525
526 // Scalar floating point is handled by fcsel.
527 // FIXME: Form fabs, fmin, and fmax when applicable.
528 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
529 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
530 CondCycles = 5 + ExtraCondLat;
531 TrueCycles = FalseCycles = 2;
532 return true;
533 }
534
535 // Can't do vectors.
536 return false;
537 }
538
insertSelect(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,unsigned DstReg,ArrayRef<MachineOperand> Cond,unsigned TrueReg,unsigned FalseReg) const539 void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
540 MachineBasicBlock::iterator I,
541 const DebugLoc &DL, unsigned DstReg,
542 ArrayRef<MachineOperand> Cond,
543 unsigned TrueReg, unsigned FalseReg) const {
544 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
545
546 // Parse the condition code, see parseCondBranch() above.
547 AArch64CC::CondCode CC;
548 switch (Cond.size()) {
549 default:
550 llvm_unreachable("Unknown condition opcode in Cond");
551 case 1: // b.cc
552 CC = AArch64CC::CondCode(Cond[0].getImm());
553 break;
554 case 3: { // cbz/cbnz
555 // We must insert a compare against 0.
556 bool Is64Bit;
557 switch (Cond[1].getImm()) {
558 default:
559 llvm_unreachable("Unknown branch opcode in Cond");
560 case AArch64::CBZW:
561 Is64Bit = false;
562 CC = AArch64CC::EQ;
563 break;
564 case AArch64::CBZX:
565 Is64Bit = true;
566 CC = AArch64CC::EQ;
567 break;
568 case AArch64::CBNZW:
569 Is64Bit = false;
570 CC = AArch64CC::NE;
571 break;
572 case AArch64::CBNZX:
573 Is64Bit = true;
574 CC = AArch64CC::NE;
575 break;
576 }
577 Register SrcReg = Cond[2].getReg();
578 if (Is64Bit) {
579 // cmp reg, #0 is actually subs xzr, reg, #0.
580 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
581 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
582 .addReg(SrcReg)
583 .addImm(0)
584 .addImm(0);
585 } else {
586 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
587 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
588 .addReg(SrcReg)
589 .addImm(0)
590 .addImm(0);
591 }
592 break;
593 }
594 case 4: { // tbz/tbnz
595 // We must insert a tst instruction.
596 switch (Cond[1].getImm()) {
597 default:
598 llvm_unreachable("Unknown branch opcode in Cond");
599 case AArch64::TBZW:
600 case AArch64::TBZX:
601 CC = AArch64CC::EQ;
602 break;
603 case AArch64::TBNZW:
604 case AArch64::TBNZX:
605 CC = AArch64CC::NE;
606 break;
607 }
608 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
609 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
610 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
611 .addReg(Cond[2].getReg())
612 .addImm(
613 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
614 else
615 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
616 .addReg(Cond[2].getReg())
617 .addImm(
618 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
619 break;
620 }
621 }
622
623 unsigned Opc = 0;
624 const TargetRegisterClass *RC = nullptr;
625 bool TryFold = false;
626 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
627 RC = &AArch64::GPR64RegClass;
628 Opc = AArch64::CSELXr;
629 TryFold = true;
630 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
631 RC = &AArch64::GPR32RegClass;
632 Opc = AArch64::CSELWr;
633 TryFold = true;
634 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
635 RC = &AArch64::FPR64RegClass;
636 Opc = AArch64::FCSELDrrr;
637 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
638 RC = &AArch64::FPR32RegClass;
639 Opc = AArch64::FCSELSrrr;
640 }
641 assert(RC && "Unsupported regclass");
642
643 // Try folding simple instructions into the csel.
644 if (TryFold) {
645 unsigned NewVReg = 0;
646 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
647 if (FoldedOpc) {
648 // The folded opcodes csinc, csinc and csneg apply the operation to
649 // FalseReg, so we need to invert the condition.
650 CC = AArch64CC::getInvertedCondCode(CC);
651 TrueReg = FalseReg;
652 } else
653 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
654
655 // Fold the operation. Leave any dead instructions for DCE to clean up.
656 if (FoldedOpc) {
657 FalseReg = NewVReg;
658 Opc = FoldedOpc;
659 // The extends the live range of NewVReg.
660 MRI.clearKillFlags(NewVReg);
661 }
662 }
663
664 // Pull all virtual register into the appropriate class.
665 MRI.constrainRegClass(TrueReg, RC);
666 MRI.constrainRegClass(FalseReg, RC);
667
668 // Insert the csel.
669 BuildMI(MBB, I, DL, get(Opc), DstReg)
670 .addReg(TrueReg)
671 .addReg(FalseReg)
672 .addImm(CC);
673 }
674
675 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
canBeExpandedToORR(const MachineInstr & MI,unsigned BitSize)676 static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
677 uint64_t Imm = MI.getOperand(1).getImm();
678 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
679 uint64_t Encoding;
680 return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
681 }
682
683 // FIXME: this implementation should be micro-architecture dependent, so a
684 // micro-architecture target hook should be introduced here in future.
isAsCheapAsAMove(const MachineInstr & MI) const685 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
686 if (!Subtarget.hasCustomCheapAsMoveHandling())
687 return MI.isAsCheapAsAMove();
688
689 const unsigned Opcode = MI.getOpcode();
690
691 // Firstly, check cases gated by features.
692
693 if (Subtarget.hasZeroCycleZeroingFP()) {
694 if (Opcode == AArch64::FMOVH0 ||
695 Opcode == AArch64::FMOVS0 ||
696 Opcode == AArch64::FMOVD0)
697 return true;
698 }
699
700 if (Subtarget.hasZeroCycleZeroingGP()) {
701 if (Opcode == TargetOpcode::COPY &&
702 (MI.getOperand(1).getReg() == AArch64::WZR ||
703 MI.getOperand(1).getReg() == AArch64::XZR))
704 return true;
705 }
706
707 // Secondly, check cases specific to sub-targets.
708
709 if (Subtarget.hasExynosCheapAsMoveHandling()) {
710 if (isExynosCheapAsMove(MI))
711 return true;
712
713 return MI.isAsCheapAsAMove();
714 }
715
716 // Finally, check generic cases.
717
718 switch (Opcode) {
719 default:
720 return false;
721
722 // add/sub on register without shift
723 case AArch64::ADDWri:
724 case AArch64::ADDXri:
725 case AArch64::SUBWri:
726 case AArch64::SUBXri:
727 return (MI.getOperand(3).getImm() == 0);
728
729 // logical ops on immediate
730 case AArch64::ANDWri:
731 case AArch64::ANDXri:
732 case AArch64::EORWri:
733 case AArch64::EORXri:
734 case AArch64::ORRWri:
735 case AArch64::ORRXri:
736 return true;
737
738 // logical ops on register without shift
739 case AArch64::ANDWrr:
740 case AArch64::ANDXrr:
741 case AArch64::BICWrr:
742 case AArch64::BICXrr:
743 case AArch64::EONWrr:
744 case AArch64::EONXrr:
745 case AArch64::EORWrr:
746 case AArch64::EORXrr:
747 case AArch64::ORNWrr:
748 case AArch64::ORNXrr:
749 case AArch64::ORRWrr:
750 case AArch64::ORRXrr:
751 return true;
752
753 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
754 // ORRXri, it is as cheap as MOV
755 case AArch64::MOVi32imm:
756 return canBeExpandedToORR(MI, 32);
757 case AArch64::MOVi64imm:
758 return canBeExpandedToORR(MI, 64);
759 }
760
761 llvm_unreachable("Unknown opcode to check as cheap as a move!");
762 }
763
isFalkorShiftExtFast(const MachineInstr & MI)764 bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
765 switch (MI.getOpcode()) {
766 default:
767 return false;
768
769 case AArch64::ADDWrs:
770 case AArch64::ADDXrs:
771 case AArch64::ADDSWrs:
772 case AArch64::ADDSXrs: {
773 unsigned Imm = MI.getOperand(3).getImm();
774 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
775 if (ShiftVal == 0)
776 return true;
777 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
778 }
779
780 case AArch64::ADDWrx:
781 case AArch64::ADDXrx:
782 case AArch64::ADDXrx64:
783 case AArch64::ADDSWrx:
784 case AArch64::ADDSXrx:
785 case AArch64::ADDSXrx64: {
786 unsigned Imm = MI.getOperand(3).getImm();
787 switch (AArch64_AM::getArithExtendType(Imm)) {
788 default:
789 return false;
790 case AArch64_AM::UXTB:
791 case AArch64_AM::UXTH:
792 case AArch64_AM::UXTW:
793 case AArch64_AM::UXTX:
794 return AArch64_AM::getArithShiftValue(Imm) <= 4;
795 }
796 }
797
798 case AArch64::SUBWrs:
799 case AArch64::SUBSWrs: {
800 unsigned Imm = MI.getOperand(3).getImm();
801 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
802 return ShiftVal == 0 ||
803 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
804 }
805
806 case AArch64::SUBXrs:
807 case AArch64::SUBSXrs: {
808 unsigned Imm = MI.getOperand(3).getImm();
809 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
810 return ShiftVal == 0 ||
811 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
812 }
813
814 case AArch64::SUBWrx:
815 case AArch64::SUBXrx:
816 case AArch64::SUBXrx64:
817 case AArch64::SUBSWrx:
818 case AArch64::SUBSXrx:
819 case AArch64::SUBSXrx64: {
820 unsigned Imm = MI.getOperand(3).getImm();
821 switch (AArch64_AM::getArithExtendType(Imm)) {
822 default:
823 return false;
824 case AArch64_AM::UXTB:
825 case AArch64_AM::UXTH:
826 case AArch64_AM::UXTW:
827 case AArch64_AM::UXTX:
828 return AArch64_AM::getArithShiftValue(Imm) == 0;
829 }
830 }
831
832 case AArch64::LDRBBroW:
833 case AArch64::LDRBBroX:
834 case AArch64::LDRBroW:
835 case AArch64::LDRBroX:
836 case AArch64::LDRDroW:
837 case AArch64::LDRDroX:
838 case AArch64::LDRHHroW:
839 case AArch64::LDRHHroX:
840 case AArch64::LDRHroW:
841 case AArch64::LDRHroX:
842 case AArch64::LDRQroW:
843 case AArch64::LDRQroX:
844 case AArch64::LDRSBWroW:
845 case AArch64::LDRSBWroX:
846 case AArch64::LDRSBXroW:
847 case AArch64::LDRSBXroX:
848 case AArch64::LDRSHWroW:
849 case AArch64::LDRSHWroX:
850 case AArch64::LDRSHXroW:
851 case AArch64::LDRSHXroX:
852 case AArch64::LDRSWroW:
853 case AArch64::LDRSWroX:
854 case AArch64::LDRSroW:
855 case AArch64::LDRSroX:
856 case AArch64::LDRWroW:
857 case AArch64::LDRWroX:
858 case AArch64::LDRXroW:
859 case AArch64::LDRXroX:
860 case AArch64::PRFMroW:
861 case AArch64::PRFMroX:
862 case AArch64::STRBBroW:
863 case AArch64::STRBBroX:
864 case AArch64::STRBroW:
865 case AArch64::STRBroX:
866 case AArch64::STRDroW:
867 case AArch64::STRDroX:
868 case AArch64::STRHHroW:
869 case AArch64::STRHHroX:
870 case AArch64::STRHroW:
871 case AArch64::STRHroX:
872 case AArch64::STRQroW:
873 case AArch64::STRQroX:
874 case AArch64::STRSroW:
875 case AArch64::STRSroX:
876 case AArch64::STRWroW:
877 case AArch64::STRWroX:
878 case AArch64::STRXroW:
879 case AArch64::STRXroX: {
880 unsigned IsSigned = MI.getOperand(3).getImm();
881 return !IsSigned;
882 }
883 }
884 }
885
isSEHInstruction(const MachineInstr & MI)886 bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {
887 unsigned Opc = MI.getOpcode();
888 switch (Opc) {
889 default:
890 return false;
891 case AArch64::SEH_StackAlloc:
892 case AArch64::SEH_SaveFPLR:
893 case AArch64::SEH_SaveFPLR_X:
894 case AArch64::SEH_SaveReg:
895 case AArch64::SEH_SaveReg_X:
896 case AArch64::SEH_SaveRegP:
897 case AArch64::SEH_SaveRegP_X:
898 case AArch64::SEH_SaveFReg:
899 case AArch64::SEH_SaveFReg_X:
900 case AArch64::SEH_SaveFRegP:
901 case AArch64::SEH_SaveFRegP_X:
902 case AArch64::SEH_SetFP:
903 case AArch64::SEH_AddFP:
904 case AArch64::SEH_Nop:
905 case AArch64::SEH_PrologEnd:
906 case AArch64::SEH_EpilogStart:
907 case AArch64::SEH_EpilogEnd:
908 return true;
909 }
910 }
911
isCoalescableExtInstr(const MachineInstr & MI,unsigned & SrcReg,unsigned & DstReg,unsigned & SubIdx) const912 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
913 unsigned &SrcReg, unsigned &DstReg,
914 unsigned &SubIdx) const {
915 switch (MI.getOpcode()) {
916 default:
917 return false;
918 case AArch64::SBFMXri: // aka sxtw
919 case AArch64::UBFMXri: // aka uxtw
920 // Check for the 32 -> 64 bit extension case, these instructions can do
921 // much more.
922 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
923 return false;
924 // This is a signed or unsigned 32 -> 64 bit extension.
925 SrcReg = MI.getOperand(1).getReg();
926 DstReg = MI.getOperand(0).getReg();
927 SubIdx = AArch64::sub_32;
928 return true;
929 }
930 }
931
areMemAccessesTriviallyDisjoint(const MachineInstr & MIa,const MachineInstr & MIb) const932 bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
933 const MachineInstr &MIa, const MachineInstr &MIb) const {
934 const TargetRegisterInfo *TRI = &getRegisterInfo();
935 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
936 int64_t OffsetA = 0, OffsetB = 0;
937 unsigned WidthA = 0, WidthB = 0;
938
939 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
940 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
941
942 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
943 MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
944 return false;
945
946 // Retrieve the base, offset from the base and width. Width
947 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
948 // base are identical, and the offset of a lower memory access +
949 // the width doesn't overlap the offset of a higher memory access,
950 // then the memory accesses are different.
951 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
952 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
953 if (BaseOpA->isIdenticalTo(*BaseOpB)) {
954 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
955 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
956 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
957 if (LowOffset + LowWidth <= HighOffset)
958 return true;
959 }
960 }
961 return false;
962 }
963
isSchedulingBoundary(const MachineInstr & MI,const MachineBasicBlock * MBB,const MachineFunction & MF) const964 bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
965 const MachineBasicBlock *MBB,
966 const MachineFunction &MF) const {
967 if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))
968 return true;
969 switch (MI.getOpcode()) {
970 case AArch64::HINT:
971 // CSDB hints are scheduling barriers.
972 if (MI.getOperand(0).getImm() == 0x14)
973 return true;
974 break;
975 case AArch64::DSB:
976 case AArch64::ISB:
977 // DSB and ISB also are scheduling barriers.
978 return true;
979 default:;
980 }
981 return isSEHInstruction(MI);
982 }
983
984 /// analyzeCompare - For a comparison instruction, return the source registers
985 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
986 /// Return true if the comparison instruction can be analyzed.
analyzeCompare(const MachineInstr & MI,unsigned & SrcReg,unsigned & SrcReg2,int & CmpMask,int & CmpValue) const987 bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
988 unsigned &SrcReg2, int &CmpMask,
989 int &CmpValue) const {
990 // The first operand can be a frame index where we'd normally expect a
991 // register.
992 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
993 if (!MI.getOperand(1).isReg())
994 return false;
995
996 switch (MI.getOpcode()) {
997 default:
998 break;
999 case AArch64::SUBSWrr:
1000 case AArch64::SUBSWrs:
1001 case AArch64::SUBSWrx:
1002 case AArch64::SUBSXrr:
1003 case AArch64::SUBSXrs:
1004 case AArch64::SUBSXrx:
1005 case AArch64::ADDSWrr:
1006 case AArch64::ADDSWrs:
1007 case AArch64::ADDSWrx:
1008 case AArch64::ADDSXrr:
1009 case AArch64::ADDSXrs:
1010 case AArch64::ADDSXrx:
1011 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1012 SrcReg = MI.getOperand(1).getReg();
1013 SrcReg2 = MI.getOperand(2).getReg();
1014 CmpMask = ~0;
1015 CmpValue = 0;
1016 return true;
1017 case AArch64::SUBSWri:
1018 case AArch64::ADDSWri:
1019 case AArch64::SUBSXri:
1020 case AArch64::ADDSXri:
1021 SrcReg = MI.getOperand(1).getReg();
1022 SrcReg2 = 0;
1023 CmpMask = ~0;
1024 // FIXME: In order to convert CmpValue to 0 or 1
1025 CmpValue = MI.getOperand(2).getImm() != 0;
1026 return true;
1027 case AArch64::ANDSWri:
1028 case AArch64::ANDSXri:
1029 // ANDS does not use the same encoding scheme as the others xxxS
1030 // instructions.
1031 SrcReg = MI.getOperand(1).getReg();
1032 SrcReg2 = 0;
1033 CmpMask = ~0;
1034 // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
1035 // while the type of CmpValue is int. When converting uint64_t to int,
1036 // the high 32 bits of uint64_t will be lost.
1037 // In fact it causes a bug in spec2006-483.xalancbmk
1038 // CmpValue is only used to compare with zero in OptimizeCompareInstr
1039 CmpValue = AArch64_AM::decodeLogicalImmediate(
1040 MI.getOperand(2).getImm(),
1041 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
1042 return true;
1043 }
1044
1045 return false;
1046 }
1047
UpdateOperandRegClass(MachineInstr & Instr)1048 static bool UpdateOperandRegClass(MachineInstr &Instr) {
1049 MachineBasicBlock *MBB = Instr.getParent();
1050 assert(MBB && "Can't get MachineBasicBlock here");
1051 MachineFunction *MF = MBB->getParent();
1052 assert(MF && "Can't get MachineFunction here");
1053 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1054 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
1055 MachineRegisterInfo *MRI = &MF->getRegInfo();
1056
1057 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1058 ++OpIdx) {
1059 MachineOperand &MO = Instr.getOperand(OpIdx);
1060 const TargetRegisterClass *OpRegCstraints =
1061 Instr.getRegClassConstraint(OpIdx, TII, TRI);
1062
1063 // If there's no constraint, there's nothing to do.
1064 if (!OpRegCstraints)
1065 continue;
1066 // If the operand is a frame index, there's nothing to do here.
1067 // A frame index operand will resolve correctly during PEI.
1068 if (MO.isFI())
1069 continue;
1070
1071 assert(MO.isReg() &&
1072 "Operand has register constraints without being a register!");
1073
1074 Register Reg = MO.getReg();
1075 if (Register::isPhysicalRegister(Reg)) {
1076 if (!OpRegCstraints->contains(Reg))
1077 return false;
1078 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1079 !MRI->constrainRegClass(Reg, OpRegCstraints))
1080 return false;
1081 }
1082
1083 return true;
1084 }
1085
1086 /// Return the opcode that does not set flags when possible - otherwise
1087 /// return the original opcode. The caller is responsible to do the actual
1088 /// substitution and legality checking.
convertToNonFlagSettingOpc(const MachineInstr & MI)1089 static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
1090 // Don't convert all compare instructions, because for some the zero register
1091 // encoding becomes the sp register.
1092 bool MIDefinesZeroReg = false;
1093 if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
1094 MIDefinesZeroReg = true;
1095
1096 switch (MI.getOpcode()) {
1097 default:
1098 return MI.getOpcode();
1099 case AArch64::ADDSWrr:
1100 return AArch64::ADDWrr;
1101 case AArch64::ADDSWri:
1102 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1103 case AArch64::ADDSWrs:
1104 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1105 case AArch64::ADDSWrx:
1106 return AArch64::ADDWrx;
1107 case AArch64::ADDSXrr:
1108 return AArch64::ADDXrr;
1109 case AArch64::ADDSXri:
1110 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1111 case AArch64::ADDSXrs:
1112 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1113 case AArch64::ADDSXrx:
1114 return AArch64::ADDXrx;
1115 case AArch64::SUBSWrr:
1116 return AArch64::SUBWrr;
1117 case AArch64::SUBSWri:
1118 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1119 case AArch64::SUBSWrs:
1120 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1121 case AArch64::SUBSWrx:
1122 return AArch64::SUBWrx;
1123 case AArch64::SUBSXrr:
1124 return AArch64::SUBXrr;
1125 case AArch64::SUBSXri:
1126 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1127 case AArch64::SUBSXrs:
1128 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1129 case AArch64::SUBSXrx:
1130 return AArch64::SUBXrx;
1131 }
1132 }
1133
1134 enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1135
1136 /// True when condition flags are accessed (either by writing or reading)
1137 /// on the instruction trace starting at From and ending at To.
1138 ///
1139 /// Note: If From and To are from different blocks it's assumed CC are accessed
1140 /// on the path.
areCFlagsAccessedBetweenInstrs(MachineBasicBlock::iterator From,MachineBasicBlock::iterator To,const TargetRegisterInfo * TRI,const AccessKind AccessToCheck=AK_All)1141 static bool areCFlagsAccessedBetweenInstrs(
1142 MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
1143 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1144 // Early exit if To is at the beginning of the BB.
1145 if (To == To->getParent()->begin())
1146 return true;
1147
1148 // Check whether the instructions are in the same basic block
1149 // If not, assume the condition flags might get modified somewhere.
1150 if (To->getParent() != From->getParent())
1151 return true;
1152
1153 // From must be above To.
1154 assert(std::find_if(++To.getReverse(), To->getParent()->rend(),
1155 [From](MachineInstr &MI) {
1156 return MI.getIterator() == From;
1157 }) != To->getParent()->rend());
1158
1159 // We iterate backward starting \p To until we hit \p From.
1160 for (--To; To != From; --To) {
1161 const MachineInstr &Instr = *To;
1162
1163 if (((AccessToCheck & AK_Write) &&
1164 Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1165 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1166 return true;
1167 }
1168 return false;
1169 }
1170
1171 /// Try to optimize a compare instruction. A compare instruction is an
1172 /// instruction which produces AArch64::NZCV. It can be truly compare
1173 /// instruction
1174 /// when there are no uses of its destination register.
1175 ///
1176 /// The following steps are tried in order:
1177 /// 1. Convert CmpInstr into an unconditional version.
1178 /// 2. Remove CmpInstr if above there is an instruction producing a needed
1179 /// condition code or an instruction which can be converted into such an
1180 /// instruction.
1181 /// Only comparison with zero is supported.
optimizeCompareInstr(MachineInstr & CmpInstr,unsigned SrcReg,unsigned SrcReg2,int CmpMask,int CmpValue,const MachineRegisterInfo * MRI) const1182 bool AArch64InstrInfo::optimizeCompareInstr(
1183 MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
1184 int CmpValue, const MachineRegisterInfo *MRI) const {
1185 assert(CmpInstr.getParent());
1186 assert(MRI);
1187
1188 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1189 int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
1190 if (DeadNZCVIdx != -1) {
1191 if (CmpInstr.definesRegister(AArch64::WZR) ||
1192 CmpInstr.definesRegister(AArch64::XZR)) {
1193 CmpInstr.eraseFromParent();
1194 return true;
1195 }
1196 unsigned Opc = CmpInstr.getOpcode();
1197 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1198 if (NewOpc == Opc)
1199 return false;
1200 const MCInstrDesc &MCID = get(NewOpc);
1201 CmpInstr.setDesc(MCID);
1202 CmpInstr.RemoveOperand(DeadNZCVIdx);
1203 bool succeeded = UpdateOperandRegClass(CmpInstr);
1204 (void)succeeded;
1205 assert(succeeded && "Some operands reg class are incompatible!");
1206 return true;
1207 }
1208
1209 // Continue only if we have a "ri" where immediate is zero.
1210 // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
1211 // function.
1212 assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
1213 if (CmpValue != 0 || SrcReg2 != 0)
1214 return false;
1215
1216 // CmpInstr is a Compare instruction if destination register is not used.
1217 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1218 return false;
1219
1220 return substituteCmpToZero(CmpInstr, SrcReg, MRI);
1221 }
1222
1223 /// Get opcode of S version of Instr.
1224 /// If Instr is S version its opcode is returned.
1225 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1226 /// or we are not interested in it.
sForm(MachineInstr & Instr)1227 static unsigned sForm(MachineInstr &Instr) {
1228 switch (Instr.getOpcode()) {
1229 default:
1230 return AArch64::INSTRUCTION_LIST_END;
1231
1232 case AArch64::ADDSWrr:
1233 case AArch64::ADDSWri:
1234 case AArch64::ADDSXrr:
1235 case AArch64::ADDSXri:
1236 case AArch64::SUBSWrr:
1237 case AArch64::SUBSWri:
1238 case AArch64::SUBSXrr:
1239 case AArch64::SUBSXri:
1240 return Instr.getOpcode();
1241
1242 case AArch64::ADDWrr:
1243 return AArch64::ADDSWrr;
1244 case AArch64::ADDWri:
1245 return AArch64::ADDSWri;
1246 case AArch64::ADDXrr:
1247 return AArch64::ADDSXrr;
1248 case AArch64::ADDXri:
1249 return AArch64::ADDSXri;
1250 case AArch64::ADCWr:
1251 return AArch64::ADCSWr;
1252 case AArch64::ADCXr:
1253 return AArch64::ADCSXr;
1254 case AArch64::SUBWrr:
1255 return AArch64::SUBSWrr;
1256 case AArch64::SUBWri:
1257 return AArch64::SUBSWri;
1258 case AArch64::SUBXrr:
1259 return AArch64::SUBSXrr;
1260 case AArch64::SUBXri:
1261 return AArch64::SUBSXri;
1262 case AArch64::SBCWr:
1263 return AArch64::SBCSWr;
1264 case AArch64::SBCXr:
1265 return AArch64::SBCSXr;
1266 case AArch64::ANDWri:
1267 return AArch64::ANDSWri;
1268 case AArch64::ANDXri:
1269 return AArch64::ANDSXri;
1270 }
1271 }
1272
1273 /// Check if AArch64::NZCV should be alive in successors of MBB.
areCFlagsAliveInSuccessors(MachineBasicBlock * MBB)1274 static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
1275 for (auto *BB : MBB->successors())
1276 if (BB->isLiveIn(AArch64::NZCV))
1277 return true;
1278 return false;
1279 }
1280
1281 namespace {
1282
1283 struct UsedNZCV {
1284 bool N = false;
1285 bool Z = false;
1286 bool C = false;
1287 bool V = false;
1288
1289 UsedNZCV() = default;
1290
operator |=__anon46446e0c0211::UsedNZCV1291 UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
1292 this->N |= UsedFlags.N;
1293 this->Z |= UsedFlags.Z;
1294 this->C |= UsedFlags.C;
1295 this->V |= UsedFlags.V;
1296 return *this;
1297 }
1298 };
1299
1300 } // end anonymous namespace
1301
1302 /// Find a condition code used by the instruction.
1303 /// Returns AArch64CC::Invalid if either the instruction does not use condition
1304 /// codes or we don't optimize CmpInstr in the presence of such instructions.
findCondCodeUsedByInstr(const MachineInstr & Instr)1305 static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
1306 switch (Instr.getOpcode()) {
1307 default:
1308 return AArch64CC::Invalid;
1309
1310 case AArch64::Bcc: {
1311 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1312 assert(Idx >= 2);
1313 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
1314 }
1315
1316 case AArch64::CSINVWr:
1317 case AArch64::CSINVXr:
1318 case AArch64::CSINCWr:
1319 case AArch64::CSINCXr:
1320 case AArch64::CSELWr:
1321 case AArch64::CSELXr:
1322 case AArch64::CSNEGWr:
1323 case AArch64::CSNEGXr:
1324 case AArch64::FCSELSrrr:
1325 case AArch64::FCSELDrrr: {
1326 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1327 assert(Idx >= 1);
1328 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
1329 }
1330 }
1331 }
1332
getUsedNZCV(AArch64CC::CondCode CC)1333 static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
1334 assert(CC != AArch64CC::Invalid);
1335 UsedNZCV UsedFlags;
1336 switch (CC) {
1337 default:
1338 break;
1339
1340 case AArch64CC::EQ: // Z set
1341 case AArch64CC::NE: // Z clear
1342 UsedFlags.Z = true;
1343 break;
1344
1345 case AArch64CC::HI: // Z clear and C set
1346 case AArch64CC::LS: // Z set or C clear
1347 UsedFlags.Z = true;
1348 LLVM_FALLTHROUGH;
1349 case AArch64CC::HS: // C set
1350 case AArch64CC::LO: // C clear
1351 UsedFlags.C = true;
1352 break;
1353
1354 case AArch64CC::MI: // N set
1355 case AArch64CC::PL: // N clear
1356 UsedFlags.N = true;
1357 break;
1358
1359 case AArch64CC::VS: // V set
1360 case AArch64CC::VC: // V clear
1361 UsedFlags.V = true;
1362 break;
1363
1364 case AArch64CC::GT: // Z clear, N and V the same
1365 case AArch64CC::LE: // Z set, N and V differ
1366 UsedFlags.Z = true;
1367 LLVM_FALLTHROUGH;
1368 case AArch64CC::GE: // N and V the same
1369 case AArch64CC::LT: // N and V differ
1370 UsedFlags.N = true;
1371 UsedFlags.V = true;
1372 break;
1373 }
1374 return UsedFlags;
1375 }
1376
isADDSRegImm(unsigned Opcode)1377 static bool isADDSRegImm(unsigned Opcode) {
1378 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1379 }
1380
isSUBSRegImm(unsigned Opcode)1381 static bool isSUBSRegImm(unsigned Opcode) {
1382 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1383 }
1384
1385 /// Check if CmpInstr can be substituted by MI.
1386 ///
1387 /// CmpInstr can be substituted:
1388 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1389 /// - and, MI and CmpInstr are from the same MachineBB
1390 /// - and, condition flags are not alive in successors of the CmpInstr parent
1391 /// - and, if MI opcode is the S form there must be no defs of flags between
1392 /// MI and CmpInstr
1393 /// or if MI opcode is not the S form there must be neither defs of flags
1394 /// nor uses of flags between MI and CmpInstr.
1395 /// - and C/V flags are not used after CmpInstr
canInstrSubstituteCmpInstr(MachineInstr * MI,MachineInstr * CmpInstr,const TargetRegisterInfo * TRI)1396 static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
1397 const TargetRegisterInfo *TRI) {
1398 assert(MI);
1399 assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
1400 assert(CmpInstr);
1401
1402 const unsigned CmpOpcode = CmpInstr->getOpcode();
1403 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1404 return false;
1405
1406 if (MI->getParent() != CmpInstr->getParent())
1407 return false;
1408
1409 if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
1410 return false;
1411
1412 AccessKind AccessToCheck = AK_Write;
1413 if (sForm(*MI) != MI->getOpcode())
1414 AccessToCheck = AK_All;
1415 if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
1416 return false;
1417
1418 UsedNZCV NZCVUsedAfterCmp;
1419 for (auto I = std::next(CmpInstr->getIterator()),
1420 E = CmpInstr->getParent()->instr_end();
1421 I != E; ++I) {
1422 const MachineInstr &Instr = *I;
1423 if (Instr.readsRegister(AArch64::NZCV, TRI)) {
1424 AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
1425 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1426 return false;
1427 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1428 }
1429
1430 if (Instr.modifiesRegister(AArch64::NZCV, TRI))
1431 break;
1432 }
1433
1434 return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
1435 }
1436
1437 /// Substitute an instruction comparing to zero with another instruction
1438 /// which produces needed condition flags.
1439 ///
1440 /// Return true on success.
substituteCmpToZero(MachineInstr & CmpInstr,unsigned SrcReg,const MachineRegisterInfo * MRI) const1441 bool AArch64InstrInfo::substituteCmpToZero(
1442 MachineInstr &CmpInstr, unsigned SrcReg,
1443 const MachineRegisterInfo *MRI) const {
1444 assert(MRI);
1445 // Get the unique definition of SrcReg.
1446 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
1447 if (!MI)
1448 return false;
1449
1450 const TargetRegisterInfo *TRI = &getRegisterInfo();
1451
1452 unsigned NewOpc = sForm(*MI);
1453 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1454 return false;
1455
1456 if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
1457 return false;
1458
1459 // Update the instruction to set NZCV.
1460 MI->setDesc(get(NewOpc));
1461 CmpInstr.eraseFromParent();
1462 bool succeeded = UpdateOperandRegClass(*MI);
1463 (void)succeeded;
1464 assert(succeeded && "Some operands reg class are incompatible!");
1465 MI->addRegisterDefined(AArch64::NZCV, TRI);
1466 return true;
1467 }
1468
expandPostRAPseudo(MachineInstr & MI) const1469 bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
1470 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
1471 MI.getOpcode() != AArch64::CATCHRET)
1472 return false;
1473
1474 MachineBasicBlock &MBB = *MI.getParent();
1475 auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
1476 auto TRI = Subtarget.getRegisterInfo();
1477 DebugLoc DL = MI.getDebugLoc();
1478
1479 if (MI.getOpcode() == AArch64::CATCHRET) {
1480 // Skip to the first instruction before the epilog.
1481 const TargetInstrInfo *TII =
1482 MBB.getParent()->getSubtarget().getInstrInfo();
1483 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
1484 auto MBBI = MachineBasicBlock::iterator(MI);
1485 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
1486 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
1487 FirstEpilogSEH != MBB.begin())
1488 FirstEpilogSEH = std::prev(FirstEpilogSEH);
1489 if (FirstEpilogSEH != MBB.begin())
1490 FirstEpilogSEH = std::next(FirstEpilogSEH);
1491 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
1492 .addReg(AArch64::X0, RegState::Define)
1493 .addMBB(TargetMBB);
1494 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
1495 .addReg(AArch64::X0, RegState::Define)
1496 .addReg(AArch64::X0)
1497 .addMBB(TargetMBB)
1498 .addImm(0);
1499 return true;
1500 }
1501
1502 Register Reg = MI.getOperand(0).getReg();
1503 const GlobalValue *GV =
1504 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
1505 const TargetMachine &TM = MBB.getParent()->getTarget();
1506 unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
1507 const unsigned char MO_NC = AArch64II::MO_NC;
1508
1509 if ((OpFlags & AArch64II::MO_GOT) != 0) {
1510 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
1511 .addGlobalAddress(GV, 0, OpFlags);
1512 if (Subtarget.isTargetILP32()) {
1513 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
1514 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
1515 .addDef(Reg32, RegState::Dead)
1516 .addUse(Reg, RegState::Kill)
1517 .addImm(0)
1518 .addMemOperand(*MI.memoperands_begin())
1519 .addDef(Reg, RegState::Implicit);
1520 } else {
1521 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1522 .addReg(Reg, RegState::Kill)
1523 .addImm(0)
1524 .addMemOperand(*MI.memoperands_begin());
1525 }
1526 } else if (TM.getCodeModel() == CodeModel::Large) {
1527 assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
1528 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
1529 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
1530 .addImm(0);
1531 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1532 .addReg(Reg, RegState::Kill)
1533 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
1534 .addImm(16);
1535 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1536 .addReg(Reg, RegState::Kill)
1537 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
1538 .addImm(32);
1539 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1540 .addReg(Reg, RegState::Kill)
1541 .addGlobalAddress(GV, 0, AArch64II::MO_G3)
1542 .addImm(48);
1543 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1544 .addReg(Reg, RegState::Kill)
1545 .addImm(0)
1546 .addMemOperand(*MI.memoperands_begin());
1547 } else if (TM.getCodeModel() == CodeModel::Tiny) {
1548 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
1549 .addGlobalAddress(GV, 0, OpFlags);
1550 } else {
1551 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
1552 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
1553 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
1554 if (Subtarget.isTargetILP32()) {
1555 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
1556 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
1557 .addDef(Reg32, RegState::Dead)
1558 .addUse(Reg, RegState::Kill)
1559 .addGlobalAddress(GV, 0, LoFlags)
1560 .addMemOperand(*MI.memoperands_begin())
1561 .addDef(Reg, RegState::Implicit);
1562 } else {
1563 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1564 .addReg(Reg, RegState::Kill)
1565 .addGlobalAddress(GV, 0, LoFlags)
1566 .addMemOperand(*MI.memoperands_begin());
1567 }
1568 }
1569
1570 MBB.erase(MI);
1571
1572 return true;
1573 }
1574
1575 // Return true if this instruction simply sets its single destination register
1576 // to zero. This is equivalent to a register rename of the zero-register.
isGPRZero(const MachineInstr & MI)1577 bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) {
1578 switch (MI.getOpcode()) {
1579 default:
1580 break;
1581 case AArch64::MOVZWi:
1582 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
1583 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
1584 assert(MI.getDesc().getNumOperands() == 3 &&
1585 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
1586 return true;
1587 }
1588 break;
1589 case AArch64::ANDWri: // and Rd, Rzr, #imm
1590 return MI.getOperand(1).getReg() == AArch64::WZR;
1591 case AArch64::ANDXri:
1592 return MI.getOperand(1).getReg() == AArch64::XZR;
1593 case TargetOpcode::COPY:
1594 return MI.getOperand(1).getReg() == AArch64::WZR;
1595 }
1596 return false;
1597 }
1598
1599 // Return true if this instruction simply renames a general register without
1600 // modifying bits.
isGPRCopy(const MachineInstr & MI)1601 bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) {
1602 switch (MI.getOpcode()) {
1603 default:
1604 break;
1605 case TargetOpcode::COPY: {
1606 // GPR32 copies will by lowered to ORRXrs
1607 Register DstReg = MI.getOperand(0).getReg();
1608 return (AArch64::GPR32RegClass.contains(DstReg) ||
1609 AArch64::GPR64RegClass.contains(DstReg));
1610 }
1611 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
1612 if (MI.getOperand(1).getReg() == AArch64::XZR) {
1613 assert(MI.getDesc().getNumOperands() == 4 &&
1614 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
1615 return true;
1616 }
1617 break;
1618 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
1619 if (MI.getOperand(2).getImm() == 0) {
1620 assert(MI.getDesc().getNumOperands() == 4 &&
1621 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
1622 return true;
1623 }
1624 break;
1625 }
1626 return false;
1627 }
1628
1629 // Return true if this instruction simply renames a general register without
1630 // modifying bits.
isFPRCopy(const MachineInstr & MI)1631 bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {
1632 switch (MI.getOpcode()) {
1633 default:
1634 break;
1635 case TargetOpcode::COPY: {
1636 // FPR64 copies will by lowered to ORR.16b
1637 Register DstReg = MI.getOperand(0).getReg();
1638 return (AArch64::FPR64RegClass.contains(DstReg) ||
1639 AArch64::FPR128RegClass.contains(DstReg));
1640 }
1641 case AArch64::ORRv16i8:
1642 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
1643 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
1644 "invalid ORRv16i8 operands");
1645 return true;
1646 }
1647 break;
1648 }
1649 return false;
1650 }
1651
isLoadFromStackSlot(const MachineInstr & MI,int & FrameIndex) const1652 unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
1653 int &FrameIndex) const {
1654 switch (MI.getOpcode()) {
1655 default:
1656 break;
1657 case AArch64::LDRWui:
1658 case AArch64::LDRXui:
1659 case AArch64::LDRBui:
1660 case AArch64::LDRHui:
1661 case AArch64::LDRSui:
1662 case AArch64::LDRDui:
1663 case AArch64::LDRQui:
1664 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1665 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1666 FrameIndex = MI.getOperand(1).getIndex();
1667 return MI.getOperand(0).getReg();
1668 }
1669 break;
1670 }
1671
1672 return 0;
1673 }
1674
isStoreToStackSlot(const MachineInstr & MI,int & FrameIndex) const1675 unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
1676 int &FrameIndex) const {
1677 switch (MI.getOpcode()) {
1678 default:
1679 break;
1680 case AArch64::STRWui:
1681 case AArch64::STRXui:
1682 case AArch64::STRBui:
1683 case AArch64::STRHui:
1684 case AArch64::STRSui:
1685 case AArch64::STRDui:
1686 case AArch64::STRQui:
1687 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1688 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1689 FrameIndex = MI.getOperand(1).getIndex();
1690 return MI.getOperand(0).getReg();
1691 }
1692 break;
1693 }
1694 return 0;
1695 }
1696
1697 /// Check all MachineMemOperands for a hint to suppress pairing.
isLdStPairSuppressed(const MachineInstr & MI)1698 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) {
1699 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1700 return MMO->getFlags() & MOSuppressPair;
1701 });
1702 }
1703
1704 /// Set a flag on the first MachineMemOperand to suppress pairing.
suppressLdStPair(MachineInstr & MI)1705 void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) {
1706 if (MI.memoperands_empty())
1707 return;
1708 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
1709 }
1710
1711 /// Check all MachineMemOperands for a hint that the load/store is strided.
isStridedAccess(const MachineInstr & MI)1712 bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) {
1713 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1714 return MMO->getFlags() & MOStridedAccess;
1715 });
1716 }
1717
isUnscaledLdSt(unsigned Opc)1718 bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) {
1719 switch (Opc) {
1720 default:
1721 return false;
1722 case AArch64::STURSi:
1723 case AArch64::STURDi:
1724 case AArch64::STURQi:
1725 case AArch64::STURBBi:
1726 case AArch64::STURHHi:
1727 case AArch64::STURWi:
1728 case AArch64::STURXi:
1729 case AArch64::LDURSi:
1730 case AArch64::LDURDi:
1731 case AArch64::LDURQi:
1732 case AArch64::LDURWi:
1733 case AArch64::LDURXi:
1734 case AArch64::LDURSWi:
1735 case AArch64::LDURHHi:
1736 case AArch64::LDURBBi:
1737 case AArch64::LDURSBWi:
1738 case AArch64::LDURSHWi:
1739 return true;
1740 }
1741 }
1742
getUnscaledLdSt(unsigned Opc)1743 Optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
1744 switch (Opc) {
1745 default: return {};
1746 case AArch64::PRFMui: return AArch64::PRFUMi;
1747 case AArch64::LDRXui: return AArch64::LDURXi;
1748 case AArch64::LDRWui: return AArch64::LDURWi;
1749 case AArch64::LDRBui: return AArch64::LDURBi;
1750 case AArch64::LDRHui: return AArch64::LDURHi;
1751 case AArch64::LDRSui: return AArch64::LDURSi;
1752 case AArch64::LDRDui: return AArch64::LDURDi;
1753 case AArch64::LDRQui: return AArch64::LDURQi;
1754 case AArch64::LDRBBui: return AArch64::LDURBBi;
1755 case AArch64::LDRHHui: return AArch64::LDURHHi;
1756 case AArch64::LDRSBXui: return AArch64::LDURSBXi;
1757 case AArch64::LDRSBWui: return AArch64::LDURSBWi;
1758 case AArch64::LDRSHXui: return AArch64::LDURSHXi;
1759 case AArch64::LDRSHWui: return AArch64::LDURSHWi;
1760 case AArch64::LDRSWui: return AArch64::LDURSWi;
1761 case AArch64::STRXui: return AArch64::STURXi;
1762 case AArch64::STRWui: return AArch64::STURWi;
1763 case AArch64::STRBui: return AArch64::STURBi;
1764 case AArch64::STRHui: return AArch64::STURHi;
1765 case AArch64::STRSui: return AArch64::STURSi;
1766 case AArch64::STRDui: return AArch64::STURDi;
1767 case AArch64::STRQui: return AArch64::STURQi;
1768 case AArch64::STRBBui: return AArch64::STURBBi;
1769 case AArch64::STRHHui: return AArch64::STURHHi;
1770 }
1771 }
1772
getLoadStoreImmIdx(unsigned Opc)1773 unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
1774 switch (Opc) {
1775 default:
1776 return 2;
1777 case AArch64::LDPXi:
1778 case AArch64::LDPDi:
1779 case AArch64::STPXi:
1780 case AArch64::STPDi:
1781 case AArch64::LDNPXi:
1782 case AArch64::LDNPDi:
1783 case AArch64::STNPXi:
1784 case AArch64::STNPDi:
1785 case AArch64::LDPQi:
1786 case AArch64::STPQi:
1787 case AArch64::LDNPQi:
1788 case AArch64::STNPQi:
1789 case AArch64::LDPWi:
1790 case AArch64::LDPSi:
1791 case AArch64::STPWi:
1792 case AArch64::STPSi:
1793 case AArch64::LDNPWi:
1794 case AArch64::LDNPSi:
1795 case AArch64::STNPWi:
1796 case AArch64::STNPSi:
1797 case AArch64::LDG:
1798 case AArch64::STGPi:
1799 return 3;
1800 case AArch64::ADDG:
1801 case AArch64::STGOffset:
1802 return 2;
1803 }
1804 }
1805
isPairableLdStInst(const MachineInstr & MI)1806 bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
1807 switch (MI.getOpcode()) {
1808 default:
1809 return false;
1810 // Scaled instructions.
1811 case AArch64::STRSui:
1812 case AArch64::STRDui:
1813 case AArch64::STRQui:
1814 case AArch64::STRXui:
1815 case AArch64::STRWui:
1816 case AArch64::LDRSui:
1817 case AArch64::LDRDui:
1818 case AArch64::LDRQui:
1819 case AArch64::LDRXui:
1820 case AArch64::LDRWui:
1821 case AArch64::LDRSWui:
1822 // Unscaled instructions.
1823 case AArch64::STURSi:
1824 case AArch64::STURDi:
1825 case AArch64::STURQi:
1826 case AArch64::STURWi:
1827 case AArch64::STURXi:
1828 case AArch64::LDURSi:
1829 case AArch64::LDURDi:
1830 case AArch64::LDURQi:
1831 case AArch64::LDURWi:
1832 case AArch64::LDURXi:
1833 case AArch64::LDURSWi:
1834 return true;
1835 }
1836 }
1837
convertToFlagSettingOpc(unsigned Opc,bool & Is64Bit)1838 unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc,
1839 bool &Is64Bit) {
1840 switch (Opc) {
1841 default:
1842 llvm_unreachable("Opcode has no flag setting equivalent!");
1843 // 32-bit cases:
1844 case AArch64::ADDWri:
1845 Is64Bit = false;
1846 return AArch64::ADDSWri;
1847 case AArch64::ADDWrr:
1848 Is64Bit = false;
1849 return AArch64::ADDSWrr;
1850 case AArch64::ADDWrs:
1851 Is64Bit = false;
1852 return AArch64::ADDSWrs;
1853 case AArch64::ADDWrx:
1854 Is64Bit = false;
1855 return AArch64::ADDSWrx;
1856 case AArch64::ANDWri:
1857 Is64Bit = false;
1858 return AArch64::ANDSWri;
1859 case AArch64::ANDWrr:
1860 Is64Bit = false;
1861 return AArch64::ANDSWrr;
1862 case AArch64::ANDWrs:
1863 Is64Bit = false;
1864 return AArch64::ANDSWrs;
1865 case AArch64::BICWrr:
1866 Is64Bit = false;
1867 return AArch64::BICSWrr;
1868 case AArch64::BICWrs:
1869 Is64Bit = false;
1870 return AArch64::BICSWrs;
1871 case AArch64::SUBWri:
1872 Is64Bit = false;
1873 return AArch64::SUBSWri;
1874 case AArch64::SUBWrr:
1875 Is64Bit = false;
1876 return AArch64::SUBSWrr;
1877 case AArch64::SUBWrs:
1878 Is64Bit = false;
1879 return AArch64::SUBSWrs;
1880 case AArch64::SUBWrx:
1881 Is64Bit = false;
1882 return AArch64::SUBSWrx;
1883 // 64-bit cases:
1884 case AArch64::ADDXri:
1885 Is64Bit = true;
1886 return AArch64::ADDSXri;
1887 case AArch64::ADDXrr:
1888 Is64Bit = true;
1889 return AArch64::ADDSXrr;
1890 case AArch64::ADDXrs:
1891 Is64Bit = true;
1892 return AArch64::ADDSXrs;
1893 case AArch64::ADDXrx:
1894 Is64Bit = true;
1895 return AArch64::ADDSXrx;
1896 case AArch64::ANDXri:
1897 Is64Bit = true;
1898 return AArch64::ANDSXri;
1899 case AArch64::ANDXrr:
1900 Is64Bit = true;
1901 return AArch64::ANDSXrr;
1902 case AArch64::ANDXrs:
1903 Is64Bit = true;
1904 return AArch64::ANDSXrs;
1905 case AArch64::BICXrr:
1906 Is64Bit = true;
1907 return AArch64::BICSXrr;
1908 case AArch64::BICXrs:
1909 Is64Bit = true;
1910 return AArch64::BICSXrs;
1911 case AArch64::SUBXri:
1912 Is64Bit = true;
1913 return AArch64::SUBSXri;
1914 case AArch64::SUBXrr:
1915 Is64Bit = true;
1916 return AArch64::SUBSXrr;
1917 case AArch64::SUBXrs:
1918 Is64Bit = true;
1919 return AArch64::SUBSXrs;
1920 case AArch64::SUBXrx:
1921 Is64Bit = true;
1922 return AArch64::SUBSXrx;
1923 }
1924 }
1925
1926 // Is this a candidate for ld/st merging or pairing? For example, we don't
1927 // touch volatiles or load/stores that have a hint to avoid pair formation.
isCandidateToMergeOrPair(const MachineInstr & MI) const1928 bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const {
1929 // If this is a volatile load/store, don't mess with it.
1930 if (MI.hasOrderedMemoryRef())
1931 return false;
1932
1933 // Make sure this is a reg/fi+imm (as opposed to an address reloc).
1934 assert((MI.getOperand(1).isReg() || MI.getOperand(1).isFI()) &&
1935 "Expected a reg or frame index operand.");
1936 if (!MI.getOperand(2).isImm())
1937 return false;
1938
1939 // Can't merge/pair if the instruction modifies the base register.
1940 // e.g., ldr x0, [x0]
1941 // This case will never occur with an FI base.
1942 if (MI.getOperand(1).isReg()) {
1943 Register BaseReg = MI.getOperand(1).getReg();
1944 const TargetRegisterInfo *TRI = &getRegisterInfo();
1945 if (MI.modifiesRegister(BaseReg, TRI))
1946 return false;
1947 }
1948
1949 // Check if this load/store has a hint to avoid pair formation.
1950 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
1951 if (isLdStPairSuppressed(MI))
1952 return false;
1953
1954 // Do not pair any callee-save store/reload instructions in the
1955 // prologue/epilogue if the CFI information encoded the operations as separate
1956 // instructions, as that will cause the size of the actual prologue to mismatch
1957 // with the prologue size recorded in the Windows CFI.
1958 const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
1959 bool NeedsWinCFI = MAI->usesWindowsCFI() &&
1960 MI.getMF()->getFunction().needsUnwindTableEntry();
1961 if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
1962 MI.getFlag(MachineInstr::FrameDestroy)))
1963 return false;
1964
1965 // On some CPUs quad load/store pairs are slower than two single load/stores.
1966 if (Subtarget.isPaired128Slow()) {
1967 switch (MI.getOpcode()) {
1968 default:
1969 break;
1970 case AArch64::LDURQi:
1971 case AArch64::STURQi:
1972 case AArch64::LDRQui:
1973 case AArch64::STRQui:
1974 return false;
1975 }
1976 }
1977
1978 return true;
1979 }
1980
getMemOperandWithOffset(const MachineInstr & LdSt,const MachineOperand * & BaseOp,int64_t & Offset,const TargetRegisterInfo * TRI) const1981 bool AArch64InstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
1982 const MachineOperand *&BaseOp,
1983 int64_t &Offset,
1984 const TargetRegisterInfo *TRI) const {
1985 if (!LdSt.mayLoadOrStore())
1986 return false;
1987
1988 unsigned Width;
1989 return getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI);
1990 }
1991
getMemOperandWithOffsetWidth(const MachineInstr & LdSt,const MachineOperand * & BaseOp,int64_t & Offset,unsigned & Width,const TargetRegisterInfo * TRI) const1992 bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
1993 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
1994 unsigned &Width, const TargetRegisterInfo *TRI) const {
1995 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
1996 // Handle only loads/stores with base register followed by immediate offset.
1997 if (LdSt.getNumExplicitOperands() == 3) {
1998 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
1999 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
2000 !LdSt.getOperand(2).isImm())
2001 return false;
2002 } else if (LdSt.getNumExplicitOperands() == 4) {
2003 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
2004 if (!LdSt.getOperand(1).isReg() ||
2005 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
2006 !LdSt.getOperand(3).isImm())
2007 return false;
2008 } else
2009 return false;
2010
2011 // Get the scaling factor for the instruction and set the width for the
2012 // instruction.
2013 unsigned Scale = 0;
2014 int64_t Dummy1, Dummy2;
2015
2016 // If this returns false, then it's an instruction we don't want to handle.
2017 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
2018 return false;
2019
2020 // Compute the offset. Offset is calculated as the immediate operand
2021 // multiplied by the scaling factor. Unscaled instructions have scaling factor
2022 // set to 1.
2023 if (LdSt.getNumExplicitOperands() == 3) {
2024 BaseOp = &LdSt.getOperand(1);
2025 Offset = LdSt.getOperand(2).getImm() * Scale;
2026 } else {
2027 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
2028 BaseOp = &LdSt.getOperand(2);
2029 Offset = LdSt.getOperand(3).getImm() * Scale;
2030 }
2031
2032 if (!BaseOp->isReg() && !BaseOp->isFI())
2033 return false;
2034
2035 return true;
2036 }
2037
2038 MachineOperand &
getMemOpBaseRegImmOfsOffsetOperand(MachineInstr & LdSt) const2039 AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
2040 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
2041 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
2042 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
2043 return OfsOp;
2044 }
2045
getMemOpInfo(unsigned Opcode,unsigned & Scale,unsigned & Width,int64_t & MinOffset,int64_t & MaxOffset)2046 bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
2047 unsigned &Width, int64_t &MinOffset,
2048 int64_t &MaxOffset) {
2049 switch (Opcode) {
2050 // Not a memory operation or something we want to handle.
2051 default:
2052 Scale = Width = 0;
2053 MinOffset = MaxOffset = 0;
2054 return false;
2055 case AArch64::STRWpost:
2056 case AArch64::LDRWpost:
2057 Width = 32;
2058 Scale = 4;
2059 MinOffset = -256;
2060 MaxOffset = 255;
2061 break;
2062 case AArch64::LDURQi:
2063 case AArch64::STURQi:
2064 Width = 16;
2065 Scale = 1;
2066 MinOffset = -256;
2067 MaxOffset = 255;
2068 break;
2069 case AArch64::PRFUMi:
2070 case AArch64::LDURXi:
2071 case AArch64::LDURDi:
2072 case AArch64::STURXi:
2073 case AArch64::STURDi:
2074 Width = 8;
2075 Scale = 1;
2076 MinOffset = -256;
2077 MaxOffset = 255;
2078 break;
2079 case AArch64::LDURWi:
2080 case AArch64::LDURSi:
2081 case AArch64::LDURSWi:
2082 case AArch64::STURWi:
2083 case AArch64::STURSi:
2084 Width = 4;
2085 Scale = 1;
2086 MinOffset = -256;
2087 MaxOffset = 255;
2088 break;
2089 case AArch64::LDURHi:
2090 case AArch64::LDURHHi:
2091 case AArch64::LDURSHXi:
2092 case AArch64::LDURSHWi:
2093 case AArch64::STURHi:
2094 case AArch64::STURHHi:
2095 Width = 2;
2096 Scale = 1;
2097 MinOffset = -256;
2098 MaxOffset = 255;
2099 break;
2100 case AArch64::LDURBi:
2101 case AArch64::LDURBBi:
2102 case AArch64::LDURSBXi:
2103 case AArch64::LDURSBWi:
2104 case AArch64::STURBi:
2105 case AArch64::STURBBi:
2106 Width = 1;
2107 Scale = 1;
2108 MinOffset = -256;
2109 MaxOffset = 255;
2110 break;
2111 case AArch64::LDPQi:
2112 case AArch64::LDNPQi:
2113 case AArch64::STPQi:
2114 case AArch64::STNPQi:
2115 Scale = 16;
2116 Width = 32;
2117 MinOffset = -64;
2118 MaxOffset = 63;
2119 break;
2120 case AArch64::LDRQui:
2121 case AArch64::STRQui:
2122 Scale = Width = 16;
2123 MinOffset = 0;
2124 MaxOffset = 4095;
2125 break;
2126 case AArch64::LDPXi:
2127 case AArch64::LDPDi:
2128 case AArch64::LDNPXi:
2129 case AArch64::LDNPDi:
2130 case AArch64::STPXi:
2131 case AArch64::STPDi:
2132 case AArch64::STNPXi:
2133 case AArch64::STNPDi:
2134 Scale = 8;
2135 Width = 16;
2136 MinOffset = -64;
2137 MaxOffset = 63;
2138 break;
2139 case AArch64::PRFMui:
2140 case AArch64::LDRXui:
2141 case AArch64::LDRDui:
2142 case AArch64::STRXui:
2143 case AArch64::STRDui:
2144 Scale = Width = 8;
2145 MinOffset = 0;
2146 MaxOffset = 4095;
2147 break;
2148 case AArch64::LDPWi:
2149 case AArch64::LDPSi:
2150 case AArch64::LDNPWi:
2151 case AArch64::LDNPSi:
2152 case AArch64::STPWi:
2153 case AArch64::STPSi:
2154 case AArch64::STNPWi:
2155 case AArch64::STNPSi:
2156 Scale = 4;
2157 Width = 8;
2158 MinOffset = -64;
2159 MaxOffset = 63;
2160 break;
2161 case AArch64::LDRWui:
2162 case AArch64::LDRSui:
2163 case AArch64::LDRSWui:
2164 case AArch64::STRWui:
2165 case AArch64::STRSui:
2166 Scale = Width = 4;
2167 MinOffset = 0;
2168 MaxOffset = 4095;
2169 break;
2170 case AArch64::LDRHui:
2171 case AArch64::LDRHHui:
2172 case AArch64::LDRSHWui:
2173 case AArch64::LDRSHXui:
2174 case AArch64::STRHui:
2175 case AArch64::STRHHui:
2176 Scale = Width = 2;
2177 MinOffset = 0;
2178 MaxOffset = 4095;
2179 break;
2180 case AArch64::LDRBui:
2181 case AArch64::LDRBBui:
2182 case AArch64::LDRSBWui:
2183 case AArch64::LDRSBXui:
2184 case AArch64::STRBui:
2185 case AArch64::STRBBui:
2186 Scale = Width = 1;
2187 MinOffset = 0;
2188 MaxOffset = 4095;
2189 break;
2190 case AArch64::ADDG:
2191 Scale = 16;
2192 Width = 0;
2193 MinOffset = 0;
2194 MaxOffset = 63;
2195 break;
2196 case AArch64::TAGPstack:
2197 Scale = 16;
2198 Width = 0;
2199 // TAGP with a negative offset turns into SUBP, which has a maximum offset
2200 // of 63 (not 64!).
2201 MinOffset = -63;
2202 MaxOffset = 63;
2203 break;
2204 case AArch64::LDG:
2205 case AArch64::STGOffset:
2206 case AArch64::STZGOffset:
2207 Scale = Width = 16;
2208 MinOffset = -256;
2209 MaxOffset = 255;
2210 break;
2211 case AArch64::LDR_PXI:
2212 case AArch64::STR_PXI:
2213 Scale = Width = 2;
2214 MinOffset = -256;
2215 MaxOffset = 255;
2216 break;
2217 case AArch64::LDR_ZXI:
2218 case AArch64::STR_ZXI:
2219 Scale = Width = 16;
2220 MinOffset = -256;
2221 MaxOffset = 255;
2222 break;
2223 case AArch64::ST2GOffset:
2224 case AArch64::STZ2GOffset:
2225 Scale = 16;
2226 Width = 32;
2227 MinOffset = -256;
2228 MaxOffset = 255;
2229 break;
2230 case AArch64::STGPi:
2231 Scale = Width = 16;
2232 MinOffset = -64;
2233 MaxOffset = 63;
2234 break;
2235 }
2236
2237 return true;
2238 }
2239
2240 // Scaling factor for unscaled load or store.
getMemScale(unsigned Opc)2241 int AArch64InstrInfo::getMemScale(unsigned Opc) {
2242 switch (Opc) {
2243 default:
2244 llvm_unreachable("Opcode has unknown scale!");
2245 case AArch64::LDRBBui:
2246 case AArch64::LDURBBi:
2247 case AArch64::LDRSBWui:
2248 case AArch64::LDURSBWi:
2249 case AArch64::STRBBui:
2250 case AArch64::STURBBi:
2251 return 1;
2252 case AArch64::LDRHHui:
2253 case AArch64::LDURHHi:
2254 case AArch64::LDRSHWui:
2255 case AArch64::LDURSHWi:
2256 case AArch64::STRHHui:
2257 case AArch64::STURHHi:
2258 return 2;
2259 case AArch64::LDRSui:
2260 case AArch64::LDURSi:
2261 case AArch64::LDRSWui:
2262 case AArch64::LDURSWi:
2263 case AArch64::LDRWui:
2264 case AArch64::LDURWi:
2265 case AArch64::STRSui:
2266 case AArch64::STURSi:
2267 case AArch64::STRWui:
2268 case AArch64::STURWi:
2269 case AArch64::LDPSi:
2270 case AArch64::LDPSWi:
2271 case AArch64::LDPWi:
2272 case AArch64::STPSi:
2273 case AArch64::STPWi:
2274 return 4;
2275 case AArch64::LDRDui:
2276 case AArch64::LDURDi:
2277 case AArch64::LDRXui:
2278 case AArch64::LDURXi:
2279 case AArch64::STRDui:
2280 case AArch64::STURDi:
2281 case AArch64::STRXui:
2282 case AArch64::STURXi:
2283 case AArch64::LDPDi:
2284 case AArch64::LDPXi:
2285 case AArch64::STPDi:
2286 case AArch64::STPXi:
2287 return 8;
2288 case AArch64::LDRQui:
2289 case AArch64::LDURQi:
2290 case AArch64::STRQui:
2291 case AArch64::STURQi:
2292 case AArch64::LDPQi:
2293 case AArch64::STPQi:
2294 case AArch64::STGOffset:
2295 case AArch64::STZGOffset:
2296 case AArch64::ST2GOffset:
2297 case AArch64::STZ2GOffset:
2298 case AArch64::STGPi:
2299 return 16;
2300 }
2301 }
2302
2303 // Scale the unscaled offsets. Returns false if the unscaled offset can't be
2304 // scaled.
scaleOffset(unsigned Opc,int64_t & Offset)2305 static bool scaleOffset(unsigned Opc, int64_t &Offset) {
2306 int Scale = AArch64InstrInfo::getMemScale(Opc);
2307
2308 // If the byte-offset isn't a multiple of the stride, we can't scale this
2309 // offset.
2310 if (Offset % Scale != 0)
2311 return false;
2312
2313 // Convert the byte-offset used by unscaled into an "element" offset used
2314 // by the scaled pair load/store instructions.
2315 Offset /= Scale;
2316 return true;
2317 }
2318
canPairLdStOpc(unsigned FirstOpc,unsigned SecondOpc)2319 static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
2320 if (FirstOpc == SecondOpc)
2321 return true;
2322 // We can also pair sign-ext and zero-ext instructions.
2323 switch (FirstOpc) {
2324 default:
2325 return false;
2326 case AArch64::LDRWui:
2327 case AArch64::LDURWi:
2328 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
2329 case AArch64::LDRSWui:
2330 case AArch64::LDURSWi:
2331 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
2332 }
2333 // These instructions can't be paired based on their opcodes.
2334 return false;
2335 }
2336
shouldClusterFI(const MachineFrameInfo & MFI,int FI1,int64_t Offset1,unsigned Opcode1,int FI2,int64_t Offset2,unsigned Opcode2)2337 static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
2338 int64_t Offset1, unsigned Opcode1, int FI2,
2339 int64_t Offset2, unsigned Opcode2) {
2340 // Accesses through fixed stack object frame indices may access a different
2341 // fixed stack slot. Check that the object offsets + offsets match.
2342 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
2343 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
2344 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
2345 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
2346 // Convert to scaled object offsets.
2347 int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
2348 if (ObjectOffset1 % Scale1 != 0)
2349 return false;
2350 ObjectOffset1 /= Scale1;
2351 int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
2352 if (ObjectOffset2 % Scale2 != 0)
2353 return false;
2354 ObjectOffset2 /= Scale2;
2355 ObjectOffset1 += Offset1;
2356 ObjectOffset2 += Offset2;
2357 return ObjectOffset1 + 1 == ObjectOffset2;
2358 }
2359
2360 return FI1 == FI2;
2361 }
2362
2363 /// Detect opportunities for ldp/stp formation.
2364 ///
2365 /// Only called for LdSt for which getMemOperandWithOffset returns true.
shouldClusterMemOps(const MachineOperand & BaseOp1,const MachineOperand & BaseOp2,unsigned NumLoads) const2366 bool AArch64InstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1,
2367 const MachineOperand &BaseOp2,
2368 unsigned NumLoads) const {
2369 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
2370 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
2371 if (BaseOp1.getType() != BaseOp2.getType())
2372 return false;
2373
2374 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
2375 "Only base registers and frame indices are supported.");
2376
2377 // Check for both base regs and base FI.
2378 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
2379 return false;
2380
2381 // Only cluster up to a single pair.
2382 if (NumLoads > 1)
2383 return false;
2384
2385 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
2386 return false;
2387
2388 // Can we pair these instructions based on their opcodes?
2389 unsigned FirstOpc = FirstLdSt.getOpcode();
2390 unsigned SecondOpc = SecondLdSt.getOpcode();
2391 if (!canPairLdStOpc(FirstOpc, SecondOpc))
2392 return false;
2393
2394 // Can't merge volatiles or load/stores that have a hint to avoid pair
2395 // formation, for example.
2396 if (!isCandidateToMergeOrPair(FirstLdSt) ||
2397 !isCandidateToMergeOrPair(SecondLdSt))
2398 return false;
2399
2400 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
2401 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
2402 if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
2403 return false;
2404
2405 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
2406 if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
2407 return false;
2408
2409 // Pairwise instructions have a 7-bit signed offset field.
2410 if (Offset1 > 63 || Offset1 < -64)
2411 return false;
2412
2413 // The caller should already have ordered First/SecondLdSt by offset.
2414 // Note: except for non-equal frame index bases
2415 if (BaseOp1.isFI()) {
2416 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
2417 "Caller should have ordered offsets.");
2418
2419 const MachineFrameInfo &MFI =
2420 FirstLdSt.getParent()->getParent()->getFrameInfo();
2421 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
2422 BaseOp2.getIndex(), Offset2, SecondOpc);
2423 }
2424
2425 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
2426
2427 return Offset1 + 1 == Offset2;
2428 }
2429
AddSubReg(const MachineInstrBuilder & MIB,unsigned Reg,unsigned SubIdx,unsigned State,const TargetRegisterInfo * TRI)2430 static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
2431 unsigned Reg, unsigned SubIdx,
2432 unsigned State,
2433 const TargetRegisterInfo *TRI) {
2434 if (!SubIdx)
2435 return MIB.addReg(Reg, State);
2436
2437 if (Register::isPhysicalRegister(Reg))
2438 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
2439 return MIB.addReg(Reg, State, SubIdx);
2440 }
2441
forwardCopyWillClobberTuple(unsigned DestReg,unsigned SrcReg,unsigned NumRegs)2442 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
2443 unsigned NumRegs) {
2444 // We really want the positive remainder mod 32 here, that happens to be
2445 // easily obtainable with a mask.
2446 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
2447 }
2448
copyPhysRegTuple(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,MCRegister DestReg,MCRegister SrcReg,bool KillSrc,unsigned Opcode,ArrayRef<unsigned> Indices) const2449 void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
2450 MachineBasicBlock::iterator I,
2451 const DebugLoc &DL, MCRegister DestReg,
2452 MCRegister SrcReg, bool KillSrc,
2453 unsigned Opcode,
2454 ArrayRef<unsigned> Indices) const {
2455 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
2456 const TargetRegisterInfo *TRI = &getRegisterInfo();
2457 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
2458 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
2459 unsigned NumRegs = Indices.size();
2460
2461 int SubReg = 0, End = NumRegs, Incr = 1;
2462 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
2463 SubReg = NumRegs - 1;
2464 End = -1;
2465 Incr = -1;
2466 }
2467
2468 for (; SubReg != End; SubReg += Incr) {
2469 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
2470 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
2471 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
2472 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
2473 }
2474 }
2475
copyGPRRegTuple(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,DebugLoc DL,unsigned DestReg,unsigned SrcReg,bool KillSrc,unsigned Opcode,unsigned ZeroReg,llvm::ArrayRef<unsigned> Indices) const2476 void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB,
2477 MachineBasicBlock::iterator I,
2478 DebugLoc DL, unsigned DestReg,
2479 unsigned SrcReg, bool KillSrc,
2480 unsigned Opcode, unsigned ZeroReg,
2481 llvm::ArrayRef<unsigned> Indices) const {
2482 const TargetRegisterInfo *TRI = &getRegisterInfo();
2483 unsigned NumRegs = Indices.size();
2484
2485 #ifndef NDEBUG
2486 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
2487 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
2488 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
2489 "GPR reg sequences should not be able to overlap");
2490 #endif
2491
2492 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
2493 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
2494 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
2495 MIB.addReg(ZeroReg);
2496 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
2497 MIB.addImm(0);
2498 }
2499 }
2500
copyPhysReg(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,MCRegister DestReg,MCRegister SrcReg,bool KillSrc) const2501 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
2502 MachineBasicBlock::iterator I,
2503 const DebugLoc &DL, MCRegister DestReg,
2504 MCRegister SrcReg, bool KillSrc) const {
2505 if (AArch64::GPR32spRegClass.contains(DestReg) &&
2506 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
2507 const TargetRegisterInfo *TRI = &getRegisterInfo();
2508
2509 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
2510 // If either operand is WSP, expand to ADD #0.
2511 if (Subtarget.hasZeroCycleRegMove()) {
2512 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
2513 MCRegister DestRegX = TRI->getMatchingSuperReg(
2514 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
2515 MCRegister SrcRegX = TRI->getMatchingSuperReg(
2516 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
2517 // This instruction is reading and writing X registers. This may upset
2518 // the register scavenger and machine verifier, so we need to indicate
2519 // that we are reading an undefined value from SrcRegX, but a proper
2520 // value from SrcReg.
2521 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
2522 .addReg(SrcRegX, RegState::Undef)
2523 .addImm(0)
2524 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
2525 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2526 } else {
2527 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
2528 .addReg(SrcReg, getKillRegState(KillSrc))
2529 .addImm(0)
2530 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2531 }
2532 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
2533 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
2534 .addImm(0)
2535 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2536 } else {
2537 if (Subtarget.hasZeroCycleRegMove()) {
2538 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
2539 MCRegister DestRegX = TRI->getMatchingSuperReg(
2540 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
2541 MCRegister SrcRegX = TRI->getMatchingSuperReg(
2542 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
2543 // This instruction is reading and writing X registers. This may upset
2544 // the register scavenger and machine verifier, so we need to indicate
2545 // that we are reading an undefined value from SrcRegX, but a proper
2546 // value from SrcReg.
2547 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
2548 .addReg(AArch64::XZR)
2549 .addReg(SrcRegX, RegState::Undef)
2550 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2551 } else {
2552 // Otherwise, expand to ORR WZR.
2553 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
2554 .addReg(AArch64::WZR)
2555 .addReg(SrcReg, getKillRegState(KillSrc));
2556 }
2557 }
2558 return;
2559 }
2560
2561 // Copy a Predicate register by ORRing with itself.
2562 if (AArch64::PPRRegClass.contains(DestReg) &&
2563 AArch64::PPRRegClass.contains(SrcReg)) {
2564 assert(Subtarget.hasSVE() && "Unexpected SVE register.");
2565 BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
2566 .addReg(SrcReg) // Pg
2567 .addReg(SrcReg)
2568 .addReg(SrcReg, getKillRegState(KillSrc));
2569 return;
2570 }
2571
2572 // Copy a Z register by ORRing with itself.
2573 if (AArch64::ZPRRegClass.contains(DestReg) &&
2574 AArch64::ZPRRegClass.contains(SrcReg)) {
2575 assert(Subtarget.hasSVE() && "Unexpected SVE register.");
2576 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
2577 .addReg(SrcReg)
2578 .addReg(SrcReg, getKillRegState(KillSrc));
2579 return;
2580 }
2581
2582 if (AArch64::GPR64spRegClass.contains(DestReg) &&
2583 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
2584 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
2585 // If either operand is SP, expand to ADD #0.
2586 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
2587 .addReg(SrcReg, getKillRegState(KillSrc))
2588 .addImm(0)
2589 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2590 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
2591 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
2592 .addImm(0)
2593 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2594 } else {
2595 // Otherwise, expand to ORR XZR.
2596 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
2597 .addReg(AArch64::XZR)
2598 .addReg(SrcReg, getKillRegState(KillSrc));
2599 }
2600 return;
2601 }
2602
2603 // Copy a DDDD register quad by copying the individual sub-registers.
2604 if (AArch64::DDDDRegClass.contains(DestReg) &&
2605 AArch64::DDDDRegClass.contains(SrcReg)) {
2606 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2607 AArch64::dsub2, AArch64::dsub3};
2608 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2609 Indices);
2610 return;
2611 }
2612
2613 // Copy a DDD register triple by copying the individual sub-registers.
2614 if (AArch64::DDDRegClass.contains(DestReg) &&
2615 AArch64::DDDRegClass.contains(SrcReg)) {
2616 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2617 AArch64::dsub2};
2618 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2619 Indices);
2620 return;
2621 }
2622
2623 // Copy a DD register pair by copying the individual sub-registers.
2624 if (AArch64::DDRegClass.contains(DestReg) &&
2625 AArch64::DDRegClass.contains(SrcReg)) {
2626 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
2627 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2628 Indices);
2629 return;
2630 }
2631
2632 // Copy a QQQQ register quad by copying the individual sub-registers.
2633 if (AArch64::QQQQRegClass.contains(DestReg) &&
2634 AArch64::QQQQRegClass.contains(SrcReg)) {
2635 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2636 AArch64::qsub2, AArch64::qsub3};
2637 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2638 Indices);
2639 return;
2640 }
2641
2642 // Copy a QQQ register triple by copying the individual sub-registers.
2643 if (AArch64::QQQRegClass.contains(DestReg) &&
2644 AArch64::QQQRegClass.contains(SrcReg)) {
2645 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2646 AArch64::qsub2};
2647 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2648 Indices);
2649 return;
2650 }
2651
2652 // Copy a QQ register pair by copying the individual sub-registers.
2653 if (AArch64::QQRegClass.contains(DestReg) &&
2654 AArch64::QQRegClass.contains(SrcReg)) {
2655 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
2656 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2657 Indices);
2658 return;
2659 }
2660
2661 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
2662 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
2663 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
2664 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
2665 AArch64::XZR, Indices);
2666 return;
2667 }
2668
2669 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
2670 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
2671 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
2672 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
2673 AArch64::WZR, Indices);
2674 return;
2675 }
2676
2677 if (AArch64::FPR128RegClass.contains(DestReg) &&
2678 AArch64::FPR128RegClass.contains(SrcReg)) {
2679 if (Subtarget.hasNEON()) {
2680 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2681 .addReg(SrcReg)
2682 .addReg(SrcReg, getKillRegState(KillSrc));
2683 } else {
2684 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
2685 .addReg(AArch64::SP, RegState::Define)
2686 .addReg(SrcReg, getKillRegState(KillSrc))
2687 .addReg(AArch64::SP)
2688 .addImm(-16);
2689 BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
2690 .addReg(AArch64::SP, RegState::Define)
2691 .addReg(DestReg, RegState::Define)
2692 .addReg(AArch64::SP)
2693 .addImm(16);
2694 }
2695 return;
2696 }
2697
2698 if (AArch64::FPR64RegClass.contains(DestReg) &&
2699 AArch64::FPR64RegClass.contains(SrcReg)) {
2700 if (Subtarget.hasNEON()) {
2701 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
2702 &AArch64::FPR128RegClass);
2703 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
2704 &AArch64::FPR128RegClass);
2705 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2706 .addReg(SrcReg)
2707 .addReg(SrcReg, getKillRegState(KillSrc));
2708 } else {
2709 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
2710 .addReg(SrcReg, getKillRegState(KillSrc));
2711 }
2712 return;
2713 }
2714
2715 if (AArch64::FPR32RegClass.contains(DestReg) &&
2716 AArch64::FPR32RegClass.contains(SrcReg)) {
2717 if (Subtarget.hasNEON()) {
2718 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
2719 &AArch64::FPR128RegClass);
2720 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
2721 &AArch64::FPR128RegClass);
2722 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2723 .addReg(SrcReg)
2724 .addReg(SrcReg, getKillRegState(KillSrc));
2725 } else {
2726 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2727 .addReg(SrcReg, getKillRegState(KillSrc));
2728 }
2729 return;
2730 }
2731
2732 if (AArch64::FPR16RegClass.contains(DestReg) &&
2733 AArch64::FPR16RegClass.contains(SrcReg)) {
2734 if (Subtarget.hasNEON()) {
2735 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2736 &AArch64::FPR128RegClass);
2737 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2738 &AArch64::FPR128RegClass);
2739 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2740 .addReg(SrcReg)
2741 .addReg(SrcReg, getKillRegState(KillSrc));
2742 } else {
2743 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2744 &AArch64::FPR32RegClass);
2745 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2746 &AArch64::FPR32RegClass);
2747 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2748 .addReg(SrcReg, getKillRegState(KillSrc));
2749 }
2750 return;
2751 }
2752
2753 if (AArch64::FPR8RegClass.contains(DestReg) &&
2754 AArch64::FPR8RegClass.contains(SrcReg)) {
2755 if (Subtarget.hasNEON()) {
2756 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2757 &AArch64::FPR128RegClass);
2758 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2759 &AArch64::FPR128RegClass);
2760 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2761 .addReg(SrcReg)
2762 .addReg(SrcReg, getKillRegState(KillSrc));
2763 } else {
2764 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2765 &AArch64::FPR32RegClass);
2766 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2767 &AArch64::FPR32RegClass);
2768 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2769 .addReg(SrcReg, getKillRegState(KillSrc));
2770 }
2771 return;
2772 }
2773
2774 // Copies between GPR64 and FPR64.
2775 if (AArch64::FPR64RegClass.contains(DestReg) &&
2776 AArch64::GPR64RegClass.contains(SrcReg)) {
2777 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
2778 .addReg(SrcReg, getKillRegState(KillSrc));
2779 return;
2780 }
2781 if (AArch64::GPR64RegClass.contains(DestReg) &&
2782 AArch64::FPR64RegClass.contains(SrcReg)) {
2783 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
2784 .addReg(SrcReg, getKillRegState(KillSrc));
2785 return;
2786 }
2787 // Copies between GPR32 and FPR32.
2788 if (AArch64::FPR32RegClass.contains(DestReg) &&
2789 AArch64::GPR32RegClass.contains(SrcReg)) {
2790 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
2791 .addReg(SrcReg, getKillRegState(KillSrc));
2792 return;
2793 }
2794 if (AArch64::GPR32RegClass.contains(DestReg) &&
2795 AArch64::FPR32RegClass.contains(SrcReg)) {
2796 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
2797 .addReg(SrcReg, getKillRegState(KillSrc));
2798 return;
2799 }
2800
2801 if (DestReg == AArch64::NZCV) {
2802 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
2803 BuildMI(MBB, I, DL, get(AArch64::MSR))
2804 .addImm(AArch64SysReg::NZCV)
2805 .addReg(SrcReg, getKillRegState(KillSrc))
2806 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
2807 return;
2808 }
2809
2810 if (SrcReg == AArch64::NZCV) {
2811 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
2812 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
2813 .addImm(AArch64SysReg::NZCV)
2814 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
2815 return;
2816 }
2817
2818 llvm_unreachable("unimplemented reg-to-reg copy");
2819 }
2820
storeRegPairToStackSlot(const TargetRegisterInfo & TRI,MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,const MCInstrDesc & MCID,unsigned SrcReg,bool IsKill,unsigned SubIdx0,unsigned SubIdx1,int FI,MachineMemOperand * MMO)2821 static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI,
2822 MachineBasicBlock &MBB,
2823 MachineBasicBlock::iterator InsertBefore,
2824 const MCInstrDesc &MCID,
2825 unsigned SrcReg, bool IsKill,
2826 unsigned SubIdx0, unsigned SubIdx1, int FI,
2827 MachineMemOperand *MMO) {
2828 unsigned SrcReg0 = SrcReg;
2829 unsigned SrcReg1 = SrcReg;
2830 if (Register::isPhysicalRegister(SrcReg)) {
2831 SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
2832 SubIdx0 = 0;
2833 SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
2834 SubIdx1 = 0;
2835 }
2836 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
2837 .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
2838 .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
2839 .addFrameIndex(FI)
2840 .addImm(0)
2841 .addMemOperand(MMO);
2842 }
2843
storeRegToStackSlot(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,unsigned SrcReg,bool isKill,int FI,const TargetRegisterClass * RC,const TargetRegisterInfo * TRI) const2844 void AArch64InstrInfo::storeRegToStackSlot(
2845 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
2846 bool isKill, int FI, const TargetRegisterClass *RC,
2847 const TargetRegisterInfo *TRI) const {
2848 MachineFunction &MF = *MBB.getParent();
2849 MachineFrameInfo &MFI = MF.getFrameInfo();
2850 unsigned Align = MFI.getObjectAlignment(FI);
2851
2852 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
2853 MachineMemOperand *MMO = MF.getMachineMemOperand(
2854 PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
2855 unsigned Opc = 0;
2856 bool Offset = true;
2857 switch (TRI->getSpillSize(*RC)) {
2858 case 1:
2859 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2860 Opc = AArch64::STRBui;
2861 break;
2862 case 2:
2863 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2864 Opc = AArch64::STRHui;
2865 break;
2866 case 4:
2867 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2868 Opc = AArch64::STRWui;
2869 if (Register::isVirtualRegister(SrcReg))
2870 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2871 else
2872 assert(SrcReg != AArch64::WSP);
2873 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2874 Opc = AArch64::STRSui;
2875 break;
2876 case 8:
2877 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2878 Opc = AArch64::STRXui;
2879 if (Register::isVirtualRegister(SrcReg))
2880 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2881 else
2882 assert(SrcReg != AArch64::SP);
2883 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
2884 Opc = AArch64::STRDui;
2885 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
2886 storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
2887 get(AArch64::STPWi), SrcReg, isKill,
2888 AArch64::sube32, AArch64::subo32, FI, MMO);
2889 return;
2890 }
2891 break;
2892 case 16:
2893 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2894 Opc = AArch64::STRQui;
2895 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2896 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2897 Opc = AArch64::ST1Twov1d;
2898 Offset = false;
2899 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
2900 storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
2901 get(AArch64::STPXi), SrcReg, isKill,
2902 AArch64::sube64, AArch64::subo64, FI, MMO);
2903 return;
2904 }
2905 break;
2906 case 24:
2907 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2908 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2909 Opc = AArch64::ST1Threev1d;
2910 Offset = false;
2911 }
2912 break;
2913 case 32:
2914 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2915 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2916 Opc = AArch64::ST1Fourv1d;
2917 Offset = false;
2918 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2919 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2920 Opc = AArch64::ST1Twov2d;
2921 Offset = false;
2922 }
2923 break;
2924 case 48:
2925 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2926 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2927 Opc = AArch64::ST1Threev2d;
2928 Offset = false;
2929 }
2930 break;
2931 case 64:
2932 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2933 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2934 Opc = AArch64::ST1Fourv2d;
2935 Offset = false;
2936 }
2937 break;
2938 }
2939 unsigned StackID = TargetStackID::Default;
2940 if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
2941 assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
2942 Opc = AArch64::STR_PXI;
2943 StackID = TargetStackID::SVEVector;
2944 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
2945 assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
2946 Opc = AArch64::STR_ZXI;
2947 StackID = TargetStackID::SVEVector;
2948 }
2949 assert(Opc && "Unknown register class");
2950 MFI.setStackID(FI, StackID);
2951
2952 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
2953 .addReg(SrcReg, getKillRegState(isKill))
2954 .addFrameIndex(FI);
2955
2956 if (Offset)
2957 MI.addImm(0);
2958 MI.addMemOperand(MMO);
2959 }
2960
loadRegPairFromStackSlot(const TargetRegisterInfo & TRI,MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,const MCInstrDesc & MCID,unsigned DestReg,unsigned SubIdx0,unsigned SubIdx1,int FI,MachineMemOperand * MMO)2961 static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI,
2962 MachineBasicBlock &MBB,
2963 MachineBasicBlock::iterator InsertBefore,
2964 const MCInstrDesc &MCID,
2965 unsigned DestReg, unsigned SubIdx0,
2966 unsigned SubIdx1, int FI,
2967 MachineMemOperand *MMO) {
2968 unsigned DestReg0 = DestReg;
2969 unsigned DestReg1 = DestReg;
2970 bool IsUndef = true;
2971 if (Register::isPhysicalRegister(DestReg)) {
2972 DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
2973 SubIdx0 = 0;
2974 DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
2975 SubIdx1 = 0;
2976 IsUndef = false;
2977 }
2978 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
2979 .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
2980 .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
2981 .addFrameIndex(FI)
2982 .addImm(0)
2983 .addMemOperand(MMO);
2984 }
2985
loadRegFromStackSlot(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,unsigned DestReg,int FI,const TargetRegisterClass * RC,const TargetRegisterInfo * TRI) const2986 void AArch64InstrInfo::loadRegFromStackSlot(
2987 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
2988 int FI, const TargetRegisterClass *RC,
2989 const TargetRegisterInfo *TRI) const {
2990 MachineFunction &MF = *MBB.getParent();
2991 MachineFrameInfo &MFI = MF.getFrameInfo();
2992 unsigned Align = MFI.getObjectAlignment(FI);
2993 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
2994 MachineMemOperand *MMO = MF.getMachineMemOperand(
2995 PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
2996
2997 unsigned Opc = 0;
2998 bool Offset = true;
2999 switch (TRI->getSpillSize(*RC)) {
3000 case 1:
3001 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
3002 Opc = AArch64::LDRBui;
3003 break;
3004 case 2:
3005 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
3006 Opc = AArch64::LDRHui;
3007 break;
3008 case 4:
3009 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
3010 Opc = AArch64::LDRWui;
3011 if (Register::isVirtualRegister(DestReg))
3012 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
3013 else
3014 assert(DestReg != AArch64::WSP);
3015 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
3016 Opc = AArch64::LDRSui;
3017 break;
3018 case 8:
3019 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
3020 Opc = AArch64::LDRXui;
3021 if (Register::isVirtualRegister(DestReg))
3022 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
3023 else
3024 assert(DestReg != AArch64::SP);
3025 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
3026 Opc = AArch64::LDRDui;
3027 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
3028 loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
3029 get(AArch64::LDPWi), DestReg, AArch64::sube32,
3030 AArch64::subo32, FI, MMO);
3031 return;
3032 }
3033 break;
3034 case 16:
3035 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
3036 Opc = AArch64::LDRQui;
3037 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
3038 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3039 Opc = AArch64::LD1Twov1d;
3040 Offset = false;
3041 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
3042 loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
3043 get(AArch64::LDPXi), DestReg, AArch64::sube64,
3044 AArch64::subo64, FI, MMO);
3045 return;
3046 }
3047 break;
3048 case 24:
3049 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
3050 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3051 Opc = AArch64::LD1Threev1d;
3052 Offset = false;
3053 }
3054 break;
3055 case 32:
3056 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
3057 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3058 Opc = AArch64::LD1Fourv1d;
3059 Offset = false;
3060 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
3061 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3062 Opc = AArch64::LD1Twov2d;
3063 Offset = false;
3064 }
3065 break;
3066 case 48:
3067 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
3068 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3069 Opc = AArch64::LD1Threev2d;
3070 Offset = false;
3071 }
3072 break;
3073 case 64:
3074 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
3075 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3076 Opc = AArch64::LD1Fourv2d;
3077 Offset = false;
3078 }
3079 break;
3080 }
3081
3082 unsigned StackID = TargetStackID::Default;
3083 if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
3084 assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
3085 Opc = AArch64::LDR_PXI;
3086 StackID = TargetStackID::SVEVector;
3087 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
3088 assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
3089 Opc = AArch64::LDR_ZXI;
3090 StackID = TargetStackID::SVEVector;
3091 }
3092 assert(Opc && "Unknown register class");
3093 MFI.setStackID(FI, StackID);
3094
3095 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
3096 .addReg(DestReg, getDefRegState(true))
3097 .addFrameIndex(FI);
3098 if (Offset)
3099 MI.addImm(0);
3100 MI.addMemOperand(MMO);
3101 }
3102
3103 // Helper function to emit a frame offset adjustment from a given
3104 // pointer (SrcReg), stored into DestReg. This function is explicit
3105 // in that it requires the opcode.
emitFrameOffsetAdj(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const DebugLoc & DL,unsigned DestReg,unsigned SrcReg,int64_t Offset,unsigned Opc,const TargetInstrInfo * TII,MachineInstr::MIFlag Flag,bool NeedsWinCFI,bool * HasWinCFI)3106 static void emitFrameOffsetAdj(MachineBasicBlock &MBB,
3107 MachineBasicBlock::iterator MBBI,
3108 const DebugLoc &DL, unsigned DestReg,
3109 unsigned SrcReg, int64_t Offset, unsigned Opc,
3110 const TargetInstrInfo *TII,
3111 MachineInstr::MIFlag Flag, bool NeedsWinCFI,
3112 bool *HasWinCFI) {
3113 int Sign = 1;
3114 unsigned MaxEncoding, ShiftSize;
3115 switch (Opc) {
3116 case AArch64::ADDXri:
3117 case AArch64::ADDSXri:
3118 case AArch64::SUBXri:
3119 case AArch64::SUBSXri:
3120 MaxEncoding = 0xfff;
3121 ShiftSize = 12;
3122 break;
3123 case AArch64::ADDVL_XXI:
3124 case AArch64::ADDPL_XXI:
3125 MaxEncoding = 31;
3126 ShiftSize = 0;
3127 if (Offset < 0) {
3128 MaxEncoding = 32;
3129 Sign = -1;
3130 Offset = -Offset;
3131 }
3132 break;
3133 default:
3134 llvm_unreachable("Unsupported opcode");
3135 }
3136
3137 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
3138 // scratch register. If DestReg is a virtual register, use it as the
3139 // scratch register; otherwise, create a new virtual register (to be
3140 // replaced by the scavenger at the end of PEI). That case can be optimized
3141 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
3142 // register can be loaded with offset%8 and the add/sub can use an extending
3143 // instruction with LSL#3.
3144 // Currently the function handles any offsets but generates a poor sequence
3145 // of code.
3146 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
3147
3148 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
3149 do {
3150 uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);
3151 unsigned LocalShiftSize = 0;
3152 if (ThisVal > MaxEncoding) {
3153 ThisVal = ThisVal >> ShiftSize;
3154 LocalShiftSize = ShiftSize;
3155 }
3156 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
3157 "Encoding cannot handle value that big");
3158 auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
3159 .addReg(SrcReg)
3160 .addImm(Sign * (int)ThisVal);
3161 if (ShiftSize)
3162 MBI = MBI.addImm(
3163 AArch64_AM::getShifterImm(AArch64_AM::LSL, LocalShiftSize));
3164 MBI = MBI.setMIFlag(Flag);
3165
3166 if (NeedsWinCFI) {
3167 assert(Sign == 1 && "SEH directives should always have a positive sign");
3168 int Imm = (int)(ThisVal << LocalShiftSize);
3169 if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
3170 (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
3171 if (HasWinCFI)
3172 *HasWinCFI = true;
3173 if (Imm == 0)
3174 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag);
3175 else
3176 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
3177 .addImm(Imm)
3178 .setMIFlag(Flag);
3179 assert((Offset - Imm) == 0 && "Expected remaining offset to be zero to "
3180 "emit a single SEH directive");
3181 } else if (DestReg == AArch64::SP) {
3182 if (HasWinCFI)
3183 *HasWinCFI = true;
3184 assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
3185 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
3186 .addImm(Imm)
3187 .setMIFlag(Flag);
3188 }
3189 if (HasWinCFI)
3190 *HasWinCFI = true;
3191 }
3192
3193 SrcReg = DestReg;
3194 Offset -= ThisVal << LocalShiftSize;
3195 } while (Offset);
3196 }
3197
emitFrameOffset(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const DebugLoc & DL,unsigned DestReg,unsigned SrcReg,StackOffset Offset,const TargetInstrInfo * TII,MachineInstr::MIFlag Flag,bool SetNZCV,bool NeedsWinCFI,bool * HasWinCFI)3198 void llvm::emitFrameOffset(MachineBasicBlock &MBB,
3199 MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
3200 unsigned DestReg, unsigned SrcReg,
3201 StackOffset Offset, const TargetInstrInfo *TII,
3202 MachineInstr::MIFlag Flag, bool SetNZCV,
3203 bool NeedsWinCFI, bool *HasWinCFI) {
3204 int64_t Bytes, NumPredicateVectors, NumDataVectors;
3205 Offset.getForFrameOffset(Bytes, NumPredicateVectors, NumDataVectors);
3206
3207 // First emit non-scalable frame offsets, or a simple 'mov'.
3208 if (Bytes || (!Offset && SrcReg != DestReg)) {
3209 assert((DestReg != AArch64::SP || Bytes % 16 == 0) &&
3210 "SP increment/decrement not 16-byte aligned");
3211 unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
3212 if (Bytes < 0) {
3213 Bytes = -Bytes;
3214 Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
3215 }
3216 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag,
3217 NeedsWinCFI, HasWinCFI);
3218 SrcReg = DestReg;
3219 }
3220
3221 assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) &&
3222 "SetNZCV not supported with SVE vectors");
3223 assert(!(NeedsWinCFI && (NumPredicateVectors || NumDataVectors)) &&
3224 "WinCFI not supported with SVE vectors");
3225
3226 if (NumDataVectors) {
3227 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
3228 AArch64::ADDVL_XXI, TII, Flag, NeedsWinCFI, nullptr);
3229 SrcReg = DestReg;
3230 }
3231
3232 if (NumPredicateVectors) {
3233 assert(DestReg != AArch64::SP && "Unaligned access to SP");
3234 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
3235 AArch64::ADDPL_XXI, TII, Flag, NeedsWinCFI, nullptr);
3236 }
3237 }
3238
foldMemoryOperandImpl(MachineFunction & MF,MachineInstr & MI,ArrayRef<unsigned> Ops,MachineBasicBlock::iterator InsertPt,int FrameIndex,LiveIntervals * LIS,VirtRegMap * VRM) const3239 MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
3240 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
3241 MachineBasicBlock::iterator InsertPt, int FrameIndex,
3242 LiveIntervals *LIS, VirtRegMap *VRM) const {
3243 // This is a bit of a hack. Consider this instruction:
3244 //
3245 // %0 = COPY %sp; GPR64all:%0
3246 //
3247 // We explicitly chose GPR64all for the virtual register so such a copy might
3248 // be eliminated by RegisterCoalescer. However, that may not be possible, and
3249 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
3250 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
3251 //
3252 // To prevent that, we are going to constrain the %0 register class here.
3253 //
3254 // <rdar://problem/11522048>
3255 //
3256 if (MI.isFullCopy()) {
3257 Register DstReg = MI.getOperand(0).getReg();
3258 Register SrcReg = MI.getOperand(1).getReg();
3259 if (SrcReg == AArch64::SP && Register::isVirtualRegister(DstReg)) {
3260 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
3261 return nullptr;
3262 }
3263 if (DstReg == AArch64::SP && Register::isVirtualRegister(SrcReg)) {
3264 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
3265 return nullptr;
3266 }
3267 }
3268
3269 // Handle the case where a copy is being spilled or filled but the source
3270 // and destination register class don't match. For example:
3271 //
3272 // %0 = COPY %xzr; GPR64common:%0
3273 //
3274 // In this case we can still safely fold away the COPY and generate the
3275 // following spill code:
3276 //
3277 // STRXui %xzr, %stack.0
3278 //
3279 // This also eliminates spilled cross register class COPYs (e.g. between x and
3280 // d regs) of the same size. For example:
3281 //
3282 // %0 = COPY %1; GPR64:%0, FPR64:%1
3283 //
3284 // will be filled as
3285 //
3286 // LDRDui %0, fi<#0>
3287 //
3288 // instead of
3289 //
3290 // LDRXui %Temp, fi<#0>
3291 // %0 = FMOV %Temp
3292 //
3293 if (MI.isCopy() && Ops.size() == 1 &&
3294 // Make sure we're only folding the explicit COPY defs/uses.
3295 (Ops[0] == 0 || Ops[0] == 1)) {
3296 bool IsSpill = Ops[0] == 0;
3297 bool IsFill = !IsSpill;
3298 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
3299 const MachineRegisterInfo &MRI = MF.getRegInfo();
3300 MachineBasicBlock &MBB = *MI.getParent();
3301 const MachineOperand &DstMO = MI.getOperand(0);
3302 const MachineOperand &SrcMO = MI.getOperand(1);
3303 Register DstReg = DstMO.getReg();
3304 Register SrcReg = SrcMO.getReg();
3305 // This is slightly expensive to compute for physical regs since
3306 // getMinimalPhysRegClass is slow.
3307 auto getRegClass = [&](unsigned Reg) {
3308 return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
3309 : TRI.getMinimalPhysRegClass(Reg);
3310 };
3311
3312 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
3313 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
3314 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
3315 "Mismatched register size in non subreg COPY");
3316 if (IsSpill)
3317 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
3318 getRegClass(SrcReg), &TRI);
3319 else
3320 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
3321 getRegClass(DstReg), &TRI);
3322 return &*--InsertPt;
3323 }
3324
3325 // Handle cases like spilling def of:
3326 //
3327 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
3328 //
3329 // where the physical register source can be widened and stored to the full
3330 // virtual reg destination stack slot, in this case producing:
3331 //
3332 // STRXui %xzr, %stack.0
3333 //
3334 if (IsSpill && DstMO.isUndef() && Register::isPhysicalRegister(SrcReg)) {
3335 assert(SrcMO.getSubReg() == 0 &&
3336 "Unexpected subreg on physical register");
3337 const TargetRegisterClass *SpillRC;
3338 unsigned SpillSubreg;
3339 switch (DstMO.getSubReg()) {
3340 default:
3341 SpillRC = nullptr;
3342 break;
3343 case AArch64::sub_32:
3344 case AArch64::ssub:
3345 if (AArch64::GPR32RegClass.contains(SrcReg)) {
3346 SpillRC = &AArch64::GPR64RegClass;
3347 SpillSubreg = AArch64::sub_32;
3348 } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
3349 SpillRC = &AArch64::FPR64RegClass;
3350 SpillSubreg = AArch64::ssub;
3351 } else
3352 SpillRC = nullptr;
3353 break;
3354 case AArch64::dsub:
3355 if (AArch64::FPR64RegClass.contains(SrcReg)) {
3356 SpillRC = &AArch64::FPR128RegClass;
3357 SpillSubreg = AArch64::dsub;
3358 } else
3359 SpillRC = nullptr;
3360 break;
3361 }
3362
3363 if (SpillRC)
3364 if (unsigned WidenedSrcReg =
3365 TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
3366 storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
3367 FrameIndex, SpillRC, &TRI);
3368 return &*--InsertPt;
3369 }
3370 }
3371
3372 // Handle cases like filling use of:
3373 //
3374 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
3375 //
3376 // where we can load the full virtual reg source stack slot, into the subreg
3377 // destination, in this case producing:
3378 //
3379 // LDRWui %0:sub_32<def,read-undef>, %stack.0
3380 //
3381 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
3382 const TargetRegisterClass *FillRC;
3383 switch (DstMO.getSubReg()) {
3384 default:
3385 FillRC = nullptr;
3386 break;
3387 case AArch64::sub_32:
3388 FillRC = &AArch64::GPR32RegClass;
3389 break;
3390 case AArch64::ssub:
3391 FillRC = &AArch64::FPR32RegClass;
3392 break;
3393 case AArch64::dsub:
3394 FillRC = &AArch64::FPR64RegClass;
3395 break;
3396 }
3397
3398 if (FillRC) {
3399 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
3400 TRI.getRegSizeInBits(*FillRC) &&
3401 "Mismatched regclass size on folded subreg COPY");
3402 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
3403 MachineInstr &LoadMI = *--InsertPt;
3404 MachineOperand &LoadDst = LoadMI.getOperand(0);
3405 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
3406 LoadDst.setSubReg(DstMO.getSubReg());
3407 LoadDst.setIsUndef();
3408 return &LoadMI;
3409 }
3410 }
3411 }
3412
3413 // Cannot fold.
3414 return nullptr;
3415 }
3416
isSVEScaledImmInstruction(unsigned Opcode)3417 static bool isSVEScaledImmInstruction(unsigned Opcode) {
3418 switch (Opcode) {
3419 case AArch64::LDR_ZXI:
3420 case AArch64::STR_ZXI:
3421 case AArch64::LDR_PXI:
3422 case AArch64::STR_PXI:
3423 return true;
3424 default:
3425 return false;
3426 }
3427 }
3428
isAArch64FrameOffsetLegal(const MachineInstr & MI,StackOffset & SOffset,bool * OutUseUnscaledOp,unsigned * OutUnscaledOp,int64_t * EmittableOffset)3429 int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
3430 StackOffset &SOffset,
3431 bool *OutUseUnscaledOp,
3432 unsigned *OutUnscaledOp,
3433 int64_t *EmittableOffset) {
3434 // Set output values in case of early exit.
3435 if (EmittableOffset)
3436 *EmittableOffset = 0;
3437 if (OutUseUnscaledOp)
3438 *OutUseUnscaledOp = false;
3439 if (OutUnscaledOp)
3440 *OutUnscaledOp = 0;
3441
3442 // Exit early for structured vector spills/fills as they can't take an
3443 // immediate offset.
3444 switch (MI.getOpcode()) {
3445 default:
3446 break;
3447 case AArch64::LD1Twov2d:
3448 case AArch64::LD1Threev2d:
3449 case AArch64::LD1Fourv2d:
3450 case AArch64::LD1Twov1d:
3451 case AArch64::LD1Threev1d:
3452 case AArch64::LD1Fourv1d:
3453 case AArch64::ST1Twov2d:
3454 case AArch64::ST1Threev2d:
3455 case AArch64::ST1Fourv2d:
3456 case AArch64::ST1Twov1d:
3457 case AArch64::ST1Threev1d:
3458 case AArch64::ST1Fourv1d:
3459 case AArch64::IRG:
3460 case AArch64::IRGstack:
3461 return AArch64FrameOffsetCannotUpdate;
3462 }
3463
3464 // Get the min/max offset and the scale.
3465 unsigned Scale, Width;
3466 int64_t MinOff, MaxOff;
3467 if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), Scale, Width, MinOff,
3468 MaxOff))
3469 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
3470
3471 // Construct the complete offset.
3472 bool IsMulVL = isSVEScaledImmInstruction(MI.getOpcode());
3473 int64_t Offset =
3474 IsMulVL ? (SOffset.getScalableBytes()) : (SOffset.getBytes());
3475
3476 const MachineOperand &ImmOpnd =
3477 MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
3478 Offset += ImmOpnd.getImm() * Scale;
3479
3480 // If the offset doesn't match the scale, we rewrite the instruction to
3481 // use the unscaled instruction instead. Likewise, if we have a negative
3482 // offset and there is an unscaled op to use.
3483 Optional<unsigned> UnscaledOp =
3484 AArch64InstrInfo::getUnscaledLdSt(MI.getOpcode());
3485 bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
3486 if (useUnscaledOp &&
3487 !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, Scale, Width, MinOff, MaxOff))
3488 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
3489
3490 int64_t Remainder = Offset % Scale;
3491 assert(!(Remainder && useUnscaledOp) &&
3492 "Cannot have remainder when using unscaled op");
3493
3494 assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
3495 int64_t NewOffset = Offset / Scale;
3496 if (MinOff <= NewOffset && NewOffset <= MaxOff)
3497 Offset = Remainder;
3498 else {
3499 NewOffset = NewOffset < 0 ? MinOff : MaxOff;
3500 Offset = Offset - NewOffset * Scale + Remainder;
3501 }
3502
3503 if (EmittableOffset)
3504 *EmittableOffset = NewOffset;
3505 if (OutUseUnscaledOp)
3506 *OutUseUnscaledOp = useUnscaledOp;
3507 if (OutUnscaledOp && UnscaledOp)
3508 *OutUnscaledOp = *UnscaledOp;
3509
3510 if (IsMulVL)
3511 SOffset = StackOffset(Offset, MVT::nxv1i8) +
3512 StackOffset(SOffset.getBytes(), MVT::i8);
3513 else
3514 SOffset = StackOffset(Offset, MVT::i8) +
3515 StackOffset(SOffset.getScalableBytes(), MVT::nxv1i8);
3516 return AArch64FrameOffsetCanUpdate |
3517 (SOffset ? 0 : AArch64FrameOffsetIsLegal);
3518 }
3519
rewriteAArch64FrameIndex(MachineInstr & MI,unsigned FrameRegIdx,unsigned FrameReg,StackOffset & Offset,const AArch64InstrInfo * TII)3520 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
3521 unsigned FrameReg, StackOffset &Offset,
3522 const AArch64InstrInfo *TII) {
3523 unsigned Opcode = MI.getOpcode();
3524 unsigned ImmIdx = FrameRegIdx + 1;
3525
3526 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
3527 Offset += StackOffset(MI.getOperand(ImmIdx).getImm(), MVT::i8);
3528 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
3529 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
3530 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
3531 MI.eraseFromParent();
3532 Offset = StackOffset();
3533 return true;
3534 }
3535
3536 int64_t NewOffset;
3537 unsigned UnscaledOp;
3538 bool UseUnscaledOp;
3539 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
3540 &UnscaledOp, &NewOffset);
3541 if (Status & AArch64FrameOffsetCanUpdate) {
3542 if (Status & AArch64FrameOffsetIsLegal)
3543 // Replace the FrameIndex with FrameReg.
3544 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
3545 if (UseUnscaledOp)
3546 MI.setDesc(TII->get(UnscaledOp));
3547
3548 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
3549 return !Offset;
3550 }
3551
3552 return false;
3553 }
3554
getNoop(MCInst & NopInst) const3555 void AArch64InstrInfo::getNoop(MCInst &NopInst) const {
3556 NopInst.setOpcode(AArch64::HINT);
3557 NopInst.addOperand(MCOperand::createImm(0));
3558 }
3559
3560 // AArch64 supports MachineCombiner.
useMachineCombiner() const3561 bool AArch64InstrInfo::useMachineCombiner() const { return true; }
3562
3563 // True when Opc sets flag
isCombineInstrSettingFlag(unsigned Opc)3564 static bool isCombineInstrSettingFlag(unsigned Opc) {
3565 switch (Opc) {
3566 case AArch64::ADDSWrr:
3567 case AArch64::ADDSWri:
3568 case AArch64::ADDSXrr:
3569 case AArch64::ADDSXri:
3570 case AArch64::SUBSWrr:
3571 case AArch64::SUBSXrr:
3572 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3573 case AArch64::SUBSWri:
3574 case AArch64::SUBSXri:
3575 return true;
3576 default:
3577 break;
3578 }
3579 return false;
3580 }
3581
3582 // 32b Opcodes that can be combined with a MUL
isCombineInstrCandidate32(unsigned Opc)3583 static bool isCombineInstrCandidate32(unsigned Opc) {
3584 switch (Opc) {
3585 case AArch64::ADDWrr:
3586 case AArch64::ADDWri:
3587 case AArch64::SUBWrr:
3588 case AArch64::ADDSWrr:
3589 case AArch64::ADDSWri:
3590 case AArch64::SUBSWrr:
3591 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3592 case AArch64::SUBWri:
3593 case AArch64::SUBSWri:
3594 return true;
3595 default:
3596 break;
3597 }
3598 return false;
3599 }
3600
3601 // 64b Opcodes that can be combined with a MUL
isCombineInstrCandidate64(unsigned Opc)3602 static bool isCombineInstrCandidate64(unsigned Opc) {
3603 switch (Opc) {
3604 case AArch64::ADDXrr:
3605 case AArch64::ADDXri:
3606 case AArch64::SUBXrr:
3607 case AArch64::ADDSXrr:
3608 case AArch64::ADDSXri:
3609 case AArch64::SUBSXrr:
3610 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3611 case AArch64::SUBXri:
3612 case AArch64::SUBSXri:
3613 case AArch64::ADDv8i8:
3614 case AArch64::ADDv16i8:
3615 case AArch64::ADDv4i16:
3616 case AArch64::ADDv8i16:
3617 case AArch64::ADDv2i32:
3618 case AArch64::ADDv4i32:
3619 case AArch64::SUBv8i8:
3620 case AArch64::SUBv16i8:
3621 case AArch64::SUBv4i16:
3622 case AArch64::SUBv8i16:
3623 case AArch64::SUBv2i32:
3624 case AArch64::SUBv4i32:
3625 return true;
3626 default:
3627 break;
3628 }
3629 return false;
3630 }
3631
3632 // FP Opcodes that can be combined with a FMUL
isCombineInstrCandidateFP(const MachineInstr & Inst)3633 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
3634 switch (Inst.getOpcode()) {
3635 default:
3636 break;
3637 case AArch64::FADDHrr:
3638 case AArch64::FADDSrr:
3639 case AArch64::FADDDrr:
3640 case AArch64::FADDv4f16:
3641 case AArch64::FADDv8f16:
3642 case AArch64::FADDv2f32:
3643 case AArch64::FADDv2f64:
3644 case AArch64::FADDv4f32:
3645 case AArch64::FSUBHrr:
3646 case AArch64::FSUBSrr:
3647 case AArch64::FSUBDrr:
3648 case AArch64::FSUBv4f16:
3649 case AArch64::FSUBv8f16:
3650 case AArch64::FSUBv2f32:
3651 case AArch64::FSUBv2f64:
3652 case AArch64::FSUBv4f32:
3653 TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
3654 return (Options.UnsafeFPMath ||
3655 Options.AllowFPOpFusion == FPOpFusion::Fast);
3656 }
3657 return false;
3658 }
3659
3660 // Opcodes that can be combined with a MUL
isCombineInstrCandidate(unsigned Opc)3661 static bool isCombineInstrCandidate(unsigned Opc) {
3662 return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
3663 }
3664
3665 //
3666 // Utility routine that checks if \param MO is defined by an
3667 // \param CombineOpc instruction in the basic block \param MBB
canCombine(MachineBasicBlock & MBB,MachineOperand & MO,unsigned CombineOpc,unsigned ZeroReg=0,bool CheckZeroReg=false)3668 static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
3669 unsigned CombineOpc, unsigned ZeroReg = 0,
3670 bool CheckZeroReg = false) {
3671 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3672 MachineInstr *MI = nullptr;
3673
3674 if (MO.isReg() && Register::isVirtualRegister(MO.getReg()))
3675 MI = MRI.getUniqueVRegDef(MO.getReg());
3676 // And it needs to be in the trace (otherwise, it won't have a depth).
3677 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
3678 return false;
3679 // Must only used by the user we combine with.
3680 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
3681 return false;
3682
3683 if (CheckZeroReg) {
3684 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
3685 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
3686 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
3687 // The third input reg must be zero.
3688 if (MI->getOperand(3).getReg() != ZeroReg)
3689 return false;
3690 }
3691
3692 return true;
3693 }
3694
3695 //
3696 // Is \param MO defined by an integer multiply and can be combined?
canCombineWithMUL(MachineBasicBlock & MBB,MachineOperand & MO,unsigned MulOpc,unsigned ZeroReg)3697 static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
3698 unsigned MulOpc, unsigned ZeroReg) {
3699 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
3700 }
3701
3702 //
3703 // Is \param MO defined by a floating-point multiply and can be combined?
canCombineWithFMUL(MachineBasicBlock & MBB,MachineOperand & MO,unsigned MulOpc)3704 static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
3705 unsigned MulOpc) {
3706 return canCombine(MBB, MO, MulOpc);
3707 }
3708
3709 // TODO: There are many more machine instruction opcodes to match:
3710 // 1. Other data types (integer, vectors)
3711 // 2. Other math / logic operations (xor, or)
3712 // 3. Other forms of the same operation (intrinsics and other variants)
isAssociativeAndCommutative(const MachineInstr & Inst) const3713 bool AArch64InstrInfo::isAssociativeAndCommutative(
3714 const MachineInstr &Inst) const {
3715 switch (Inst.getOpcode()) {
3716 case AArch64::FADDDrr:
3717 case AArch64::FADDSrr:
3718 case AArch64::FADDv2f32:
3719 case AArch64::FADDv2f64:
3720 case AArch64::FADDv4f32:
3721 case AArch64::FMULDrr:
3722 case AArch64::FMULSrr:
3723 case AArch64::FMULX32:
3724 case AArch64::FMULX64:
3725 case AArch64::FMULXv2f32:
3726 case AArch64::FMULXv2f64:
3727 case AArch64::FMULXv4f32:
3728 case AArch64::FMULv2f32:
3729 case AArch64::FMULv2f64:
3730 case AArch64::FMULv4f32:
3731 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
3732 default:
3733 return false;
3734 }
3735 }
3736
3737 /// Find instructions that can be turned into madd.
getMaddPatterns(MachineInstr & Root,SmallVectorImpl<MachineCombinerPattern> & Patterns)3738 static bool getMaddPatterns(MachineInstr &Root,
3739 SmallVectorImpl<MachineCombinerPattern> &Patterns) {
3740 unsigned Opc = Root.getOpcode();
3741 MachineBasicBlock &MBB = *Root.getParent();
3742 bool Found = false;
3743
3744 if (!isCombineInstrCandidate(Opc))
3745 return false;
3746 if (isCombineInstrSettingFlag(Opc)) {
3747 int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
3748 // When NZCV is live bail out.
3749 if (Cmp_NZCV == -1)
3750 return false;
3751 unsigned NewOpc = convertToNonFlagSettingOpc(Root);
3752 // When opcode can't change bail out.
3753 // CHECKME: do we miss any cases for opcode conversion?
3754 if (NewOpc == Opc)
3755 return false;
3756 Opc = NewOpc;
3757 }
3758
3759 auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
3760 MachineCombinerPattern Pattern) {
3761 if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) {
3762 Patterns.push_back(Pattern);
3763 Found = true;
3764 }
3765 };
3766
3767 auto setVFound = [&](int Opcode, int Operand, MachineCombinerPattern Pattern) {
3768 if (canCombine(MBB, Root.getOperand(Operand), Opcode)) {
3769 Patterns.push_back(Pattern);
3770 Found = true;
3771 }
3772 };
3773
3774 typedef MachineCombinerPattern MCP;
3775
3776 switch (Opc) {
3777 default:
3778 break;
3779 case AArch64::ADDWrr:
3780 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3781 "ADDWrr does not have register operands");
3782 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1);
3783 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2);
3784 break;
3785 case AArch64::ADDXrr:
3786 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1);
3787 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);
3788 break;
3789 case AArch64::SUBWrr:
3790 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
3791 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);
3792 break;
3793 case AArch64::SUBXrr:
3794 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
3795 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);
3796 break;
3797 case AArch64::ADDWri:
3798 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);
3799 break;
3800 case AArch64::ADDXri:
3801 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1);
3802 break;
3803 case AArch64::SUBWri:
3804 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1);
3805 break;
3806 case AArch64::SUBXri:
3807 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);
3808 break;
3809 case AArch64::ADDv8i8:
3810 setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1);
3811 setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2);
3812 break;
3813 case AArch64::ADDv16i8:
3814 setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1);
3815 setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2);
3816 break;
3817 case AArch64::ADDv4i16:
3818 setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1);
3819 setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2);
3820 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1);
3821 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2);
3822 break;
3823 case AArch64::ADDv8i16:
3824 setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1);
3825 setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2);
3826 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1);
3827 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2);
3828 break;
3829 case AArch64::ADDv2i32:
3830 setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1);
3831 setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2);
3832 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1);
3833 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2);
3834 break;
3835 case AArch64::ADDv4i32:
3836 setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1);
3837 setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2);
3838 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1);
3839 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2);
3840 break;
3841 case AArch64::SUBv8i8:
3842 setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1);
3843 setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2);
3844 break;
3845 case AArch64::SUBv16i8:
3846 setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1);
3847 setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2);
3848 break;
3849 case AArch64::SUBv4i16:
3850 setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1);
3851 setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2);
3852 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1);
3853 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2);
3854 break;
3855 case AArch64::SUBv8i16:
3856 setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1);
3857 setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2);
3858 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1);
3859 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2);
3860 break;
3861 case AArch64::SUBv2i32:
3862 setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1);
3863 setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2);
3864 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1);
3865 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2);
3866 break;
3867 case AArch64::SUBv4i32:
3868 setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1);
3869 setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2);
3870 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1);
3871 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2);
3872 break;
3873 }
3874 return Found;
3875 }
3876 /// Floating-Point Support
3877
3878 /// Find instructions that can be turned into madd.
getFMAPatterns(MachineInstr & Root,SmallVectorImpl<MachineCombinerPattern> & Patterns)3879 static bool getFMAPatterns(MachineInstr &Root,
3880 SmallVectorImpl<MachineCombinerPattern> &Patterns) {
3881
3882 if (!isCombineInstrCandidateFP(Root))
3883 return false;
3884
3885 MachineBasicBlock &MBB = *Root.getParent();
3886 bool Found = false;
3887
3888 auto Match = [&](int Opcode, int Operand,
3889 MachineCombinerPattern Pattern) -> bool {
3890 if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) {
3891 Patterns.push_back(Pattern);
3892 return true;
3893 }
3894 return false;
3895 };
3896
3897 typedef MachineCombinerPattern MCP;
3898
3899 switch (Root.getOpcode()) {
3900 default:
3901 assert(false && "Unsupported FP instruction in combiner\n");
3902 break;
3903 case AArch64::FADDHrr:
3904 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3905 "FADDHrr does not have register operands");
3906
3907 Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1);
3908 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2);
3909 break;
3910 case AArch64::FADDSrr:
3911 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3912 "FADDSrr does not have register operands");
3913
3914 Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) ||
3915 Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1);
3916
3917 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) ||
3918 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2);
3919 break;
3920 case AArch64::FADDDrr:
3921 Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) ||
3922 Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1);
3923
3924 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) ||
3925 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2);
3926 break;
3927 case AArch64::FADDv4f16:
3928 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) ||
3929 Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1);
3930
3931 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) ||
3932 Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2);
3933 break;
3934 case AArch64::FADDv8f16:
3935 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) ||
3936 Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1);
3937
3938 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) ||
3939 Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2);
3940 break;
3941 case AArch64::FADDv2f32:
3942 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) ||
3943 Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1);
3944
3945 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) ||
3946 Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2);
3947 break;
3948 case AArch64::FADDv2f64:
3949 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) ||
3950 Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1);
3951
3952 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) ||
3953 Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2);
3954 break;
3955 case AArch64::FADDv4f32:
3956 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) ||
3957 Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1);
3958
3959 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) ||
3960 Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2);
3961 break;
3962 case AArch64::FSUBHrr:
3963 Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1);
3964 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2);
3965 Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1);
3966 break;
3967 case AArch64::FSUBSrr:
3968 Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1);
3969
3970 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) ||
3971 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2);
3972
3973 Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1);
3974 break;
3975 case AArch64::FSUBDrr:
3976 Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1);
3977
3978 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) ||
3979 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2);
3980
3981 Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1);
3982 break;
3983 case AArch64::FSUBv4f16:
3984 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) ||
3985 Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2);
3986
3987 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) ||
3988 Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1);
3989 break;
3990 case AArch64::FSUBv8f16:
3991 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) ||
3992 Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2);
3993
3994 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) ||
3995 Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1);
3996 break;
3997 case AArch64::FSUBv2f32:
3998 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) ||
3999 Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2);
4000
4001 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) ||
4002 Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1);
4003 break;
4004 case AArch64::FSUBv2f64:
4005 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) ||
4006 Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2);
4007
4008 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) ||
4009 Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1);
4010 break;
4011 case AArch64::FSUBv4f32:
4012 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) ||
4013 Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2);
4014
4015 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) ||
4016 Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1);
4017 break;
4018 }
4019 return Found;
4020 }
4021
4022 /// Return true when a code sequence can improve throughput. It
4023 /// should be called only for instructions in loops.
4024 /// \param Pattern - combiner pattern
isThroughputPattern(MachineCombinerPattern Pattern) const4025 bool AArch64InstrInfo::isThroughputPattern(
4026 MachineCombinerPattern Pattern) const {
4027 switch (Pattern) {
4028 default:
4029 break;
4030 case MachineCombinerPattern::FMULADDH_OP1:
4031 case MachineCombinerPattern::FMULADDH_OP2:
4032 case MachineCombinerPattern::FMULSUBH_OP1:
4033 case MachineCombinerPattern::FMULSUBH_OP2:
4034 case MachineCombinerPattern::FMULADDS_OP1:
4035 case MachineCombinerPattern::FMULADDS_OP2:
4036 case MachineCombinerPattern::FMULSUBS_OP1:
4037 case MachineCombinerPattern::FMULSUBS_OP2:
4038 case MachineCombinerPattern::FMULADDD_OP1:
4039 case MachineCombinerPattern::FMULADDD_OP2:
4040 case MachineCombinerPattern::FMULSUBD_OP1:
4041 case MachineCombinerPattern::FMULSUBD_OP2:
4042 case MachineCombinerPattern::FNMULSUBH_OP1:
4043 case MachineCombinerPattern::FNMULSUBS_OP1:
4044 case MachineCombinerPattern::FNMULSUBD_OP1:
4045 case MachineCombinerPattern::FMLAv4i16_indexed_OP1:
4046 case MachineCombinerPattern::FMLAv4i16_indexed_OP2:
4047 case MachineCombinerPattern::FMLAv8i16_indexed_OP1:
4048 case MachineCombinerPattern::FMLAv8i16_indexed_OP2:
4049 case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
4050 case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
4051 case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
4052 case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
4053 case MachineCombinerPattern::FMLAv4f16_OP2:
4054 case MachineCombinerPattern::FMLAv4f16_OP1:
4055 case MachineCombinerPattern::FMLAv8f16_OP1:
4056 case MachineCombinerPattern::FMLAv8f16_OP2:
4057 case MachineCombinerPattern::FMLAv2f32_OP2:
4058 case MachineCombinerPattern::FMLAv2f32_OP1:
4059 case MachineCombinerPattern::FMLAv2f64_OP1:
4060 case MachineCombinerPattern::FMLAv2f64_OP2:
4061 case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
4062 case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
4063 case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
4064 case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
4065 case MachineCombinerPattern::FMLAv4f32_OP1:
4066 case MachineCombinerPattern::FMLAv4f32_OP2:
4067 case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
4068 case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
4069 case MachineCombinerPattern::FMLSv4i16_indexed_OP1:
4070 case MachineCombinerPattern::FMLSv4i16_indexed_OP2:
4071 case MachineCombinerPattern::FMLSv8i16_indexed_OP1:
4072 case MachineCombinerPattern::FMLSv8i16_indexed_OP2:
4073 case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
4074 case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
4075 case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
4076 case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
4077 case MachineCombinerPattern::FMLSv4f16_OP1:
4078 case MachineCombinerPattern::FMLSv4f16_OP2:
4079 case MachineCombinerPattern::FMLSv8f16_OP1:
4080 case MachineCombinerPattern::FMLSv8f16_OP2:
4081 case MachineCombinerPattern::FMLSv2f32_OP2:
4082 case MachineCombinerPattern::FMLSv2f64_OP2:
4083 case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
4084 case MachineCombinerPattern::FMLSv4f32_OP2:
4085 case MachineCombinerPattern::MULADDv8i8_OP1:
4086 case MachineCombinerPattern::MULADDv8i8_OP2:
4087 case MachineCombinerPattern::MULADDv16i8_OP1:
4088 case MachineCombinerPattern::MULADDv16i8_OP2:
4089 case MachineCombinerPattern::MULADDv4i16_OP1:
4090 case MachineCombinerPattern::MULADDv4i16_OP2:
4091 case MachineCombinerPattern::MULADDv8i16_OP1:
4092 case MachineCombinerPattern::MULADDv8i16_OP2:
4093 case MachineCombinerPattern::MULADDv2i32_OP1:
4094 case MachineCombinerPattern::MULADDv2i32_OP2:
4095 case MachineCombinerPattern::MULADDv4i32_OP1:
4096 case MachineCombinerPattern::MULADDv4i32_OP2:
4097 case MachineCombinerPattern::MULSUBv8i8_OP1:
4098 case MachineCombinerPattern::MULSUBv8i8_OP2:
4099 case MachineCombinerPattern::MULSUBv16i8_OP1:
4100 case MachineCombinerPattern::MULSUBv16i8_OP2:
4101 case MachineCombinerPattern::MULSUBv4i16_OP1:
4102 case MachineCombinerPattern::MULSUBv4i16_OP2:
4103 case MachineCombinerPattern::MULSUBv8i16_OP1:
4104 case MachineCombinerPattern::MULSUBv8i16_OP2:
4105 case MachineCombinerPattern::MULSUBv2i32_OP1:
4106 case MachineCombinerPattern::MULSUBv2i32_OP2:
4107 case MachineCombinerPattern::MULSUBv4i32_OP1:
4108 case MachineCombinerPattern::MULSUBv4i32_OP2:
4109 case MachineCombinerPattern::MULADDv4i16_indexed_OP1:
4110 case MachineCombinerPattern::MULADDv4i16_indexed_OP2:
4111 case MachineCombinerPattern::MULADDv8i16_indexed_OP1:
4112 case MachineCombinerPattern::MULADDv8i16_indexed_OP2:
4113 case MachineCombinerPattern::MULADDv2i32_indexed_OP1:
4114 case MachineCombinerPattern::MULADDv2i32_indexed_OP2:
4115 case MachineCombinerPattern::MULADDv4i32_indexed_OP1:
4116 case MachineCombinerPattern::MULADDv4i32_indexed_OP2:
4117 case MachineCombinerPattern::MULSUBv4i16_indexed_OP1:
4118 case MachineCombinerPattern::MULSUBv4i16_indexed_OP2:
4119 case MachineCombinerPattern::MULSUBv8i16_indexed_OP1:
4120 case MachineCombinerPattern::MULSUBv8i16_indexed_OP2:
4121 case MachineCombinerPattern::MULSUBv2i32_indexed_OP1:
4122 case MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
4123 case MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
4124 case MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
4125 return true;
4126 } // end switch (Pattern)
4127 return false;
4128 }
4129 /// Return true when there is potentially a faster code sequence for an
4130 /// instruction chain ending in \p Root. All potential patterns are listed in
4131 /// the \p Pattern vector. Pattern should be sorted in priority order since the
4132 /// pattern evaluator stops checking as soon as it finds a faster sequence.
4133
getMachineCombinerPatterns(MachineInstr & Root,SmallVectorImpl<MachineCombinerPattern> & Patterns) const4134 bool AArch64InstrInfo::getMachineCombinerPatterns(
4135 MachineInstr &Root,
4136 SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
4137 // Integer patterns
4138 if (getMaddPatterns(Root, Patterns))
4139 return true;
4140 // Floating point patterns
4141 if (getFMAPatterns(Root, Patterns))
4142 return true;
4143
4144 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
4145 }
4146
4147 enum class FMAInstKind { Default, Indexed, Accumulator };
4148 /// genFusedMultiply - Generate fused multiply instructions.
4149 /// This function supports both integer and floating point instructions.
4150 /// A typical example:
4151 /// F|MUL I=A,B,0
4152 /// F|ADD R,I,C
4153 /// ==> F|MADD R,A,B,C
4154 /// \param MF Containing MachineFunction
4155 /// \param MRI Register information
4156 /// \param TII Target information
4157 /// \param Root is the F|ADD instruction
4158 /// \param [out] InsInstrs is a vector of machine instructions and will
4159 /// contain the generated madd instruction
4160 /// \param IdxMulOpd is index of operand in Root that is the result of
4161 /// the F|MUL. In the example above IdxMulOpd is 1.
4162 /// \param MaddOpc the opcode fo the f|madd instruction
4163 /// \param RC Register class of operands
4164 /// \param kind of fma instruction (addressing mode) to be generated
4165 /// \param ReplacedAddend is the result register from the instruction
4166 /// replacing the non-combined operand, if any.
4167 static MachineInstr *
genFusedMultiply(MachineFunction & MF,MachineRegisterInfo & MRI,const TargetInstrInfo * TII,MachineInstr & Root,SmallVectorImpl<MachineInstr * > & InsInstrs,unsigned IdxMulOpd,unsigned MaddOpc,const TargetRegisterClass * RC,FMAInstKind kind=FMAInstKind::Default,const Register * ReplacedAddend=nullptr)4168 genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
4169 const TargetInstrInfo *TII, MachineInstr &Root,
4170 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
4171 unsigned MaddOpc, const TargetRegisterClass *RC,
4172 FMAInstKind kind = FMAInstKind::Default,
4173 const Register *ReplacedAddend = nullptr) {
4174 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
4175
4176 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
4177 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
4178 Register ResultReg = Root.getOperand(0).getReg();
4179 Register SrcReg0 = MUL->getOperand(1).getReg();
4180 bool Src0IsKill = MUL->getOperand(1).isKill();
4181 Register SrcReg1 = MUL->getOperand(2).getReg();
4182 bool Src1IsKill = MUL->getOperand(2).isKill();
4183
4184 unsigned SrcReg2;
4185 bool Src2IsKill;
4186 if (ReplacedAddend) {
4187 // If we just generated a new addend, we must be it's only use.
4188 SrcReg2 = *ReplacedAddend;
4189 Src2IsKill = true;
4190 } else {
4191 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
4192 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
4193 }
4194
4195 if (Register::isVirtualRegister(ResultReg))
4196 MRI.constrainRegClass(ResultReg, RC);
4197 if (Register::isVirtualRegister(SrcReg0))
4198 MRI.constrainRegClass(SrcReg0, RC);
4199 if (Register::isVirtualRegister(SrcReg1))
4200 MRI.constrainRegClass(SrcReg1, RC);
4201 if (Register::isVirtualRegister(SrcReg2))
4202 MRI.constrainRegClass(SrcReg2, RC);
4203
4204 MachineInstrBuilder MIB;
4205 if (kind == FMAInstKind::Default)
4206 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4207 .addReg(SrcReg0, getKillRegState(Src0IsKill))
4208 .addReg(SrcReg1, getKillRegState(Src1IsKill))
4209 .addReg(SrcReg2, getKillRegState(Src2IsKill));
4210 else if (kind == FMAInstKind::Indexed)
4211 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4212 .addReg(SrcReg2, getKillRegState(Src2IsKill))
4213 .addReg(SrcReg0, getKillRegState(Src0IsKill))
4214 .addReg(SrcReg1, getKillRegState(Src1IsKill))
4215 .addImm(MUL->getOperand(3).getImm());
4216 else if (kind == FMAInstKind::Accumulator)
4217 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4218 .addReg(SrcReg2, getKillRegState(Src2IsKill))
4219 .addReg(SrcReg0, getKillRegState(Src0IsKill))
4220 .addReg(SrcReg1, getKillRegState(Src1IsKill));
4221 else
4222 assert(false && "Invalid FMA instruction kind \n");
4223 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
4224 InsInstrs.push_back(MIB);
4225 return MUL;
4226 }
4227
4228 /// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
4229 /// instructions.
4230 ///
4231 /// \see genFusedMultiply
genFusedMultiplyAcc(MachineFunction & MF,MachineRegisterInfo & MRI,const TargetInstrInfo * TII,MachineInstr & Root,SmallVectorImpl<MachineInstr * > & InsInstrs,unsigned IdxMulOpd,unsigned MaddOpc,const TargetRegisterClass * RC)4232 static MachineInstr *genFusedMultiplyAcc(
4233 MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
4234 MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
4235 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
4236 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
4237 FMAInstKind::Accumulator);
4238 }
4239
4240 /// genNeg - Helper to generate an intermediate negation of the second operand
4241 /// of Root
genNeg(MachineFunction & MF,MachineRegisterInfo & MRI,const TargetInstrInfo * TII,MachineInstr & Root,SmallVectorImpl<MachineInstr * > & InsInstrs,DenseMap<unsigned,unsigned> & InstrIdxForVirtReg,unsigned MnegOpc,const TargetRegisterClass * RC)4242 static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI,
4243 const TargetInstrInfo *TII, MachineInstr &Root,
4244 SmallVectorImpl<MachineInstr *> &InsInstrs,
4245 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
4246 unsigned MnegOpc, const TargetRegisterClass *RC) {
4247 Register NewVR = MRI.createVirtualRegister(RC);
4248 MachineInstrBuilder MIB =
4249 BuildMI(MF, Root.getDebugLoc(), TII->get(MnegOpc), NewVR)
4250 .add(Root.getOperand(2));
4251 InsInstrs.push_back(MIB);
4252
4253 assert(InstrIdxForVirtReg.empty());
4254 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4255
4256 return NewVR;
4257 }
4258
4259 /// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
4260 /// instructions with an additional negation of the accumulator
genFusedMultiplyAccNeg(MachineFunction & MF,MachineRegisterInfo & MRI,const TargetInstrInfo * TII,MachineInstr & Root,SmallVectorImpl<MachineInstr * > & InsInstrs,DenseMap<unsigned,unsigned> & InstrIdxForVirtReg,unsigned IdxMulOpd,unsigned MaddOpc,unsigned MnegOpc,const TargetRegisterClass * RC)4261 static MachineInstr *genFusedMultiplyAccNeg(
4262 MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
4263 MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
4264 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
4265 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
4266 assert(IdxMulOpd == 1);
4267
4268 Register NewVR =
4269 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
4270 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
4271 FMAInstKind::Accumulator, &NewVR);
4272 }
4273
4274 /// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
4275 /// instructions.
4276 ///
4277 /// \see genFusedMultiply
genFusedMultiplyIdx(MachineFunction & MF,MachineRegisterInfo & MRI,const TargetInstrInfo * TII,MachineInstr & Root,SmallVectorImpl<MachineInstr * > & InsInstrs,unsigned IdxMulOpd,unsigned MaddOpc,const TargetRegisterClass * RC)4278 static MachineInstr *genFusedMultiplyIdx(
4279 MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
4280 MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
4281 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
4282 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
4283 FMAInstKind::Indexed);
4284 }
4285
4286 /// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
4287 /// instructions with an additional negation of the accumulator
genFusedMultiplyIdxNeg(MachineFunction & MF,MachineRegisterInfo & MRI,const TargetInstrInfo * TII,MachineInstr & Root,SmallVectorImpl<MachineInstr * > & InsInstrs,DenseMap<unsigned,unsigned> & InstrIdxForVirtReg,unsigned IdxMulOpd,unsigned MaddOpc,unsigned MnegOpc,const TargetRegisterClass * RC)4288 static MachineInstr *genFusedMultiplyIdxNeg(
4289 MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
4290 MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
4291 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
4292 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
4293 assert(IdxMulOpd == 1);
4294
4295 Register NewVR =
4296 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
4297
4298 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
4299 FMAInstKind::Indexed, &NewVR);
4300 }
4301
4302 /// genMaddR - Generate madd instruction and combine mul and add using
4303 /// an extra virtual register
4304 /// Example - an ADD intermediate needs to be stored in a register:
4305 /// MUL I=A,B,0
4306 /// ADD R,I,Imm
4307 /// ==> ORR V, ZR, Imm
4308 /// ==> MADD R,A,B,V
4309 /// \param MF Containing MachineFunction
4310 /// \param MRI Register information
4311 /// \param TII Target information
4312 /// \param Root is the ADD instruction
4313 /// \param [out] InsInstrs is a vector of machine instructions and will
4314 /// contain the generated madd instruction
4315 /// \param IdxMulOpd is index of operand in Root that is the result of
4316 /// the MUL. In the example above IdxMulOpd is 1.
4317 /// \param MaddOpc the opcode fo the madd instruction
4318 /// \param VR is a virtual register that holds the value of an ADD operand
4319 /// (V in the example above).
4320 /// \param RC Register class of operands
genMaddR(MachineFunction & MF,MachineRegisterInfo & MRI,const TargetInstrInfo * TII,MachineInstr & Root,SmallVectorImpl<MachineInstr * > & InsInstrs,unsigned IdxMulOpd,unsigned MaddOpc,unsigned VR,const TargetRegisterClass * RC)4321 static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
4322 const TargetInstrInfo *TII, MachineInstr &Root,
4323 SmallVectorImpl<MachineInstr *> &InsInstrs,
4324 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
4325 const TargetRegisterClass *RC) {
4326 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
4327
4328 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
4329 Register ResultReg = Root.getOperand(0).getReg();
4330 Register SrcReg0 = MUL->getOperand(1).getReg();
4331 bool Src0IsKill = MUL->getOperand(1).isKill();
4332 Register SrcReg1 = MUL->getOperand(2).getReg();
4333 bool Src1IsKill = MUL->getOperand(2).isKill();
4334
4335 if (Register::isVirtualRegister(ResultReg))
4336 MRI.constrainRegClass(ResultReg, RC);
4337 if (Register::isVirtualRegister(SrcReg0))
4338 MRI.constrainRegClass(SrcReg0, RC);
4339 if (Register::isVirtualRegister(SrcReg1))
4340 MRI.constrainRegClass(SrcReg1, RC);
4341 if (Register::isVirtualRegister(VR))
4342 MRI.constrainRegClass(VR, RC);
4343
4344 MachineInstrBuilder MIB =
4345 BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4346 .addReg(SrcReg0, getKillRegState(Src0IsKill))
4347 .addReg(SrcReg1, getKillRegState(Src1IsKill))
4348 .addReg(VR);
4349 // Insert the MADD
4350 InsInstrs.push_back(MIB);
4351 return MUL;
4352 }
4353
4354 /// When getMachineCombinerPatterns() finds potential patterns,
4355 /// this function generates the instructions that could replace the
4356 /// original code sequence
genAlternativeCodeSequence(MachineInstr & Root,MachineCombinerPattern Pattern,SmallVectorImpl<MachineInstr * > & InsInstrs,SmallVectorImpl<MachineInstr * > & DelInstrs,DenseMap<unsigned,unsigned> & InstrIdxForVirtReg) const4357 void AArch64InstrInfo::genAlternativeCodeSequence(
4358 MachineInstr &Root, MachineCombinerPattern Pattern,
4359 SmallVectorImpl<MachineInstr *> &InsInstrs,
4360 SmallVectorImpl<MachineInstr *> &DelInstrs,
4361 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
4362 MachineBasicBlock &MBB = *Root.getParent();
4363 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
4364 MachineFunction &MF = *MBB.getParent();
4365 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
4366
4367 MachineInstr *MUL;
4368 const TargetRegisterClass *RC;
4369 unsigned Opc;
4370 switch (Pattern) {
4371 default:
4372 // Reassociate instructions.
4373 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
4374 DelInstrs, InstrIdxForVirtReg);
4375 return;
4376 case MachineCombinerPattern::MULADDW_OP1:
4377 case MachineCombinerPattern::MULADDX_OP1:
4378 // MUL I=A,B,0
4379 // ADD R,I,C
4380 // ==> MADD R,A,B,C
4381 // --- Create(MADD);
4382 if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
4383 Opc = AArch64::MADDWrrr;
4384 RC = &AArch64::GPR32RegClass;
4385 } else {
4386 Opc = AArch64::MADDXrrr;
4387 RC = &AArch64::GPR64RegClass;
4388 }
4389 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4390 break;
4391 case MachineCombinerPattern::MULADDW_OP2:
4392 case MachineCombinerPattern::MULADDX_OP2:
4393 // MUL I=A,B,0
4394 // ADD R,C,I
4395 // ==> MADD R,A,B,C
4396 // --- Create(MADD);
4397 if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
4398 Opc = AArch64::MADDWrrr;
4399 RC = &AArch64::GPR32RegClass;
4400 } else {
4401 Opc = AArch64::MADDXrrr;
4402 RC = &AArch64::GPR64RegClass;
4403 }
4404 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4405 break;
4406 case MachineCombinerPattern::MULADDWI_OP1:
4407 case MachineCombinerPattern::MULADDXI_OP1: {
4408 // MUL I=A,B,0
4409 // ADD R,I,Imm
4410 // ==> ORR V, ZR, Imm
4411 // ==> MADD R,A,B,V
4412 // --- Create(MADD);
4413 const TargetRegisterClass *OrrRC;
4414 unsigned BitSize, OrrOpc, ZeroReg;
4415 if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
4416 OrrOpc = AArch64::ORRWri;
4417 OrrRC = &AArch64::GPR32spRegClass;
4418 BitSize = 32;
4419 ZeroReg = AArch64::WZR;
4420 Opc = AArch64::MADDWrrr;
4421 RC = &AArch64::GPR32RegClass;
4422 } else {
4423 OrrOpc = AArch64::ORRXri;
4424 OrrRC = &AArch64::GPR64spRegClass;
4425 BitSize = 64;
4426 ZeroReg = AArch64::XZR;
4427 Opc = AArch64::MADDXrrr;
4428 RC = &AArch64::GPR64RegClass;
4429 }
4430 Register NewVR = MRI.createVirtualRegister(OrrRC);
4431 uint64_t Imm = Root.getOperand(2).getImm();
4432
4433 if (Root.getOperand(3).isImm()) {
4434 unsigned Val = Root.getOperand(3).getImm();
4435 Imm = Imm << Val;
4436 }
4437 uint64_t UImm = SignExtend64(Imm, BitSize);
4438 uint64_t Encoding;
4439 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4440 MachineInstrBuilder MIB1 =
4441 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4442 .addReg(ZeroReg)
4443 .addImm(Encoding);
4444 InsInstrs.push_back(MIB1);
4445 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4446 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4447 }
4448 break;
4449 }
4450 case MachineCombinerPattern::MULSUBW_OP1:
4451 case MachineCombinerPattern::MULSUBX_OP1: {
4452 // MUL I=A,B,0
4453 // SUB R,I, C
4454 // ==> SUB V, 0, C
4455 // ==> MADD R,A,B,V // = -C + A*B
4456 // --- Create(MADD);
4457 const TargetRegisterClass *SubRC;
4458 unsigned SubOpc, ZeroReg;
4459 if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
4460 SubOpc = AArch64::SUBWrr;
4461 SubRC = &AArch64::GPR32spRegClass;
4462 ZeroReg = AArch64::WZR;
4463 Opc = AArch64::MADDWrrr;
4464 RC = &AArch64::GPR32RegClass;
4465 } else {
4466 SubOpc = AArch64::SUBXrr;
4467 SubRC = &AArch64::GPR64spRegClass;
4468 ZeroReg = AArch64::XZR;
4469 Opc = AArch64::MADDXrrr;
4470 RC = &AArch64::GPR64RegClass;
4471 }
4472 Register NewVR = MRI.createVirtualRegister(SubRC);
4473 // SUB NewVR, 0, C
4474 MachineInstrBuilder MIB1 =
4475 BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
4476 .addReg(ZeroReg)
4477 .add(Root.getOperand(2));
4478 InsInstrs.push_back(MIB1);
4479 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4480 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4481 break;
4482 }
4483 case MachineCombinerPattern::MULSUBW_OP2:
4484 case MachineCombinerPattern::MULSUBX_OP2:
4485 // MUL I=A,B,0
4486 // SUB R,C,I
4487 // ==> MSUB R,A,B,C (computes C - A*B)
4488 // --- Create(MSUB);
4489 if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
4490 Opc = AArch64::MSUBWrrr;
4491 RC = &AArch64::GPR32RegClass;
4492 } else {
4493 Opc = AArch64::MSUBXrrr;
4494 RC = &AArch64::GPR64RegClass;
4495 }
4496 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4497 break;
4498 case MachineCombinerPattern::MULSUBWI_OP1:
4499 case MachineCombinerPattern::MULSUBXI_OP1: {
4500 // MUL I=A,B,0
4501 // SUB R,I, Imm
4502 // ==> ORR V, ZR, -Imm
4503 // ==> MADD R,A,B,V // = -Imm + A*B
4504 // --- Create(MADD);
4505 const TargetRegisterClass *OrrRC;
4506 unsigned BitSize, OrrOpc, ZeroReg;
4507 if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
4508 OrrOpc = AArch64::ORRWri;
4509 OrrRC = &AArch64::GPR32spRegClass;
4510 BitSize = 32;
4511 ZeroReg = AArch64::WZR;
4512 Opc = AArch64::MADDWrrr;
4513 RC = &AArch64::GPR32RegClass;
4514 } else {
4515 OrrOpc = AArch64::ORRXri;
4516 OrrRC = &AArch64::GPR64spRegClass;
4517 BitSize = 64;
4518 ZeroReg = AArch64::XZR;
4519 Opc = AArch64::MADDXrrr;
4520 RC = &AArch64::GPR64RegClass;
4521 }
4522 Register NewVR = MRI.createVirtualRegister(OrrRC);
4523 uint64_t Imm = Root.getOperand(2).getImm();
4524 if (Root.getOperand(3).isImm()) {
4525 unsigned Val = Root.getOperand(3).getImm();
4526 Imm = Imm << Val;
4527 }
4528 uint64_t UImm = SignExtend64(-Imm, BitSize);
4529 uint64_t Encoding;
4530 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4531 MachineInstrBuilder MIB1 =
4532 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4533 .addReg(ZeroReg)
4534 .addImm(Encoding);
4535 InsInstrs.push_back(MIB1);
4536 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4537 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4538 }
4539 break;
4540 }
4541
4542 case MachineCombinerPattern::MULADDv8i8_OP1:
4543 Opc = AArch64::MLAv8i8;
4544 RC = &AArch64::FPR64RegClass;
4545 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4546 break;
4547 case MachineCombinerPattern::MULADDv8i8_OP2:
4548 Opc = AArch64::MLAv8i8;
4549 RC = &AArch64::FPR64RegClass;
4550 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4551 break;
4552 case MachineCombinerPattern::MULADDv16i8_OP1:
4553 Opc = AArch64::MLAv16i8;
4554 RC = &AArch64::FPR128RegClass;
4555 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4556 break;
4557 case MachineCombinerPattern::MULADDv16i8_OP2:
4558 Opc = AArch64::MLAv16i8;
4559 RC = &AArch64::FPR128RegClass;
4560 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4561 break;
4562 case MachineCombinerPattern::MULADDv4i16_OP1:
4563 Opc = AArch64::MLAv4i16;
4564 RC = &AArch64::FPR64RegClass;
4565 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4566 break;
4567 case MachineCombinerPattern::MULADDv4i16_OP2:
4568 Opc = AArch64::MLAv4i16;
4569 RC = &AArch64::FPR64RegClass;
4570 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4571 break;
4572 case MachineCombinerPattern::MULADDv8i16_OP1:
4573 Opc = AArch64::MLAv8i16;
4574 RC = &AArch64::FPR128RegClass;
4575 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4576 break;
4577 case MachineCombinerPattern::MULADDv8i16_OP2:
4578 Opc = AArch64::MLAv8i16;
4579 RC = &AArch64::FPR128RegClass;
4580 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4581 break;
4582 case MachineCombinerPattern::MULADDv2i32_OP1:
4583 Opc = AArch64::MLAv2i32;
4584 RC = &AArch64::FPR64RegClass;
4585 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4586 break;
4587 case MachineCombinerPattern::MULADDv2i32_OP2:
4588 Opc = AArch64::MLAv2i32;
4589 RC = &AArch64::FPR64RegClass;
4590 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4591 break;
4592 case MachineCombinerPattern::MULADDv4i32_OP1:
4593 Opc = AArch64::MLAv4i32;
4594 RC = &AArch64::FPR128RegClass;
4595 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4596 break;
4597 case MachineCombinerPattern::MULADDv4i32_OP2:
4598 Opc = AArch64::MLAv4i32;
4599 RC = &AArch64::FPR128RegClass;
4600 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4601 break;
4602
4603 case MachineCombinerPattern::MULSUBv8i8_OP1:
4604 Opc = AArch64::MLAv8i8;
4605 RC = &AArch64::FPR64RegClass;
4606 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
4607 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,
4608 RC);
4609 break;
4610 case MachineCombinerPattern::MULSUBv8i8_OP2:
4611 Opc = AArch64::MLSv8i8;
4612 RC = &AArch64::FPR64RegClass;
4613 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4614 break;
4615 case MachineCombinerPattern::MULSUBv16i8_OP1:
4616 Opc = AArch64::MLAv16i8;
4617 RC = &AArch64::FPR128RegClass;
4618 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
4619 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,
4620 RC);
4621 break;
4622 case MachineCombinerPattern::MULSUBv16i8_OP2:
4623 Opc = AArch64::MLSv16i8;
4624 RC = &AArch64::FPR128RegClass;
4625 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4626 break;
4627 case MachineCombinerPattern::MULSUBv4i16_OP1:
4628 Opc = AArch64::MLAv4i16;
4629 RC = &AArch64::FPR64RegClass;
4630 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
4631 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
4632 RC);
4633 break;
4634 case MachineCombinerPattern::MULSUBv4i16_OP2:
4635 Opc = AArch64::MLSv4i16;
4636 RC = &AArch64::FPR64RegClass;
4637 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4638 break;
4639 case MachineCombinerPattern::MULSUBv8i16_OP1:
4640 Opc = AArch64::MLAv8i16;
4641 RC = &AArch64::FPR128RegClass;
4642 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
4643 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
4644 RC);
4645 break;
4646 case MachineCombinerPattern::MULSUBv8i16_OP2:
4647 Opc = AArch64::MLSv8i16;
4648 RC = &AArch64::FPR128RegClass;
4649 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4650 break;
4651 case MachineCombinerPattern::MULSUBv2i32_OP1:
4652 Opc = AArch64::MLAv2i32;
4653 RC = &AArch64::FPR64RegClass;
4654 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
4655 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
4656 RC);
4657 break;
4658 case MachineCombinerPattern::MULSUBv2i32_OP2:
4659 Opc = AArch64::MLSv2i32;
4660 RC = &AArch64::FPR64RegClass;
4661 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4662 break;
4663 case MachineCombinerPattern::MULSUBv4i32_OP1:
4664 Opc = AArch64::MLAv4i32;
4665 RC = &AArch64::FPR128RegClass;
4666 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
4667 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
4668 RC);
4669 break;
4670 case MachineCombinerPattern::MULSUBv4i32_OP2:
4671 Opc = AArch64::MLSv4i32;
4672 RC = &AArch64::FPR128RegClass;
4673 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4674 break;
4675
4676 case MachineCombinerPattern::MULADDv4i16_indexed_OP1:
4677 Opc = AArch64::MLAv4i16_indexed;
4678 RC = &AArch64::FPR64RegClass;
4679 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4680 break;
4681 case MachineCombinerPattern::MULADDv4i16_indexed_OP2:
4682 Opc = AArch64::MLAv4i16_indexed;
4683 RC = &AArch64::FPR64RegClass;
4684 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4685 break;
4686 case MachineCombinerPattern::MULADDv8i16_indexed_OP1:
4687 Opc = AArch64::MLAv8i16_indexed;
4688 RC = &AArch64::FPR128RegClass;
4689 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4690 break;
4691 case MachineCombinerPattern::MULADDv8i16_indexed_OP2:
4692 Opc = AArch64::MLAv8i16_indexed;
4693 RC = &AArch64::FPR128RegClass;
4694 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4695 break;
4696 case MachineCombinerPattern::MULADDv2i32_indexed_OP1:
4697 Opc = AArch64::MLAv2i32_indexed;
4698 RC = &AArch64::FPR64RegClass;
4699 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4700 break;
4701 case MachineCombinerPattern::MULADDv2i32_indexed_OP2:
4702 Opc = AArch64::MLAv2i32_indexed;
4703 RC = &AArch64::FPR64RegClass;
4704 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4705 break;
4706 case MachineCombinerPattern::MULADDv4i32_indexed_OP1:
4707 Opc = AArch64::MLAv4i32_indexed;
4708 RC = &AArch64::FPR128RegClass;
4709 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4710 break;
4711 case MachineCombinerPattern::MULADDv4i32_indexed_OP2:
4712 Opc = AArch64::MLAv4i32_indexed;
4713 RC = &AArch64::FPR128RegClass;
4714 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4715 break;
4716
4717 case MachineCombinerPattern::MULSUBv4i16_indexed_OP1:
4718 Opc = AArch64::MLAv4i16_indexed;
4719 RC = &AArch64::FPR64RegClass;
4720 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
4721 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
4722 RC);
4723 break;
4724 case MachineCombinerPattern::MULSUBv4i16_indexed_OP2:
4725 Opc = AArch64::MLSv4i16_indexed;
4726 RC = &AArch64::FPR64RegClass;
4727 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4728 break;
4729 case MachineCombinerPattern::MULSUBv8i16_indexed_OP1:
4730 Opc = AArch64::MLAv8i16_indexed;
4731 RC = &AArch64::FPR128RegClass;
4732 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
4733 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
4734 RC);
4735 break;
4736 case MachineCombinerPattern::MULSUBv8i16_indexed_OP2:
4737 Opc = AArch64::MLSv8i16_indexed;
4738 RC = &AArch64::FPR128RegClass;
4739 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4740 break;
4741 case MachineCombinerPattern::MULSUBv2i32_indexed_OP1:
4742 Opc = AArch64::MLAv2i32_indexed;
4743 RC = &AArch64::FPR64RegClass;
4744 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
4745 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
4746 RC);
4747 break;
4748 case MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
4749 Opc = AArch64::MLSv2i32_indexed;
4750 RC = &AArch64::FPR64RegClass;
4751 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4752 break;
4753 case MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
4754 Opc = AArch64::MLAv4i32_indexed;
4755 RC = &AArch64::FPR128RegClass;
4756 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
4757 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
4758 RC);
4759 break;
4760 case MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
4761 Opc = AArch64::MLSv4i32_indexed;
4762 RC = &AArch64::FPR128RegClass;
4763 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4764 break;
4765
4766 // Floating Point Support
4767 case MachineCombinerPattern::FMULADDH_OP1:
4768 Opc = AArch64::FMADDHrrr;
4769 RC = &AArch64::FPR16RegClass;
4770 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4771 break;
4772 case MachineCombinerPattern::FMULADDS_OP1:
4773 Opc = AArch64::FMADDSrrr;
4774 RC = &AArch64::FPR32RegClass;
4775 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4776 break;
4777 case MachineCombinerPattern::FMULADDD_OP1:
4778 Opc = AArch64::FMADDDrrr;
4779 RC = &AArch64::FPR64RegClass;
4780 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4781 break;
4782
4783 case MachineCombinerPattern::FMULADDH_OP2:
4784 Opc = AArch64::FMADDHrrr;
4785 RC = &AArch64::FPR16RegClass;
4786 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4787 break;
4788 case MachineCombinerPattern::FMULADDS_OP2:
4789 Opc = AArch64::FMADDSrrr;
4790 RC = &AArch64::FPR32RegClass;
4791 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4792 break;
4793 case MachineCombinerPattern::FMULADDD_OP2:
4794 Opc = AArch64::FMADDDrrr;
4795 RC = &AArch64::FPR64RegClass;
4796 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4797 break;
4798
4799 case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
4800 Opc = AArch64::FMLAv1i32_indexed;
4801 RC = &AArch64::FPR32RegClass;
4802 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4803 FMAInstKind::Indexed);
4804 break;
4805 case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
4806 Opc = AArch64::FMLAv1i32_indexed;
4807 RC = &AArch64::FPR32RegClass;
4808 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4809 FMAInstKind::Indexed);
4810 break;
4811
4812 case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
4813 Opc = AArch64::FMLAv1i64_indexed;
4814 RC = &AArch64::FPR64RegClass;
4815 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4816 FMAInstKind::Indexed);
4817 break;
4818 case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
4819 Opc = AArch64::FMLAv1i64_indexed;
4820 RC = &AArch64::FPR64RegClass;
4821 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4822 FMAInstKind::Indexed);
4823 break;
4824
4825 case MachineCombinerPattern::FMLAv4i16_indexed_OP1:
4826 RC = &AArch64::FPR64RegClass;
4827 Opc = AArch64::FMLAv4i16_indexed;
4828 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4829 FMAInstKind::Indexed);
4830 break;
4831 case MachineCombinerPattern::FMLAv4f16_OP1:
4832 RC = &AArch64::FPR64RegClass;
4833 Opc = AArch64::FMLAv4f16;
4834 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4835 FMAInstKind::Accumulator);
4836 break;
4837 case MachineCombinerPattern::FMLAv4i16_indexed_OP2:
4838 RC = &AArch64::FPR64RegClass;
4839 Opc = AArch64::FMLAv4i16_indexed;
4840 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4841 FMAInstKind::Indexed);
4842 break;
4843 case MachineCombinerPattern::FMLAv4f16_OP2:
4844 RC = &AArch64::FPR64RegClass;
4845 Opc = AArch64::FMLAv4f16;
4846 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4847 FMAInstKind::Accumulator);
4848 break;
4849
4850 case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
4851 case MachineCombinerPattern::FMLAv2f32_OP1:
4852 RC = &AArch64::FPR64RegClass;
4853 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
4854 Opc = AArch64::FMLAv2i32_indexed;
4855 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4856 FMAInstKind::Indexed);
4857 } else {
4858 Opc = AArch64::FMLAv2f32;
4859 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4860 FMAInstKind::Accumulator);
4861 }
4862 break;
4863 case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
4864 case MachineCombinerPattern::FMLAv2f32_OP2:
4865 RC = &AArch64::FPR64RegClass;
4866 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
4867 Opc = AArch64::FMLAv2i32_indexed;
4868 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4869 FMAInstKind::Indexed);
4870 } else {
4871 Opc = AArch64::FMLAv2f32;
4872 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4873 FMAInstKind::Accumulator);
4874 }
4875 break;
4876
4877 case MachineCombinerPattern::FMLAv8i16_indexed_OP1:
4878 RC = &AArch64::FPR128RegClass;
4879 Opc = AArch64::FMLAv8i16_indexed;
4880 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4881 FMAInstKind::Indexed);
4882 break;
4883 case MachineCombinerPattern::FMLAv8f16_OP1:
4884 RC = &AArch64::FPR128RegClass;
4885 Opc = AArch64::FMLAv8f16;
4886 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4887 FMAInstKind::Accumulator);
4888 break;
4889 case MachineCombinerPattern::FMLAv8i16_indexed_OP2:
4890 RC = &AArch64::FPR128RegClass;
4891 Opc = AArch64::FMLAv8i16_indexed;
4892 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4893 FMAInstKind::Indexed);
4894 break;
4895 case MachineCombinerPattern::FMLAv8f16_OP2:
4896 RC = &AArch64::FPR128RegClass;
4897 Opc = AArch64::FMLAv8f16;
4898 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4899 FMAInstKind::Accumulator);
4900 break;
4901
4902 case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
4903 case MachineCombinerPattern::FMLAv2f64_OP1:
4904 RC = &AArch64::FPR128RegClass;
4905 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
4906 Opc = AArch64::FMLAv2i64_indexed;
4907 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4908 FMAInstKind::Indexed);
4909 } else {
4910 Opc = AArch64::FMLAv2f64;
4911 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4912 FMAInstKind::Accumulator);
4913 }
4914 break;
4915 case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
4916 case MachineCombinerPattern::FMLAv2f64_OP2:
4917 RC = &AArch64::FPR128RegClass;
4918 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
4919 Opc = AArch64::FMLAv2i64_indexed;
4920 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4921 FMAInstKind::Indexed);
4922 } else {
4923 Opc = AArch64::FMLAv2f64;
4924 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4925 FMAInstKind::Accumulator);
4926 }
4927 break;
4928
4929 case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
4930 case MachineCombinerPattern::FMLAv4f32_OP1:
4931 RC = &AArch64::FPR128RegClass;
4932 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
4933 Opc = AArch64::FMLAv4i32_indexed;
4934 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4935 FMAInstKind::Indexed);
4936 } else {
4937 Opc = AArch64::FMLAv4f32;
4938 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4939 FMAInstKind::Accumulator);
4940 }
4941 break;
4942
4943 case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
4944 case MachineCombinerPattern::FMLAv4f32_OP2:
4945 RC = &AArch64::FPR128RegClass;
4946 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
4947 Opc = AArch64::FMLAv4i32_indexed;
4948 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4949 FMAInstKind::Indexed);
4950 } else {
4951 Opc = AArch64::FMLAv4f32;
4952 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4953 FMAInstKind::Accumulator);
4954 }
4955 break;
4956
4957 case MachineCombinerPattern::FMULSUBH_OP1:
4958 Opc = AArch64::FNMSUBHrrr;
4959 RC = &AArch64::FPR16RegClass;
4960 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4961 break;
4962 case MachineCombinerPattern::FMULSUBS_OP1:
4963 Opc = AArch64::FNMSUBSrrr;
4964 RC = &AArch64::FPR32RegClass;
4965 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4966 break;
4967 case MachineCombinerPattern::FMULSUBD_OP1:
4968 Opc = AArch64::FNMSUBDrrr;
4969 RC = &AArch64::FPR64RegClass;
4970 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4971 break;
4972
4973 case MachineCombinerPattern::FNMULSUBH_OP1:
4974 Opc = AArch64::FNMADDHrrr;
4975 RC = &AArch64::FPR16RegClass;
4976 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4977 break;
4978 case MachineCombinerPattern::FNMULSUBS_OP1:
4979 Opc = AArch64::FNMADDSrrr;
4980 RC = &AArch64::FPR32RegClass;
4981 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4982 break;
4983 case MachineCombinerPattern::FNMULSUBD_OP1:
4984 Opc = AArch64::FNMADDDrrr;
4985 RC = &AArch64::FPR64RegClass;
4986 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4987 break;
4988
4989 case MachineCombinerPattern::FMULSUBH_OP2:
4990 Opc = AArch64::FMSUBHrrr;
4991 RC = &AArch64::FPR16RegClass;
4992 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4993 break;
4994 case MachineCombinerPattern::FMULSUBS_OP2:
4995 Opc = AArch64::FMSUBSrrr;
4996 RC = &AArch64::FPR32RegClass;
4997 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4998 break;
4999 case MachineCombinerPattern::FMULSUBD_OP2:
5000 Opc = AArch64::FMSUBDrrr;
5001 RC = &AArch64::FPR64RegClass;
5002 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5003 break;
5004
5005 case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
5006 Opc = AArch64::FMLSv1i32_indexed;
5007 RC = &AArch64::FPR32RegClass;
5008 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5009 FMAInstKind::Indexed);
5010 break;
5011
5012 case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
5013 Opc = AArch64::FMLSv1i64_indexed;
5014 RC = &AArch64::FPR64RegClass;
5015 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5016 FMAInstKind::Indexed);
5017 break;
5018
5019 case MachineCombinerPattern::FMLSv4f16_OP1:
5020 case MachineCombinerPattern::FMLSv4i16_indexed_OP1: {
5021 RC = &AArch64::FPR64RegClass;
5022 Register NewVR = MRI.createVirtualRegister(RC);
5023 MachineInstrBuilder MIB1 =
5024 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f16), NewVR)
5025 .add(Root.getOperand(2));
5026 InsInstrs.push_back(MIB1);
5027 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
5028 if (Pattern == MachineCombinerPattern::FMLSv4f16_OP1) {
5029 Opc = AArch64::FMLAv4f16;
5030 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5031 FMAInstKind::Accumulator, &NewVR);
5032 } else {
5033 Opc = AArch64::FMLAv4i16_indexed;
5034 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5035 FMAInstKind::Indexed, &NewVR);
5036 }
5037 break;
5038 }
5039 case MachineCombinerPattern::FMLSv4f16_OP2:
5040 RC = &AArch64::FPR64RegClass;
5041 Opc = AArch64::FMLSv4f16;
5042 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5043 FMAInstKind::Accumulator);
5044 break;
5045 case MachineCombinerPattern::FMLSv4i16_indexed_OP2:
5046 RC = &AArch64::FPR64RegClass;
5047 Opc = AArch64::FMLSv4i16_indexed;
5048 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5049 FMAInstKind::Indexed);
5050 break;
5051
5052 case MachineCombinerPattern::FMLSv2f32_OP2:
5053 case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
5054 RC = &AArch64::FPR64RegClass;
5055 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
5056 Opc = AArch64::FMLSv2i32_indexed;
5057 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5058 FMAInstKind::Indexed);
5059 } else {
5060 Opc = AArch64::FMLSv2f32;
5061 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5062 FMAInstKind::Accumulator);
5063 }
5064 break;
5065
5066 case MachineCombinerPattern::FMLSv8f16_OP1:
5067 case MachineCombinerPattern::FMLSv8i16_indexed_OP1: {
5068 RC = &AArch64::FPR128RegClass;
5069 Register NewVR = MRI.createVirtualRegister(RC);
5070 MachineInstrBuilder MIB1 =
5071 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv8f16), NewVR)
5072 .add(Root.getOperand(2));
5073 InsInstrs.push_back(MIB1);
5074 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
5075 if (Pattern == MachineCombinerPattern::FMLSv8f16_OP1) {
5076 Opc = AArch64::FMLAv8f16;
5077 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5078 FMAInstKind::Accumulator, &NewVR);
5079 } else {
5080 Opc = AArch64::FMLAv8i16_indexed;
5081 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5082 FMAInstKind::Indexed, &NewVR);
5083 }
5084 break;
5085 }
5086 case MachineCombinerPattern::FMLSv8f16_OP2:
5087 RC = &AArch64::FPR128RegClass;
5088 Opc = AArch64::FMLSv8f16;
5089 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5090 FMAInstKind::Accumulator);
5091 break;
5092 case MachineCombinerPattern::FMLSv8i16_indexed_OP2:
5093 RC = &AArch64::FPR128RegClass;
5094 Opc = AArch64::FMLSv8i16_indexed;
5095 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5096 FMAInstKind::Indexed);
5097 break;
5098
5099 case MachineCombinerPattern::FMLSv2f64_OP2:
5100 case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
5101 RC = &AArch64::FPR128RegClass;
5102 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
5103 Opc = AArch64::FMLSv2i64_indexed;
5104 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5105 FMAInstKind::Indexed);
5106 } else {
5107 Opc = AArch64::FMLSv2f64;
5108 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5109 FMAInstKind::Accumulator);
5110 }
5111 break;
5112
5113 case MachineCombinerPattern::FMLSv4f32_OP2:
5114 case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
5115 RC = &AArch64::FPR128RegClass;
5116 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
5117 Opc = AArch64::FMLSv4i32_indexed;
5118 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5119 FMAInstKind::Indexed);
5120 } else {
5121 Opc = AArch64::FMLSv4f32;
5122 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5123 FMAInstKind::Accumulator);
5124 }
5125 break;
5126 case MachineCombinerPattern::FMLSv2f32_OP1:
5127 case MachineCombinerPattern::FMLSv2i32_indexed_OP1: {
5128 RC = &AArch64::FPR64RegClass;
5129 Register NewVR = MRI.createVirtualRegister(RC);
5130 MachineInstrBuilder MIB1 =
5131 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR)
5132 .add(Root.getOperand(2));
5133 InsInstrs.push_back(MIB1);
5134 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
5135 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) {
5136 Opc = AArch64::FMLAv2i32_indexed;
5137 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5138 FMAInstKind::Indexed, &NewVR);
5139 } else {
5140 Opc = AArch64::FMLAv2f32;
5141 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5142 FMAInstKind::Accumulator, &NewVR);
5143 }
5144 break;
5145 }
5146 case MachineCombinerPattern::FMLSv4f32_OP1:
5147 case MachineCombinerPattern::FMLSv4i32_indexed_OP1: {
5148 RC = &AArch64::FPR128RegClass;
5149 Register NewVR = MRI.createVirtualRegister(RC);
5150 MachineInstrBuilder MIB1 =
5151 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR)
5152 .add(Root.getOperand(2));
5153 InsInstrs.push_back(MIB1);
5154 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
5155 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) {
5156 Opc = AArch64::FMLAv4i32_indexed;
5157 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5158 FMAInstKind::Indexed, &NewVR);
5159 } else {
5160 Opc = AArch64::FMLAv4f32;
5161 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5162 FMAInstKind::Accumulator, &NewVR);
5163 }
5164 break;
5165 }
5166 case MachineCombinerPattern::FMLSv2f64_OP1:
5167 case MachineCombinerPattern::FMLSv2i64_indexed_OP1: {
5168 RC = &AArch64::FPR128RegClass;
5169 Register NewVR = MRI.createVirtualRegister(RC);
5170 MachineInstrBuilder MIB1 =
5171 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR)
5172 .add(Root.getOperand(2));
5173 InsInstrs.push_back(MIB1);
5174 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
5175 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) {
5176 Opc = AArch64::FMLAv2i64_indexed;
5177 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5178 FMAInstKind::Indexed, &NewVR);
5179 } else {
5180 Opc = AArch64::FMLAv2f64;
5181 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5182 FMAInstKind::Accumulator, &NewVR);
5183 }
5184 break;
5185 }
5186 } // end switch (Pattern)
5187 // Record MUL and ADD/SUB for deletion
5188 DelInstrs.push_back(MUL);
5189 DelInstrs.push_back(&Root);
5190 }
5191
5192 /// Replace csincr-branch sequence by simple conditional branch
5193 ///
5194 /// Examples:
5195 /// 1. \code
5196 /// csinc w9, wzr, wzr, <condition code>
5197 /// tbnz w9, #0, 0x44
5198 /// \endcode
5199 /// to
5200 /// \code
5201 /// b.<inverted condition code>
5202 /// \endcode
5203 ///
5204 /// 2. \code
5205 /// csinc w9, wzr, wzr, <condition code>
5206 /// tbz w9, #0, 0x44
5207 /// \endcode
5208 /// to
5209 /// \code
5210 /// b.<condition code>
5211 /// \endcode
5212 ///
5213 /// Replace compare and branch sequence by TBZ/TBNZ instruction when the
5214 /// compare's constant operand is power of 2.
5215 ///
5216 /// Examples:
5217 /// \code
5218 /// and w8, w8, #0x400
5219 /// cbnz w8, L1
5220 /// \endcode
5221 /// to
5222 /// \code
5223 /// tbnz w8, #10, L1
5224 /// \endcode
5225 ///
5226 /// \param MI Conditional Branch
5227 /// \return True when the simple conditional branch is generated
5228 ///
optimizeCondBranch(MachineInstr & MI) const5229 bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
5230 bool IsNegativeBranch = false;
5231 bool IsTestAndBranch = false;
5232 unsigned TargetBBInMI = 0;
5233 switch (MI.getOpcode()) {
5234 default:
5235 llvm_unreachable("Unknown branch instruction?");
5236 case AArch64::Bcc:
5237 return false;
5238 case AArch64::CBZW:
5239 case AArch64::CBZX:
5240 TargetBBInMI = 1;
5241 break;
5242 case AArch64::CBNZW:
5243 case AArch64::CBNZX:
5244 TargetBBInMI = 1;
5245 IsNegativeBranch = true;
5246 break;
5247 case AArch64::TBZW:
5248 case AArch64::TBZX:
5249 TargetBBInMI = 2;
5250 IsTestAndBranch = true;
5251 break;
5252 case AArch64::TBNZW:
5253 case AArch64::TBNZX:
5254 TargetBBInMI = 2;
5255 IsNegativeBranch = true;
5256 IsTestAndBranch = true;
5257 break;
5258 }
5259 // So we increment a zero register and test for bits other
5260 // than bit 0? Conservatively bail out in case the verifier
5261 // missed this case.
5262 if (IsTestAndBranch && MI.getOperand(1).getImm())
5263 return false;
5264
5265 // Find Definition.
5266 assert(MI.getParent() && "Incomplete machine instruciton\n");
5267 MachineBasicBlock *MBB = MI.getParent();
5268 MachineFunction *MF = MBB->getParent();
5269 MachineRegisterInfo *MRI = &MF->getRegInfo();
5270 Register VReg = MI.getOperand(0).getReg();
5271 if (!Register::isVirtualRegister(VReg))
5272 return false;
5273
5274 MachineInstr *DefMI = MRI->getVRegDef(VReg);
5275
5276 // Look through COPY instructions to find definition.
5277 while (DefMI->isCopy()) {
5278 Register CopyVReg = DefMI->getOperand(1).getReg();
5279 if (!MRI->hasOneNonDBGUse(CopyVReg))
5280 return false;
5281 if (!MRI->hasOneDef(CopyVReg))
5282 return false;
5283 DefMI = MRI->getVRegDef(CopyVReg);
5284 }
5285
5286 switch (DefMI->getOpcode()) {
5287 default:
5288 return false;
5289 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
5290 case AArch64::ANDWri:
5291 case AArch64::ANDXri: {
5292 if (IsTestAndBranch)
5293 return false;
5294 if (DefMI->getParent() != MBB)
5295 return false;
5296 if (!MRI->hasOneNonDBGUse(VReg))
5297 return false;
5298
5299 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
5300 uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
5301 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
5302 if (!isPowerOf2_64(Mask))
5303 return false;
5304
5305 MachineOperand &MO = DefMI->getOperand(1);
5306 Register NewReg = MO.getReg();
5307 if (!Register::isVirtualRegister(NewReg))
5308 return false;
5309
5310 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
5311
5312 MachineBasicBlock &RefToMBB = *MBB;
5313 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
5314 DebugLoc DL = MI.getDebugLoc();
5315 unsigned Imm = Log2_64(Mask);
5316 unsigned Opc = (Imm < 32)
5317 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
5318 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
5319 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
5320 .addReg(NewReg)
5321 .addImm(Imm)
5322 .addMBB(TBB);
5323 // Register lives on to the CBZ now.
5324 MO.setIsKill(false);
5325
5326 // For immediate smaller than 32, we need to use the 32-bit
5327 // variant (W) in all cases. Indeed the 64-bit variant does not
5328 // allow to encode them.
5329 // Therefore, if the input register is 64-bit, we need to take the
5330 // 32-bit sub-part.
5331 if (!Is32Bit && Imm < 32)
5332 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
5333 MI.eraseFromParent();
5334 return true;
5335 }
5336 // Look for CSINC
5337 case AArch64::CSINCWr:
5338 case AArch64::CSINCXr: {
5339 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
5340 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
5341 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
5342 DefMI->getOperand(2).getReg() == AArch64::XZR))
5343 return false;
5344
5345 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
5346 return false;
5347
5348 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
5349 // Convert only when the condition code is not modified between
5350 // the CSINC and the branch. The CC may be used by other
5351 // instructions in between.
5352 if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))
5353 return false;
5354 MachineBasicBlock &RefToMBB = *MBB;
5355 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
5356 DebugLoc DL = MI.getDebugLoc();
5357 if (IsNegativeBranch)
5358 CC = AArch64CC::getInvertedCondCode(CC);
5359 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
5360 MI.eraseFromParent();
5361 return true;
5362 }
5363 }
5364 }
5365
5366 std::pair<unsigned, unsigned>
decomposeMachineOperandsTargetFlags(unsigned TF) const5367 AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
5368 const unsigned Mask = AArch64II::MO_FRAGMENT;
5369 return std::make_pair(TF & Mask, TF & ~Mask);
5370 }
5371
5372 ArrayRef<std::pair<unsigned, const char *>>
getSerializableDirectMachineOperandTargetFlags() const5373 AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
5374 using namespace AArch64II;
5375
5376 static const std::pair<unsigned, const char *> TargetFlags[] = {
5377 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
5378 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
5379 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
5380 {MO_HI12, "aarch64-hi12"}};
5381 return makeArrayRef(TargetFlags);
5382 }
5383
5384 ArrayRef<std::pair<unsigned, const char *>>
getSerializableBitmaskMachineOperandTargetFlags() const5385 AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
5386 using namespace AArch64II;
5387
5388 static const std::pair<unsigned, const char *> TargetFlags[] = {
5389 {MO_COFFSTUB, "aarch64-coffstub"},
5390 {MO_GOT, "aarch64-got"},
5391 {MO_NC, "aarch64-nc"},
5392 {MO_S, "aarch64-s"},
5393 {MO_TLS, "aarch64-tls"},
5394 {MO_DLLIMPORT, "aarch64-dllimport"},
5395 {MO_PREL, "aarch64-prel"},
5396 {MO_TAGGED, "aarch64-tagged"}};
5397 return makeArrayRef(TargetFlags);
5398 }
5399
5400 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
getSerializableMachineMemOperandTargetFlags() const5401 AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
5402 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
5403 {{MOSuppressPair, "aarch64-suppress-pair"},
5404 {MOStridedAccess, "aarch64-strided-access"}};
5405 return makeArrayRef(TargetFlags);
5406 }
5407
5408 /// Constants defining how certain sequences should be outlined.
5409 /// This encompasses how an outlined function should be called, and what kind of
5410 /// frame should be emitted for that outlined function.
5411 ///
5412 /// \p MachineOutlinerDefault implies that the function should be called with
5413 /// a save and restore of LR to the stack.
5414 ///
5415 /// That is,
5416 ///
5417 /// I1 Save LR OUTLINED_FUNCTION:
5418 /// I2 --> BL OUTLINED_FUNCTION I1
5419 /// I3 Restore LR I2
5420 /// I3
5421 /// RET
5422 ///
5423 /// * Call construction overhead: 3 (save + BL + restore)
5424 /// * Frame construction overhead: 1 (ret)
5425 /// * Requires stack fixups? Yes
5426 ///
5427 /// \p MachineOutlinerTailCall implies that the function is being created from
5428 /// a sequence of instructions ending in a return.
5429 ///
5430 /// That is,
5431 ///
5432 /// I1 OUTLINED_FUNCTION:
5433 /// I2 --> B OUTLINED_FUNCTION I1
5434 /// RET I2
5435 /// RET
5436 ///
5437 /// * Call construction overhead: 1 (B)
5438 /// * Frame construction overhead: 0 (Return included in sequence)
5439 /// * Requires stack fixups? No
5440 ///
5441 /// \p MachineOutlinerNoLRSave implies that the function should be called using
5442 /// a BL instruction, but doesn't require LR to be saved and restored. This
5443 /// happens when LR is known to be dead.
5444 ///
5445 /// That is,
5446 ///
5447 /// I1 OUTLINED_FUNCTION:
5448 /// I2 --> BL OUTLINED_FUNCTION I1
5449 /// I3 I2
5450 /// I3
5451 /// RET
5452 ///
5453 /// * Call construction overhead: 1 (BL)
5454 /// * Frame construction overhead: 1 (RET)
5455 /// * Requires stack fixups? No
5456 ///
5457 /// \p MachineOutlinerThunk implies that the function is being created from
5458 /// a sequence of instructions ending in a call. The outlined function is
5459 /// called with a BL instruction, and the outlined function tail-calls the
5460 /// original call destination.
5461 ///
5462 /// That is,
5463 ///
5464 /// I1 OUTLINED_FUNCTION:
5465 /// I2 --> BL OUTLINED_FUNCTION I1
5466 /// BL f I2
5467 /// B f
5468 /// * Call construction overhead: 1 (BL)
5469 /// * Frame construction overhead: 0
5470 /// * Requires stack fixups? No
5471 ///
5472 /// \p MachineOutlinerRegSave implies that the function should be called with a
5473 /// save and restore of LR to an available register. This allows us to avoid
5474 /// stack fixups. Note that this outlining variant is compatible with the
5475 /// NoLRSave case.
5476 ///
5477 /// That is,
5478 ///
5479 /// I1 Save LR OUTLINED_FUNCTION:
5480 /// I2 --> BL OUTLINED_FUNCTION I1
5481 /// I3 Restore LR I2
5482 /// I3
5483 /// RET
5484 ///
5485 /// * Call construction overhead: 3 (save + BL + restore)
5486 /// * Frame construction overhead: 1 (ret)
5487 /// * Requires stack fixups? No
5488 enum MachineOutlinerClass {
5489 MachineOutlinerDefault, /// Emit a save, restore, call, and return.
5490 MachineOutlinerTailCall, /// Only emit a branch.
5491 MachineOutlinerNoLRSave, /// Emit a call and return.
5492 MachineOutlinerThunk, /// Emit a call and tail-call.
5493 MachineOutlinerRegSave /// Same as default, but save to a register.
5494 };
5495
5496 enum MachineOutlinerMBBFlags {
5497 LRUnavailableSomewhere = 0x2,
5498 HasCalls = 0x4,
5499 UnsafeRegsDead = 0x8
5500 };
5501
5502 unsigned
findRegisterToSaveLRTo(const outliner::Candidate & C) const5503 AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
5504 assert(C.LRUWasSet && "LRU wasn't set?");
5505 MachineFunction *MF = C.getMF();
5506 const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
5507 MF->getSubtarget().getRegisterInfo());
5508
5509 // Check if there is an available register across the sequence that we can
5510 // use.
5511 for (unsigned Reg : AArch64::GPR64RegClass) {
5512 if (!ARI->isReservedReg(*MF, Reg) &&
5513 Reg != AArch64::LR && // LR is not reserved, but don't use it.
5514 Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
5515 Reg != AArch64::X17 && // Ditto for X17.
5516 C.LRU.available(Reg) && C.UsedInSequence.available(Reg))
5517 return Reg;
5518 }
5519
5520 // No suitable register. Return 0.
5521 return 0u;
5522 }
5523
5524 static bool
outliningCandidatesSigningScopeConsensus(const outliner::Candidate & a,const outliner::Candidate & b)5525 outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a,
5526 const outliner::Candidate &b) {
5527 const Function &Fa = a.getMF()->getFunction();
5528 const Function &Fb = b.getMF()->getFunction();
5529
5530 // If none of the functions have the "sign-return-address" attribute their
5531 // signing behaviour is equal
5532 if (!Fa.hasFnAttribute("sign-return-address") &&
5533 !Fb.hasFnAttribute("sign-return-address")) {
5534 return true;
5535 }
5536
5537 // If both functions have the "sign-return-address" attribute their signing
5538 // behaviour is equal, if the values of the attributes are equal
5539 if (Fa.hasFnAttribute("sign-return-address") &&
5540 Fb.hasFnAttribute("sign-return-address")) {
5541 StringRef ScopeA =
5542 Fa.getFnAttribute("sign-return-address").getValueAsString();
5543 StringRef ScopeB =
5544 Fb.getFnAttribute("sign-return-address").getValueAsString();
5545 return ScopeA.equals(ScopeB);
5546 }
5547
5548 // If function B doesn't have the "sign-return-address" attribute but A does,
5549 // the functions' signing behaviour is equal if A's value for
5550 // "sign-return-address" is "none" and vice versa.
5551 if (Fa.hasFnAttribute("sign-return-address")) {
5552 StringRef ScopeA =
5553 Fa.getFnAttribute("sign-return-address").getValueAsString();
5554 return ScopeA.equals("none");
5555 }
5556
5557 if (Fb.hasFnAttribute("sign-return-address")) {
5558 StringRef ScopeB =
5559 Fb.getFnAttribute("sign-return-address").getValueAsString();
5560 return ScopeB.equals("none");
5561 }
5562
5563 llvm_unreachable("Unkown combination of sign-return-address attributes");
5564 }
5565
5566 static bool
outliningCandidatesSigningKeyConsensus(const outliner::Candidate & a,const outliner::Candidate & b)5567 outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a,
5568 const outliner::Candidate &b) {
5569 const Function &Fa = a.getMF()->getFunction();
5570 const Function &Fb = b.getMF()->getFunction();
5571
5572 // If none of the functions have the "sign-return-address-key" attribute
5573 // their keys are equal
5574 if (!Fa.hasFnAttribute("sign-return-address-key") &&
5575 !Fb.hasFnAttribute("sign-return-address-key")) {
5576 return true;
5577 }
5578
5579 // If both functions have the "sign-return-address-key" attribute their
5580 // keys are equal if the values of "sign-return-address-key" are equal
5581 if (Fa.hasFnAttribute("sign-return-address-key") &&
5582 Fb.hasFnAttribute("sign-return-address-key")) {
5583 StringRef KeyA =
5584 Fa.getFnAttribute("sign-return-address-key").getValueAsString();
5585 StringRef KeyB =
5586 Fb.getFnAttribute("sign-return-address-key").getValueAsString();
5587 return KeyA.equals(KeyB);
5588 }
5589
5590 // If B doesn't have the "sign-return-address-key" attribute, both keys are
5591 // equal, if function a has the default key (a_key)
5592 if (Fa.hasFnAttribute("sign-return-address-key")) {
5593 StringRef KeyA =
5594 Fa.getFnAttribute("sign-return-address-key").getValueAsString();
5595 return KeyA.equals_lower("a_key");
5596 }
5597
5598 if (Fb.hasFnAttribute("sign-return-address-key")) {
5599 StringRef KeyB =
5600 Fb.getFnAttribute("sign-return-address-key").getValueAsString();
5601 return KeyB.equals_lower("a_key");
5602 }
5603
5604 llvm_unreachable("Unkown combination of sign-return-address-key attributes");
5605 }
5606
outliningCandidatesV8_3OpsConsensus(const outliner::Candidate & a,const outliner::Candidate & b)5607 static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a,
5608 const outliner::Candidate &b) {
5609 const AArch64Subtarget &SubtargetA =
5610 a.getMF()->getSubtarget<AArch64Subtarget>();
5611 const AArch64Subtarget &SubtargetB =
5612 b.getMF()->getSubtarget<AArch64Subtarget>();
5613 return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
5614 }
5615
getOutliningCandidateInfo(std::vector<outliner::Candidate> & RepeatedSequenceLocs) const5616 outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
5617 std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
5618 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
5619 unsigned SequenceSize =
5620 std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0,
5621 [this](unsigned Sum, const MachineInstr &MI) {
5622 return Sum + getInstSizeInBytes(MI);
5623 });
5624 unsigned NumBytesToCreateFrame = 0;
5625
5626 // We only allow outlining for functions having exactly matching return
5627 // address signing attributes, i.e., all share the same value for the
5628 // attribute "sign-return-address" and all share the same type of key they
5629 // are signed with.
5630 // Additionally we require all functions to simultaniously either support
5631 // v8.3a features or not. Otherwise an outlined function could get signed
5632 // using dedicated v8.3 instructions and a call from a function that doesn't
5633 // support v8.3 instructions would therefore be invalid.
5634 if (std::adjacent_find(
5635 RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
5636 [](const outliner::Candidate &a, const outliner::Candidate &b) {
5637 // Return true if a and b are non-equal w.r.t. return address
5638 // signing or support of v8.3a features
5639 if (outliningCandidatesSigningScopeConsensus(a, b) &&
5640 outliningCandidatesSigningKeyConsensus(a, b) &&
5641 outliningCandidatesV8_3OpsConsensus(a, b)) {
5642 return false;
5643 }
5644 return true;
5645 }) != RepeatedSequenceLocs.end()) {
5646 return outliner::OutlinedFunction();
5647 }
5648
5649 // Since at this point all candidates agree on their return address signing
5650 // picking just one is fine. If the candidate functions potentially sign their
5651 // return addresses, the outlined function should do the same. Note that in
5652 // the case of "sign-return-address"="non-leaf" this is an assumption: It is
5653 // not certainly true that the outlined function will have to sign its return
5654 // address but this decision is made later, when the decision to outline
5655 // has already been made.
5656 // The same holds for the number of additional instructions we need: On
5657 // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
5658 // necessary. However, at this point we don't know if the outlined function
5659 // will have a RET instruction so we assume the worst.
5660 const Function &FCF = FirstCand.getMF()->getFunction();
5661 const TargetRegisterInfo &TRI = getRegisterInfo();
5662 if (FCF.hasFnAttribute("sign-return-address")) {
5663 // One PAC and one AUT instructions
5664 NumBytesToCreateFrame += 8;
5665
5666 // We have to check if sp modifying instructions would get outlined.
5667 // If so we only allow outlining if sp is unchanged overall, so matching
5668 // sub and add instructions are okay to outline, all other sp modifications
5669 // are not
5670 auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {
5671 int SPValue = 0;
5672 MachineBasicBlock::iterator MBBI = C.front();
5673 for (;;) {
5674 if (MBBI->modifiesRegister(AArch64::SP, &TRI)) {
5675 switch (MBBI->getOpcode()) {
5676 case AArch64::ADDXri:
5677 case AArch64::ADDWri:
5678 assert(MBBI->getNumOperands() == 4 && "Wrong number of operands");
5679 assert(MBBI->getOperand(2).isImm() &&
5680 "Expected operand to be immediate");
5681 assert(MBBI->getOperand(1).isReg() &&
5682 "Expected operand to be a register");
5683 // Check if the add just increments sp. If so, we search for
5684 // matching sub instructions that decrement sp. If not, the
5685 // modification is illegal
5686 if (MBBI->getOperand(1).getReg() == AArch64::SP)
5687 SPValue += MBBI->getOperand(2).getImm();
5688 else
5689 return true;
5690 break;
5691 case AArch64::SUBXri:
5692 case AArch64::SUBWri:
5693 assert(MBBI->getNumOperands() == 4 && "Wrong number of operands");
5694 assert(MBBI->getOperand(2).isImm() &&
5695 "Expected operand to be immediate");
5696 assert(MBBI->getOperand(1).isReg() &&
5697 "Expected operand to be a register");
5698 // Check if the sub just decrements sp. If so, we search for
5699 // matching add instructions that increment sp. If not, the
5700 // modification is illegal
5701 if (MBBI->getOperand(1).getReg() == AArch64::SP)
5702 SPValue -= MBBI->getOperand(2).getImm();
5703 else
5704 return true;
5705 break;
5706 default:
5707 return true;
5708 }
5709 }
5710 if (MBBI == C.back())
5711 break;
5712 ++MBBI;
5713 }
5714 if (SPValue)
5715 return true;
5716 return false;
5717 };
5718 // Remove candidates with illegal stack modifying instructions
5719 RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
5720 RepeatedSequenceLocs.end(),
5721 hasIllegalSPModification),
5722 RepeatedSequenceLocs.end());
5723
5724 // If the sequence doesn't have enough candidates left, then we're done.
5725 if (RepeatedSequenceLocs.size() < 2)
5726 return outliner::OutlinedFunction();
5727 }
5728
5729 // Properties about candidate MBBs that hold for all of them.
5730 unsigned FlagsSetInAll = 0xF;
5731
5732 // Compute liveness information for each candidate, and set FlagsSetInAll.
5733 std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
5734 [&FlagsSetInAll](outliner::Candidate &C) {
5735 FlagsSetInAll &= C.Flags;
5736 });
5737
5738 // According to the AArch64 Procedure Call Standard, the following are
5739 // undefined on entry/exit from a function call:
5740 //
5741 // * Registers x16, x17, (and thus w16, w17)
5742 // * Condition codes (and thus the NZCV register)
5743 //
5744 // Because if this, we can't outline any sequence of instructions where
5745 // one
5746 // of these registers is live into/across it. Thus, we need to delete
5747 // those
5748 // candidates.
5749 auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
5750 // If the unsafe registers in this block are all dead, then we don't need
5751 // to compute liveness here.
5752 if (C.Flags & UnsafeRegsDead)
5753 return false;
5754 C.initLRU(TRI);
5755 LiveRegUnits LRU = C.LRU;
5756 return (!LRU.available(AArch64::W16) || !LRU.available(AArch64::W17) ||
5757 !LRU.available(AArch64::NZCV));
5758 };
5759
5760 // Are there any candidates where those registers are live?
5761 if (!(FlagsSetInAll & UnsafeRegsDead)) {
5762 // Erase every candidate that violates the restrictions above. (It could be
5763 // true that we have viable candidates, so it's not worth bailing out in
5764 // the case that, say, 1 out of 20 candidates violate the restructions.)
5765 RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
5766 RepeatedSequenceLocs.end(),
5767 CantGuaranteeValueAcrossCall),
5768 RepeatedSequenceLocs.end());
5769
5770 // If the sequence doesn't have enough candidates left, then we're done.
5771 if (RepeatedSequenceLocs.size() < 2)
5772 return outliner::OutlinedFunction();
5773 }
5774
5775 // At this point, we have only "safe" candidates to outline. Figure out
5776 // frame + call instruction information.
5777
5778 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode();
5779
5780 // Helper lambda which sets call information for every candidate.
5781 auto SetCandidateCallInfo =
5782 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
5783 for (outliner::Candidate &C : RepeatedSequenceLocs)
5784 C.setCallInfo(CallID, NumBytesForCall);
5785 };
5786
5787 unsigned FrameID = MachineOutlinerDefault;
5788 NumBytesToCreateFrame += 4;
5789
5790 bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
5791 return C.getMF()->getFunction().hasFnAttribute("branch-target-enforcement");
5792 });
5793
5794 // Returns true if an instructions is safe to fix up, false otherwise.
5795 auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
5796 if (MI.isCall())
5797 return true;
5798
5799 if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
5800 !MI.readsRegister(AArch64::SP, &TRI))
5801 return true;
5802
5803 // Any modification of SP will break our code to save/restore LR.
5804 // FIXME: We could handle some instructions which add a constant
5805 // offset to SP, with a bit more work.
5806 if (MI.modifiesRegister(AArch64::SP, &TRI))
5807 return false;
5808
5809 // At this point, we have a stack instruction that we might need to
5810 // fix up. We'll handle it if it's a load or store.
5811 if (MI.mayLoadOrStore()) {
5812 const MachineOperand *Base; // Filled with the base operand of MI.
5813 int64_t Offset; // Filled with the offset of MI.
5814
5815 // Does it allow us to offset the base operand and is the base the
5816 // register SP?
5817 if (!getMemOperandWithOffset(MI, Base, Offset, &TRI) || !Base->isReg() ||
5818 Base->getReg() != AArch64::SP)
5819 return false;
5820
5821 // Find the minimum/maximum offset for this instruction and check
5822 // if fixing it up would be in range.
5823 int64_t MinOffset,
5824 MaxOffset; // Unscaled offsets for the instruction.
5825 unsigned Scale; // The scale to multiply the offsets by.
5826 unsigned DummyWidth;
5827 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
5828
5829 Offset += 16; // Update the offset to what it would be if we outlined.
5830 if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale)
5831 return false;
5832
5833 // It's in range, so we can outline it.
5834 return true;
5835 }
5836
5837 // FIXME: Add handling for instructions like "add x0, sp, #8".
5838
5839 // We can't fix it up, so don't outline it.
5840 return false;
5841 };
5842
5843 // True if it's possible to fix up each stack instruction in this sequence.
5844 // Important for frames/call variants that modify the stack.
5845 bool AllStackInstrsSafe = std::all_of(
5846 FirstCand.front(), std::next(FirstCand.back()), IsSafeToFixup);
5847
5848 // If the last instruction in any candidate is a terminator, then we should
5849 // tail call all of the candidates.
5850 if (RepeatedSequenceLocs[0].back()->isTerminator()) {
5851 FrameID = MachineOutlinerTailCall;
5852 NumBytesToCreateFrame = 0;
5853 SetCandidateCallInfo(MachineOutlinerTailCall, 4);
5854 }
5855
5856 else if (LastInstrOpcode == AArch64::BL ||
5857 (LastInstrOpcode == AArch64::BLR && !HasBTI)) {
5858 // FIXME: Do we need to check if the code after this uses the value of LR?
5859 FrameID = MachineOutlinerThunk;
5860 NumBytesToCreateFrame = 0;
5861 SetCandidateCallInfo(MachineOutlinerThunk, 4);
5862 }
5863
5864 else {
5865 // We need to decide how to emit calls + frames. We can always emit the same
5866 // frame if we don't need to save to the stack. If we have to save to the
5867 // stack, then we need a different frame.
5868 unsigned NumBytesNoStackCalls = 0;
5869 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
5870
5871 // Check if we have to save LR.
5872 for (outliner::Candidate &C : RepeatedSequenceLocs) {
5873 C.initLRU(TRI);
5874
5875 // If we have a noreturn caller, then we're going to be conservative and
5876 // say that we have to save LR. If we don't have a ret at the end of the
5877 // block, then we can't reason about liveness accurately.
5878 //
5879 // FIXME: We can probably do better than always disabling this in
5880 // noreturn functions by fixing up the liveness info.
5881 bool IsNoReturn =
5882 C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);
5883
5884 // Is LR available? If so, we don't need a save.
5885 if (C.LRU.available(AArch64::LR) && !IsNoReturn) {
5886 NumBytesNoStackCalls += 4;
5887 C.setCallInfo(MachineOutlinerNoLRSave, 4);
5888 CandidatesWithoutStackFixups.push_back(C);
5889 }
5890
5891 // Is an unused register available? If so, we won't modify the stack, so
5892 // we can outline with the same frame type as those that don't save LR.
5893 else if (findRegisterToSaveLRTo(C)) {
5894 NumBytesNoStackCalls += 12;
5895 C.setCallInfo(MachineOutlinerRegSave, 12);
5896 CandidatesWithoutStackFixups.push_back(C);
5897 }
5898
5899 // Is SP used in the sequence at all? If not, we don't have to modify
5900 // the stack, so we are guaranteed to get the same frame.
5901 else if (C.UsedInSequence.available(AArch64::SP)) {
5902 NumBytesNoStackCalls += 12;
5903 C.setCallInfo(MachineOutlinerDefault, 12);
5904 CandidatesWithoutStackFixups.push_back(C);
5905 }
5906
5907 // If we outline this, we need to modify the stack. Pretend we don't
5908 // outline this by saving all of its bytes.
5909 else {
5910 NumBytesNoStackCalls += SequenceSize;
5911 }
5912 }
5913
5914 // If there are no places where we have to save LR, then note that we
5915 // don't have to update the stack. Otherwise, give every candidate the
5916 // default call type, as long as it's safe to do so.
5917 if (!AllStackInstrsSafe ||
5918 NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
5919 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
5920 FrameID = MachineOutlinerNoLRSave;
5921 } else {
5922 SetCandidateCallInfo(MachineOutlinerDefault, 12);
5923 }
5924
5925 // If we dropped all of the candidates, bail out here.
5926 if (RepeatedSequenceLocs.size() < 2) {
5927 RepeatedSequenceLocs.clear();
5928 return outliner::OutlinedFunction();
5929 }
5930 }
5931
5932 // Does every candidate's MBB contain a call? If so, then we might have a call
5933 // in the range.
5934 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
5935 // Check if the range contains a call. These require a save + restore of the
5936 // link register.
5937 bool ModStackToSaveLR = false;
5938 if (std::any_of(FirstCand.front(), FirstCand.back(),
5939 [](const MachineInstr &MI) { return MI.isCall(); }))
5940 ModStackToSaveLR = true;
5941
5942 // Handle the last instruction separately. If this is a tail call, then the
5943 // last instruction is a call. We don't want to save + restore in this case.
5944 // However, it could be possible that the last instruction is a call without
5945 // it being valid to tail call this sequence. We should consider this as
5946 // well.
5947 else if (FrameID != MachineOutlinerThunk &&
5948 FrameID != MachineOutlinerTailCall && FirstCand.back()->isCall())
5949 ModStackToSaveLR = true;
5950
5951 if (ModStackToSaveLR) {
5952 // We can't fix up the stack. Bail out.
5953 if (!AllStackInstrsSafe) {
5954 RepeatedSequenceLocs.clear();
5955 return outliner::OutlinedFunction();
5956 }
5957
5958 // Save + restore LR.
5959 NumBytesToCreateFrame += 8;
5960 }
5961 }
5962
5963 return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
5964 NumBytesToCreateFrame, FrameID);
5965 }
5966
isFunctionSafeToOutlineFrom(MachineFunction & MF,bool OutlineFromLinkOnceODRs) const5967 bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
5968 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
5969 const Function &F = MF.getFunction();
5970
5971 // Can F be deduplicated by the linker? If it can, don't outline from it.
5972 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
5973 return false;
5974
5975 // Don't outline from functions with section markings; the program could
5976 // expect that all the code is in the named section.
5977 // FIXME: Allow outlining from multiple functions with the same section
5978 // marking.
5979 if (F.hasSection())
5980 return false;
5981
5982 // Outlining from functions with redzones is unsafe since the outliner may
5983 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
5984 // outline from it.
5985 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
5986 if (!AFI || AFI->hasRedZone().getValueOr(true))
5987 return false;
5988
5989 // It's safe to outline from MF.
5990 return true;
5991 }
5992
isMBBSafeToOutlineFrom(MachineBasicBlock & MBB,unsigned & Flags) const5993 bool AArch64InstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
5994 unsigned &Flags) const {
5995 // Check if LR is available through all of the MBB. If it's not, then set
5996 // a flag.
5997 assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
5998 "Suitable Machine Function for outlining must track liveness");
5999 LiveRegUnits LRU(getRegisterInfo());
6000
6001 std::for_each(MBB.rbegin(), MBB.rend(),
6002 [&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
6003
6004 // Check if each of the unsafe registers are available...
6005 bool W16AvailableInBlock = LRU.available(AArch64::W16);
6006 bool W17AvailableInBlock = LRU.available(AArch64::W17);
6007 bool NZCVAvailableInBlock = LRU.available(AArch64::NZCV);
6008
6009 // If all of these are dead (and not live out), we know we don't have to check
6010 // them later.
6011 if (W16AvailableInBlock && W17AvailableInBlock && NZCVAvailableInBlock)
6012 Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead;
6013
6014 // Now, add the live outs to the set.
6015 LRU.addLiveOuts(MBB);
6016
6017 // If any of these registers is available in the MBB, but also a live out of
6018 // the block, then we know outlining is unsafe.
6019 if (W16AvailableInBlock && !LRU.available(AArch64::W16))
6020 return false;
6021 if (W17AvailableInBlock && !LRU.available(AArch64::W17))
6022 return false;
6023 if (NZCVAvailableInBlock && !LRU.available(AArch64::NZCV))
6024 return false;
6025
6026 // Check if there's a call inside this MachineBasicBlock. If there is, then
6027 // set a flag.
6028 if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
6029 Flags |= MachineOutlinerMBBFlags::HasCalls;
6030
6031 MachineFunction *MF = MBB.getParent();
6032
6033 // In the event that we outline, we may have to save LR. If there is an
6034 // available register in the MBB, then we'll always save LR there. Check if
6035 // this is true.
6036 bool CanSaveLR = false;
6037 const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
6038 MF->getSubtarget().getRegisterInfo());
6039
6040 // Check if there is an available register across the sequence that we can
6041 // use.
6042 for (unsigned Reg : AArch64::GPR64RegClass) {
6043 if (!ARI->isReservedReg(*MF, Reg) && Reg != AArch64::LR &&
6044 Reg != AArch64::X16 && Reg != AArch64::X17 && LRU.available(Reg)) {
6045 CanSaveLR = true;
6046 break;
6047 }
6048 }
6049
6050 // Check if we have a register we can save LR to, and if LR was used
6051 // somewhere. If both of those things are true, then we need to evaluate the
6052 // safety of outlining stack instructions later.
6053 if (!CanSaveLR && !LRU.available(AArch64::LR))
6054 Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
6055
6056 return true;
6057 }
6058
6059 outliner::InstrType
getOutliningType(MachineBasicBlock::iterator & MIT,unsigned Flags) const6060 AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
6061 unsigned Flags) const {
6062 MachineInstr &MI = *MIT;
6063 MachineBasicBlock *MBB = MI.getParent();
6064 MachineFunction *MF = MBB->getParent();
6065 AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
6066
6067 // Don't outline anything used for return address signing. The outlined
6068 // function will get signed later if needed
6069 switch (MI.getOpcode()) {
6070 case AArch64::PACIASP:
6071 case AArch64::PACIBSP:
6072 case AArch64::AUTIASP:
6073 case AArch64::AUTIBSP:
6074 case AArch64::RETAA:
6075 case AArch64::RETAB:
6076 case AArch64::EMITBKEY:
6077 return outliner::InstrType::Illegal;
6078 }
6079
6080 // Don't outline LOHs.
6081 if (FuncInfo->getLOHRelated().count(&MI))
6082 return outliner::InstrType::Illegal;
6083
6084 // Don't allow debug values to impact outlining type.
6085 if (MI.isDebugInstr() || MI.isIndirectDebugValue())
6086 return outliner::InstrType::Invisible;
6087
6088 // At this point, KILL instructions don't really tell us much so we can go
6089 // ahead and skip over them.
6090 if (MI.isKill())
6091 return outliner::InstrType::Invisible;
6092
6093 // Is this a terminator for a basic block?
6094 if (MI.isTerminator()) {
6095
6096 // Is this the end of a function?
6097 if (MI.getParent()->succ_empty())
6098 return outliner::InstrType::Legal;
6099
6100 // It's not, so don't outline it.
6101 return outliner::InstrType::Illegal;
6102 }
6103
6104 // Make sure none of the operands are un-outlinable.
6105 for (const MachineOperand &MOP : MI.operands()) {
6106 if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
6107 MOP.isTargetIndex())
6108 return outliner::InstrType::Illegal;
6109
6110 // If it uses LR or W30 explicitly, then don't touch it.
6111 if (MOP.isReg() && !MOP.isImplicit() &&
6112 (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
6113 return outliner::InstrType::Illegal;
6114 }
6115
6116 // Special cases for instructions that can always be outlined, but will fail
6117 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
6118 // be outlined because they don't require a *specific* value to be in LR.
6119 if (MI.getOpcode() == AArch64::ADRP)
6120 return outliner::InstrType::Legal;
6121
6122 // If MI is a call we might be able to outline it. We don't want to outline
6123 // any calls that rely on the position of items on the stack. When we outline
6124 // something containing a call, we have to emit a save and restore of LR in
6125 // the outlined function. Currently, this always happens by saving LR to the
6126 // stack. Thus, if we outline, say, half the parameters for a function call
6127 // plus the call, then we'll break the callee's expectations for the layout
6128 // of the stack.
6129 //
6130 // FIXME: Allow calls to functions which construct a stack frame, as long
6131 // as they don't access arguments on the stack.
6132 // FIXME: Figure out some way to analyze functions defined in other modules.
6133 // We should be able to compute the memory usage based on the IR calling
6134 // convention, even if we can't see the definition.
6135 if (MI.isCall()) {
6136 // Get the function associated with the call. Look at each operand and find
6137 // the one that represents the callee and get its name.
6138 const Function *Callee = nullptr;
6139 for (const MachineOperand &MOP : MI.operands()) {
6140 if (MOP.isGlobal()) {
6141 Callee = dyn_cast<Function>(MOP.getGlobal());
6142 break;
6143 }
6144 }
6145
6146 // Never outline calls to mcount. There isn't any rule that would require
6147 // this, but the Linux kernel's "ftrace" feature depends on it.
6148 if (Callee && Callee->getName() == "\01_mcount")
6149 return outliner::InstrType::Illegal;
6150
6151 // If we don't know anything about the callee, assume it depends on the
6152 // stack layout of the caller. In that case, it's only legal to outline
6153 // as a tail-call. Whitelist the call instructions we know about so we
6154 // don't get unexpected results with call pseudo-instructions.
6155 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
6156 if (MI.getOpcode() == AArch64::BLR || MI.getOpcode() == AArch64::BL)
6157 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
6158
6159 if (!Callee)
6160 return UnknownCallOutlineType;
6161
6162 // We have a function we have information about. Check it if it's something
6163 // can safely outline.
6164 MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
6165
6166 // We don't know what's going on with the callee at all. Don't touch it.
6167 if (!CalleeMF)
6168 return UnknownCallOutlineType;
6169
6170 // Check if we know anything about the callee saves on the function. If we
6171 // don't, then don't touch it, since that implies that we haven't
6172 // computed anything about its stack frame yet.
6173 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
6174 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
6175 MFI.getNumObjects() > 0)
6176 return UnknownCallOutlineType;
6177
6178 // At this point, we can say that CalleeMF ought to not pass anything on the
6179 // stack. Therefore, we can outline it.
6180 return outliner::InstrType::Legal;
6181 }
6182
6183 // Don't outline positions.
6184 if (MI.isPosition())
6185 return outliner::InstrType::Illegal;
6186
6187 // Don't touch the link register or W30.
6188 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
6189 MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
6190 return outliner::InstrType::Illegal;
6191
6192 // Don't outline BTI instructions, because that will prevent the outlining
6193 // site from being indirectly callable.
6194 if (MI.getOpcode() == AArch64::HINT) {
6195 int64_t Imm = MI.getOperand(0).getImm();
6196 if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
6197 return outliner::InstrType::Illegal;
6198 }
6199
6200 return outliner::InstrType::Legal;
6201 }
6202
fixupPostOutline(MachineBasicBlock & MBB) const6203 void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
6204 for (MachineInstr &MI : MBB) {
6205 const MachineOperand *Base;
6206 unsigned Width;
6207 int64_t Offset;
6208
6209 // Is this a load or store with an immediate offset with SP as the base?
6210 if (!MI.mayLoadOrStore() ||
6211 !getMemOperandWithOffsetWidth(MI, Base, Offset, Width, &RI) ||
6212 (Base->isReg() && Base->getReg() != AArch64::SP))
6213 continue;
6214
6215 // It is, so we have to fix it up.
6216 unsigned Scale;
6217 int64_t Dummy1, Dummy2;
6218
6219 MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
6220 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
6221 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
6222 assert(Scale != 0 && "Unexpected opcode!");
6223
6224 // We've pushed the return address to the stack, so add 16 to the offset.
6225 // This is safe, since we already checked if it would overflow when we
6226 // checked if this instruction was legal to outline.
6227 int64_t NewImm = (Offset + 16) / Scale;
6228 StackOffsetOperand.setImm(NewImm);
6229 }
6230 }
6231
signOutlinedFunction(MachineFunction & MF,MachineBasicBlock & MBB,bool ShouldSignReturnAddr,bool ShouldSignReturnAddrWithAKey)6232 static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB,
6233 bool ShouldSignReturnAddr,
6234 bool ShouldSignReturnAddrWithAKey) {
6235 if (ShouldSignReturnAddr) {
6236 MachineBasicBlock::iterator MBBPAC = MBB.begin();
6237 MachineBasicBlock::iterator MBBAUT = MBB.getFirstTerminator();
6238 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
6239 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6240 DebugLoc DL;
6241
6242 if (MBBAUT != MBB.end())
6243 DL = MBBAUT->getDebugLoc();
6244
6245 // At the very beginning of the basic block we insert the following
6246 // depending on the key type
6247 //
6248 // a_key: b_key:
6249 // PACIASP EMITBKEY
6250 // CFI_INSTRUCTION PACIBSP
6251 // CFI_INSTRUCTION
6252 if (ShouldSignReturnAddrWithAKey) {
6253 BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::PACIASP))
6254 .setMIFlag(MachineInstr::FrameSetup);
6255 } else {
6256 BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::EMITBKEY))
6257 .setMIFlag(MachineInstr::FrameSetup);
6258 BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::PACIBSP))
6259 .setMIFlag(MachineInstr::FrameSetup);
6260 }
6261 unsigned CFIIndex =
6262 MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
6263 BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::CFI_INSTRUCTION))
6264 .addCFIIndex(CFIIndex)
6265 .setMIFlags(MachineInstr::FrameSetup);
6266
6267 // If v8.3a features are available we can replace a RET instruction by
6268 // RETAA or RETAB and omit the AUT instructions
6269 if (Subtarget.hasV8_3aOps() && MBBAUT != MBB.end() &&
6270 MBBAUT->getOpcode() == AArch64::RET) {
6271 BuildMI(MBB, MBBAUT, DL,
6272 TII->get(ShouldSignReturnAddrWithAKey ? AArch64::RETAA
6273 : AArch64::RETAB))
6274 .copyImplicitOps(*MBBAUT);
6275 MBB.erase(MBBAUT);
6276 } else {
6277 BuildMI(MBB, MBBAUT, DL,
6278 TII->get(ShouldSignReturnAddrWithAKey ? AArch64::AUTIASP
6279 : AArch64::AUTIBSP))
6280 .setMIFlag(MachineInstr::FrameDestroy);
6281 }
6282 }
6283 }
6284
buildOutlinedFrame(MachineBasicBlock & MBB,MachineFunction & MF,const outliner::OutlinedFunction & OF) const6285 void AArch64InstrInfo::buildOutlinedFrame(
6286 MachineBasicBlock &MBB, MachineFunction &MF,
6287 const outliner::OutlinedFunction &OF) const {
6288 // For thunk outlining, rewrite the last instruction from a call to a
6289 // tail-call.
6290 if (OF.FrameConstructionID == MachineOutlinerThunk) {
6291 MachineInstr *Call = &*--MBB.instr_end();
6292 unsigned TailOpcode;
6293 if (Call->getOpcode() == AArch64::BL) {
6294 TailOpcode = AArch64::TCRETURNdi;
6295 } else {
6296 assert(Call->getOpcode() == AArch64::BLR);
6297 TailOpcode = AArch64::TCRETURNriALL;
6298 }
6299 MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
6300 .add(Call->getOperand(0))
6301 .addImm(0);
6302 MBB.insert(MBB.end(), TC);
6303 Call->eraseFromParent();
6304 }
6305
6306 bool IsLeafFunction = true;
6307
6308 // Is there a call in the outlined range?
6309 auto IsNonTailCall = [](const MachineInstr &MI) {
6310 return MI.isCall() && !MI.isReturn();
6311 };
6312
6313 if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) {
6314 // Fix up the instructions in the range, since we're going to modify the
6315 // stack.
6316 assert(OF.FrameConstructionID != MachineOutlinerDefault &&
6317 "Can only fix up stack references once");
6318 fixupPostOutline(MBB);
6319
6320 IsLeafFunction = false;
6321
6322 // LR has to be a live in so that we can save it.
6323 MBB.addLiveIn(AArch64::LR);
6324
6325 MachineBasicBlock::iterator It = MBB.begin();
6326 MachineBasicBlock::iterator Et = MBB.end();
6327
6328 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
6329 OF.FrameConstructionID == MachineOutlinerThunk)
6330 Et = std::prev(MBB.end());
6331
6332 // Insert a save before the outlined region
6333 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
6334 .addReg(AArch64::SP, RegState::Define)
6335 .addReg(AArch64::LR)
6336 .addReg(AArch64::SP)
6337 .addImm(-16);
6338 It = MBB.insert(It, STRXpre);
6339
6340 const TargetSubtargetInfo &STI = MF.getSubtarget();
6341 const MCRegisterInfo *MRI = STI.getRegisterInfo();
6342 unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true);
6343
6344 // Add a CFI saying the stack was moved 16 B down.
6345 int64_t StackPosEntry =
6346 MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 16));
6347 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
6348 .addCFIIndex(StackPosEntry)
6349 .setMIFlags(MachineInstr::FrameSetup);
6350
6351 // Add a CFI saying that the LR that we want to find is now 16 B higher than
6352 // before.
6353 int64_t LRPosEntry =
6354 MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, 16));
6355 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
6356 .addCFIIndex(LRPosEntry)
6357 .setMIFlags(MachineInstr::FrameSetup);
6358
6359 // Insert a restore before the terminator for the function.
6360 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
6361 .addReg(AArch64::SP, RegState::Define)
6362 .addReg(AArch64::LR, RegState::Define)
6363 .addReg(AArch64::SP)
6364 .addImm(16);
6365 Et = MBB.insert(Et, LDRXpost);
6366 }
6367
6368 // If a bunch of candidates reach this point they must agree on their return
6369 // address signing. It is therefore enough to just consider the signing
6370 // behaviour of one of them
6371 const Function &CF = OF.Candidates.front().getMF()->getFunction();
6372 bool ShouldSignReturnAddr = false;
6373 if (CF.hasFnAttribute("sign-return-address")) {
6374 StringRef Scope =
6375 CF.getFnAttribute("sign-return-address").getValueAsString();
6376 if (Scope.equals("all"))
6377 ShouldSignReturnAddr = true;
6378 else if (Scope.equals("non-leaf") && !IsLeafFunction)
6379 ShouldSignReturnAddr = true;
6380 }
6381
6382 // a_key is the default
6383 bool ShouldSignReturnAddrWithAKey = true;
6384 if (CF.hasFnAttribute("sign-return-address-key")) {
6385 const StringRef Key =
6386 CF.getFnAttribute("sign-return-address-key").getValueAsString();
6387 // Key can either be a_key or b_key
6388 assert((Key.equals_lower("a_key") || Key.equals_lower("b_key")) &&
6389 "Return address signing key must be either a_key or b_key");
6390 ShouldSignReturnAddrWithAKey = Key.equals_lower("a_key");
6391 }
6392
6393 // If this is a tail call outlined function, then there's already a return.
6394 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
6395 OF.FrameConstructionID == MachineOutlinerThunk) {
6396 signOutlinedFunction(MF, MBB, ShouldSignReturnAddr,
6397 ShouldSignReturnAddrWithAKey);
6398 return;
6399 }
6400
6401 // It's not a tail call, so we have to insert the return ourselves.
6402 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
6403 .addReg(AArch64::LR, RegState::Undef);
6404 MBB.insert(MBB.end(), ret);
6405
6406 signOutlinedFunction(MF, MBB, ShouldSignReturnAddr,
6407 ShouldSignReturnAddrWithAKey);
6408
6409 // Did we have to modify the stack by saving the link register?
6410 if (OF.FrameConstructionID != MachineOutlinerDefault)
6411 return;
6412
6413 // We modified the stack.
6414 // Walk over the basic block and fix up all the stack accesses.
6415 fixupPostOutline(MBB);
6416 }
6417
insertOutlinedCall(Module & M,MachineBasicBlock & MBB,MachineBasicBlock::iterator & It,MachineFunction & MF,const outliner::Candidate & C) const6418 MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
6419 Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
6420 MachineFunction &MF, const outliner::Candidate &C) const {
6421
6422 // Are we tail calling?
6423 if (C.CallConstructionID == MachineOutlinerTailCall) {
6424 // If yes, then we can just branch to the label.
6425 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
6426 .addGlobalAddress(M.getNamedValue(MF.getName()))
6427 .addImm(0));
6428 return It;
6429 }
6430
6431 // Are we saving the link register?
6432 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
6433 C.CallConstructionID == MachineOutlinerThunk) {
6434 // No, so just insert the call.
6435 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
6436 .addGlobalAddress(M.getNamedValue(MF.getName())));
6437 return It;
6438 }
6439
6440 // We want to return the spot where we inserted the call.
6441 MachineBasicBlock::iterator CallPt;
6442
6443 // Instructions for saving and restoring LR around the call instruction we're
6444 // going to insert.
6445 MachineInstr *Save;
6446 MachineInstr *Restore;
6447 // Can we save to a register?
6448 if (C.CallConstructionID == MachineOutlinerRegSave) {
6449 // FIXME: This logic should be sunk into a target-specific interface so that
6450 // we don't have to recompute the register.
6451 unsigned Reg = findRegisterToSaveLRTo(C);
6452 assert(Reg != 0 && "No callee-saved register available?");
6453
6454 // Save and restore LR from that register.
6455 Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
6456 .addReg(AArch64::XZR)
6457 .addReg(AArch64::LR)
6458 .addImm(0);
6459 Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
6460 .addReg(AArch64::XZR)
6461 .addReg(Reg)
6462 .addImm(0);
6463 } else {
6464 // We have the default case. Save and restore from SP.
6465 Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
6466 .addReg(AArch64::SP, RegState::Define)
6467 .addReg(AArch64::LR)
6468 .addReg(AArch64::SP)
6469 .addImm(-16);
6470 Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
6471 .addReg(AArch64::SP, RegState::Define)
6472 .addReg(AArch64::LR, RegState::Define)
6473 .addReg(AArch64::SP)
6474 .addImm(16);
6475 }
6476
6477 It = MBB.insert(It, Save);
6478 It++;
6479
6480 // Insert the call.
6481 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
6482 .addGlobalAddress(M.getNamedValue(MF.getName())));
6483 CallPt = It;
6484 It++;
6485
6486 It = MBB.insert(It, Restore);
6487 return CallPt;
6488 }
6489
shouldOutlineFromFunctionByDefault(MachineFunction & MF) const6490 bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
6491 MachineFunction &MF) const {
6492 return MF.getFunction().hasMinSize();
6493 }
6494
6495 Optional<DestSourcePair>
isCopyInstrImpl(const MachineInstr & MI) const6496 AArch64InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
6497
6498 // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
6499 // and zero immediate operands used as an alias for mov instruction.
6500 if (MI.getOpcode() == AArch64::ORRWrs &&
6501 MI.getOperand(1).getReg() == AArch64::WZR &&
6502 MI.getOperand(3).getImm() == 0x0) {
6503 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
6504 }
6505
6506 if (MI.getOpcode() == AArch64::ORRXrs &&
6507 MI.getOperand(1).getReg() == AArch64::XZR &&
6508 MI.getOperand(3).getImm() == 0x0) {
6509 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
6510 }
6511
6512 return None;
6513 }
6514
isAddImmediate(const MachineInstr & MI,Register Reg) const6515 Optional<RegImmPair> AArch64InstrInfo::isAddImmediate(const MachineInstr &MI,
6516 Register Reg) const {
6517 int Sign = 1;
6518 int64_t Offset = 0;
6519
6520 // TODO: Handle cases where Reg is a super- or sub-register of the
6521 // destination register.
6522 if (Reg != MI.getOperand(0).getReg())
6523 return None;
6524
6525 switch (MI.getOpcode()) {
6526 default:
6527 return None;
6528 case AArch64::SUBWri:
6529 case AArch64::SUBXri:
6530 case AArch64::SUBSWri:
6531 case AArch64::SUBSXri:
6532 Sign *= -1;
6533 LLVM_FALLTHROUGH;
6534 case AArch64::ADDSWri:
6535 case AArch64::ADDSXri:
6536 case AArch64::ADDWri:
6537 case AArch64::ADDXri: {
6538 // TODO: Third operand can be global address (usually some string).
6539 if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
6540 !MI.getOperand(2).isImm())
6541 return None;
6542 Offset = MI.getOperand(2).getImm() * Sign;
6543 int Shift = MI.getOperand(3).getImm();
6544 assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
6545 Offset = Offset << Shift;
6546 }
6547 }
6548 return RegImmPair{MI.getOperand(1).getReg(), Offset};
6549 }
6550
6551 /// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
6552 /// the destination register then, if possible, describe the value in terms of
6553 /// the source register.
6554 static Optional<ParamLoadedValue>
describeORRLoadedValue(const MachineInstr & MI,Register DescribedReg,const TargetInstrInfo * TII,const TargetRegisterInfo * TRI)6555 describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg,
6556 const TargetInstrInfo *TII,
6557 const TargetRegisterInfo *TRI) {
6558 auto DestSrc = TII->isCopyInstr(MI);
6559 if (!DestSrc)
6560 return None;
6561
6562 Register DestReg = DestSrc->Destination->getReg();
6563 Register SrcReg = DestSrc->Source->getReg();
6564
6565 auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
6566
6567 // If the described register is the destination, just return the source.
6568 if (DestReg == DescribedReg)
6569 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
6570
6571 // ORRWrs zero-extends to 64-bits, so we need to consider such cases.
6572 if (MI.getOpcode() == AArch64::ORRWrs &&
6573 TRI->isSuperRegister(DestReg, DescribedReg))
6574 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
6575
6576 // We may need to describe the lower part of a ORRXrs move.
6577 if (MI.getOpcode() == AArch64::ORRXrs &&
6578 TRI->isSubRegister(DestReg, DescribedReg)) {
6579 Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32);
6580 return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
6581 }
6582
6583 assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&
6584 "Unhandled ORR[XW]rs copy case");
6585
6586 return None;
6587 }
6588
6589 Optional<ParamLoadedValue>
describeLoadedValue(const MachineInstr & MI,Register Reg) const6590 AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI,
6591 Register Reg) const {
6592 const MachineFunction *MF = MI.getMF();
6593 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
6594 switch (MI.getOpcode()) {
6595 case AArch64::MOVZWi:
6596 case AArch64::MOVZXi: {
6597 // MOVZWi may be used for producing zero-extended 32-bit immediates in
6598 // 64-bit parameters, so we need to consider super-registers.
6599 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
6600 return None;
6601
6602 if (!MI.getOperand(1).isImm())
6603 return None;
6604 int64_t Immediate = MI.getOperand(1).getImm();
6605 int Shift = MI.getOperand(2).getImm();
6606 return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift),
6607 nullptr);
6608 }
6609 case AArch64::ORRWrs:
6610 case AArch64::ORRXrs:
6611 return describeORRLoadedValue(MI, Reg, this, TRI);
6612 }
6613
6614 return TargetInstrInfo::describeLoadedValue(MI, Reg);
6615 }
6616
6617 #define GET_INSTRINFO_HELPERS
6618 #include "AArch64GenInstrInfo.inc"
6619