1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the AArch64 implementation of the TargetInstrInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "AArch64.h"
15 #include "AArch64InstrInfo.h"
16 #include "AArch64MachineFunctionInfo.h"
17 #include "AArch64TargetMachine.h"
18 #include "MCTargetDesc/AArch64MCTargetDesc.h"
19 #include "Utils/AArch64BaseInfo.h"
20 #include "llvm/CodeGen/MachineConstantPool.h"
21 #include "llvm/CodeGen/MachineDominators.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunctionPass.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/Support/ErrorHandling.h"
28 #include "llvm/Support/TargetRegistry.h"
29
30 #include <algorithm>
31
32 #define GET_INSTRINFO_CTOR
33 #include "AArch64GenInstrInfo.inc"
34
35 using namespace llvm;
36
AArch64InstrInfo(const AArch64Subtarget & STI)37 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
38 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
39 RI(*this, STI), Subtarget(STI) {}
40
copyPhysReg(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,DebugLoc DL,unsigned DestReg,unsigned SrcReg,bool KillSrc) const41 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
42 MachineBasicBlock::iterator I, DebugLoc DL,
43 unsigned DestReg, unsigned SrcReg,
44 bool KillSrc) const {
45 unsigned Opc = 0;
46 unsigned ZeroReg = 0;
47 if (DestReg == AArch64::XSP || SrcReg == AArch64::XSP) {
48 // E.g. ADD xDst, xsp, #0 (, lsl #0)
49 BuildMI(MBB, I, DL, get(AArch64::ADDxxi_lsl0_s), DestReg)
50 .addReg(SrcReg)
51 .addImm(0);
52 return;
53 } else if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
54 // E.g. ADD wDST, wsp, #0 (, lsl #0)
55 BuildMI(MBB, I, DL, get(AArch64::ADDwwi_lsl0_s), DestReg)
56 .addReg(SrcReg)
57 .addImm(0);
58 return;
59 } else if (DestReg == AArch64::NZCV) {
60 assert(AArch64::GPR64RegClass.contains(SrcReg));
61 // E.g. MSR NZCV, xDST
62 BuildMI(MBB, I, DL, get(AArch64::MSRix))
63 .addImm(A64SysReg::NZCV)
64 .addReg(SrcReg);
65 } else if (SrcReg == AArch64::NZCV) {
66 assert(AArch64::GPR64RegClass.contains(DestReg));
67 // E.g. MRS xDST, NZCV
68 BuildMI(MBB, I, DL, get(AArch64::MRSxi), DestReg)
69 .addImm(A64SysReg::NZCV);
70 } else if (AArch64::GPR64RegClass.contains(DestReg)) {
71 assert(AArch64::GPR64RegClass.contains(SrcReg));
72 Opc = AArch64::ORRxxx_lsl;
73 ZeroReg = AArch64::XZR;
74 } else if (AArch64::GPR32RegClass.contains(DestReg)) {
75 assert(AArch64::GPR32RegClass.contains(SrcReg));
76 Opc = AArch64::ORRwww_lsl;
77 ZeroReg = AArch64::WZR;
78 } else if (AArch64::FPR32RegClass.contains(DestReg)) {
79 assert(AArch64::FPR32RegClass.contains(SrcReg));
80 BuildMI(MBB, I, DL, get(AArch64::FMOVss), DestReg)
81 .addReg(SrcReg);
82 return;
83 } else if (AArch64::FPR64RegClass.contains(DestReg)) {
84 assert(AArch64::FPR64RegClass.contains(SrcReg));
85 BuildMI(MBB, I, DL, get(AArch64::FMOVdd), DestReg)
86 .addReg(SrcReg);
87 return;
88 } else if (AArch64::FPR128RegClass.contains(DestReg)) {
89 assert(AArch64::FPR128RegClass.contains(SrcReg));
90
91 // FIXME: there's no good way to do this, at least without NEON:
92 // + There's no single move instruction for q-registers
93 // + We can't create a spill slot and use normal STR/LDR because stack
94 // allocation has already happened
95 // + We can't go via X-registers with FMOV because register allocation has
96 // already happened.
97 // This may not be efficient, but at least it works.
98 BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP)
99 .addReg(SrcReg)
100 .addReg(AArch64::XSP)
101 .addImm(0x1ff & -16);
102
103 BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg)
104 .addReg(AArch64::XSP, RegState::Define)
105 .addReg(AArch64::XSP)
106 .addImm(16);
107 return;
108 } else {
109 llvm_unreachable("Unknown register class in copyPhysReg");
110 }
111
112 // E.g. ORR xDst, xzr, xSrc, lsl #0
113 BuildMI(MBB, I, DL, get(Opc), DestReg)
114 .addReg(ZeroReg)
115 .addReg(SrcReg)
116 .addImm(0);
117 }
118
119 MachineInstr *
emitFrameIndexDebugValue(MachineFunction & MF,int FrameIx,uint64_t Offset,const MDNode * MDPtr,DebugLoc DL) const120 AArch64InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
121 uint64_t Offset, const MDNode *MDPtr,
122 DebugLoc DL) const {
123 MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE))
124 .addFrameIndex(FrameIx).addImm(0)
125 .addImm(Offset)
126 .addMetadata(MDPtr);
127 return &*MIB;
128 }
129
130 /// Does the Opcode represent a conditional branch that we can remove and re-add
131 /// at the end of a basic block?
isCondBranch(unsigned Opc)132 static bool isCondBranch(unsigned Opc) {
133 return Opc == AArch64::Bcc || Opc == AArch64::CBZw || Opc == AArch64::CBZx ||
134 Opc == AArch64::CBNZw || Opc == AArch64::CBNZx ||
135 Opc == AArch64::TBZwii || Opc == AArch64::TBZxii ||
136 Opc == AArch64::TBNZwii || Opc == AArch64::TBNZxii;
137 }
138
139 /// Takes apart a given conditional branch MachineInstr (see isCondBranch),
140 /// setting TBB to the destination basic block and populating the Cond vector
141 /// with data necessary to recreate the conditional branch at a later
142 /// date. First element will be the opcode, and subsequent ones define the
143 /// conditions being branched on in an instruction-specific manner.
classifyCondBranch(MachineInstr * I,MachineBasicBlock * & TBB,SmallVectorImpl<MachineOperand> & Cond)144 static void classifyCondBranch(MachineInstr *I, MachineBasicBlock *&TBB,
145 SmallVectorImpl<MachineOperand> &Cond) {
146 switch(I->getOpcode()) {
147 case AArch64::Bcc:
148 case AArch64::CBZw:
149 case AArch64::CBZx:
150 case AArch64::CBNZw:
151 case AArch64::CBNZx:
152 // These instructions just have one predicate operand in position 0 (either
153 // a condition code or a register being compared).
154 Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
155 Cond.push_back(I->getOperand(0));
156 TBB = I->getOperand(1).getMBB();
157 return;
158 case AArch64::TBZwii:
159 case AArch64::TBZxii:
160 case AArch64::TBNZwii:
161 case AArch64::TBNZxii:
162 // These have two predicate operands: a register and a bit position.
163 Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
164 Cond.push_back(I->getOperand(0));
165 Cond.push_back(I->getOperand(1));
166 TBB = I->getOperand(2).getMBB();
167 return;
168 default:
169 llvm_unreachable("Unknown conditional branch to classify");
170 }
171 }
172
173
174 bool
AnalyzeBranch(MachineBasicBlock & MBB,MachineBasicBlock * & TBB,MachineBasicBlock * & FBB,SmallVectorImpl<MachineOperand> & Cond,bool AllowModify) const175 AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
176 MachineBasicBlock *&FBB,
177 SmallVectorImpl<MachineOperand> &Cond,
178 bool AllowModify) const {
179 // If the block has no terminators, it just falls into the block after it.
180 MachineBasicBlock::iterator I = MBB.end();
181 if (I == MBB.begin())
182 return false;
183 --I;
184 while (I->isDebugValue()) {
185 if (I == MBB.begin())
186 return false;
187 --I;
188 }
189 if (!isUnpredicatedTerminator(I))
190 return false;
191
192 // Get the last instruction in the block.
193 MachineInstr *LastInst = I;
194
195 // If there is only one terminator instruction, process it.
196 unsigned LastOpc = LastInst->getOpcode();
197 if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
198 if (LastOpc == AArch64::Bimm) {
199 TBB = LastInst->getOperand(0).getMBB();
200 return false;
201 }
202 if (isCondBranch(LastOpc)) {
203 classifyCondBranch(LastInst, TBB, Cond);
204 return false;
205 }
206 return true; // Can't handle indirect branch.
207 }
208
209 // Get the instruction before it if it is a terminator.
210 MachineInstr *SecondLastInst = I;
211 unsigned SecondLastOpc = SecondLastInst->getOpcode();
212
213 // If AllowModify is true and the block ends with two or more unconditional
214 // branches, delete all but the first unconditional branch.
215 if (AllowModify && LastOpc == AArch64::Bimm) {
216 while (SecondLastOpc == AArch64::Bimm) {
217 LastInst->eraseFromParent();
218 LastInst = SecondLastInst;
219 LastOpc = LastInst->getOpcode();
220 if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
221 // Return now the only terminator is an unconditional branch.
222 TBB = LastInst->getOperand(0).getMBB();
223 return false;
224 } else {
225 SecondLastInst = I;
226 SecondLastOpc = SecondLastInst->getOpcode();
227 }
228 }
229 }
230
231 // If there are three terminators, we don't know what sort of block this is.
232 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
233 return true;
234
235 // If the block ends with a B and a Bcc, handle it.
236 if (LastOpc == AArch64::Bimm) {
237 if (SecondLastOpc == AArch64::Bcc) {
238 TBB = SecondLastInst->getOperand(1).getMBB();
239 Cond.push_back(MachineOperand::CreateImm(AArch64::Bcc));
240 Cond.push_back(SecondLastInst->getOperand(0));
241 FBB = LastInst->getOperand(0).getMBB();
242 return false;
243 } else if (isCondBranch(SecondLastOpc)) {
244 classifyCondBranch(SecondLastInst, TBB, Cond);
245 FBB = LastInst->getOperand(0).getMBB();
246 return false;
247 }
248 }
249
250 // If the block ends with two unconditional branches, handle it. The second
251 // one is not executed, so remove it.
252 if (SecondLastOpc == AArch64::Bimm && LastOpc == AArch64::Bimm) {
253 TBB = SecondLastInst->getOperand(0).getMBB();
254 I = LastInst;
255 if (AllowModify)
256 I->eraseFromParent();
257 return false;
258 }
259
260 // Otherwise, can't handle this.
261 return true;
262 }
263
ReverseBranchCondition(SmallVectorImpl<MachineOperand> & Cond) const264 bool AArch64InstrInfo::ReverseBranchCondition(
265 SmallVectorImpl<MachineOperand> &Cond) const {
266 switch (Cond[0].getImm()) {
267 case AArch64::Bcc: {
268 A64CC::CondCodes CC = static_cast<A64CC::CondCodes>(Cond[1].getImm());
269 CC = A64InvertCondCode(CC);
270 Cond[1].setImm(CC);
271 return false;
272 }
273 case AArch64::CBZw:
274 Cond[0].setImm(AArch64::CBNZw);
275 return false;
276 case AArch64::CBZx:
277 Cond[0].setImm(AArch64::CBNZx);
278 return false;
279 case AArch64::CBNZw:
280 Cond[0].setImm(AArch64::CBZw);
281 return false;
282 case AArch64::CBNZx:
283 Cond[0].setImm(AArch64::CBZx);
284 return false;
285 case AArch64::TBZwii:
286 Cond[0].setImm(AArch64::TBNZwii);
287 return false;
288 case AArch64::TBZxii:
289 Cond[0].setImm(AArch64::TBNZxii);
290 return false;
291 case AArch64::TBNZwii:
292 Cond[0].setImm(AArch64::TBZwii);
293 return false;
294 case AArch64::TBNZxii:
295 Cond[0].setImm(AArch64::TBZxii);
296 return false;
297 default:
298 llvm_unreachable("Unknown branch type");
299 }
300 }
301
302
303 unsigned
InsertBranch(MachineBasicBlock & MBB,MachineBasicBlock * TBB,MachineBasicBlock * FBB,const SmallVectorImpl<MachineOperand> & Cond,DebugLoc DL) const304 AArch64InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
305 MachineBasicBlock *FBB,
306 const SmallVectorImpl<MachineOperand> &Cond,
307 DebugLoc DL) const {
308 if (FBB == 0 && Cond.empty()) {
309 BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(TBB);
310 return 1;
311 } else if (FBB == 0) {
312 MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
313 for (int i = 1, e = Cond.size(); i != e; ++i)
314 MIB.addOperand(Cond[i]);
315 MIB.addMBB(TBB);
316 return 1;
317 }
318
319 MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
320 for (int i = 1, e = Cond.size(); i != e; ++i)
321 MIB.addOperand(Cond[i]);
322 MIB.addMBB(TBB);
323
324 BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(FBB);
325 return 2;
326 }
327
RemoveBranch(MachineBasicBlock & MBB) const328 unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
329 MachineBasicBlock::iterator I = MBB.end();
330 if (I == MBB.begin()) return 0;
331 --I;
332 while (I->isDebugValue()) {
333 if (I == MBB.begin())
334 return 0;
335 --I;
336 }
337 if (I->getOpcode() != AArch64::Bimm && !isCondBranch(I->getOpcode()))
338 return 0;
339
340 // Remove the branch.
341 I->eraseFromParent();
342
343 I = MBB.end();
344
345 if (I == MBB.begin()) return 1;
346 --I;
347 if (!isCondBranch(I->getOpcode()))
348 return 1;
349
350 // Remove the branch.
351 I->eraseFromParent();
352 return 2;
353 }
354
355 bool
expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const356 AArch64InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const {
357 MachineInstr &MI = *MBBI;
358 MachineBasicBlock &MBB = *MI.getParent();
359
360 unsigned Opcode = MI.getOpcode();
361 switch (Opcode) {
362 case AArch64::TLSDESC_BLRx: {
363 MachineInstr *NewMI =
364 BuildMI(MBB, MBBI, MI.getDebugLoc(), get(AArch64::TLSDESCCALL))
365 .addOperand(MI.getOperand(1));
366 MI.setDesc(get(AArch64::BLRx));
367
368 llvm::finalizeBundle(MBB, NewMI, *++MBBI);
369 return true;
370 }
371 default:
372 return false;
373 }
374
375 return false;
376 }
377
378 void
storeRegToStackSlot(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,unsigned SrcReg,bool isKill,int FrameIdx,const TargetRegisterClass * RC,const TargetRegisterInfo * TRI) const379 AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
380 MachineBasicBlock::iterator MBBI,
381 unsigned SrcReg, bool isKill,
382 int FrameIdx,
383 const TargetRegisterClass *RC,
384 const TargetRegisterInfo *TRI) const {
385 DebugLoc DL = MBB.findDebugLoc(MBBI);
386 MachineFunction &MF = *MBB.getParent();
387 MachineFrameInfo &MFI = *MF.getFrameInfo();
388 unsigned Align = MFI.getObjectAlignment(FrameIdx);
389
390 MachineMemOperand *MMO
391 = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
392 MachineMemOperand::MOStore,
393 MFI.getObjectSize(FrameIdx),
394 Align);
395
396 unsigned StoreOp = 0;
397 if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) {
398 switch(RC->getSize()) {
399 case 4: StoreOp = AArch64::LS32_STR; break;
400 case 8: StoreOp = AArch64::LS64_STR; break;
401 default:
402 llvm_unreachable("Unknown size for regclass");
403 }
404 } else {
405 assert((RC->hasType(MVT::f32) || RC->hasType(MVT::f64) ||
406 RC->hasType(MVT::f128))
407 && "Expected integer or floating type for store");
408 switch (RC->getSize()) {
409 case 4: StoreOp = AArch64::LSFP32_STR; break;
410 case 8: StoreOp = AArch64::LSFP64_STR; break;
411 case 16: StoreOp = AArch64::LSFP128_STR; break;
412 default:
413 llvm_unreachable("Unknown size for regclass");
414 }
415 }
416
417 MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(StoreOp));
418 NewMI.addReg(SrcReg, getKillRegState(isKill))
419 .addFrameIndex(FrameIdx)
420 .addImm(0)
421 .addMemOperand(MMO);
422
423 }
424
425 void
loadRegFromStackSlot(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,unsigned DestReg,int FrameIdx,const TargetRegisterClass * RC,const TargetRegisterInfo * TRI) const426 AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
427 MachineBasicBlock::iterator MBBI,
428 unsigned DestReg, int FrameIdx,
429 const TargetRegisterClass *RC,
430 const TargetRegisterInfo *TRI) const {
431 DebugLoc DL = MBB.findDebugLoc(MBBI);
432 MachineFunction &MF = *MBB.getParent();
433 MachineFrameInfo &MFI = *MF.getFrameInfo();
434 unsigned Align = MFI.getObjectAlignment(FrameIdx);
435
436 MachineMemOperand *MMO
437 = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
438 MachineMemOperand::MOLoad,
439 MFI.getObjectSize(FrameIdx),
440 Align);
441
442 unsigned LoadOp = 0;
443 if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) {
444 switch(RC->getSize()) {
445 case 4: LoadOp = AArch64::LS32_LDR; break;
446 case 8: LoadOp = AArch64::LS64_LDR; break;
447 default:
448 llvm_unreachable("Unknown size for regclass");
449 }
450 } else {
451 assert((RC->hasType(MVT::f32) || RC->hasType(MVT::f64)
452 || RC->hasType(MVT::f128))
453 && "Expected integer or floating type for store");
454 switch (RC->getSize()) {
455 case 4: LoadOp = AArch64::LSFP32_LDR; break;
456 case 8: LoadOp = AArch64::LSFP64_LDR; break;
457 case 16: LoadOp = AArch64::LSFP128_LDR; break;
458 default:
459 llvm_unreachable("Unknown size for regclass");
460 }
461 }
462
463 MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg);
464 NewMI.addFrameIndex(FrameIdx)
465 .addImm(0)
466 .addMemOperand(MMO);
467 }
468
estimateRSStackLimit(MachineFunction & MF) const469 unsigned AArch64InstrInfo::estimateRSStackLimit(MachineFunction &MF) const {
470 unsigned Limit = (1 << 16) - 1;
471 for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) {
472 for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
473 I != E; ++I) {
474 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
475 if (!I->getOperand(i).isFI()) continue;
476
477 // When using ADDxxi_lsl0_s to get the address of a stack object, 0xfff
478 // is the largest offset guaranteed to fit in the immediate offset.
479 if (I->getOpcode() == AArch64::ADDxxi_lsl0_s) {
480 Limit = std::min(Limit, 0xfffu);
481 break;
482 }
483
484 int AccessScale, MinOffset, MaxOffset;
485 getAddressConstraints(*I, AccessScale, MinOffset, MaxOffset);
486 Limit = std::min(Limit, static_cast<unsigned>(MaxOffset));
487
488 break; // At most one FI per instruction
489 }
490 }
491 }
492
493 return Limit;
494 }
getAddressConstraints(const MachineInstr & MI,int & AccessScale,int & MinOffset,int & MaxOffset) const495 void AArch64InstrInfo::getAddressConstraints(const MachineInstr &MI,
496 int &AccessScale, int &MinOffset,
497 int &MaxOffset) const {
498 switch (MI.getOpcode()) {
499 default: llvm_unreachable("Unkown load/store kind");
500 case TargetOpcode::DBG_VALUE:
501 AccessScale = 1;
502 MinOffset = INT_MIN;
503 MaxOffset = INT_MAX;
504 return;
505 case AArch64::LS8_LDR: case AArch64::LS8_STR:
506 case AArch64::LSFP8_LDR: case AArch64::LSFP8_STR:
507 case AArch64::LDRSBw:
508 case AArch64::LDRSBx:
509 AccessScale = 1;
510 MinOffset = 0;
511 MaxOffset = 0xfff;
512 return;
513 case AArch64::LS16_LDR: case AArch64::LS16_STR:
514 case AArch64::LSFP16_LDR: case AArch64::LSFP16_STR:
515 case AArch64::LDRSHw:
516 case AArch64::LDRSHx:
517 AccessScale = 2;
518 MinOffset = 0;
519 MaxOffset = 0xfff * AccessScale;
520 return;
521 case AArch64::LS32_LDR: case AArch64::LS32_STR:
522 case AArch64::LSFP32_LDR: case AArch64::LSFP32_STR:
523 case AArch64::LDRSWx:
524 case AArch64::LDPSWx:
525 AccessScale = 4;
526 MinOffset = 0;
527 MaxOffset = 0xfff * AccessScale;
528 return;
529 case AArch64::LS64_LDR: case AArch64::LS64_STR:
530 case AArch64::LSFP64_LDR: case AArch64::LSFP64_STR:
531 case AArch64::PRFM:
532 AccessScale = 8;
533 MinOffset = 0;
534 MaxOffset = 0xfff * AccessScale;
535 return;
536 case AArch64::LSFP128_LDR: case AArch64::LSFP128_STR:
537 AccessScale = 16;
538 MinOffset = 0;
539 MaxOffset = 0xfff * AccessScale;
540 return;
541 case AArch64::LSPair32_LDR: case AArch64::LSPair32_STR:
542 case AArch64::LSFPPair32_LDR: case AArch64::LSFPPair32_STR:
543 AccessScale = 4;
544 MinOffset = -0x40 * AccessScale;
545 MaxOffset = 0x3f * AccessScale;
546 return;
547 case AArch64::LSPair64_LDR: case AArch64::LSPair64_STR:
548 case AArch64::LSFPPair64_LDR: case AArch64::LSFPPair64_STR:
549 AccessScale = 8;
550 MinOffset = -0x40 * AccessScale;
551 MaxOffset = 0x3f * AccessScale;
552 return;
553 case AArch64::LSFPPair128_LDR: case AArch64::LSFPPair128_STR:
554 AccessScale = 16;
555 MinOffset = -0x40 * AccessScale;
556 MaxOffset = 0x3f * AccessScale;
557 return;
558 }
559 }
560
getInstSizeInBytes(const MachineInstr & MI) const561 unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
562 const MCInstrDesc &MCID = MI.getDesc();
563 const MachineBasicBlock &MBB = *MI.getParent();
564 const MachineFunction &MF = *MBB.getParent();
565 const MCAsmInfo &MAI = *MF.getTarget().getMCAsmInfo();
566
567 if (MCID.getSize())
568 return MCID.getSize();
569
570 if (MI.getOpcode() == AArch64::INLINEASM)
571 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), MAI);
572
573 if (MI.isLabel())
574 return 0;
575
576 switch (MI.getOpcode()) {
577 case TargetOpcode::BUNDLE:
578 return getInstBundleLength(MI);
579 case TargetOpcode::IMPLICIT_DEF:
580 case TargetOpcode::KILL:
581 case TargetOpcode::PROLOG_LABEL:
582 case TargetOpcode::EH_LABEL:
583 case TargetOpcode::DBG_VALUE:
584 return 0;
585 case AArch64::TLSDESCCALL:
586 return 0;
587 default:
588 llvm_unreachable("Unknown instruction class");
589 }
590 }
591
getInstBundleLength(const MachineInstr & MI) const592 unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
593 unsigned Size = 0;
594 MachineBasicBlock::const_instr_iterator I = MI;
595 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
596 while (++I != E && I->isInsideBundle()) {
597 assert(!I->isBundle() && "No nested bundle!");
598 Size += getInstSizeInBytes(*I);
599 }
600 return Size;
601 }
602
rewriteA64FrameIndex(MachineInstr & MI,unsigned FrameRegIdx,unsigned FrameReg,int & Offset,const AArch64InstrInfo & TII)603 bool llvm::rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
604 unsigned FrameReg, int &Offset,
605 const AArch64InstrInfo &TII) {
606 MachineBasicBlock &MBB = *MI.getParent();
607 MachineFunction &MF = *MBB.getParent();
608 MachineFrameInfo &MFI = *MF.getFrameInfo();
609
610 MFI.getObjectOffset(FrameRegIdx);
611 llvm_unreachable("Unimplemented rewriteFrameIndex");
612 }
613
emitRegUpdate(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,DebugLoc dl,const TargetInstrInfo & TII,unsigned DstReg,unsigned SrcReg,unsigned ScratchReg,int64_t NumBytes,MachineInstr::MIFlag MIFlags)614 void llvm::emitRegUpdate(MachineBasicBlock &MBB,
615 MachineBasicBlock::iterator MBBI,
616 DebugLoc dl, const TargetInstrInfo &TII,
617 unsigned DstReg, unsigned SrcReg, unsigned ScratchReg,
618 int64_t NumBytes, MachineInstr::MIFlag MIFlags) {
619 if (NumBytes == 0 && DstReg == SrcReg)
620 return;
621 else if (abs(NumBytes) & ~0xffffff) {
622 // Generically, we have to materialize the offset into a temporary register
623 // and subtract it. There are a couple of ways this could be done, for now
624 // we'll use a movz/movk or movn/movk sequence.
625 uint64_t Bits = static_cast<uint64_t>(abs(NumBytes));
626 BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVZxii), ScratchReg)
627 .addImm(0xffff & Bits).addImm(0)
628 .setMIFlags(MIFlags);
629
630 Bits >>= 16;
631 if (Bits & 0xffff) {
632 BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg)
633 .addReg(ScratchReg)
634 .addImm(0xffff & Bits).addImm(1)
635 .setMIFlags(MIFlags);
636 }
637
638 Bits >>= 16;
639 if (Bits & 0xffff) {
640 BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg)
641 .addReg(ScratchReg)
642 .addImm(0xffff & Bits).addImm(2)
643 .setMIFlags(MIFlags);
644 }
645
646 Bits >>= 16;
647 if (Bits & 0xffff) {
648 BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg)
649 .addReg(ScratchReg)
650 .addImm(0xffff & Bits).addImm(3)
651 .setMIFlags(MIFlags);
652 }
653
654 // ADD DST, SRC, xTMP (, lsl #0)
655 unsigned AddOp = NumBytes > 0 ? AArch64::ADDxxx_uxtx : AArch64::SUBxxx_uxtx;
656 BuildMI(MBB, MBBI, dl, TII.get(AddOp), DstReg)
657 .addReg(SrcReg, RegState::Kill)
658 .addReg(ScratchReg, RegState::Kill)
659 .addImm(0)
660 .setMIFlag(MIFlags);
661 return;
662 }
663
664 // Now we know that the adjustment can be done in at most two add/sub
665 // (immediate) instructions, which is always more efficient than a
666 // literal-pool load, or even a hypothetical movz/movk/add sequence
667
668 // Decide whether we're doing addition or subtraction
669 unsigned LowOp, HighOp;
670 if (NumBytes >= 0) {
671 LowOp = AArch64::ADDxxi_lsl0_s;
672 HighOp = AArch64::ADDxxi_lsl12_s;
673 } else {
674 LowOp = AArch64::SUBxxi_lsl0_s;
675 HighOp = AArch64::SUBxxi_lsl12_s;
676 NumBytes = abs(NumBytes);
677 }
678
679 // If we're here, at the very least a move needs to be produced, which just
680 // happens to be materializable by an ADD.
681 if ((NumBytes & 0xfff) || NumBytes == 0) {
682 BuildMI(MBB, MBBI, dl, TII.get(LowOp), DstReg)
683 .addReg(SrcReg, RegState::Kill)
684 .addImm(NumBytes & 0xfff)
685 .setMIFlag(MIFlags);
686
687 // Next update should use the register we've just defined.
688 SrcReg = DstReg;
689 }
690
691 if (NumBytes & 0xfff000) {
692 BuildMI(MBB, MBBI, dl, TII.get(HighOp), DstReg)
693 .addReg(SrcReg, RegState::Kill)
694 .addImm(NumBytes >> 12)
695 .setMIFlag(MIFlags);
696 }
697 }
698
emitSPUpdate(MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,DebugLoc dl,const TargetInstrInfo & TII,unsigned ScratchReg,int64_t NumBytes,MachineInstr::MIFlag MIFlags)699 void llvm::emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
700 DebugLoc dl, const TargetInstrInfo &TII,
701 unsigned ScratchReg, int64_t NumBytes,
702 MachineInstr::MIFlag MIFlags) {
703 emitRegUpdate(MBB, MI, dl, TII, AArch64::XSP, AArch64::XSP, AArch64::X16,
704 NumBytes, MIFlags);
705 }
706
707
708 namespace {
709 struct LDTLSCleanup : public MachineFunctionPass {
710 static char ID;
LDTLSCleanup__anonca57cbeb0111::LDTLSCleanup711 LDTLSCleanup() : MachineFunctionPass(ID) {}
712
runOnMachineFunction__anonca57cbeb0111::LDTLSCleanup713 virtual bool runOnMachineFunction(MachineFunction &MF) {
714 AArch64MachineFunctionInfo* MFI
715 = MF.getInfo<AArch64MachineFunctionInfo>();
716 if (MFI->getNumLocalDynamicTLSAccesses() < 2) {
717 // No point folding accesses if there isn't at least two.
718 return false;
719 }
720
721 MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
722 return VisitNode(DT->getRootNode(), 0);
723 }
724
725 // Visit the dominator subtree rooted at Node in pre-order.
726 // If TLSBaseAddrReg is non-null, then use that to replace any
727 // TLS_base_addr instructions. Otherwise, create the register
728 // when the first such instruction is seen, and then use it
729 // as we encounter more instructions.
VisitNode__anonca57cbeb0111::LDTLSCleanup730 bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) {
731 MachineBasicBlock *BB = Node->getBlock();
732 bool Changed = false;
733
734 // Traverse the current block.
735 for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
736 ++I) {
737 switch (I->getOpcode()) {
738 case AArch64::TLSDESC_BLRx:
739 // Make sure it's a local dynamic access.
740 if (!I->getOperand(1).isSymbol() ||
741 strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_"))
742 break;
743
744 if (TLSBaseAddrReg)
745 I = ReplaceTLSBaseAddrCall(I, TLSBaseAddrReg);
746 else
747 I = SetRegister(I, &TLSBaseAddrReg);
748 Changed = true;
749 break;
750 default:
751 break;
752 }
753 }
754
755 // Visit the children of this block in the dominator tree.
756 for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end();
757 I != E; ++I) {
758 Changed |= VisitNode(*I, TLSBaseAddrReg);
759 }
760
761 return Changed;
762 }
763
764 // Replace the TLS_base_addr instruction I with a copy from
765 // TLSBaseAddrReg, returning the new instruction.
ReplaceTLSBaseAddrCall__anonca57cbeb0111::LDTLSCleanup766 MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr *I,
767 unsigned TLSBaseAddrReg) {
768 MachineFunction *MF = I->getParent()->getParent();
769 const AArch64TargetMachine *TM =
770 static_cast<const AArch64TargetMachine *>(&MF->getTarget());
771 const AArch64InstrInfo *TII = TM->getInstrInfo();
772
773 // Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the
774 // code sequence assumes the address will be.
775 MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(),
776 TII->get(TargetOpcode::COPY),
777 AArch64::X0)
778 .addReg(TLSBaseAddrReg);
779
780 // Erase the TLS_base_addr instruction.
781 I->eraseFromParent();
782
783 return Copy;
784 }
785
786 // Create a virtal register in *TLSBaseAddrReg, and populate it by
787 // inserting a copy instruction after I. Returns the new instruction.
SetRegister__anonca57cbeb0111::LDTLSCleanup788 MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) {
789 MachineFunction *MF = I->getParent()->getParent();
790 const AArch64TargetMachine *TM =
791 static_cast<const AArch64TargetMachine *>(&MF->getTarget());
792 const AArch64InstrInfo *TII = TM->getInstrInfo();
793
794 // Create a virtual register for the TLS base address.
795 MachineRegisterInfo &RegInfo = MF->getRegInfo();
796 *TLSBaseAddrReg = RegInfo.createVirtualRegister(&AArch64::GPR64RegClass);
797
798 // Insert a copy from X0 to TLSBaseAddrReg for later.
799 MachineInstr *Next = I->getNextNode();
800 MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(),
801 TII->get(TargetOpcode::COPY),
802 *TLSBaseAddrReg)
803 .addReg(AArch64::X0);
804
805 return Copy;
806 }
807
getPassName__anonca57cbeb0111::LDTLSCleanup808 virtual const char *getPassName() const {
809 return "Local Dynamic TLS Access Clean-up";
810 }
811
getAnalysisUsage__anonca57cbeb0111::LDTLSCleanup812 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
813 AU.setPreservesCFG();
814 AU.addRequired<MachineDominatorTree>();
815 MachineFunctionPass::getAnalysisUsage(AU);
816 }
817 };
818 }
819
820 char LDTLSCleanup::ID = 0;
821 FunctionPass*
createAArch64CleanupLocalDynamicTLSPass()822 llvm::createAArch64CleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); }
823