1 //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// SI implementation of the TargetRegisterInfo class.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "SIRegisterInfo.h"
16 #include "AMDGPURegisterBankInfo.h"
17 #include "AMDGPUSubtarget.h"
18 #include "SIInstrInfo.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/RegisterScavenging.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/LLVMContext.h"
26
27 using namespace llvm;
28
hasPressureSet(const int * PSets,unsigned PSetID)29 static bool hasPressureSet(const int *PSets, unsigned PSetID) {
30 for (unsigned i = 0; PSets[i] != -1; ++i) {
31 if (PSets[i] == (int)PSetID)
32 return true;
33 }
34 return false;
35 }
36
classifyPressureSet(unsigned PSetID,unsigned Reg,BitVector & PressureSets) const37 void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
38 BitVector &PressureSets) const {
39 for (MCRegUnitIterator U(Reg, this); U.isValid(); ++U) {
40 const int *PSets = getRegUnitPressureSets(*U);
41 if (hasPressureSet(PSets, PSetID)) {
42 PressureSets.set(PSetID);
43 break;
44 }
45 }
46 }
47
48 static cl::opt<bool> EnableSpillSGPRToSMEM(
49 "amdgpu-spill-sgpr-to-smem",
50 cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
51 cl::init(false));
52
53 static cl::opt<bool> EnableSpillSGPRToVGPR(
54 "amdgpu-spill-sgpr-to-vgpr",
55 cl::desc("Enable spilling VGPRs to SGPRs"),
56 cl::ReallyHidden,
57 cl::init(true));
58
SIRegisterInfo(const GCNSubtarget & ST)59 SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) :
60 AMDGPURegisterInfo(),
61 SGPRPressureSets(getNumRegPressureSets()),
62 VGPRPressureSets(getNumRegPressureSets()),
63 SpillSGPRToVGPR(false),
64 SpillSGPRToSMEM(false) {
65 if (EnableSpillSGPRToSMEM && ST.hasScalarStores())
66 SpillSGPRToSMEM = true;
67 else if (EnableSpillSGPRToVGPR)
68 SpillSGPRToVGPR = true;
69
70 unsigned NumRegPressureSets = getNumRegPressureSets();
71
72 SGPRSetID = NumRegPressureSets;
73 VGPRSetID = NumRegPressureSets;
74
75 for (unsigned i = 0; i < NumRegPressureSets; ++i) {
76 classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets);
77 classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets);
78 }
79
80 // Determine the number of reg units for each pressure set.
81 std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0);
82 for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i) {
83 const int *PSets = getRegUnitPressureSets(i);
84 for (unsigned j = 0; PSets[j] != -1; ++j) {
85 ++PressureSetRegUnits[PSets[j]];
86 }
87 }
88
89 unsigned VGPRMax = 0, SGPRMax = 0;
90 for (unsigned i = 0; i < NumRegPressureSets; ++i) {
91 if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) {
92 VGPRSetID = i;
93 VGPRMax = PressureSetRegUnits[i];
94 continue;
95 }
96 if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax) {
97 SGPRSetID = i;
98 SGPRMax = PressureSetRegUnits[i];
99 }
100 }
101
102 assert(SGPRSetID < NumRegPressureSets &&
103 VGPRSetID < NumRegPressureSets);
104 }
105
reservedPrivateSegmentBufferReg(const MachineFunction & MF) const106 unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
107 const MachineFunction &MF) const {
108
109 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
110 unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
111 unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
112 return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
113 }
114
findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount)115 static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) {
116 unsigned Reg;
117
118 // Try to place it in a hole after PrivateSegmentBufferReg.
119 if (RegCount & 3) {
120 // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to
121 // alignment constraints, so we have a hole where can put the wave offset.
122 Reg = RegCount - 1;
123 } else {
124 // We can put the segment buffer in (Idx - 4) ... (Idx - 1) and put the
125 // wave offset before it.
126 Reg = RegCount - 5;
127 }
128
129 return Reg;
130 }
131
reservedPrivateSegmentWaveByteOffsetReg(const MachineFunction & MF) const132 unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
133 const MachineFunction &MF) const {
134 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
135 unsigned Reg = findPrivateSegmentWaveByteOffsetRegIndex(ST.getMaxNumSGPRs(MF));
136 return AMDGPU::SGPR_32RegClass.getRegister(Reg);
137 }
138
reservedStackPtrOffsetReg(const MachineFunction & MF) const139 unsigned SIRegisterInfo::reservedStackPtrOffsetReg(
140 const MachineFunction &MF) const {
141 return AMDGPU::SGPR32;
142 }
143
getReservedRegs(const MachineFunction & MF) const144 BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
145 BitVector Reserved(getNumRegs());
146
147 // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
148 // this seems likely to result in bugs, so I'm marking them as reserved.
149 reserveRegisterTuples(Reserved, AMDGPU::EXEC);
150 reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
151
152 // M0 has to be reserved so that llvm accepts it as a live-in into a block.
153 reserveRegisterTuples(Reserved, AMDGPU::M0);
154
155 // Reserve the memory aperture registers.
156 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
157 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
158 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
159 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
160
161 // Reserve xnack_mask registers - support is not implemented in Codegen.
162 reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
163
164 // Reserve Trap Handler registers - support is not implemented in Codegen.
165 reserveRegisterTuples(Reserved, AMDGPU::TBA);
166 reserveRegisterTuples(Reserved, AMDGPU::TMA);
167 reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
168 reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
169 reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
170 reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
171 reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
172 reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
173 reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
174 reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
175
176 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
177
178 unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
179 unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
180 for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
181 unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
182 reserveRegisterTuples(Reserved, Reg);
183 }
184
185 unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
186 unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
187 for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
188 unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
189 reserveRegisterTuples(Reserved, Reg);
190 }
191
192 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
193
194 unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
195 if (ScratchWaveOffsetReg != AMDGPU::NoRegister) {
196 // Reserve 1 SGPR for scratch wave offset in case we need to spill.
197 reserveRegisterTuples(Reserved, ScratchWaveOffsetReg);
198 }
199
200 unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
201 if (ScratchRSrcReg != AMDGPU::NoRegister) {
202 // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
203 // to spill.
204 // TODO: May need to reserve a VGPR if doing LDS spilling.
205 reserveRegisterTuples(Reserved, ScratchRSrcReg);
206 assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
207 }
208
209 // We have to assume the SP is needed in case there are calls in the function,
210 // which is detected after the function is lowered. If we aren't really going
211 // to need SP, don't bother reserving it.
212 unsigned StackPtrReg = MFI->getStackPtrOffsetReg();
213
214 if (StackPtrReg != AMDGPU::NoRegister) {
215 reserveRegisterTuples(Reserved, StackPtrReg);
216 assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
217 }
218
219 unsigned FrameReg = MFI->getFrameOffsetReg();
220 if (FrameReg != AMDGPU::NoRegister) {
221 reserveRegisterTuples(Reserved, FrameReg);
222 assert(!isSubRegister(ScratchRSrcReg, FrameReg));
223 }
224
225 return Reserved;
226 }
227
requiresRegisterScavenging(const MachineFunction & Fn) const228 bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
229 const SIMachineFunctionInfo *Info = Fn.getInfo<SIMachineFunctionInfo>();
230 if (Info->isEntryFunction()) {
231 const MachineFrameInfo &MFI = Fn.getFrameInfo();
232 return MFI.hasStackObjects() || MFI.hasCalls();
233 }
234
235 // May need scavenger for dealing with callee saved registers.
236 return true;
237 }
238
requiresFrameIndexScavenging(const MachineFunction & MF) const239 bool SIRegisterInfo::requiresFrameIndexScavenging(
240 const MachineFunction &MF) const {
241 const MachineFrameInfo &MFI = MF.getFrameInfo();
242 if (MFI.hasStackObjects())
243 return true;
244
245 // May need to deal with callee saved registers.
246 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
247 return !Info->isEntryFunction();
248 }
249
requiresFrameIndexReplacementScavenging(const MachineFunction & MF) const250 bool SIRegisterInfo::requiresFrameIndexReplacementScavenging(
251 const MachineFunction &MF) const {
252 // m0 is needed for the scalar store offset. m0 is unallocatable, so we can't
253 // create a virtual register for it during frame index elimination, so the
254 // scavenger is directly needed.
255 return MF.getFrameInfo().hasStackObjects() &&
256 MF.getSubtarget<GCNSubtarget>().hasScalarStores() &&
257 MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs();
258 }
259
requiresVirtualBaseRegisters(const MachineFunction &) const260 bool SIRegisterInfo::requiresVirtualBaseRegisters(
261 const MachineFunction &) const {
262 // There are no special dedicated stack or frame pointers.
263 return true;
264 }
265
trackLivenessAfterRegAlloc(const MachineFunction & MF) const266 bool SIRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
267 // This helps catch bugs as verifier errors.
268 return true;
269 }
270
getMUBUFInstrOffset(const MachineInstr * MI) const271 int64_t SIRegisterInfo::getMUBUFInstrOffset(const MachineInstr *MI) const {
272 assert(SIInstrInfo::isMUBUF(*MI));
273
274 int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
275 AMDGPU::OpName::offset);
276 return MI->getOperand(OffIdx).getImm();
277 }
278
getFrameIndexInstrOffset(const MachineInstr * MI,int Idx) const279 int64_t SIRegisterInfo::getFrameIndexInstrOffset(const MachineInstr *MI,
280 int Idx) const {
281 if (!SIInstrInfo::isMUBUF(*MI))
282 return 0;
283
284 assert(Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
285 AMDGPU::OpName::vaddr) &&
286 "Should never see frame index on non-address operand");
287
288 return getMUBUFInstrOffset(MI);
289 }
290
needsFrameBaseReg(MachineInstr * MI,int64_t Offset) const291 bool SIRegisterInfo::needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
292 if (!MI->mayLoadOrStore())
293 return false;
294
295 int64_t FullOffset = Offset + getMUBUFInstrOffset(MI);
296
297 return !isUInt<12>(FullOffset);
298 }
299
materializeFrameBaseRegister(MachineBasicBlock * MBB,unsigned BaseReg,int FrameIdx,int64_t Offset) const300 void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
301 unsigned BaseReg,
302 int FrameIdx,
303 int64_t Offset) const {
304 MachineBasicBlock::iterator Ins = MBB->begin();
305 DebugLoc DL; // Defaults to "unknown"
306
307 if (Ins != MBB->end())
308 DL = Ins->getDebugLoc();
309
310 MachineFunction *MF = MBB->getParent();
311 const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
312 const SIInstrInfo *TII = Subtarget.getInstrInfo();
313
314 if (Offset == 0) {
315 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg)
316 .addFrameIndex(FrameIdx);
317 return;
318 }
319
320 MachineRegisterInfo &MRI = MF->getRegInfo();
321 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
322
323 unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
324
325 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
326 .addImm(Offset);
327 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
328 .addFrameIndex(FrameIdx);
329
330 TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
331 .addReg(OffsetReg, RegState::Kill)
332 .addReg(FIReg);
333 }
334
resolveFrameIndex(MachineInstr & MI,unsigned BaseReg,int64_t Offset) const335 void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
336 int64_t Offset) const {
337
338 MachineBasicBlock *MBB = MI.getParent();
339 MachineFunction *MF = MBB->getParent();
340 const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
341 const SIInstrInfo *TII = Subtarget.getInstrInfo();
342
343 #ifndef NDEBUG
344 // FIXME: Is it possible to be storing a frame index to itself?
345 bool SeenFI = false;
346 for (const MachineOperand &MO: MI.operands()) {
347 if (MO.isFI()) {
348 if (SeenFI)
349 llvm_unreachable("should not see multiple frame indices");
350
351 SeenFI = true;
352 }
353 }
354 #endif
355
356 MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
357 assert(FIOp && FIOp->isFI() && "frame index must be address operand");
358 assert(TII->isMUBUF(MI));
359 assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() ==
360 MF->getInfo<SIMachineFunctionInfo>()->getFrameOffsetReg() &&
361 "should only be seeing frame offset relative FrameIndex");
362
363
364 MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
365 int64_t NewOffset = OffsetOp->getImm() + Offset;
366 assert(isUInt<12>(NewOffset) && "offset should be legal");
367
368 FIOp->ChangeToRegister(BaseReg, false);
369 OffsetOp->setImm(NewOffset);
370 }
371
isFrameOffsetLegal(const MachineInstr * MI,unsigned BaseReg,int64_t Offset) const372 bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
373 unsigned BaseReg,
374 int64_t Offset) const {
375 if (!SIInstrInfo::isMUBUF(*MI))
376 return false;
377
378 int64_t NewOffset = Offset + getMUBUFInstrOffset(MI);
379
380 return isUInt<12>(NewOffset);
381 }
382
getPointerRegClass(const MachineFunction & MF,unsigned Kind) const383 const TargetRegisterClass *SIRegisterInfo::getPointerRegClass(
384 const MachineFunction &MF, unsigned Kind) const {
385 // This is inaccurate. It depends on the instruction and address space. The
386 // only place where we should hit this is for dealing with frame indexes /
387 // private accesses, so this is correct in that case.
388 return &AMDGPU::VGPR_32RegClass;
389 }
390
getNumSubRegsForSpillOp(unsigned Op)391 static unsigned getNumSubRegsForSpillOp(unsigned Op) {
392
393 switch (Op) {
394 case AMDGPU::SI_SPILL_S512_SAVE:
395 case AMDGPU::SI_SPILL_S512_RESTORE:
396 case AMDGPU::SI_SPILL_V512_SAVE:
397 case AMDGPU::SI_SPILL_V512_RESTORE:
398 return 16;
399 case AMDGPU::SI_SPILL_S256_SAVE:
400 case AMDGPU::SI_SPILL_S256_RESTORE:
401 case AMDGPU::SI_SPILL_V256_SAVE:
402 case AMDGPU::SI_SPILL_V256_RESTORE:
403 return 8;
404 case AMDGPU::SI_SPILL_S128_SAVE:
405 case AMDGPU::SI_SPILL_S128_RESTORE:
406 case AMDGPU::SI_SPILL_V128_SAVE:
407 case AMDGPU::SI_SPILL_V128_RESTORE:
408 return 4;
409 case AMDGPU::SI_SPILL_V96_SAVE:
410 case AMDGPU::SI_SPILL_V96_RESTORE:
411 return 3;
412 case AMDGPU::SI_SPILL_S64_SAVE:
413 case AMDGPU::SI_SPILL_S64_RESTORE:
414 case AMDGPU::SI_SPILL_V64_SAVE:
415 case AMDGPU::SI_SPILL_V64_RESTORE:
416 return 2;
417 case AMDGPU::SI_SPILL_S32_SAVE:
418 case AMDGPU::SI_SPILL_S32_RESTORE:
419 case AMDGPU::SI_SPILL_V32_SAVE:
420 case AMDGPU::SI_SPILL_V32_RESTORE:
421 return 1;
422 default: llvm_unreachable("Invalid spill opcode");
423 }
424 }
425
getOffsetMUBUFStore(unsigned Opc)426 static int getOffsetMUBUFStore(unsigned Opc) {
427 switch (Opc) {
428 case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
429 return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
430 case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
431 return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
432 case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
433 return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
434 case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
435 return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
436 case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
437 return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
438 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
439 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
440 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
441 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
442 default:
443 return -1;
444 }
445 }
446
getOffsetMUBUFLoad(unsigned Opc)447 static int getOffsetMUBUFLoad(unsigned Opc) {
448 switch (Opc) {
449 case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
450 return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
451 case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
452 return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
453 case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
454 return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
455 case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
456 return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
457 case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
458 return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
459 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
460 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
461 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
462 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
463 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
464 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
465 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
466 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
467 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
468 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
469 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
470 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
471 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
472 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
473 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
474 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
475 default:
476 return -1;
477 }
478 }
479
480 // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
481 // need to handle the case where an SGPR may need to be spilled while spilling.
buildMUBUFOffsetLoadStore(const SIInstrInfo * TII,MachineFrameInfo & MFI,MachineBasicBlock::iterator MI,int Index,int64_t Offset)482 static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,
483 MachineFrameInfo &MFI,
484 MachineBasicBlock::iterator MI,
485 int Index,
486 int64_t Offset) {
487 MachineBasicBlock *MBB = MI->getParent();
488 const DebugLoc &DL = MI->getDebugLoc();
489 bool IsStore = MI->mayStore();
490
491 unsigned Opc = MI->getOpcode();
492 int LoadStoreOp = IsStore ?
493 getOffsetMUBUFStore(Opc) : getOffsetMUBUFLoad(Opc);
494 if (LoadStoreOp == -1)
495 return false;
496
497 const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
498 MachineInstrBuilder NewMI = BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
499 .add(*Reg)
500 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
501 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
502 .addImm(Offset)
503 .addImm(0) // glc
504 .addImm(0) // slc
505 .addImm(0) // tfe
506 .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
507
508 const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
509 AMDGPU::OpName::vdata_in);
510 if (VDataIn)
511 NewMI.add(*VDataIn);
512 return true;
513 }
514
buildSpillLoadStore(MachineBasicBlock::iterator MI,unsigned LoadStoreOp,int Index,unsigned ValueReg,bool IsKill,unsigned ScratchRsrcReg,unsigned ScratchOffsetReg,int64_t InstOffset,MachineMemOperand * MMO,RegScavenger * RS) const515 void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
516 unsigned LoadStoreOp,
517 int Index,
518 unsigned ValueReg,
519 bool IsKill,
520 unsigned ScratchRsrcReg,
521 unsigned ScratchOffsetReg,
522 int64_t InstOffset,
523 MachineMemOperand *MMO,
524 RegScavenger *RS) const {
525 MachineBasicBlock *MBB = MI->getParent();
526 MachineFunction *MF = MI->getParent()->getParent();
527 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
528 const SIInstrInfo *TII = ST.getInstrInfo();
529 const MachineFrameInfo &MFI = MF->getFrameInfo();
530
531 const MCInstrDesc &Desc = TII->get(LoadStoreOp);
532 const DebugLoc &DL = MI->getDebugLoc();
533 bool IsStore = Desc.mayStore();
534
535 bool Scavenged = false;
536 unsigned SOffset = ScratchOffsetReg;
537
538 const unsigned EltSize = 4;
539 const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
540 unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / (EltSize * CHAR_BIT);
541 unsigned Size = NumSubRegs * EltSize;
542 int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
543 int64_t ScratchOffsetRegDelta = 0;
544
545 unsigned Align = MFI.getObjectAlignment(Index);
546 const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
547
548 assert((Offset % EltSize) == 0 && "unexpected VGPR spill offset");
549
550 if (!isUInt<12>(Offset + Size - EltSize)) {
551 SOffset = AMDGPU::NoRegister;
552
553 // We currently only support spilling VGPRs to EltSize boundaries, meaning
554 // we can simplify the adjustment of Offset here to just scale with
555 // WavefrontSize.
556 Offset *= ST.getWavefrontSize();
557
558 // We don't have access to the register scavenger if this function is called
559 // during PEI::scavengeFrameVirtualRegs().
560 if (RS)
561 SOffset = RS->FindUnusedReg(&AMDGPU::SGPR_32RegClass);
562
563 if (SOffset == AMDGPU::NoRegister) {
564 // There are no free SGPRs, and since we are in the process of spilling
565 // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true
566 // on SI/CI and on VI it is true until we implement spilling using scalar
567 // stores), we have no way to free up an SGPR. Our solution here is to
568 // add the offset directly to the ScratchOffset register, and then
569 // subtract the offset after the spill to return ScratchOffset to it's
570 // original value.
571 SOffset = ScratchOffsetReg;
572 ScratchOffsetRegDelta = Offset;
573 } else {
574 Scavenged = true;
575 }
576
577 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
578 .addReg(ScratchOffsetReg)
579 .addImm(Offset);
580
581 Offset = 0;
582 }
583
584 for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
585 unsigned SubReg = NumSubRegs == 1 ?
586 ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i));
587
588 unsigned SOffsetRegState = 0;
589 unsigned SrcDstRegState = getDefRegState(!IsStore);
590 if (i + 1 == e) {
591 SOffsetRegState |= getKillRegState(Scavenged);
592 // The last implicit use carries the "Kill" flag.
593 SrcDstRegState |= getKillRegState(IsKill);
594 }
595
596 MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
597 MachineMemOperand *NewMMO
598 = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
599 EltSize, MinAlign(Align, EltSize * i));
600
601 auto MIB = BuildMI(*MBB, MI, DL, Desc)
602 .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
603 .addReg(ScratchRsrcReg)
604 .addReg(SOffset, SOffsetRegState)
605 .addImm(Offset)
606 .addImm(0) // glc
607 .addImm(0) // slc
608 .addImm(0) // tfe
609 .addMemOperand(NewMMO);
610
611 if (NumSubRegs > 1)
612 MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
613 }
614
615 if (ScratchOffsetRegDelta != 0) {
616 // Subtract the offset we added to the ScratchOffset register.
617 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg)
618 .addReg(ScratchOffsetReg)
619 .addImm(ScratchOffsetRegDelta);
620 }
621 }
622
getSpillEltSize(unsigned SuperRegSize,bool Store)623 static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize,
624 bool Store) {
625 if (SuperRegSize % 16 == 0) {
626 return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR :
627 AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR };
628 }
629
630 if (SuperRegSize % 8 == 0) {
631 return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR :
632 AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR };
633 }
634
635 return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR :
636 AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
637 }
638
spillSGPR(MachineBasicBlock::iterator MI,int Index,RegScavenger * RS,bool OnlyToVGPR) const639 bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
640 int Index,
641 RegScavenger *RS,
642 bool OnlyToVGPR) const {
643 MachineBasicBlock *MBB = MI->getParent();
644 MachineFunction *MF = MBB->getParent();
645 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
646 DenseSet<unsigned> SGPRSpillVGPRDefinedSet;
647
648 ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
649 = MFI->getSGPRToVGPRSpills(Index);
650 bool SpillToVGPR = !VGPRSpills.empty();
651 if (OnlyToVGPR && !SpillToVGPR)
652 return false;
653
654 MachineRegisterInfo &MRI = MF->getRegInfo();
655 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
656 const SIInstrInfo *TII = ST.getInstrInfo();
657
658 unsigned SuperReg = MI->getOperand(0).getReg();
659 bool IsKill = MI->getOperand(0).isKill();
660 const DebugLoc &DL = MI->getDebugLoc();
661
662 MachineFrameInfo &FrameInfo = MF->getFrameInfo();
663
664 bool SpillToSMEM = spillSGPRToSMEM();
665 if (SpillToSMEM && OnlyToVGPR)
666 return false;
667
668 assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() &&
669 SuperReg != MFI->getFrameOffsetReg() &&
670 SuperReg != MFI->getScratchWaveOffsetReg()));
671
672 assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
673
674 unsigned OffsetReg = AMDGPU::M0;
675 unsigned M0CopyReg = AMDGPU::NoRegister;
676
677 if (SpillToSMEM) {
678 if (RS->isRegUsed(AMDGPU::M0)) {
679 M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
680 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
681 .addReg(AMDGPU::M0);
682 }
683 }
684
685 unsigned ScalarStoreOp;
686 unsigned EltSize = 4;
687 const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
688 if (SpillToSMEM && isSGPRClass(RC)) {
689 // XXX - if private_element_size is larger than 4 it might be useful to be
690 // able to spill wider vmem spills.
691 std::tie(EltSize, ScalarStoreOp) =
692 getSpillEltSize(getRegSizeInBits(*RC) / 8, true);
693 }
694
695 ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
696 unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
697
698 // SubReg carries the "Kill" flag when SubReg == SuperReg.
699 unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
700 for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
701 unsigned SubReg = NumSubRegs == 1 ?
702 SuperReg : getSubReg(SuperReg, SplitParts[i]);
703
704 if (SpillToSMEM) {
705 int64_t FrOffset = FrameInfo.getObjectOffset(Index);
706
707 // The allocated memory size is really the wavefront size * the frame
708 // index size. The widest register class is 64 bytes, so a 4-byte scratch
709 // allocation is enough to spill this in a single stack object.
710 //
711 // FIXME: Frame size/offsets are computed earlier than this, so the extra
712 // space is still unnecessarily allocated.
713
714 unsigned Align = FrameInfo.getObjectAlignment(Index);
715 MachinePointerInfo PtrInfo
716 = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
717 MachineMemOperand *MMO
718 = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
719 EltSize, MinAlign(Align, EltSize * i));
720
721 // SMEM instructions only support a single offset, so increment the wave
722 // offset.
723
724 int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
725 if (Offset != 0) {
726 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
727 .addReg(MFI->getFrameOffsetReg())
728 .addImm(Offset);
729 } else {
730 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
731 .addReg(MFI->getFrameOffsetReg());
732 }
733
734 BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
735 .addReg(SubReg, getKillRegState(IsKill)) // sdata
736 .addReg(MFI->getScratchRSrcReg()) // sbase
737 .addReg(OffsetReg, RegState::Kill) // soff
738 .addImm(0) // glc
739 .addMemOperand(MMO);
740
741 continue;
742 }
743
744 if (SpillToVGPR) {
745 SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
746
747 // During SGPR spilling to VGPR, determine if the VGPR is defined. The
748 // only circumstance in which we say it is undefined is when it is the
749 // first spill to this VGPR in the first basic block.
750 bool VGPRDefined = true;
751 if (MBB == &MF->front())
752 VGPRDefined = !SGPRSpillVGPRDefinedSet.insert(Spill.VGPR).second;
753
754 // Mark the "old value of vgpr" input undef only if this is the first sgpr
755 // spill to this specific vgpr in the first basic block.
756 BuildMI(*MBB, MI, DL,
757 TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
758 Spill.VGPR)
759 .addReg(SubReg, getKillRegState(IsKill))
760 .addImm(Spill.Lane)
761 .addReg(Spill.VGPR, VGPRDefined ? 0 : RegState::Undef);
762
763 // FIXME: Since this spills to another register instead of an actual
764 // frame index, we should delete the frame index when all references to
765 // it are fixed.
766 } else {
767 // XXX - Can to VGPR spill fail for some subregisters but not others?
768 if (OnlyToVGPR)
769 return false;
770
771 // Spill SGPR to a frame index.
772 // TODO: Should VI try to spill to VGPR and then spill to SMEM?
773 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
774 // TODO: Should VI try to spill to VGPR and then spill to SMEM?
775
776 MachineInstrBuilder Mov
777 = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
778 .addReg(SubReg, SubKillState);
779
780
781 // There could be undef components of a spilled super register.
782 // TODO: Can we detect this and skip the spill?
783 if (NumSubRegs > 1) {
784 // The last implicit use of the SuperReg carries the "Kill" flag.
785 unsigned SuperKillState = 0;
786 if (i + 1 == e)
787 SuperKillState |= getKillRegState(IsKill);
788 Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
789 }
790
791 unsigned Align = FrameInfo.getObjectAlignment(Index);
792 MachinePointerInfo PtrInfo
793 = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
794 MachineMemOperand *MMO
795 = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
796 EltSize, MinAlign(Align, EltSize * i));
797 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
798 .addReg(TmpReg, RegState::Kill) // src
799 .addFrameIndex(Index) // vaddr
800 .addReg(MFI->getScratchRSrcReg()) // srrsrc
801 .addReg(MFI->getFrameOffsetReg()) // soffset
802 .addImm(i * 4) // offset
803 .addMemOperand(MMO);
804 }
805 }
806
807 if (M0CopyReg != AMDGPU::NoRegister) {
808 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
809 .addReg(M0CopyReg, RegState::Kill);
810 }
811
812 MI->eraseFromParent();
813 MFI->addToSpilledSGPRs(NumSubRegs);
814 return true;
815 }
816
restoreSGPR(MachineBasicBlock::iterator MI,int Index,RegScavenger * RS,bool OnlyToVGPR) const817 bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
818 int Index,
819 RegScavenger *RS,
820 bool OnlyToVGPR) const {
821 MachineFunction *MF = MI->getParent()->getParent();
822 MachineRegisterInfo &MRI = MF->getRegInfo();
823 MachineBasicBlock *MBB = MI->getParent();
824 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
825
826 ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
827 = MFI->getSGPRToVGPRSpills(Index);
828 bool SpillToVGPR = !VGPRSpills.empty();
829 if (OnlyToVGPR && !SpillToVGPR)
830 return false;
831
832 MachineFrameInfo &FrameInfo = MF->getFrameInfo();
833 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
834 const SIInstrInfo *TII = ST.getInstrInfo();
835 const DebugLoc &DL = MI->getDebugLoc();
836
837 unsigned SuperReg = MI->getOperand(0).getReg();
838 bool SpillToSMEM = spillSGPRToSMEM();
839 if (SpillToSMEM && OnlyToVGPR)
840 return false;
841
842 assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
843
844 unsigned OffsetReg = AMDGPU::M0;
845 unsigned M0CopyReg = AMDGPU::NoRegister;
846
847 if (SpillToSMEM) {
848 if (RS->isRegUsed(AMDGPU::M0)) {
849 M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
850 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
851 .addReg(AMDGPU::M0);
852 }
853 }
854
855 unsigned EltSize = 4;
856 unsigned ScalarLoadOp;
857
858 const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
859 if (SpillToSMEM && isSGPRClass(RC)) {
860 // XXX - if private_element_size is larger than 4 it might be useful to be
861 // able to spill wider vmem spills.
862 std::tie(EltSize, ScalarLoadOp) =
863 getSpillEltSize(getRegSizeInBits(*RC) / 8, false);
864 }
865
866 ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
867 unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
868
869 // SubReg carries the "Kill" flag when SubReg == SuperReg.
870 int64_t FrOffset = FrameInfo.getObjectOffset(Index);
871
872 for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
873 unsigned SubReg = NumSubRegs == 1 ?
874 SuperReg : getSubReg(SuperReg, SplitParts[i]);
875
876 if (SpillToSMEM) {
877 // FIXME: Size may be > 4 but extra bytes wasted.
878 unsigned Align = FrameInfo.getObjectAlignment(Index);
879 MachinePointerInfo PtrInfo
880 = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
881 MachineMemOperand *MMO
882 = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
883 EltSize, MinAlign(Align, EltSize * i));
884
885 // Add i * 4 offset
886 int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
887 if (Offset != 0) {
888 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
889 .addReg(MFI->getFrameOffsetReg())
890 .addImm(Offset);
891 } else {
892 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
893 .addReg(MFI->getFrameOffsetReg());
894 }
895
896 auto MIB =
897 BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
898 .addReg(MFI->getScratchRSrcReg()) // sbase
899 .addReg(OffsetReg, RegState::Kill) // soff
900 .addImm(0) // glc
901 .addMemOperand(MMO);
902
903 if (NumSubRegs > 1)
904 MIB.addReg(SuperReg, RegState::ImplicitDefine);
905
906 continue;
907 }
908
909 if (SpillToVGPR) {
910 SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
911 auto MIB =
912 BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
913 SubReg)
914 .addReg(Spill.VGPR)
915 .addImm(Spill.Lane);
916
917 if (NumSubRegs > 1)
918 MIB.addReg(SuperReg, RegState::ImplicitDefine);
919 } else {
920 if (OnlyToVGPR)
921 return false;
922
923 // Restore SGPR from a stack slot.
924 // FIXME: We should use S_LOAD_DWORD here for VI.
925 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
926 unsigned Align = FrameInfo.getObjectAlignment(Index);
927
928 MachinePointerInfo PtrInfo
929 = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
930
931 MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo,
932 MachineMemOperand::MOLoad, EltSize,
933 MinAlign(Align, EltSize * i));
934
935 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
936 .addFrameIndex(Index) // vaddr
937 .addReg(MFI->getScratchRSrcReg()) // srsrc
938 .addReg(MFI->getFrameOffsetReg()) // soffset
939 .addImm(i * 4) // offset
940 .addMemOperand(MMO);
941
942 auto MIB =
943 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
944 .addReg(TmpReg, RegState::Kill);
945
946 if (NumSubRegs > 1)
947 MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
948 }
949 }
950
951 if (M0CopyReg != AMDGPU::NoRegister) {
952 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
953 .addReg(M0CopyReg, RegState::Kill);
954 }
955
956 MI->eraseFromParent();
957 return true;
958 }
959
960 /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
961 /// a VGPR and the stack slot can be safely eliminated when all other users are
962 /// handled.
eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI,int FI,RegScavenger * RS) const963 bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
964 MachineBasicBlock::iterator MI,
965 int FI,
966 RegScavenger *RS) const {
967 switch (MI->getOpcode()) {
968 case AMDGPU::SI_SPILL_S512_SAVE:
969 case AMDGPU::SI_SPILL_S256_SAVE:
970 case AMDGPU::SI_SPILL_S128_SAVE:
971 case AMDGPU::SI_SPILL_S64_SAVE:
972 case AMDGPU::SI_SPILL_S32_SAVE:
973 return spillSGPR(MI, FI, RS, true);
974 case AMDGPU::SI_SPILL_S512_RESTORE:
975 case AMDGPU::SI_SPILL_S256_RESTORE:
976 case AMDGPU::SI_SPILL_S128_RESTORE:
977 case AMDGPU::SI_SPILL_S64_RESTORE:
978 case AMDGPU::SI_SPILL_S32_RESTORE:
979 return restoreSGPR(MI, FI, RS, true);
980 default:
981 llvm_unreachable("not an SGPR spill instruction");
982 }
983 }
984
eliminateFrameIndex(MachineBasicBlock::iterator MI,int SPAdj,unsigned FIOperandNum,RegScavenger * RS) const985 void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
986 int SPAdj, unsigned FIOperandNum,
987 RegScavenger *RS) const {
988 MachineFunction *MF = MI->getParent()->getParent();
989 MachineRegisterInfo &MRI = MF->getRegInfo();
990 MachineBasicBlock *MBB = MI->getParent();
991 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
992 MachineFrameInfo &FrameInfo = MF->getFrameInfo();
993 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
994 const SIInstrInfo *TII = ST.getInstrInfo();
995 DebugLoc DL = MI->getDebugLoc();
996
997 MachineOperand &FIOp = MI->getOperand(FIOperandNum);
998 int Index = MI->getOperand(FIOperandNum).getIndex();
999
1000 switch (MI->getOpcode()) {
1001 // SGPR register spill
1002 case AMDGPU::SI_SPILL_S512_SAVE:
1003 case AMDGPU::SI_SPILL_S256_SAVE:
1004 case AMDGPU::SI_SPILL_S128_SAVE:
1005 case AMDGPU::SI_SPILL_S64_SAVE:
1006 case AMDGPU::SI_SPILL_S32_SAVE: {
1007 spillSGPR(MI, Index, RS);
1008 break;
1009 }
1010
1011 // SGPR register restore
1012 case AMDGPU::SI_SPILL_S512_RESTORE:
1013 case AMDGPU::SI_SPILL_S256_RESTORE:
1014 case AMDGPU::SI_SPILL_S128_RESTORE:
1015 case AMDGPU::SI_SPILL_S64_RESTORE:
1016 case AMDGPU::SI_SPILL_S32_RESTORE: {
1017 restoreSGPR(MI, Index, RS);
1018 break;
1019 }
1020
1021 // VGPR register spill
1022 case AMDGPU::SI_SPILL_V512_SAVE:
1023 case AMDGPU::SI_SPILL_V256_SAVE:
1024 case AMDGPU::SI_SPILL_V128_SAVE:
1025 case AMDGPU::SI_SPILL_V96_SAVE:
1026 case AMDGPU::SI_SPILL_V64_SAVE:
1027 case AMDGPU::SI_SPILL_V32_SAVE: {
1028 const MachineOperand *VData = TII->getNamedOperand(*MI,
1029 AMDGPU::OpName::vdata);
1030 buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
1031 Index,
1032 VData->getReg(), VData->isKill(),
1033 TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1034 TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
1035 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1036 *MI->memoperands_begin(),
1037 RS);
1038 MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
1039 MI->eraseFromParent();
1040 break;
1041 }
1042 case AMDGPU::SI_SPILL_V32_RESTORE:
1043 case AMDGPU::SI_SPILL_V64_RESTORE:
1044 case AMDGPU::SI_SPILL_V96_RESTORE:
1045 case AMDGPU::SI_SPILL_V128_RESTORE:
1046 case AMDGPU::SI_SPILL_V256_RESTORE:
1047 case AMDGPU::SI_SPILL_V512_RESTORE: {
1048 const MachineOperand *VData = TII->getNamedOperand(*MI,
1049 AMDGPU::OpName::vdata);
1050
1051 buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
1052 Index,
1053 VData->getReg(), VData->isKill(),
1054 TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1055 TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
1056 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1057 *MI->memoperands_begin(),
1058 RS);
1059 MI->eraseFromParent();
1060 break;
1061 }
1062
1063 default: {
1064 const DebugLoc &DL = MI->getDebugLoc();
1065 bool IsMUBUF = TII->isMUBUF(*MI);
1066
1067 if (!IsMUBUF &&
1068 MFI->getFrameOffsetReg() != MFI->getScratchWaveOffsetReg()) {
1069 // Convert to an absolute stack address by finding the offset from the
1070 // scratch wave base and scaling by the wave size.
1071 //
1072 // In an entry function/kernel the stack address is already the
1073 // absolute address relative to the scratch wave offset.
1074
1075 unsigned DiffReg
1076 = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1077
1078 bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
1079 unsigned ResultReg = IsCopy ?
1080 MI->getOperand(0).getReg() :
1081 MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1082
1083 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
1084 .addReg(MFI->getFrameOffsetReg())
1085 .addReg(MFI->getScratchWaveOffsetReg());
1086
1087 int64_t Offset = FrameInfo.getObjectOffset(Index);
1088 if (Offset == 0) {
1089 // XXX - This never happens because of emergency scavenging slot at 0?
1090 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
1091 .addImm(Log2_32(ST.getWavefrontSize()))
1092 .addReg(DiffReg);
1093 } else {
1094 unsigned ScaledReg
1095 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1096
1097 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg)
1098 .addImm(Log2_32(ST.getWavefrontSize()))
1099 .addReg(DiffReg, RegState::Kill);
1100
1101 // TODO: Fold if use instruction is another add of a constant.
1102 if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
1103 TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
1104 .addImm(Offset)
1105 .addReg(ScaledReg, RegState::Kill);
1106 } else {
1107 unsigned ConstOffsetReg
1108 = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1109
1110 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
1111 .addImm(Offset);
1112 TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
1113 .addReg(ConstOffsetReg, RegState::Kill)
1114 .addReg(ScaledReg, RegState::Kill);
1115 }
1116 }
1117
1118 // Don't introduce an extra copy if we're just materializing in a mov.
1119 if (IsCopy)
1120 MI->eraseFromParent();
1121 else
1122 FIOp.ChangeToRegister(ResultReg, false, false, true);
1123 return;
1124 }
1125
1126 if (IsMUBUF) {
1127 // Disable offen so we don't need a 0 vgpr base.
1128 assert(static_cast<int>(FIOperandNum) ==
1129 AMDGPU::getNamedOperandIdx(MI->getOpcode(),
1130 AMDGPU::OpName::vaddr));
1131
1132 assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg()
1133 == MFI->getFrameOffsetReg());
1134
1135 int64_t Offset = FrameInfo.getObjectOffset(Index);
1136 int64_t OldImm
1137 = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
1138 int64_t NewOffset = OldImm + Offset;
1139
1140 if (isUInt<12>(NewOffset) &&
1141 buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {
1142 MI->eraseFromParent();
1143 return;
1144 }
1145 }
1146
1147 // If the offset is simply too big, don't convert to a scratch wave offset
1148 // relative index.
1149
1150 int64_t Offset = FrameInfo.getObjectOffset(Index);
1151 FIOp.ChangeToImmediate(Offset);
1152 if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
1153 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1154 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
1155 .addImm(Offset);
1156 FIOp.ChangeToRegister(TmpReg, false, false, true);
1157 }
1158 }
1159 }
1160 }
1161
getRegAsmName(unsigned Reg) const1162 StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const {
1163 #define AMDGPU_REG_ASM_NAMES
1164 #include "AMDGPURegAsmNames.inc.cpp"
1165
1166 #define REG_RANGE(BeginReg, EndReg, RegTable) \
1167 if (Reg >= BeginReg && Reg <= EndReg) { \
1168 unsigned Index = Reg - BeginReg; \
1169 assert(Index < array_lengthof(RegTable)); \
1170 return RegTable[Index]; \
1171 }
1172
1173 REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames);
1174 REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR103, SGPR32RegNames);
1175 REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames);
1176 REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR102_SGPR103, SGPR64RegNames);
1177 REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255,
1178 VGPR96RegNames);
1179
1180 REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3,
1181 AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255,
1182 VGPR128RegNames);
1183 REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3,
1184 AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103,
1185 SGPR128RegNames);
1186
1187 REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7,
1188 AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
1189 VGPR256RegNames);
1190
1191 REG_RANGE(
1192 AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15,
1193 AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
1194 VGPR512RegNames);
1195
1196 REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7,
1197 AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
1198 SGPR256RegNames);
1199
1200 REG_RANGE(
1201 AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15,
1202 AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
1203 SGPR512RegNames
1204 );
1205
1206 #undef REG_RANGE
1207
1208 // FIXME: Rename flat_scr so we don't need to special case this.
1209 switch (Reg) {
1210 case AMDGPU::FLAT_SCR:
1211 return "flat_scratch";
1212 case AMDGPU::FLAT_SCR_LO:
1213 return "flat_scratch_lo";
1214 case AMDGPU::FLAT_SCR_HI:
1215 return "flat_scratch_hi";
1216 default:
1217 // For the special named registers the default is fine.
1218 return TargetRegisterInfo::getRegAsmName(Reg);
1219 }
1220 }
1221
1222 // FIXME: This is very slow. It might be worth creating a map from physreg to
1223 // register class.
getPhysRegClass(unsigned Reg) const1224 const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
1225 assert(!TargetRegisterInfo::isVirtualRegister(Reg));
1226
1227 static const TargetRegisterClass *const BaseClasses[] = {
1228 &AMDGPU::VGPR_32RegClass,
1229 &AMDGPU::SReg_32RegClass,
1230 &AMDGPU::VReg_64RegClass,
1231 &AMDGPU::SReg_64RegClass,
1232 &AMDGPU::VReg_96RegClass,
1233 &AMDGPU::VReg_128RegClass,
1234 &AMDGPU::SReg_128RegClass,
1235 &AMDGPU::VReg_256RegClass,
1236 &AMDGPU::SReg_256RegClass,
1237 &AMDGPU::VReg_512RegClass,
1238 &AMDGPU::SReg_512RegClass,
1239 &AMDGPU::SCC_CLASSRegClass,
1240 &AMDGPU::Pseudo_SReg_32RegClass,
1241 &AMDGPU::Pseudo_SReg_128RegClass,
1242 };
1243
1244 for (const TargetRegisterClass *BaseClass : BaseClasses) {
1245 if (BaseClass->contains(Reg)) {
1246 return BaseClass;
1247 }
1248 }
1249 return nullptr;
1250 }
1251
1252 // TODO: It might be helpful to have some target specific flags in
1253 // TargetRegisterClass to mark which classes are VGPRs to make this trivial.
hasVGPRs(const TargetRegisterClass * RC) const1254 bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
1255 unsigned Size = getRegSizeInBits(*RC);
1256 if (Size < 32)
1257 return false;
1258 switch (Size) {
1259 case 32:
1260 return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
1261 case 64:
1262 return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
1263 case 96:
1264 return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
1265 case 128:
1266 return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
1267 case 256:
1268 return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
1269 case 512:
1270 return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
1271 default:
1272 llvm_unreachable("Invalid register class size");
1273 }
1274 }
1275
getEquivalentVGPRClass(const TargetRegisterClass * SRC) const1276 const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
1277 const TargetRegisterClass *SRC) const {
1278 switch (getRegSizeInBits(*SRC)) {
1279 case 32:
1280 return &AMDGPU::VGPR_32RegClass;
1281 case 64:
1282 return &AMDGPU::VReg_64RegClass;
1283 case 96:
1284 return &AMDGPU::VReg_96RegClass;
1285 case 128:
1286 return &AMDGPU::VReg_128RegClass;
1287 case 256:
1288 return &AMDGPU::VReg_256RegClass;
1289 case 512:
1290 return &AMDGPU::VReg_512RegClass;
1291 default:
1292 llvm_unreachable("Invalid register class size");
1293 }
1294 }
1295
getEquivalentSGPRClass(const TargetRegisterClass * VRC) const1296 const TargetRegisterClass *SIRegisterInfo::getEquivalentSGPRClass(
1297 const TargetRegisterClass *VRC) const {
1298 switch (getRegSizeInBits(*VRC)) {
1299 case 32:
1300 return &AMDGPU::SGPR_32RegClass;
1301 case 64:
1302 return &AMDGPU::SReg_64RegClass;
1303 case 128:
1304 return &AMDGPU::SReg_128RegClass;
1305 case 256:
1306 return &AMDGPU::SReg_256RegClass;
1307 case 512:
1308 return &AMDGPU::SReg_512RegClass;
1309 default:
1310 llvm_unreachable("Invalid register class size");
1311 }
1312 }
1313
getSubRegClass(const TargetRegisterClass * RC,unsigned SubIdx) const1314 const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
1315 const TargetRegisterClass *RC, unsigned SubIdx) const {
1316 if (SubIdx == AMDGPU::NoSubRegister)
1317 return RC;
1318
1319 // We can assume that each lane corresponds to one 32-bit register.
1320 unsigned Count = getSubRegIndexLaneMask(SubIdx).getNumLanes();
1321 if (isSGPRClass(RC)) {
1322 switch (Count) {
1323 case 1:
1324 return &AMDGPU::SGPR_32RegClass;
1325 case 2:
1326 return &AMDGPU::SReg_64RegClass;
1327 case 4:
1328 return &AMDGPU::SReg_128RegClass;
1329 case 8:
1330 return &AMDGPU::SReg_256RegClass;
1331 case 16: /* fall-through */
1332 default:
1333 llvm_unreachable("Invalid sub-register class size");
1334 }
1335 } else {
1336 switch (Count) {
1337 case 1:
1338 return &AMDGPU::VGPR_32RegClass;
1339 case 2:
1340 return &AMDGPU::VReg_64RegClass;
1341 case 3:
1342 return &AMDGPU::VReg_96RegClass;
1343 case 4:
1344 return &AMDGPU::VReg_128RegClass;
1345 case 8:
1346 return &AMDGPU::VReg_256RegClass;
1347 case 16: /* fall-through */
1348 default:
1349 llvm_unreachable("Invalid sub-register class size");
1350 }
1351 }
1352 }
1353
shouldRewriteCopySrc(const TargetRegisterClass * DefRC,unsigned DefSubReg,const TargetRegisterClass * SrcRC,unsigned SrcSubReg) const1354 bool SIRegisterInfo::shouldRewriteCopySrc(
1355 const TargetRegisterClass *DefRC,
1356 unsigned DefSubReg,
1357 const TargetRegisterClass *SrcRC,
1358 unsigned SrcSubReg) const {
1359 // We want to prefer the smallest register class possible, so we don't want to
1360 // stop and rewrite on anything that looks like a subregister
1361 // extract. Operations mostly don't care about the super register class, so we
1362 // only want to stop on the most basic of copies between the same register
1363 // class.
1364 //
1365 // e.g. if we have something like
1366 // %0 = ...
1367 // %1 = ...
1368 // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
1369 // %3 = COPY %2, sub0
1370 //
1371 // We want to look through the COPY to find:
1372 // => %3 = COPY %0
1373
1374 // Plain copy.
1375 return getCommonSubClass(DefRC, SrcRC) != nullptr;
1376 }
1377
1378 /// Returns a register that is not used at any point in the function.
1379 /// If all registers are used, then this function will return
1380 // AMDGPU::NoRegister.
1381 unsigned
findUnusedRegister(const MachineRegisterInfo & MRI,const TargetRegisterClass * RC,const MachineFunction & MF) const1382 SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
1383 const TargetRegisterClass *RC,
1384 const MachineFunction &MF) const {
1385
1386 for (unsigned Reg : *RC)
1387 if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
1388 return Reg;
1389 return AMDGPU::NoRegister;
1390 }
1391
getRegSplitParts(const TargetRegisterClass * RC,unsigned EltSize) const1392 ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC,
1393 unsigned EltSize) const {
1394 if (EltSize == 4) {
1395 static const int16_t Sub0_15[] = {
1396 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1397 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1398 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1399 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1400 };
1401
1402 static const int16_t Sub0_7[] = {
1403 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1404 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1405 };
1406
1407 static const int16_t Sub0_3[] = {
1408 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1409 };
1410
1411 static const int16_t Sub0_2[] = {
1412 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
1413 };
1414
1415 static const int16_t Sub0_1[] = {
1416 AMDGPU::sub0, AMDGPU::sub1,
1417 };
1418
1419 switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1420 case 32:
1421 return {};
1422 case 64:
1423 return makeArrayRef(Sub0_1);
1424 case 96:
1425 return makeArrayRef(Sub0_2);
1426 case 128:
1427 return makeArrayRef(Sub0_3);
1428 case 256:
1429 return makeArrayRef(Sub0_7);
1430 case 512:
1431 return makeArrayRef(Sub0_15);
1432 default:
1433 llvm_unreachable("unhandled register size");
1434 }
1435 }
1436
1437 if (EltSize == 8) {
1438 static const int16_t Sub0_15_64[] = {
1439 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1440 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1441 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1442 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15
1443 };
1444
1445 static const int16_t Sub0_7_64[] = {
1446 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1447 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7
1448 };
1449
1450
1451 static const int16_t Sub0_3_64[] = {
1452 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3
1453 };
1454
1455 switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1456 case 64:
1457 return {};
1458 case 128:
1459 return makeArrayRef(Sub0_3_64);
1460 case 256:
1461 return makeArrayRef(Sub0_7_64);
1462 case 512:
1463 return makeArrayRef(Sub0_15_64);
1464 default:
1465 llvm_unreachable("unhandled register size");
1466 }
1467 }
1468
1469 assert(EltSize == 16 && "unhandled register spill split size");
1470
1471 static const int16_t Sub0_15_128[] = {
1472 AMDGPU::sub0_sub1_sub2_sub3,
1473 AMDGPU::sub4_sub5_sub6_sub7,
1474 AMDGPU::sub8_sub9_sub10_sub11,
1475 AMDGPU::sub12_sub13_sub14_sub15
1476 };
1477
1478 static const int16_t Sub0_7_128[] = {
1479 AMDGPU::sub0_sub1_sub2_sub3,
1480 AMDGPU::sub4_sub5_sub6_sub7
1481 };
1482
1483 switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1484 case 128:
1485 return {};
1486 case 256:
1487 return makeArrayRef(Sub0_7_128);
1488 case 512:
1489 return makeArrayRef(Sub0_15_128);
1490 default:
1491 llvm_unreachable("unhandled register size");
1492 }
1493 }
1494
1495 const TargetRegisterClass*
getRegClassForReg(const MachineRegisterInfo & MRI,unsigned Reg) const1496 SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo &MRI,
1497 unsigned Reg) const {
1498 if (TargetRegisterInfo::isVirtualRegister(Reg))
1499 return MRI.getRegClass(Reg);
1500
1501 return getPhysRegClass(Reg);
1502 }
1503
isVGPR(const MachineRegisterInfo & MRI,unsigned Reg) const1504 bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
1505 unsigned Reg) const {
1506 const TargetRegisterClass * RC = getRegClassForReg(MRI, Reg);
1507 assert(RC && "Register class for the reg not found");
1508 return hasVGPRs(RC);
1509 }
1510
shouldCoalesce(MachineInstr * MI,const TargetRegisterClass * SrcRC,unsigned SubReg,const TargetRegisterClass * DstRC,unsigned DstSubReg,const TargetRegisterClass * NewRC,LiveIntervals & LIS) const1511 bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
1512 const TargetRegisterClass *SrcRC,
1513 unsigned SubReg,
1514 const TargetRegisterClass *DstRC,
1515 unsigned DstSubReg,
1516 const TargetRegisterClass *NewRC,
1517 LiveIntervals &LIS) const {
1518 unsigned SrcSize = getRegSizeInBits(*SrcRC);
1519 unsigned DstSize = getRegSizeInBits(*DstRC);
1520 unsigned NewSize = getRegSizeInBits(*NewRC);
1521
1522 // Do not increase size of registers beyond dword, we would need to allocate
1523 // adjacent registers and constraint regalloc more than needed.
1524
1525 // Always allow dword coalescing.
1526 if (SrcSize <= 32 || DstSize <= 32)
1527 return true;
1528
1529 return NewSize <= DstSize || NewSize <= SrcSize;
1530 }
1531
getRegPressureLimit(const TargetRegisterClass * RC,MachineFunction & MF) const1532 unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
1533 MachineFunction &MF) const {
1534
1535 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1536 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1537
1538 unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
1539 MF.getFunction());
1540 switch (RC->getID()) {
1541 default:
1542 return AMDGPURegisterInfo::getRegPressureLimit(RC, MF);
1543 case AMDGPU::VGPR_32RegClassID:
1544 return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
1545 case AMDGPU::SGPR_32RegClassID:
1546 return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
1547 }
1548 }
1549
getRegPressureSetLimit(const MachineFunction & MF,unsigned Idx) const1550 unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
1551 unsigned Idx) const {
1552 if (Idx == getVGPRPressureSet())
1553 return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
1554 const_cast<MachineFunction &>(MF));
1555
1556 if (Idx == getSGPRPressureSet())
1557 return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
1558 const_cast<MachineFunction &>(MF));
1559
1560 return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx);
1561 }
1562
getRegUnitPressureSets(unsigned RegUnit) const1563 const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
1564 static const int Empty[] = { -1 };
1565
1566 if (hasRegUnit(AMDGPU::M0, RegUnit))
1567 return Empty;
1568 return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit);
1569 }
1570
getReturnAddressReg(const MachineFunction & MF) const1571 unsigned SIRegisterInfo::getReturnAddressReg(const MachineFunction &MF) const {
1572 // Not a callee saved register.
1573 return AMDGPU::SGPR30_SGPR31;
1574 }
1575
1576 const TargetRegisterClass *
getConstrainedRegClassForOperand(const MachineOperand & MO,const MachineRegisterInfo & MRI) const1577 SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO,
1578 const MachineRegisterInfo &MRI) const {
1579 unsigned Size = getRegSizeInBits(MO.getReg(), MRI);
1580 const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
1581 if (!RB)
1582 return nullptr;
1583
1584 switch (Size) {
1585 case 32:
1586 return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
1587 &AMDGPU::SReg_32_XM0RegClass;
1588 case 64:
1589 return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass :
1590 &AMDGPU::SReg_64_XEXECRegClass;
1591 case 96:
1592 return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_96RegClass :
1593 nullptr;
1594 case 128:
1595 return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass :
1596 &AMDGPU::SReg_128RegClass;
1597 default:
1598 llvm_unreachable("not implemented");
1599 }
1600 }
1601