• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 /// \file
9 //===----------------------------------------------------------------------===//
10 
11 
12 #include "SIMachineFunctionInfo.h"
13 #include "AMDGPUSubtarget.h"
14 #include "SIInstrInfo.h"
15 #include "llvm/CodeGen/MachineInstrBuilder.h"
16 #include "llvm/CodeGen/MachineFrameInfo.h"
17 #include "llvm/CodeGen/MachineRegisterInfo.h"
18 #include "llvm/IR/Function.h"
19 #include "llvm/IR/LLVMContext.h"
20 
21 #define MAX_LANES 64
22 
23 using namespace llvm;
24 
25 
26 // Pin the vtable to this file.
anchor()27 void SIMachineFunctionInfo::anchor() {}
28 
SIMachineFunctionInfo(const MachineFunction & MF)29 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
30   : AMDGPUMachineFunction(MF),
31     TIDReg(AMDGPU::NoRegister),
32     ScratchRSrcReg(AMDGPU::NoRegister),
33     ScratchWaveOffsetReg(AMDGPU::NoRegister),
34     PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister),
35     DispatchPtrUserSGPR(AMDGPU::NoRegister),
36     QueuePtrUserSGPR(AMDGPU::NoRegister),
37     KernargSegmentPtrUserSGPR(AMDGPU::NoRegister),
38     DispatchIDUserSGPR(AMDGPU::NoRegister),
39     FlatScratchInitUserSGPR(AMDGPU::NoRegister),
40     PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister),
41     GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister),
42     GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister),
43     GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister),
44     WorkGroupIDXSystemSGPR(AMDGPU::NoRegister),
45     WorkGroupIDYSystemSGPR(AMDGPU::NoRegister),
46     WorkGroupIDZSystemSGPR(AMDGPU::NoRegister),
47     WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
48     PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
49     LDSWaveSpillSize(0),
50     PSInputAddr(0),
51     NumUserSGPRs(0),
52     NumSystemSGPRs(0),
53     HasSpilledSGPRs(false),
54     HasSpilledVGPRs(false),
55     PrivateSegmentBuffer(false),
56     DispatchPtr(false),
57     QueuePtr(false),
58     DispatchID(false),
59     KernargSegmentPtr(false),
60     FlatScratchInit(false),
61     GridWorkgroupCountX(false),
62     GridWorkgroupCountY(false),
63     GridWorkgroupCountZ(false),
64     WorkGroupIDX(true),
65     WorkGroupIDY(false),
66     WorkGroupIDZ(false),
67     WorkGroupInfo(false),
68     PrivateSegmentWaveByteOffset(false),
69     WorkItemIDX(true),
70     WorkItemIDY(false),
71     WorkItemIDZ(false) {
72   const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
73   const Function *F = MF.getFunction();
74 
75   const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
76 
77   if (getShaderType() == ShaderType::COMPUTE)
78     KernargSegmentPtr = true;
79 
80   if (F->hasFnAttribute("amdgpu-work-group-id-y"))
81     WorkGroupIDY = true;
82 
83   if (F->hasFnAttribute("amdgpu-work-group-id-z"))
84     WorkGroupIDZ = true;
85 
86   if (F->hasFnAttribute("amdgpu-work-item-id-y"))
87     WorkItemIDY = true;
88 
89   if (F->hasFnAttribute("amdgpu-work-item-id-z"))
90     WorkItemIDZ = true;
91 
92   bool MaySpill = ST.isVGPRSpillingEnabled(this);
93   bool HasStackObjects = FrameInfo->hasStackObjects();
94 
95   if (HasStackObjects || MaySpill)
96     PrivateSegmentWaveByteOffset = true;
97 
98   if (ST.isAmdHsaOS()) {
99     if (HasStackObjects || MaySpill)
100       PrivateSegmentBuffer = true;
101 
102     if (F->hasFnAttribute("amdgpu-dispatch-ptr"))
103       DispatchPtr = true;
104   }
105 
106   // X, XY, and XYZ are the only supported combinations, so make sure Y is
107   // enabled if Z is.
108   if (WorkItemIDZ)
109     WorkItemIDY = true;
110 }
111 
addPrivateSegmentBuffer(const SIRegisterInfo & TRI)112 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
113   const SIRegisterInfo &TRI) {
114   PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg(
115     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
116   NumUserSGPRs += 4;
117   return PrivateSegmentBufferUserSGPR;
118 }
119 
addDispatchPtr(const SIRegisterInfo & TRI)120 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
121   DispatchPtrUserSGPR = TRI.getMatchingSuperReg(
122     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
123   NumUserSGPRs += 2;
124   return DispatchPtrUserSGPR;
125 }
126 
addQueuePtr(const SIRegisterInfo & TRI)127 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
128   QueuePtrUserSGPR = TRI.getMatchingSuperReg(
129     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
130   NumUserSGPRs += 2;
131   return QueuePtrUserSGPR;
132 }
133 
addKernargSegmentPtr(const SIRegisterInfo & TRI)134 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
135   KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg(
136     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
137   NumUserSGPRs += 2;
138   return KernargSegmentPtrUserSGPR;
139 }
140 
getSpilledReg(MachineFunction * MF,unsigned FrameIndex,unsigned SubIdx)141 SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
142                                                        MachineFunction *MF,
143                                                        unsigned FrameIndex,
144                                                        unsigned SubIdx) {
145   const MachineFrameInfo *FrameInfo = MF->getFrameInfo();
146   const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
147       MF->getSubtarget<AMDGPUSubtarget>().getRegisterInfo());
148   MachineRegisterInfo &MRI = MF->getRegInfo();
149   int64_t Offset = FrameInfo->getObjectOffset(FrameIndex);
150   Offset += SubIdx * 4;
151 
152   unsigned LaneVGPRIdx = Offset / (64 * 4);
153   unsigned Lane = (Offset / 4) % 64;
154 
155   struct SpilledReg Spill;
156 
157   if (!LaneVGPRs.count(LaneVGPRIdx)) {
158     unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);
159     LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
160 
161     // Add this register as live-in to all blocks to avoid machine verifer
162     // complaining about use of an undefined physical register.
163     for (MachineFunction::iterator BI = MF->begin(), BE = MF->end();
164          BI != BE; ++BI) {
165       BI->addLiveIn(LaneVGPR);
166     }
167   }
168 
169   Spill.VGPR = LaneVGPRs[LaneVGPRIdx];
170   Spill.Lane = Lane;
171   return Spill;
172 }
173 
getMaximumWorkGroupSize(const MachineFunction & MF) const174 unsigned SIMachineFunctionInfo::getMaximumWorkGroupSize(
175                                               const MachineFunction &MF) const {
176   const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
177   // FIXME: We should get this information from kernel attributes if it
178   // is available.
179   return getShaderType() == ShaderType::COMPUTE ? 256 : ST.getWavefrontSize();
180 }
181