• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
15 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
16 
17 #include "AMDGPUArgumentUsageInfo.h"
18 #include "AMDGPUMachineFunction.h"
19 #include "SIInstrInfo.h"
20 #include "SIRegisterInfo.h"
21 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/CodeGen/PseudoSourceValue.h"
27 #include "llvm/CodeGen/TargetInstrInfo.h"
28 #include "llvm/MC/MCRegisterInfo.h"
29 #include "llvm/Support/ErrorHandling.h"
30 #include <array>
31 #include <cassert>
32 #include <utility>
33 #include <vector>
34 
35 namespace llvm {
36 
37 class MachineFrameInfo;
38 class MachineFunction;
39 class TargetRegisterClass;
40 
41 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
42 public:
43   // TODO: Is the img rsrc useful?
AMDGPUImagePseudoSourceValue(const TargetInstrInfo & TII)44   explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) :
45     PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {}
46 
isConstant(const MachineFrameInfo *)47   bool isConstant(const MachineFrameInfo *) const override {
48     // This should probably be true for most images, but we will start by being
49     // conservative.
50     return false;
51   }
52 
isAliased(const MachineFrameInfo *)53   bool isAliased(const MachineFrameInfo *) const override {
54     return true;
55   }
56 
mayAlias(const MachineFrameInfo *)57   bool mayAlias(const MachineFrameInfo *) const override {
58     return true;
59   }
60 };
61 
62 class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue {
63 public:
AMDGPUBufferPseudoSourceValue(const TargetInstrInfo & TII)64   explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII) :
65     PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { }
66 
isConstant(const MachineFrameInfo *)67   bool isConstant(const MachineFrameInfo *) const override {
68     // This should probably be true for most images, but we will start by being
69     // conservative.
70     return false;
71   }
72 
isAliased(const MachineFrameInfo *)73   bool isAliased(const MachineFrameInfo *) const override {
74     return true;
75   }
76 
mayAlias(const MachineFrameInfo *)77   bool mayAlias(const MachineFrameInfo *) const override {
78     return true;
79   }
80 };
81 
82 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
83 /// tells the hardware which interpolation parameters to load.
84 class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
85   unsigned TIDReg = AMDGPU::NoRegister;
86 
87   // Registers that may be reserved for spilling purposes. These may be the same
88   // as the input registers.
89   unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
90   unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG;
91 
92   // This is the current function's incremented size from the kernel's scratch
93   // wave offset register. For an entry function, this is exactly the same as
94   // the ScratchWaveOffsetReg.
95   unsigned FrameOffsetReg = AMDGPU::FP_REG;
96 
97   // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg.
98   unsigned StackPtrOffsetReg = AMDGPU::SP_REG;
99 
100   AMDGPUFunctionArgInfo ArgInfo;
101 
102   // Graphics info.
103   unsigned PSInputAddr = 0;
104   unsigned PSInputEnable = 0;
105 
106   /// Number of bytes of arguments this function has on the stack. If the callee
107   /// is expected to restore the argument stack this should be a multiple of 16,
108   /// all usable during a tail call.
109   ///
110   /// The alternative would forbid tail call optimisation in some cases: if we
111   /// want to transfer control from a function with 8-bytes of stack-argument
112   /// space to a function with 16-bytes then misalignment of this value would
113   /// make a stack adjustment necessary, which could not be undone by the
114   /// callee.
115   unsigned BytesInStackArgArea = 0;
116 
117   bool ReturnsVoid = true;
118 
119   // A pair of default/requested minimum/maximum flat work group sizes.
120   // Minimum - first, maximum - second.
121   std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0};
122 
123   // A pair of default/requested minimum/maximum number of waves per execution
124   // unit. Minimum - first, maximum - second.
125   std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
126 
127   // Stack object indices for work group IDs.
128   std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices = {{0, 0, 0}};
129 
130   // Stack object indices for work item IDs.
131   std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}};
132 
133   DenseMap<const Value *,
134            std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs;
135   DenseMap<const Value *,
136            std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs;
137 
138 private:
139   unsigned LDSWaveSpillSize = 0;
140   unsigned NumUserSGPRs = 0;
141   unsigned NumSystemSGPRs = 0;
142 
143   bool HasSpilledSGPRs = false;
144   bool HasSpilledVGPRs = false;
145   bool HasNonSpillStackObjects = false;
146   bool IsStackRealigned = false;
147 
148   unsigned NumSpilledSGPRs = 0;
149   unsigned NumSpilledVGPRs = 0;
150 
151   // Feature bits required for inputs passed in user SGPRs.
152   bool PrivateSegmentBuffer : 1;
153   bool DispatchPtr : 1;
154   bool QueuePtr : 1;
155   bool KernargSegmentPtr : 1;
156   bool DispatchID : 1;
157   bool FlatScratchInit : 1;
158 
159   // Feature bits required for inputs passed in system SGPRs.
160   bool WorkGroupIDX : 1; // Always initialized.
161   bool WorkGroupIDY : 1;
162   bool WorkGroupIDZ : 1;
163   bool WorkGroupInfo : 1;
164   bool PrivateSegmentWaveByteOffset : 1;
165 
166   bool WorkItemIDX : 1; // Always initialized.
167   bool WorkItemIDY : 1;
168   bool WorkItemIDZ : 1;
169 
170   // Private memory buffer
171   // Compute directly in sgpr[0:1]
172   // Other shaders indirect 64-bits at sgpr[0:1]
173   bool ImplicitBufferPtr : 1;
174 
175   // Pointer to where the ABI inserts special kernel arguments separate from the
176   // user arguments. This is an offset from the KernargSegmentPtr.
177   bool ImplicitArgPtr : 1;
178 
179   // The hard-wired high half of the address of the global information table
180   // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
181   // current hardware only allows a 16 bit value.
182   unsigned GITPtrHigh;
183 
184   unsigned HighBitsOf32BitAddress;
185 
186   // Current recorded maximum possible occupancy.
187   unsigned Occupancy;
188 
189   MCPhysReg getNextUserSGPR() const;
190 
191   MCPhysReg getNextSystemSGPR() const;
192 
193 public:
194   struct SpilledReg {
195     unsigned VGPR = 0;
196     int Lane = -1;
197 
198     SpilledReg() = default;
SpilledRegSpilledReg199     SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {}
200 
hasLaneSpilledReg201     bool hasLane() { return Lane != -1;}
hasRegSpilledReg202     bool hasReg() { return VGPR != 0;}
203   };
204 
205   struct SGPRSpillVGPRCSR {
206     // VGPR used for SGPR spills
207     unsigned VGPR;
208 
209     // If the VGPR is a CSR, the stack slot used to save/restore it in the
210     // prolog/epilog.
211     Optional<int> FI;
212 
SGPRSpillVGPRCSRSGPRSpillVGPRCSR213     SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {}
214   };
215 
216 private:
217   // SGPR->VGPR spilling support.
218   using SpillRegMask = std::pair<unsigned, unsigned>;
219 
220   // Track VGPR + wave index for each subregister of the SGPR spilled to
221   // frameindex key.
222   DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
223   unsigned NumVGPRSpillLanes = 0;
224   SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs;
225 
226 public:
227   SIMachineFunctionInfo(const MachineFunction &MF);
228 
getSGPRToVGPRSpills(int FrameIndex)229   ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
230     auto I = SGPRToVGPRSpills.find(FrameIndex);
231     return (I == SGPRToVGPRSpills.end()) ?
232       ArrayRef<SpilledReg>() : makeArrayRef(I->second);
233   }
234 
getSGPRSpillVGPRs()235   ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const {
236     return SpillVGPRs;
237   }
238 
239   bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
240   void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);
241 
hasCalculatedTID()242   bool hasCalculatedTID() const { return TIDReg != 0; };
getTIDReg()243   unsigned getTIDReg() const { return TIDReg; };
setTIDReg(unsigned Reg)244   void setTIDReg(unsigned Reg) { TIDReg = Reg; }
245 
getBytesInStackArgArea()246   unsigned getBytesInStackArgArea() const {
247     return BytesInStackArgArea;
248   }
249 
setBytesInStackArgArea(unsigned Bytes)250   void setBytesInStackArgArea(unsigned Bytes) {
251     BytesInStackArgArea = Bytes;
252   }
253 
254   // Add user SGPRs.
255   unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
256   unsigned addDispatchPtr(const SIRegisterInfo &TRI);
257   unsigned addQueuePtr(const SIRegisterInfo &TRI);
258   unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
259   unsigned addDispatchID(const SIRegisterInfo &TRI);
260   unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
261   unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI);
262 
263   // Add system SGPRs.
addWorkGroupIDX()264   unsigned addWorkGroupIDX() {
265     ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
266     NumSystemSGPRs += 1;
267     return ArgInfo.WorkGroupIDX.getRegister();
268   }
269 
addWorkGroupIDY()270   unsigned addWorkGroupIDY() {
271     ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());
272     NumSystemSGPRs += 1;
273     return ArgInfo.WorkGroupIDY.getRegister();
274   }
275 
addWorkGroupIDZ()276   unsigned addWorkGroupIDZ() {
277     ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());
278     NumSystemSGPRs += 1;
279     return ArgInfo.WorkGroupIDZ.getRegister();
280   }
281 
addWorkGroupInfo()282   unsigned addWorkGroupInfo() {
283     ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR());
284     NumSystemSGPRs += 1;
285     return ArgInfo.WorkGroupInfo.getRegister();
286   }
287 
288   // Add special VGPR inputs
setWorkItemIDX(ArgDescriptor Arg)289   void setWorkItemIDX(ArgDescriptor Arg) {
290     ArgInfo.WorkItemIDX = Arg;
291   }
292 
setWorkItemIDY(ArgDescriptor Arg)293   void setWorkItemIDY(ArgDescriptor Arg) {
294     ArgInfo.WorkItemIDY = Arg;
295   }
296 
setWorkItemIDZ(ArgDescriptor Arg)297   void setWorkItemIDZ(ArgDescriptor Arg) {
298     ArgInfo.WorkItemIDZ = Arg;
299   }
300 
addPrivateSegmentWaveByteOffset()301   unsigned addPrivateSegmentWaveByteOffset() {
302     ArgInfo.PrivateSegmentWaveByteOffset
303       = ArgDescriptor::createRegister(getNextSystemSGPR());
304     NumSystemSGPRs += 1;
305     return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
306   }
307 
setPrivateSegmentWaveByteOffset(unsigned Reg)308   void setPrivateSegmentWaveByteOffset(unsigned Reg) {
309     ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);
310   }
311 
hasPrivateSegmentBuffer()312   bool hasPrivateSegmentBuffer() const {
313     return PrivateSegmentBuffer;
314   }
315 
hasDispatchPtr()316   bool hasDispatchPtr() const {
317     return DispatchPtr;
318   }
319 
hasQueuePtr()320   bool hasQueuePtr() const {
321     return QueuePtr;
322   }
323 
hasKernargSegmentPtr()324   bool hasKernargSegmentPtr() const {
325     return KernargSegmentPtr;
326   }
327 
hasDispatchID()328   bool hasDispatchID() const {
329     return DispatchID;
330   }
331 
hasFlatScratchInit()332   bool hasFlatScratchInit() const {
333     return FlatScratchInit;
334   }
335 
hasWorkGroupIDX()336   bool hasWorkGroupIDX() const {
337     return WorkGroupIDX;
338   }
339 
hasWorkGroupIDY()340   bool hasWorkGroupIDY() const {
341     return WorkGroupIDY;
342   }
343 
hasWorkGroupIDZ()344   bool hasWorkGroupIDZ() const {
345     return WorkGroupIDZ;
346   }
347 
hasWorkGroupInfo()348   bool hasWorkGroupInfo() const {
349     return WorkGroupInfo;
350   }
351 
hasPrivateSegmentWaveByteOffset()352   bool hasPrivateSegmentWaveByteOffset() const {
353     return PrivateSegmentWaveByteOffset;
354   }
355 
hasWorkItemIDX()356   bool hasWorkItemIDX() const {
357     return WorkItemIDX;
358   }
359 
hasWorkItemIDY()360   bool hasWorkItemIDY() const {
361     return WorkItemIDY;
362   }
363 
hasWorkItemIDZ()364   bool hasWorkItemIDZ() const {
365     return WorkItemIDZ;
366   }
367 
hasImplicitArgPtr()368   bool hasImplicitArgPtr() const {
369     return ImplicitArgPtr;
370   }
371 
hasImplicitBufferPtr()372   bool hasImplicitBufferPtr() const {
373     return ImplicitBufferPtr;
374   }
375 
getArgInfo()376   AMDGPUFunctionArgInfo &getArgInfo() {
377     return ArgInfo;
378   }
379 
getArgInfo()380   const AMDGPUFunctionArgInfo &getArgInfo() const {
381     return ArgInfo;
382   }
383 
384   std::pair<const ArgDescriptor *, const TargetRegisterClass *>
getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value)385   getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
386     return ArgInfo.getPreloadedValue(Value);
387   }
388 
getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value)389   unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
390     return ArgInfo.getPreloadedValue(Value).first->getRegister();
391   }
392 
getGITPtrHigh()393   unsigned getGITPtrHigh() const {
394     return GITPtrHigh;
395   }
396 
get32BitAddressHighBits()397   unsigned get32BitAddressHighBits() const {
398     return HighBitsOf32BitAddress;
399   }
400 
getNumUserSGPRs()401   unsigned getNumUserSGPRs() const {
402     return NumUserSGPRs;
403   }
404 
getNumPreloadedSGPRs()405   unsigned getNumPreloadedSGPRs() const {
406     return NumUserSGPRs + NumSystemSGPRs;
407   }
408 
getPrivateSegmentWaveByteOffsetSystemSGPR()409   unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {
410     return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
411   }
412 
413   /// Returns the physical register reserved for use as the resource
414   /// descriptor for scratch accesses.
getScratchRSrcReg()415   unsigned getScratchRSrcReg() const {
416     return ScratchRSrcReg;
417   }
418 
setScratchRSrcReg(unsigned Reg)419   void setScratchRSrcReg(unsigned Reg) {
420     assert(Reg != 0 && "Should never be unset");
421     ScratchRSrcReg = Reg;
422   }
423 
getScratchWaveOffsetReg()424   unsigned getScratchWaveOffsetReg() const {
425     return ScratchWaveOffsetReg;
426   }
427 
getFrameOffsetReg()428   unsigned getFrameOffsetReg() const {
429     return FrameOffsetReg;
430   }
431 
setStackPtrOffsetReg(unsigned Reg)432   void setStackPtrOffsetReg(unsigned Reg) {
433     assert(Reg != 0 && "Should never be unset");
434     StackPtrOffsetReg = Reg;
435   }
436 
437   // Note the unset value for this is AMDGPU::SP_REG rather than
438   // NoRegister. This is mostly a workaround for MIR tests where state that
439   // can't be directly computed from the function is not preserved in serialized
440   // MIR.
getStackPtrOffsetReg()441   unsigned getStackPtrOffsetReg() const {
442     return StackPtrOffsetReg;
443   }
444 
setScratchWaveOffsetReg(unsigned Reg)445   void setScratchWaveOffsetReg(unsigned Reg) {
446     assert(Reg != 0 && "Should never be unset");
447     ScratchWaveOffsetReg = Reg;
448     if (isEntryFunction())
449       FrameOffsetReg = ScratchWaveOffsetReg;
450   }
451 
getQueuePtrUserSGPR()452   unsigned getQueuePtrUserSGPR() const {
453     return ArgInfo.QueuePtr.getRegister();
454   }
455 
getImplicitBufferPtrUserSGPR()456   unsigned getImplicitBufferPtrUserSGPR() const {
457     return ArgInfo.ImplicitBufferPtr.getRegister();
458   }
459 
hasSpilledSGPRs()460   bool hasSpilledSGPRs() const {
461     return HasSpilledSGPRs;
462   }
463 
464   void setHasSpilledSGPRs(bool Spill = true) {
465     HasSpilledSGPRs = Spill;
466   }
467 
hasSpilledVGPRs()468   bool hasSpilledVGPRs() const {
469     return HasSpilledVGPRs;
470   }
471 
472   void setHasSpilledVGPRs(bool Spill = true) {
473     HasSpilledVGPRs = Spill;
474   }
475 
hasNonSpillStackObjects()476   bool hasNonSpillStackObjects() const {
477     return HasNonSpillStackObjects;
478   }
479 
480   void setHasNonSpillStackObjects(bool StackObject = true) {
481     HasNonSpillStackObjects = StackObject;
482   }
483 
isStackRealigned()484   bool isStackRealigned() const {
485     return IsStackRealigned;
486   }
487 
488   void setIsStackRealigned(bool Realigned = true) {
489     IsStackRealigned = Realigned;
490   }
491 
getNumSpilledSGPRs()492   unsigned getNumSpilledSGPRs() const {
493     return NumSpilledSGPRs;
494   }
495 
getNumSpilledVGPRs()496   unsigned getNumSpilledVGPRs() const {
497     return NumSpilledVGPRs;
498   }
499 
addToSpilledSGPRs(unsigned num)500   void addToSpilledSGPRs(unsigned num) {
501     NumSpilledSGPRs += num;
502   }
503 
addToSpilledVGPRs(unsigned num)504   void addToSpilledVGPRs(unsigned num) {
505     NumSpilledVGPRs += num;
506   }
507 
getPSInputAddr()508   unsigned getPSInputAddr() const {
509     return PSInputAddr;
510   }
511 
getPSInputEnable()512   unsigned getPSInputEnable() const {
513     return PSInputEnable;
514   }
515 
isPSInputAllocated(unsigned Index)516   bool isPSInputAllocated(unsigned Index) const {
517     return PSInputAddr & (1 << Index);
518   }
519 
markPSInputAllocated(unsigned Index)520   void markPSInputAllocated(unsigned Index) {
521     PSInputAddr |= 1 << Index;
522   }
523 
markPSInputEnabled(unsigned Index)524   void markPSInputEnabled(unsigned Index) {
525     PSInputEnable |= 1 << Index;
526   }
527 
returnsVoid()528   bool returnsVoid() const {
529     return ReturnsVoid;
530   }
531 
setIfReturnsVoid(bool Value)532   void setIfReturnsVoid(bool Value) {
533     ReturnsVoid = Value;
534   }
535 
536   /// \returns A pair of default/requested minimum/maximum flat work group sizes
537   /// for this function.
getFlatWorkGroupSizes()538   std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
539     return FlatWorkGroupSizes;
540   }
541 
542   /// \returns Default/requested minimum flat work group size for this function.
getMinFlatWorkGroupSize()543   unsigned getMinFlatWorkGroupSize() const {
544     return FlatWorkGroupSizes.first;
545   }
546 
547   /// \returns Default/requested maximum flat work group size for this function.
getMaxFlatWorkGroupSize()548   unsigned getMaxFlatWorkGroupSize() const {
549     return FlatWorkGroupSizes.second;
550   }
551 
552   /// \returns A pair of default/requested minimum/maximum number of waves per
553   /// execution unit.
getWavesPerEU()554   std::pair<unsigned, unsigned> getWavesPerEU() const {
555     return WavesPerEU;
556   }
557 
558   /// \returns Default/requested minimum number of waves per execution unit.
getMinWavesPerEU()559   unsigned getMinWavesPerEU() const {
560     return WavesPerEU.first;
561   }
562 
563   /// \returns Default/requested maximum number of waves per execution unit.
getMaxWavesPerEU()564   unsigned getMaxWavesPerEU() const {
565     return WavesPerEU.second;
566   }
567 
568   /// \returns Stack object index for \p Dim's work group ID.
getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim)569   int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const {
570     assert(Dim < 3);
571     return DebuggerWorkGroupIDStackObjectIndices[Dim];
572   }
573 
574   /// Sets stack object index for \p Dim's work group ID to \p ObjectIdx.
setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim,int ObjectIdx)575   void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
576     assert(Dim < 3);
577     DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx;
578   }
579 
580   /// \returns Stack object index for \p Dim's work item ID.
getDebuggerWorkItemIDStackObjectIndex(unsigned Dim)581   int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const {
582     assert(Dim < 3);
583     return DebuggerWorkItemIDStackObjectIndices[Dim];
584   }
585 
586   /// Sets stack object index for \p Dim's work item ID to \p ObjectIdx.
setDebuggerWorkItemIDStackObjectIndex(unsigned Dim,int ObjectIdx)587   void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
588     assert(Dim < 3);
589     DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx;
590   }
591 
592   /// \returns SGPR used for \p Dim's work group ID.
getWorkGroupIDSGPR(unsigned Dim)593   unsigned getWorkGroupIDSGPR(unsigned Dim) const {
594     switch (Dim) {
595     case 0:
596       assert(hasWorkGroupIDX());
597       return ArgInfo.WorkGroupIDX.getRegister();
598     case 1:
599       assert(hasWorkGroupIDY());
600       return ArgInfo.WorkGroupIDY.getRegister();
601     case 2:
602       assert(hasWorkGroupIDZ());
603       return ArgInfo.WorkGroupIDZ.getRegister();
604     }
605     llvm_unreachable("unexpected dimension");
606   }
607 
608   /// \returns VGPR used for \p Dim' work item ID.
609   unsigned getWorkItemIDVGPR(unsigned Dim) const;
610 
getLDSWaveSpillSize()611   unsigned getLDSWaveSpillSize() const {
612     return LDSWaveSpillSize;
613   }
614 
getBufferPSV(const SIInstrInfo & TII,const Value * BufferRsrc)615   const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII,
616                                                     const Value *BufferRsrc) {
617     assert(BufferRsrc);
618     auto PSV = BufferPSVs.try_emplace(
619       BufferRsrc,
620       llvm::make_unique<AMDGPUBufferPseudoSourceValue>(TII));
621     return PSV.first->second.get();
622   }
623 
getImagePSV(const SIInstrInfo & TII,const Value * ImgRsrc)624   const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII,
625                                                   const Value *ImgRsrc) {
626     assert(ImgRsrc);
627     auto PSV = ImagePSVs.try_emplace(
628       ImgRsrc,
629       llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII));
630     return PSV.first->second.get();
631   }
632 
getOccupancy()633   unsigned getOccupancy() const {
634     return Occupancy;
635   }
636 
getMinAllowedOccupancy()637   unsigned getMinAllowedOccupancy() const {
638     if (!isMemoryBound() && !needsWaveLimiter())
639       return Occupancy;
640     return (Occupancy < 4) ? Occupancy : 4;
641   }
642 
643   void limitOccupancy(const MachineFunction &MF);
644 
limitOccupancy(unsigned Limit)645   void limitOccupancy(unsigned Limit) {
646     if (Occupancy > Limit)
647       Occupancy = Limit;
648   }
649 
increaseOccupancy(const MachineFunction & MF,unsigned Limit)650   void increaseOccupancy(const MachineFunction &MF, unsigned Limit) {
651     if (Occupancy < Limit)
652       Occupancy = Limit;
653     limitOccupancy(MF);
654   }
655 };
656 
657 } // end namespace llvm
658 
659 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
660