1 //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 15 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 16 17 #include "AMDGPUArgumentUsageInfo.h" 18 #include "AMDGPUMachineFunction.h" 19 #include "SIInstrInfo.h" 20 #include "SIRegisterInfo.h" 21 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 22 #include "llvm/ADT/ArrayRef.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/Optional.h" 25 #include "llvm/ADT/SmallVector.h" 26 #include "llvm/CodeGen/PseudoSourceValue.h" 27 #include "llvm/CodeGen/TargetInstrInfo.h" 28 #include "llvm/MC/MCRegisterInfo.h" 29 #include "llvm/Support/ErrorHandling.h" 30 #include <array> 31 #include <cassert> 32 #include <utility> 33 #include <vector> 34 35 namespace llvm { 36 37 class MachineFrameInfo; 38 class MachineFunction; 39 class TargetRegisterClass; 40 41 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue { 42 public: 43 // TODO: Is the img rsrc useful? AMDGPUImagePseudoSourceValue(const TargetInstrInfo & TII)44 explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) : 45 PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {} 46 isConstant(const MachineFrameInfo *)47 bool isConstant(const MachineFrameInfo *) const override { 48 // This should probably be true for most images, but we will start by being 49 // conservative. 50 return false; 51 } 52 isAliased(const MachineFrameInfo *)53 bool isAliased(const MachineFrameInfo *) const override { 54 return true; 55 } 56 mayAlias(const MachineFrameInfo *)57 bool mayAlias(const MachineFrameInfo *) const override { 58 return true; 59 } 60 }; 61 62 class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue { 63 public: AMDGPUBufferPseudoSourceValue(const TargetInstrInfo & TII)64 explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII) : 65 PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { } 66 isConstant(const MachineFrameInfo *)67 bool isConstant(const MachineFrameInfo *) const override { 68 // This should probably be true for most images, but we will start by being 69 // conservative. 70 return false; 71 } 72 isAliased(const MachineFrameInfo *)73 bool isAliased(const MachineFrameInfo *) const override { 74 return true; 75 } 76 mayAlias(const MachineFrameInfo *)77 bool mayAlias(const MachineFrameInfo *) const override { 78 return true; 79 } 80 }; 81 82 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which 83 /// tells the hardware which interpolation parameters to load. 84 class SIMachineFunctionInfo final : public AMDGPUMachineFunction { 85 unsigned TIDReg = AMDGPU::NoRegister; 86 87 // Registers that may be reserved for spilling purposes. These may be the same 88 // as the input registers. 89 unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG; 90 unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG; 91 92 // This is the current function's incremented size from the kernel's scratch 93 // wave offset register. For an entry function, this is exactly the same as 94 // the ScratchWaveOffsetReg. 95 unsigned FrameOffsetReg = AMDGPU::FP_REG; 96 97 // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg. 98 unsigned StackPtrOffsetReg = AMDGPU::SP_REG; 99 100 AMDGPUFunctionArgInfo ArgInfo; 101 102 // Graphics info. 103 unsigned PSInputAddr = 0; 104 unsigned PSInputEnable = 0; 105 106 /// Number of bytes of arguments this function has on the stack. If the callee 107 /// is expected to restore the argument stack this should be a multiple of 16, 108 /// all usable during a tail call. 109 /// 110 /// The alternative would forbid tail call optimisation in some cases: if we 111 /// want to transfer control from a function with 8-bytes of stack-argument 112 /// space to a function with 16-bytes then misalignment of this value would 113 /// make a stack adjustment necessary, which could not be undone by the 114 /// callee. 115 unsigned BytesInStackArgArea = 0; 116 117 bool ReturnsVoid = true; 118 119 // A pair of default/requested minimum/maximum flat work group sizes. 120 // Minimum - first, maximum - second. 121 std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0}; 122 123 // A pair of default/requested minimum/maximum number of waves per execution 124 // unit. Minimum - first, maximum - second. 125 std::pair<unsigned, unsigned> WavesPerEU = {0, 0}; 126 127 // Stack object indices for work group IDs. 128 std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices = {{0, 0, 0}}; 129 130 // Stack object indices for work item IDs. 131 std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}}; 132 133 DenseMap<const Value *, 134 std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs; 135 DenseMap<const Value *, 136 std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs; 137 138 private: 139 unsigned LDSWaveSpillSize = 0; 140 unsigned NumUserSGPRs = 0; 141 unsigned NumSystemSGPRs = 0; 142 143 bool HasSpilledSGPRs = false; 144 bool HasSpilledVGPRs = false; 145 bool HasNonSpillStackObjects = false; 146 bool IsStackRealigned = false; 147 148 unsigned NumSpilledSGPRs = 0; 149 unsigned NumSpilledVGPRs = 0; 150 151 // Feature bits required for inputs passed in user SGPRs. 152 bool PrivateSegmentBuffer : 1; 153 bool DispatchPtr : 1; 154 bool QueuePtr : 1; 155 bool KernargSegmentPtr : 1; 156 bool DispatchID : 1; 157 bool FlatScratchInit : 1; 158 159 // Feature bits required for inputs passed in system SGPRs. 160 bool WorkGroupIDX : 1; // Always initialized. 161 bool WorkGroupIDY : 1; 162 bool WorkGroupIDZ : 1; 163 bool WorkGroupInfo : 1; 164 bool PrivateSegmentWaveByteOffset : 1; 165 166 bool WorkItemIDX : 1; // Always initialized. 167 bool WorkItemIDY : 1; 168 bool WorkItemIDZ : 1; 169 170 // Private memory buffer 171 // Compute directly in sgpr[0:1] 172 // Other shaders indirect 64-bits at sgpr[0:1] 173 bool ImplicitBufferPtr : 1; 174 175 // Pointer to where the ABI inserts special kernel arguments separate from the 176 // user arguments. This is an offset from the KernargSegmentPtr. 177 bool ImplicitArgPtr : 1; 178 179 // The hard-wired high half of the address of the global information table 180 // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since 181 // current hardware only allows a 16 bit value. 182 unsigned GITPtrHigh; 183 184 unsigned HighBitsOf32BitAddress; 185 186 // Current recorded maximum possible occupancy. 187 unsigned Occupancy; 188 189 MCPhysReg getNextUserSGPR() const; 190 191 MCPhysReg getNextSystemSGPR() const; 192 193 public: 194 struct SpilledReg { 195 unsigned VGPR = 0; 196 int Lane = -1; 197 198 SpilledReg() = default; SpilledRegSpilledReg199 SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {} 200 hasLaneSpilledReg201 bool hasLane() { return Lane != -1;} hasRegSpilledReg202 bool hasReg() { return VGPR != 0;} 203 }; 204 205 struct SGPRSpillVGPRCSR { 206 // VGPR used for SGPR spills 207 unsigned VGPR; 208 209 // If the VGPR is a CSR, the stack slot used to save/restore it in the 210 // prolog/epilog. 211 Optional<int> FI; 212 SGPRSpillVGPRCSRSGPRSpillVGPRCSR213 SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {} 214 }; 215 216 private: 217 // SGPR->VGPR spilling support. 218 using SpillRegMask = std::pair<unsigned, unsigned>; 219 220 // Track VGPR + wave index for each subregister of the SGPR spilled to 221 // frameindex key. 222 DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills; 223 unsigned NumVGPRSpillLanes = 0; 224 SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs; 225 226 public: 227 SIMachineFunctionInfo(const MachineFunction &MF); 228 getSGPRToVGPRSpills(int FrameIndex)229 ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const { 230 auto I = SGPRToVGPRSpills.find(FrameIndex); 231 return (I == SGPRToVGPRSpills.end()) ? 232 ArrayRef<SpilledReg>() : makeArrayRef(I->second); 233 } 234 getSGPRSpillVGPRs()235 ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const { 236 return SpillVGPRs; 237 } 238 239 bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI); 240 void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI); 241 hasCalculatedTID()242 bool hasCalculatedTID() const { return TIDReg != 0; }; getTIDReg()243 unsigned getTIDReg() const { return TIDReg; }; setTIDReg(unsigned Reg)244 void setTIDReg(unsigned Reg) { TIDReg = Reg; } 245 getBytesInStackArgArea()246 unsigned getBytesInStackArgArea() const { 247 return BytesInStackArgArea; 248 } 249 setBytesInStackArgArea(unsigned Bytes)250 void setBytesInStackArgArea(unsigned Bytes) { 251 BytesInStackArgArea = Bytes; 252 } 253 254 // Add user SGPRs. 255 unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI); 256 unsigned addDispatchPtr(const SIRegisterInfo &TRI); 257 unsigned addQueuePtr(const SIRegisterInfo &TRI); 258 unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI); 259 unsigned addDispatchID(const SIRegisterInfo &TRI); 260 unsigned addFlatScratchInit(const SIRegisterInfo &TRI); 261 unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI); 262 263 // Add system SGPRs. addWorkGroupIDX()264 unsigned addWorkGroupIDX() { 265 ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR()); 266 NumSystemSGPRs += 1; 267 return ArgInfo.WorkGroupIDX.getRegister(); 268 } 269 addWorkGroupIDY()270 unsigned addWorkGroupIDY() { 271 ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR()); 272 NumSystemSGPRs += 1; 273 return ArgInfo.WorkGroupIDY.getRegister(); 274 } 275 addWorkGroupIDZ()276 unsigned addWorkGroupIDZ() { 277 ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR()); 278 NumSystemSGPRs += 1; 279 return ArgInfo.WorkGroupIDZ.getRegister(); 280 } 281 addWorkGroupInfo()282 unsigned addWorkGroupInfo() { 283 ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR()); 284 NumSystemSGPRs += 1; 285 return ArgInfo.WorkGroupInfo.getRegister(); 286 } 287 288 // Add special VGPR inputs setWorkItemIDX(ArgDescriptor Arg)289 void setWorkItemIDX(ArgDescriptor Arg) { 290 ArgInfo.WorkItemIDX = Arg; 291 } 292 setWorkItemIDY(ArgDescriptor Arg)293 void setWorkItemIDY(ArgDescriptor Arg) { 294 ArgInfo.WorkItemIDY = Arg; 295 } 296 setWorkItemIDZ(ArgDescriptor Arg)297 void setWorkItemIDZ(ArgDescriptor Arg) { 298 ArgInfo.WorkItemIDZ = Arg; 299 } 300 addPrivateSegmentWaveByteOffset()301 unsigned addPrivateSegmentWaveByteOffset() { 302 ArgInfo.PrivateSegmentWaveByteOffset 303 = ArgDescriptor::createRegister(getNextSystemSGPR()); 304 NumSystemSGPRs += 1; 305 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); 306 } 307 setPrivateSegmentWaveByteOffset(unsigned Reg)308 void setPrivateSegmentWaveByteOffset(unsigned Reg) { 309 ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg); 310 } 311 hasPrivateSegmentBuffer()312 bool hasPrivateSegmentBuffer() const { 313 return PrivateSegmentBuffer; 314 } 315 hasDispatchPtr()316 bool hasDispatchPtr() const { 317 return DispatchPtr; 318 } 319 hasQueuePtr()320 bool hasQueuePtr() const { 321 return QueuePtr; 322 } 323 hasKernargSegmentPtr()324 bool hasKernargSegmentPtr() const { 325 return KernargSegmentPtr; 326 } 327 hasDispatchID()328 bool hasDispatchID() const { 329 return DispatchID; 330 } 331 hasFlatScratchInit()332 bool hasFlatScratchInit() const { 333 return FlatScratchInit; 334 } 335 hasWorkGroupIDX()336 bool hasWorkGroupIDX() const { 337 return WorkGroupIDX; 338 } 339 hasWorkGroupIDY()340 bool hasWorkGroupIDY() const { 341 return WorkGroupIDY; 342 } 343 hasWorkGroupIDZ()344 bool hasWorkGroupIDZ() const { 345 return WorkGroupIDZ; 346 } 347 hasWorkGroupInfo()348 bool hasWorkGroupInfo() const { 349 return WorkGroupInfo; 350 } 351 hasPrivateSegmentWaveByteOffset()352 bool hasPrivateSegmentWaveByteOffset() const { 353 return PrivateSegmentWaveByteOffset; 354 } 355 hasWorkItemIDX()356 bool hasWorkItemIDX() const { 357 return WorkItemIDX; 358 } 359 hasWorkItemIDY()360 bool hasWorkItemIDY() const { 361 return WorkItemIDY; 362 } 363 hasWorkItemIDZ()364 bool hasWorkItemIDZ() const { 365 return WorkItemIDZ; 366 } 367 hasImplicitArgPtr()368 bool hasImplicitArgPtr() const { 369 return ImplicitArgPtr; 370 } 371 hasImplicitBufferPtr()372 bool hasImplicitBufferPtr() const { 373 return ImplicitBufferPtr; 374 } 375 getArgInfo()376 AMDGPUFunctionArgInfo &getArgInfo() { 377 return ArgInfo; 378 } 379 getArgInfo()380 const AMDGPUFunctionArgInfo &getArgInfo() const { 381 return ArgInfo; 382 } 383 384 std::pair<const ArgDescriptor *, const TargetRegisterClass *> getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value)385 getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const { 386 return ArgInfo.getPreloadedValue(Value); 387 } 388 getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value)389 unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const { 390 return ArgInfo.getPreloadedValue(Value).first->getRegister(); 391 } 392 getGITPtrHigh()393 unsigned getGITPtrHigh() const { 394 return GITPtrHigh; 395 } 396 get32BitAddressHighBits()397 unsigned get32BitAddressHighBits() const { 398 return HighBitsOf32BitAddress; 399 } 400 getNumUserSGPRs()401 unsigned getNumUserSGPRs() const { 402 return NumUserSGPRs; 403 } 404 getNumPreloadedSGPRs()405 unsigned getNumPreloadedSGPRs() const { 406 return NumUserSGPRs + NumSystemSGPRs; 407 } 408 getPrivateSegmentWaveByteOffsetSystemSGPR()409 unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const { 410 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); 411 } 412 413 /// Returns the physical register reserved for use as the resource 414 /// descriptor for scratch accesses. getScratchRSrcReg()415 unsigned getScratchRSrcReg() const { 416 return ScratchRSrcReg; 417 } 418 setScratchRSrcReg(unsigned Reg)419 void setScratchRSrcReg(unsigned Reg) { 420 assert(Reg != 0 && "Should never be unset"); 421 ScratchRSrcReg = Reg; 422 } 423 getScratchWaveOffsetReg()424 unsigned getScratchWaveOffsetReg() const { 425 return ScratchWaveOffsetReg; 426 } 427 getFrameOffsetReg()428 unsigned getFrameOffsetReg() const { 429 return FrameOffsetReg; 430 } 431 setStackPtrOffsetReg(unsigned Reg)432 void setStackPtrOffsetReg(unsigned Reg) { 433 assert(Reg != 0 && "Should never be unset"); 434 StackPtrOffsetReg = Reg; 435 } 436 437 // Note the unset value for this is AMDGPU::SP_REG rather than 438 // NoRegister. This is mostly a workaround for MIR tests where state that 439 // can't be directly computed from the function is not preserved in serialized 440 // MIR. getStackPtrOffsetReg()441 unsigned getStackPtrOffsetReg() const { 442 return StackPtrOffsetReg; 443 } 444 setScratchWaveOffsetReg(unsigned Reg)445 void setScratchWaveOffsetReg(unsigned Reg) { 446 assert(Reg != 0 && "Should never be unset"); 447 ScratchWaveOffsetReg = Reg; 448 if (isEntryFunction()) 449 FrameOffsetReg = ScratchWaveOffsetReg; 450 } 451 getQueuePtrUserSGPR()452 unsigned getQueuePtrUserSGPR() const { 453 return ArgInfo.QueuePtr.getRegister(); 454 } 455 getImplicitBufferPtrUserSGPR()456 unsigned getImplicitBufferPtrUserSGPR() const { 457 return ArgInfo.ImplicitBufferPtr.getRegister(); 458 } 459 hasSpilledSGPRs()460 bool hasSpilledSGPRs() const { 461 return HasSpilledSGPRs; 462 } 463 464 void setHasSpilledSGPRs(bool Spill = true) { 465 HasSpilledSGPRs = Spill; 466 } 467 hasSpilledVGPRs()468 bool hasSpilledVGPRs() const { 469 return HasSpilledVGPRs; 470 } 471 472 void setHasSpilledVGPRs(bool Spill = true) { 473 HasSpilledVGPRs = Spill; 474 } 475 hasNonSpillStackObjects()476 bool hasNonSpillStackObjects() const { 477 return HasNonSpillStackObjects; 478 } 479 480 void setHasNonSpillStackObjects(bool StackObject = true) { 481 HasNonSpillStackObjects = StackObject; 482 } 483 isStackRealigned()484 bool isStackRealigned() const { 485 return IsStackRealigned; 486 } 487 488 void setIsStackRealigned(bool Realigned = true) { 489 IsStackRealigned = Realigned; 490 } 491 getNumSpilledSGPRs()492 unsigned getNumSpilledSGPRs() const { 493 return NumSpilledSGPRs; 494 } 495 getNumSpilledVGPRs()496 unsigned getNumSpilledVGPRs() const { 497 return NumSpilledVGPRs; 498 } 499 addToSpilledSGPRs(unsigned num)500 void addToSpilledSGPRs(unsigned num) { 501 NumSpilledSGPRs += num; 502 } 503 addToSpilledVGPRs(unsigned num)504 void addToSpilledVGPRs(unsigned num) { 505 NumSpilledVGPRs += num; 506 } 507 getPSInputAddr()508 unsigned getPSInputAddr() const { 509 return PSInputAddr; 510 } 511 getPSInputEnable()512 unsigned getPSInputEnable() const { 513 return PSInputEnable; 514 } 515 isPSInputAllocated(unsigned Index)516 bool isPSInputAllocated(unsigned Index) const { 517 return PSInputAddr & (1 << Index); 518 } 519 markPSInputAllocated(unsigned Index)520 void markPSInputAllocated(unsigned Index) { 521 PSInputAddr |= 1 << Index; 522 } 523 markPSInputEnabled(unsigned Index)524 void markPSInputEnabled(unsigned Index) { 525 PSInputEnable |= 1 << Index; 526 } 527 returnsVoid()528 bool returnsVoid() const { 529 return ReturnsVoid; 530 } 531 setIfReturnsVoid(bool Value)532 void setIfReturnsVoid(bool Value) { 533 ReturnsVoid = Value; 534 } 535 536 /// \returns A pair of default/requested minimum/maximum flat work group sizes 537 /// for this function. getFlatWorkGroupSizes()538 std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const { 539 return FlatWorkGroupSizes; 540 } 541 542 /// \returns Default/requested minimum flat work group size for this function. getMinFlatWorkGroupSize()543 unsigned getMinFlatWorkGroupSize() const { 544 return FlatWorkGroupSizes.first; 545 } 546 547 /// \returns Default/requested maximum flat work group size for this function. getMaxFlatWorkGroupSize()548 unsigned getMaxFlatWorkGroupSize() const { 549 return FlatWorkGroupSizes.second; 550 } 551 552 /// \returns A pair of default/requested minimum/maximum number of waves per 553 /// execution unit. getWavesPerEU()554 std::pair<unsigned, unsigned> getWavesPerEU() const { 555 return WavesPerEU; 556 } 557 558 /// \returns Default/requested minimum number of waves per execution unit. getMinWavesPerEU()559 unsigned getMinWavesPerEU() const { 560 return WavesPerEU.first; 561 } 562 563 /// \returns Default/requested maximum number of waves per execution unit. getMaxWavesPerEU()564 unsigned getMaxWavesPerEU() const { 565 return WavesPerEU.second; 566 } 567 568 /// \returns Stack object index for \p Dim's work group ID. getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim)569 int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const { 570 assert(Dim < 3); 571 return DebuggerWorkGroupIDStackObjectIndices[Dim]; 572 } 573 574 /// Sets stack object index for \p Dim's work group ID to \p ObjectIdx. setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim,int ObjectIdx)575 void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) { 576 assert(Dim < 3); 577 DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx; 578 } 579 580 /// \returns Stack object index for \p Dim's work item ID. getDebuggerWorkItemIDStackObjectIndex(unsigned Dim)581 int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const { 582 assert(Dim < 3); 583 return DebuggerWorkItemIDStackObjectIndices[Dim]; 584 } 585 586 /// Sets stack object index for \p Dim's work item ID to \p ObjectIdx. setDebuggerWorkItemIDStackObjectIndex(unsigned Dim,int ObjectIdx)587 void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) { 588 assert(Dim < 3); 589 DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx; 590 } 591 592 /// \returns SGPR used for \p Dim's work group ID. getWorkGroupIDSGPR(unsigned Dim)593 unsigned getWorkGroupIDSGPR(unsigned Dim) const { 594 switch (Dim) { 595 case 0: 596 assert(hasWorkGroupIDX()); 597 return ArgInfo.WorkGroupIDX.getRegister(); 598 case 1: 599 assert(hasWorkGroupIDY()); 600 return ArgInfo.WorkGroupIDY.getRegister(); 601 case 2: 602 assert(hasWorkGroupIDZ()); 603 return ArgInfo.WorkGroupIDZ.getRegister(); 604 } 605 llvm_unreachable("unexpected dimension"); 606 } 607 608 /// \returns VGPR used for \p Dim' work item ID. 609 unsigned getWorkItemIDVGPR(unsigned Dim) const; 610 getLDSWaveSpillSize()611 unsigned getLDSWaveSpillSize() const { 612 return LDSWaveSpillSize; 613 } 614 getBufferPSV(const SIInstrInfo & TII,const Value * BufferRsrc)615 const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII, 616 const Value *BufferRsrc) { 617 assert(BufferRsrc); 618 auto PSV = BufferPSVs.try_emplace( 619 BufferRsrc, 620 llvm::make_unique<AMDGPUBufferPseudoSourceValue>(TII)); 621 return PSV.first->second.get(); 622 } 623 getImagePSV(const SIInstrInfo & TII,const Value * ImgRsrc)624 const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII, 625 const Value *ImgRsrc) { 626 assert(ImgRsrc); 627 auto PSV = ImagePSVs.try_emplace( 628 ImgRsrc, 629 llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII)); 630 return PSV.first->second.get(); 631 } 632 getOccupancy()633 unsigned getOccupancy() const { 634 return Occupancy; 635 } 636 getMinAllowedOccupancy()637 unsigned getMinAllowedOccupancy() const { 638 if (!isMemoryBound() && !needsWaveLimiter()) 639 return Occupancy; 640 return (Occupancy < 4) ? Occupancy : 4; 641 } 642 643 void limitOccupancy(const MachineFunction &MF); 644 limitOccupancy(unsigned Limit)645 void limitOccupancy(unsigned Limit) { 646 if (Occupancy > Limit) 647 Occupancy = Limit; 648 } 649 increaseOccupancy(const MachineFunction & MF,unsigned Limit)650 void increaseOccupancy(const MachineFunction &MF, unsigned Limit) { 651 if (Occupancy < Limit) 652 Occupancy = Limit; 653 limitOccupancy(MF); 654 } 655 }; 656 657 } // end namespace llvm 658 659 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 660