• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPUBaseInfo.h"
10 #include "AMDGPU.h"
11 #include "AMDGPUAsmUtils.h"
12 #include "AMDGPUTargetTransformInfo.h"
13 #include "SIDefines.h"
14 #include "llvm/ADT/StringRef.h"
15 #include "llvm/ADT/Triple.h"
16 #include "llvm/BinaryFormat/ELF.h"
17 #include "llvm/CodeGen/MachineMemOperand.h"
18 #include "llvm/IR/Attributes.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/Function.h"
21 #include "llvm/IR/GlobalValue.h"
22 #include "llvm/IR/Instruction.h"
23 #include "llvm/IR/IntrinsicsAMDGPU.h"
24 #include "llvm/IR/IntrinsicsR600.h"
25 #include "llvm/IR/LLVMContext.h"
26 #include "llvm/IR/Module.h"
27 #include "llvm/MC/MCContext.h"
28 #include "llvm/MC/MCInstrDesc.h"
29 #include "llvm/MC/MCInstrInfo.h"
30 #include "llvm/MC/MCRegisterInfo.h"
31 #include "llvm/MC/MCSectionELF.h"
32 #include "llvm/MC/MCSubtargetInfo.h"
33 #include "llvm/MC/SubtargetFeature.h"
34 #include "llvm/Support/Casting.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Support/MathExtras.h"
37 #include <algorithm>
38 #include <cassert>
39 #include <cstdint>
40 #include <cstring>
41 #include <utility>
42 
43 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
44 
45 #define GET_INSTRINFO_NAMED_OPS
46 #define GET_INSTRMAP_INFO
47 #include "AMDGPUGenInstrInfo.inc"
48 #undef GET_INSTRMAP_INFO
49 #undef GET_INSTRINFO_NAMED_OPS
50 
51 namespace {
52 
53 /// \returns Bit mask for given bit \p Shift and bit \p Width.
getBitMask(unsigned Shift,unsigned Width)54 unsigned getBitMask(unsigned Shift, unsigned Width) {
55   return ((1 << Width) - 1) << Shift;
56 }
57 
58 /// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
59 ///
60 /// \returns Packed \p Dst.
packBits(unsigned Src,unsigned Dst,unsigned Shift,unsigned Width)61 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
62   Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
63   Dst |= (Src << Shift) & getBitMask(Shift, Width);
64   return Dst;
65 }
66 
67 /// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
68 ///
69 /// \returns Unpacked bits.
unpackBits(unsigned Src,unsigned Shift,unsigned Width)70 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
71   return (Src & getBitMask(Shift, Width)) >> Shift;
72 }
73 
74 /// \returns Vmcnt bit shift (lower bits).
getVmcntBitShiftLo()75 unsigned getVmcntBitShiftLo() { return 0; }
76 
77 /// \returns Vmcnt bit width (lower bits).
getVmcntBitWidthLo()78 unsigned getVmcntBitWidthLo() { return 4; }
79 
80 /// \returns Expcnt bit shift.
getExpcntBitShift()81 unsigned getExpcntBitShift() { return 4; }
82 
83 /// \returns Expcnt bit width.
getExpcntBitWidth()84 unsigned getExpcntBitWidth() { return 3; }
85 
86 /// \returns Lgkmcnt bit shift.
getLgkmcntBitShift()87 unsigned getLgkmcntBitShift() { return 8; }
88 
89 /// \returns Lgkmcnt bit width.
getLgkmcntBitWidth(unsigned VersionMajor)90 unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
91   return (VersionMajor >= 10) ? 6 : 4;
92 }
93 
94 /// \returns Vmcnt bit shift (higher bits).
getVmcntBitShiftHi()95 unsigned getVmcntBitShiftHi() { return 14; }
96 
97 /// \returns Vmcnt bit width (higher bits).
getVmcntBitWidthHi()98 unsigned getVmcntBitWidthHi() { return 2; }
99 
100 } // end namespace anonymous
101 
102 namespace llvm {
103 
104 namespace AMDGPU {
105 
106 #define GET_MIMGBaseOpcodesTable_IMPL
107 #define GET_MIMGDimInfoTable_IMPL
108 #define GET_MIMGInfoTable_IMPL
109 #define GET_MIMGLZMappingTable_IMPL
110 #define GET_MIMGMIPMappingTable_IMPL
111 #include "AMDGPUGenSearchableTables.inc"
112 
getMIMGOpcode(unsigned BaseOpcode,unsigned MIMGEncoding,unsigned VDataDwords,unsigned VAddrDwords)113 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
114                   unsigned VDataDwords, unsigned VAddrDwords) {
115   const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
116                                              VDataDwords, VAddrDwords);
117   return Info ? Info->Opcode : -1;
118 }
119 
getMIMGBaseOpcode(unsigned Opc)120 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) {
121   const MIMGInfo *Info = getMIMGInfo(Opc);
122   return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr;
123 }
124 
getMaskedMIMGOp(unsigned Opc,unsigned NewChannels)125 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
126   const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
127   const MIMGInfo *NewInfo =
128       getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
129                           NewChannels, OrigInfo->VAddrDwords);
130   return NewInfo ? NewInfo->Opcode : -1;
131 }
132 
133 struct MUBUFInfo {
134   uint16_t Opcode;
135   uint16_t BaseOpcode;
136   uint8_t elements;
137   bool has_vaddr;
138   bool has_srsrc;
139   bool has_soffset;
140 };
141 
142 struct MTBUFInfo {
143   uint16_t Opcode;
144   uint16_t BaseOpcode;
145   uint8_t elements;
146   bool has_vaddr;
147   bool has_srsrc;
148   bool has_soffset;
149 };
150 
151 #define GET_MTBUFInfoTable_DECL
152 #define GET_MTBUFInfoTable_IMPL
153 #define GET_MUBUFInfoTable_DECL
154 #define GET_MUBUFInfoTable_IMPL
155 #include "AMDGPUGenSearchableTables.inc"
156 
getMTBUFBaseOpcode(unsigned Opc)157 int getMTBUFBaseOpcode(unsigned Opc) {
158   const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc);
159   return Info ? Info->BaseOpcode : -1;
160 }
161 
getMTBUFOpcode(unsigned BaseOpc,unsigned Elements)162 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
163   const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
164   return Info ? Info->Opcode : -1;
165 }
166 
getMTBUFElements(unsigned Opc)167 int getMTBUFElements(unsigned Opc) {
168   const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
169   return Info ? Info->elements : 0;
170 }
171 
getMTBUFHasVAddr(unsigned Opc)172 bool getMTBUFHasVAddr(unsigned Opc) {
173   const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
174   return Info ? Info->has_vaddr : false;
175 }
176 
getMTBUFHasSrsrc(unsigned Opc)177 bool getMTBUFHasSrsrc(unsigned Opc) {
178   const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
179   return Info ? Info->has_srsrc : false;
180 }
181 
getMTBUFHasSoffset(unsigned Opc)182 bool getMTBUFHasSoffset(unsigned Opc) {
183   const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
184   return Info ? Info->has_soffset : false;
185 }
186 
getMUBUFBaseOpcode(unsigned Opc)187 int getMUBUFBaseOpcode(unsigned Opc) {
188   const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
189   return Info ? Info->BaseOpcode : -1;
190 }
191 
getMUBUFOpcode(unsigned BaseOpc,unsigned Elements)192 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
193   const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
194   return Info ? Info->Opcode : -1;
195 }
196 
getMUBUFElements(unsigned Opc)197 int getMUBUFElements(unsigned Opc) {
198   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
199   return Info ? Info->elements : 0;
200 }
201 
getMUBUFHasVAddr(unsigned Opc)202 bool getMUBUFHasVAddr(unsigned Opc) {
203   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
204   return Info ? Info->has_vaddr : false;
205 }
206 
getMUBUFHasSrsrc(unsigned Opc)207 bool getMUBUFHasSrsrc(unsigned Opc) {
208   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
209   return Info ? Info->has_srsrc : false;
210 }
211 
getMUBUFHasSoffset(unsigned Opc)212 bool getMUBUFHasSoffset(unsigned Opc) {
213   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
214   return Info ? Info->has_soffset : false;
215 }
216 
217 // Wrapper for Tablegen'd function.  enum Subtarget is not defined in any
218 // header files, so we need to wrap it in a function that takes unsigned
219 // instead.
getMCOpcode(uint16_t Opcode,unsigned Gen)220 int getMCOpcode(uint16_t Opcode, unsigned Gen) {
221   return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
222 }
223 
224 namespace IsaInfo {
225 
streamIsaVersion(const MCSubtargetInfo * STI,raw_ostream & Stream)226 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
227   auto TargetTriple = STI->getTargetTriple();
228   auto Version = getIsaVersion(STI->getCPU());
229 
230   Stream << TargetTriple.getArchName() << '-'
231          << TargetTriple.getVendorName() << '-'
232          << TargetTriple.getOSName() << '-'
233          << TargetTriple.getEnvironmentName() << '-'
234          << "gfx"
235          << Version.Major
236          << Version.Minor
237          << Version.Stepping;
238 
239   if (hasXNACK(*STI))
240     Stream << "+xnack";
241   if (hasSRAMECC(*STI))
242     Stream << "+sram-ecc";
243 
244   Stream.flush();
245 }
246 
hasCodeObjectV3(const MCSubtargetInfo * STI)247 bool hasCodeObjectV3(const MCSubtargetInfo *STI) {
248   return STI->getTargetTriple().getOS() == Triple::AMDHSA &&
249              STI->getFeatureBits().test(FeatureCodeObjectV3);
250 }
251 
getWavefrontSize(const MCSubtargetInfo * STI)252 unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
253   if (STI->getFeatureBits().test(FeatureWavefrontSize16))
254     return 16;
255   if (STI->getFeatureBits().test(FeatureWavefrontSize32))
256     return 32;
257 
258   return 64;
259 }
260 
getLocalMemorySize(const MCSubtargetInfo * STI)261 unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
262   if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
263     return 32768;
264   if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
265     return 65536;
266 
267   return 0;
268 }
269 
getEUsPerCU(const MCSubtargetInfo * STI)270 unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
271   return 4;
272 }
273 
getMaxWorkGroupsPerCU(const MCSubtargetInfo * STI,unsigned FlatWorkGroupSize)274 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
275                                unsigned FlatWorkGroupSize) {
276   assert(FlatWorkGroupSize != 0);
277   if (STI->getTargetTriple().getArch() != Triple::amdgcn)
278     return 8;
279   unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
280   if (N == 1)
281     return 40;
282   N = 40 / N;
283   return std::min(N, 16u);
284 }
285 
getMaxWavesPerCU(const MCSubtargetInfo * STI)286 unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI) {
287   return getMaxWavesPerEU(STI) * getEUsPerCU(STI);
288 }
289 
getMaxWavesPerCU(const MCSubtargetInfo * STI,unsigned FlatWorkGroupSize)290 unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
291                           unsigned FlatWorkGroupSize) {
292   return getWavesPerWorkGroup(STI, FlatWorkGroupSize);
293 }
294 
getMinWavesPerEU(const MCSubtargetInfo * STI)295 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
296   return 1;
297 }
298 
getMaxWavesPerEU(const MCSubtargetInfo * STI)299 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
300   // FIXME: Need to take scratch memory into account.
301   if (!isGFX10(*STI))
302     return 10;
303   return 20;
304 }
305 
getMaxWavesPerEU(const MCSubtargetInfo * STI,unsigned FlatWorkGroupSize)306 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
307                           unsigned FlatWorkGroupSize) {
308   return alignTo(getMaxWavesPerCU(STI, FlatWorkGroupSize),
309                  getEUsPerCU(STI)) / getEUsPerCU(STI);
310 }
311 
getMinFlatWorkGroupSize(const MCSubtargetInfo * STI)312 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) {
313   return 1;
314 }
315 
getMaxFlatWorkGroupSize(const MCSubtargetInfo * STI)316 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
317   // Some subtargets allow encoding 2048, but this isn't tested or supported.
318   return 1024;
319 }
320 
getWavesPerWorkGroup(const MCSubtargetInfo * STI,unsigned FlatWorkGroupSize)321 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
322                               unsigned FlatWorkGroupSize) {
323   return alignTo(FlatWorkGroupSize, getWavefrontSize(STI)) /
324                  getWavefrontSize(STI);
325 }
326 
getSGPRAllocGranule(const MCSubtargetInfo * STI)327 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
328   IsaVersion Version = getIsaVersion(STI->getCPU());
329   if (Version.Major >= 10)
330     return getAddressableNumSGPRs(STI);
331   if (Version.Major >= 8)
332     return 16;
333   return 8;
334 }
335 
getSGPREncodingGranule(const MCSubtargetInfo * STI)336 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) {
337   return 8;
338 }
339 
getTotalNumSGPRs(const MCSubtargetInfo * STI)340 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
341   IsaVersion Version = getIsaVersion(STI->getCPU());
342   if (Version.Major >= 8)
343     return 800;
344   return 512;
345 }
346 
getAddressableNumSGPRs(const MCSubtargetInfo * STI)347 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
348   if (STI->getFeatureBits().test(FeatureSGPRInitBug))
349     return FIXED_NUM_SGPRS_FOR_INIT_BUG;
350 
351   IsaVersion Version = getIsaVersion(STI->getCPU());
352   if (Version.Major >= 10)
353     return 106;
354   if (Version.Major >= 8)
355     return 102;
356   return 104;
357 }
358 
getMinNumSGPRs(const MCSubtargetInfo * STI,unsigned WavesPerEU)359 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
360   assert(WavesPerEU != 0);
361 
362   IsaVersion Version = getIsaVersion(STI->getCPU());
363   if (Version.Major >= 10)
364     return 0;
365 
366   if (WavesPerEU >= getMaxWavesPerEU(STI))
367     return 0;
368 
369   unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
370   if (STI->getFeatureBits().test(FeatureTrapHandler))
371     MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
372   MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
373   return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
374 }
375 
getMaxNumSGPRs(const MCSubtargetInfo * STI,unsigned WavesPerEU,bool Addressable)376 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
377                         bool Addressable) {
378   assert(WavesPerEU != 0);
379 
380   unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
381   IsaVersion Version = getIsaVersion(STI->getCPU());
382   if (Version.Major >= 10)
383     return Addressable ? AddressableNumSGPRs : 108;
384   if (Version.Major >= 8 && !Addressable)
385     AddressableNumSGPRs = 112;
386   unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
387   if (STI->getFeatureBits().test(FeatureTrapHandler))
388     MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
389   MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
390   return std::min(MaxNumSGPRs, AddressableNumSGPRs);
391 }
392 
getNumExtraSGPRs(const MCSubtargetInfo * STI,bool VCCUsed,bool FlatScrUsed,bool XNACKUsed)393 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
394                           bool FlatScrUsed, bool XNACKUsed) {
395   unsigned ExtraSGPRs = 0;
396   if (VCCUsed)
397     ExtraSGPRs = 2;
398 
399   IsaVersion Version = getIsaVersion(STI->getCPU());
400   if (Version.Major >= 10)
401     return ExtraSGPRs;
402 
403   if (Version.Major < 8) {
404     if (FlatScrUsed)
405       ExtraSGPRs = 4;
406   } else {
407     if (XNACKUsed)
408       ExtraSGPRs = 4;
409 
410     if (FlatScrUsed)
411       ExtraSGPRs = 6;
412   }
413 
414   return ExtraSGPRs;
415 }
416 
getNumExtraSGPRs(const MCSubtargetInfo * STI,bool VCCUsed,bool FlatScrUsed)417 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
418                           bool FlatScrUsed) {
419   return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
420                           STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
421 }
422 
getNumSGPRBlocks(const MCSubtargetInfo * STI,unsigned NumSGPRs)423 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
424   NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI));
425   // SGPRBlocks is actual number of SGPR blocks minus 1.
426   return NumSGPRs / getSGPREncodingGranule(STI) - 1;
427 }
428 
getVGPRAllocGranule(const MCSubtargetInfo * STI,Optional<bool> EnableWavefrontSize32)429 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
430                              Optional<bool> EnableWavefrontSize32) {
431   bool IsWave32 = EnableWavefrontSize32 ?
432       *EnableWavefrontSize32 :
433       STI->getFeatureBits().test(FeatureWavefrontSize32);
434   return IsWave32 ? 8 : 4;
435 }
436 
getVGPREncodingGranule(const MCSubtargetInfo * STI,Optional<bool> EnableWavefrontSize32)437 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
438                                 Optional<bool> EnableWavefrontSize32) {
439   return getVGPRAllocGranule(STI, EnableWavefrontSize32);
440 }
441 
getTotalNumVGPRs(const MCSubtargetInfo * STI)442 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
443   if (!isGFX10(*STI))
444     return 256;
445   return STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1024 : 512;
446 }
447 
getAddressableNumVGPRs(const MCSubtargetInfo * STI)448 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
449   return 256;
450 }
451 
getMinNumVGPRs(const MCSubtargetInfo * STI,unsigned WavesPerEU)452 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
453   assert(WavesPerEU != 0);
454 
455   if (WavesPerEU >= getMaxWavesPerEU(STI))
456     return 0;
457   unsigned MinNumVGPRs =
458       alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1),
459                 getVGPRAllocGranule(STI)) + 1;
460   return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI));
461 }
462 
getMaxNumVGPRs(const MCSubtargetInfo * STI,unsigned WavesPerEU)463 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
464   assert(WavesPerEU != 0);
465 
466   unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
467                                    getVGPRAllocGranule(STI));
468   unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
469   return std::min(MaxNumVGPRs, AddressableNumVGPRs);
470 }
471 
getNumVGPRBlocks(const MCSubtargetInfo * STI,unsigned NumVGPRs,Optional<bool> EnableWavefrontSize32)472 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
473                           Optional<bool> EnableWavefrontSize32) {
474   NumVGPRs = alignTo(std::max(1u, NumVGPRs),
475                      getVGPREncodingGranule(STI, EnableWavefrontSize32));
476   // VGPRBlocks is actual number of VGPR blocks minus 1.
477   return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1;
478 }
479 
480 } // end namespace IsaInfo
481 
initDefaultAMDKernelCodeT(amd_kernel_code_t & Header,const MCSubtargetInfo * STI)482 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
483                                const MCSubtargetInfo *STI) {
484   IsaVersion Version = getIsaVersion(STI->getCPU());
485 
486   memset(&Header, 0, sizeof(Header));
487 
488   Header.amd_kernel_code_version_major = 1;
489   Header.amd_kernel_code_version_minor = 2;
490   Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
491   Header.amd_machine_version_major = Version.Major;
492   Header.amd_machine_version_minor = Version.Minor;
493   Header.amd_machine_version_stepping = Version.Stepping;
494   Header.kernel_code_entry_byte_offset = sizeof(Header);
495   Header.wavefront_size = 6;
496 
497   // If the code object does not support indirect functions, then the value must
498   // be 0xffffffff.
499   Header.call_convention = -1;
500 
501   // These alignment values are specified in powers of two, so alignment =
502   // 2^n.  The minimum alignment is 2^4 = 16.
503   Header.kernarg_segment_alignment = 4;
504   Header.group_segment_alignment = 4;
505   Header.private_segment_alignment = 4;
506 
507   if (Version.Major >= 10) {
508     if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
509       Header.wavefront_size = 5;
510       Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
511     }
512     Header.compute_pgm_resource_registers |=
513       S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
514       S_00B848_MEM_ORDERED(1);
515   }
516 }
517 
getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo * STI)518 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
519     const MCSubtargetInfo *STI) {
520   IsaVersion Version = getIsaVersion(STI->getCPU());
521 
522   amdhsa::kernel_descriptor_t KD;
523   memset(&KD, 0, sizeof(KD));
524 
525   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
526                   amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
527                   amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE);
528   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
529                   amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, 1);
530   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
531                   amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1);
532   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2,
533                   amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
534   if (Version.Major >= 10) {
535     AMDHSA_BITS_SET(KD.kernel_code_properties,
536                     amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
537                     STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0);
538     AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
539                     amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE,
540                     STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
541     AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
542                     amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED, 1);
543   }
544   return KD;
545 }
546 
isGroupSegment(const GlobalValue * GV)547 bool isGroupSegment(const GlobalValue *GV) {
548   return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
549 }
550 
isGlobalSegment(const GlobalValue * GV)551 bool isGlobalSegment(const GlobalValue *GV) {
552   return GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
553 }
554 
isReadOnlySegment(const GlobalValue * GV)555 bool isReadOnlySegment(const GlobalValue *GV) {
556   unsigned AS = GV->getAddressSpace();
557   return AS == AMDGPUAS::CONSTANT_ADDRESS ||
558          AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
559 }
560 
shouldEmitConstantsToTextSection(const Triple & TT)561 bool shouldEmitConstantsToTextSection(const Triple &TT) {
562   return TT.getOS() == Triple::AMDPAL || TT.getArch() == Triple::r600;
563 }
564 
getIntegerAttribute(const Function & F,StringRef Name,int Default)565 int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
566   Attribute A = F.getFnAttribute(Name);
567   int Result = Default;
568 
569   if (A.isStringAttribute()) {
570     StringRef Str = A.getValueAsString();
571     if (Str.getAsInteger(0, Result)) {
572       LLVMContext &Ctx = F.getContext();
573       Ctx.emitError("can't parse integer attribute " + Name);
574     }
575   }
576 
577   return Result;
578 }
579 
getIntegerPairAttribute(const Function & F,StringRef Name,std::pair<int,int> Default,bool OnlyFirstRequired)580 std::pair<int, int> getIntegerPairAttribute(const Function &F,
581                                             StringRef Name,
582                                             std::pair<int, int> Default,
583                                             bool OnlyFirstRequired) {
584   Attribute A = F.getFnAttribute(Name);
585   if (!A.isStringAttribute())
586     return Default;
587 
588   LLVMContext &Ctx = F.getContext();
589   std::pair<int, int> Ints = Default;
590   std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
591   if (Strs.first.trim().getAsInteger(0, Ints.first)) {
592     Ctx.emitError("can't parse first integer attribute " + Name);
593     return Default;
594   }
595   if (Strs.second.trim().getAsInteger(0, Ints.second)) {
596     if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
597       Ctx.emitError("can't parse second integer attribute " + Name);
598       return Default;
599     }
600   }
601 
602   return Ints;
603 }
604 
getVmcntBitMask(const IsaVersion & Version)605 unsigned getVmcntBitMask(const IsaVersion &Version) {
606   unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
607   if (Version.Major < 9)
608     return VmcntLo;
609 
610   unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo();
611   return VmcntLo | VmcntHi;
612 }
613 
getExpcntBitMask(const IsaVersion & Version)614 unsigned getExpcntBitMask(const IsaVersion &Version) {
615   return (1 << getExpcntBitWidth()) - 1;
616 }
617 
getLgkmcntBitMask(const IsaVersion & Version)618 unsigned getLgkmcntBitMask(const IsaVersion &Version) {
619   return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
620 }
621 
getWaitcntBitMask(const IsaVersion & Version)622 unsigned getWaitcntBitMask(const IsaVersion &Version) {
623   unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
624   unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
625   unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(),
626                                 getLgkmcntBitWidth(Version.Major));
627   unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
628   if (Version.Major < 9)
629     return Waitcnt;
630 
631   unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi());
632   return Waitcnt | VmcntHi;
633 }
634 
decodeVmcnt(const IsaVersion & Version,unsigned Waitcnt)635 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
636   unsigned VmcntLo =
637       unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
638   if (Version.Major < 9)
639     return VmcntLo;
640 
641   unsigned VmcntHi =
642       unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
643   VmcntHi <<= getVmcntBitWidthLo();
644   return VmcntLo | VmcntHi;
645 }
646 
decodeExpcnt(const IsaVersion & Version,unsigned Waitcnt)647 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
648   return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
649 }
650 
decodeLgkmcnt(const IsaVersion & Version,unsigned Waitcnt)651 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
652   return unpackBits(Waitcnt, getLgkmcntBitShift(),
653                     getLgkmcntBitWidth(Version.Major));
654 }
655 
decodeWaitcnt(const IsaVersion & Version,unsigned Waitcnt,unsigned & Vmcnt,unsigned & Expcnt,unsigned & Lgkmcnt)656 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
657                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
658   Vmcnt = decodeVmcnt(Version, Waitcnt);
659   Expcnt = decodeExpcnt(Version, Waitcnt);
660   Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
661 }
662 
decodeWaitcnt(const IsaVersion & Version,unsigned Encoded)663 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
664   Waitcnt Decoded;
665   Decoded.VmCnt = decodeVmcnt(Version, Encoded);
666   Decoded.ExpCnt = decodeExpcnt(Version, Encoded);
667   Decoded.LgkmCnt = decodeLgkmcnt(Version, Encoded);
668   return Decoded;
669 }
670 
encodeVmcnt(const IsaVersion & Version,unsigned Waitcnt,unsigned Vmcnt)671 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
672                      unsigned Vmcnt) {
673   Waitcnt =
674       packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
675   if (Version.Major < 9)
676     return Waitcnt;
677 
678   Vmcnt >>= getVmcntBitWidthLo();
679   return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
680 }
681 
encodeExpcnt(const IsaVersion & Version,unsigned Waitcnt,unsigned Expcnt)682 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
683                       unsigned Expcnt) {
684   return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
685 }
686 
encodeLgkmcnt(const IsaVersion & Version,unsigned Waitcnt,unsigned Lgkmcnt)687 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
688                        unsigned Lgkmcnt) {
689   return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(),
690                                     getLgkmcntBitWidth(Version.Major));
691 }
692 
encodeWaitcnt(const IsaVersion & Version,unsigned Vmcnt,unsigned Expcnt,unsigned Lgkmcnt)693 unsigned encodeWaitcnt(const IsaVersion &Version,
694                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
695   unsigned Waitcnt = getWaitcntBitMask(Version);
696   Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
697   Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
698   Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
699   return Waitcnt;
700 }
701 
encodeWaitcnt(const IsaVersion & Version,const Waitcnt & Decoded)702 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
703   return encodeWaitcnt(Version, Decoded.VmCnt, Decoded.ExpCnt, Decoded.LgkmCnt);
704 }
705 
706 //===----------------------------------------------------------------------===//
707 // hwreg
708 //===----------------------------------------------------------------------===//
709 
710 namespace Hwreg {
711 
getHwregId(const StringRef Name)712 int64_t getHwregId(const StringRef Name) {
713   for (int Id = ID_SYMBOLIC_FIRST_; Id < ID_SYMBOLIC_LAST_; ++Id) {
714     if (IdSymbolic[Id] && Name == IdSymbolic[Id])
715       return Id;
716   }
717   return ID_UNKNOWN_;
718 }
719 
getLastSymbolicHwreg(const MCSubtargetInfo & STI)720 static unsigned getLastSymbolicHwreg(const MCSubtargetInfo &STI) {
721   if (isSI(STI) || isCI(STI) || isVI(STI))
722     return ID_SYMBOLIC_FIRST_GFX9_;
723   else if (isGFX9(STI))
724     return ID_SYMBOLIC_FIRST_GFX10_;
725   else
726     return ID_SYMBOLIC_LAST_;
727 }
728 
isValidHwreg(int64_t Id,const MCSubtargetInfo & STI)729 bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI) {
730   return ID_SYMBOLIC_FIRST_ <= Id && Id < getLastSymbolicHwreg(STI) &&
731          IdSymbolic[Id];
732 }
733 
isValidHwreg(int64_t Id)734 bool isValidHwreg(int64_t Id) {
735   return 0 <= Id && isUInt<ID_WIDTH_>(Id);
736 }
737 
isValidHwregOffset(int64_t Offset)738 bool isValidHwregOffset(int64_t Offset) {
739   return 0 <= Offset && isUInt<OFFSET_WIDTH_>(Offset);
740 }
741 
isValidHwregWidth(int64_t Width)742 bool isValidHwregWidth(int64_t Width) {
743   return 0 <= (Width - 1) && isUInt<WIDTH_M1_WIDTH_>(Width - 1);
744 }
745 
encodeHwreg(uint64_t Id,uint64_t Offset,uint64_t Width)746 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width) {
747   return (Id << ID_SHIFT_) |
748          (Offset << OFFSET_SHIFT_) |
749          ((Width - 1) << WIDTH_M1_SHIFT_);
750 }
751 
getHwreg(unsigned Id,const MCSubtargetInfo & STI)752 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) {
753   return isValidHwreg(Id, STI) ? IdSymbolic[Id] : "";
754 }
755 
decodeHwreg(unsigned Val,unsigned & Id,unsigned & Offset,unsigned & Width)756 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width) {
757   Id = (Val & ID_MASK_) >> ID_SHIFT_;
758   Offset = (Val & OFFSET_MASK_) >> OFFSET_SHIFT_;
759   Width = ((Val & WIDTH_M1_MASK_) >> WIDTH_M1_SHIFT_) + 1;
760 }
761 
762 } // namespace Hwreg
763 
764 //===----------------------------------------------------------------------===//
765 // SendMsg
766 //===----------------------------------------------------------------------===//
767 
768 namespace SendMsg {
769 
getMsgId(const StringRef Name)770 int64_t getMsgId(const StringRef Name) {
771   for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
772     if (IdSymbolic[i] && Name == IdSymbolic[i])
773       return i;
774   }
775   return ID_UNKNOWN_;
776 }
777 
isValidMsgId(int64_t MsgId)778 static bool isValidMsgId(int64_t MsgId) {
779   return (ID_GAPS_FIRST_ <= MsgId && MsgId < ID_GAPS_LAST_) && IdSymbolic[MsgId];
780 }
781 
isValidMsgId(int64_t MsgId,const MCSubtargetInfo & STI,bool Strict)782 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict) {
783   if (Strict) {
784     if (MsgId == ID_GS_ALLOC_REQ || MsgId == ID_GET_DOORBELL)
785       return isGFX9(STI) || isGFX10(STI);
786     else
787       return isValidMsgId(MsgId);
788   } else {
789     return 0 <= MsgId && isUInt<ID_WIDTH_>(MsgId);
790   }
791 }
792 
getMsgName(int64_t MsgId)793 StringRef getMsgName(int64_t MsgId) {
794   return isValidMsgId(MsgId)? IdSymbolic[MsgId] : "";
795 }
796 
getMsgOpId(int64_t MsgId,const StringRef Name)797 int64_t getMsgOpId(int64_t MsgId, const StringRef Name) {
798   const char* const *S = (MsgId == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
799   const int F = (MsgId == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
800   const int L = (MsgId == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
801   for (int i = F; i < L; ++i) {
802     if (Name == S[i]) {
803       return i;
804     }
805   }
806   return OP_UNKNOWN_;
807 }
808 
isValidMsgOp(int64_t MsgId,int64_t OpId,bool Strict)809 bool isValidMsgOp(int64_t MsgId, int64_t OpId, bool Strict) {
810 
811   if (!Strict)
812     return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
813 
814   switch(MsgId)
815   {
816   case ID_GS:
817     return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP;
818   case ID_GS_DONE:
819     return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_;
820   case ID_SYSMSG:
821     return OP_SYS_FIRST_ <= OpId && OpId < OP_SYS_LAST_;
822   default:
823     return OpId == OP_NONE_;
824   }
825 }
826 
getMsgOpName(int64_t MsgId,int64_t OpId)827 StringRef getMsgOpName(int64_t MsgId, int64_t OpId) {
828   assert(msgRequiresOp(MsgId));
829   return (MsgId == ID_SYSMSG)? OpSysSymbolic[OpId] : OpGsSymbolic[OpId];
830 }
831 
isValidMsgStream(int64_t MsgId,int64_t OpId,int64_t StreamId,bool Strict)832 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, bool Strict) {
833 
834   if (!Strict)
835     return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId);
836 
837   switch(MsgId)
838   {
839   case ID_GS:
840     return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_;
841   case ID_GS_DONE:
842     return (OpId == OP_GS_NOP)?
843            (StreamId == STREAM_ID_NONE_) :
844            (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_);
845   default:
846     return StreamId == STREAM_ID_NONE_;
847   }
848 }
849 
msgRequiresOp(int64_t MsgId)850 bool msgRequiresOp(int64_t MsgId) {
851   return MsgId == ID_GS || MsgId == ID_GS_DONE || MsgId == ID_SYSMSG;
852 }
853 
msgSupportsStream(int64_t MsgId,int64_t OpId)854 bool msgSupportsStream(int64_t MsgId, int64_t OpId) {
855   return (MsgId == ID_GS || MsgId == ID_GS_DONE) && OpId != OP_GS_NOP;
856 }
857 
decodeMsg(unsigned Val,uint16_t & MsgId,uint16_t & OpId,uint16_t & StreamId)858 void decodeMsg(unsigned Val,
859                uint16_t &MsgId,
860                uint16_t &OpId,
861                uint16_t &StreamId) {
862   MsgId = Val & ID_MASK_;
863   OpId = (Val & OP_MASK_) >> OP_SHIFT_;
864   StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_;
865 }
866 
encodeMsg(uint64_t MsgId,uint64_t OpId,uint64_t StreamId)867 uint64_t encodeMsg(uint64_t MsgId,
868                    uint64_t OpId,
869                    uint64_t StreamId) {
870   return (MsgId << ID_SHIFT_) |
871          (OpId << OP_SHIFT_) |
872          (StreamId << STREAM_ID_SHIFT_);
873 }
874 
875 } // namespace SendMsg
876 
877 //===----------------------------------------------------------------------===//
878 //
879 //===----------------------------------------------------------------------===//
880 
getInitialPSInputAddr(const Function & F)881 unsigned getInitialPSInputAddr(const Function &F) {
882   return getIntegerAttribute(F, "InitialPSInputAddr", 0);
883 }
884 
isShader(CallingConv::ID cc)885 bool isShader(CallingConv::ID cc) {
886   switch(cc) {
887     case CallingConv::AMDGPU_VS:
888     case CallingConv::AMDGPU_LS:
889     case CallingConv::AMDGPU_HS:
890     case CallingConv::AMDGPU_ES:
891     case CallingConv::AMDGPU_GS:
892     case CallingConv::AMDGPU_PS:
893     case CallingConv::AMDGPU_CS:
894       return true;
895     default:
896       return false;
897   }
898 }
899 
isCompute(CallingConv::ID cc)900 bool isCompute(CallingConv::ID cc) {
901   return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
902 }
903 
isEntryFunctionCC(CallingConv::ID CC)904 bool isEntryFunctionCC(CallingConv::ID CC) {
905   switch (CC) {
906   case CallingConv::AMDGPU_KERNEL:
907   case CallingConv::SPIR_KERNEL:
908   case CallingConv::AMDGPU_VS:
909   case CallingConv::AMDGPU_GS:
910   case CallingConv::AMDGPU_PS:
911   case CallingConv::AMDGPU_CS:
912   case CallingConv::AMDGPU_ES:
913   case CallingConv::AMDGPU_HS:
914   case CallingConv::AMDGPU_LS:
915     return true;
916   default:
917     return false;
918   }
919 }
920 
hasXNACK(const MCSubtargetInfo & STI)921 bool hasXNACK(const MCSubtargetInfo &STI) {
922   return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
923 }
924 
hasSRAMECC(const MCSubtargetInfo & STI)925 bool hasSRAMECC(const MCSubtargetInfo &STI) {
926   return STI.getFeatureBits()[AMDGPU::FeatureSRAMECC];
927 }
928 
hasMIMG_R128(const MCSubtargetInfo & STI)929 bool hasMIMG_R128(const MCSubtargetInfo &STI) {
930   return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128];
931 }
932 
hasPackedD16(const MCSubtargetInfo & STI)933 bool hasPackedD16(const MCSubtargetInfo &STI) {
934   return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem];
935 }
936 
isSI(const MCSubtargetInfo & STI)937 bool isSI(const MCSubtargetInfo &STI) {
938   return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
939 }
940 
isCI(const MCSubtargetInfo & STI)941 bool isCI(const MCSubtargetInfo &STI) {
942   return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
943 }
944 
isVI(const MCSubtargetInfo & STI)945 bool isVI(const MCSubtargetInfo &STI) {
946   return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
947 }
948 
isGFX9(const MCSubtargetInfo & STI)949 bool isGFX9(const MCSubtargetInfo &STI) {
950   return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
951 }
952 
isGFX10(const MCSubtargetInfo & STI)953 bool isGFX10(const MCSubtargetInfo &STI) {
954   return STI.getFeatureBits()[AMDGPU::FeatureGFX10];
955 }
956 
isGCN3Encoding(const MCSubtargetInfo & STI)957 bool isGCN3Encoding(const MCSubtargetInfo &STI) {
958   return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
959 }
960 
isSGPR(unsigned Reg,const MCRegisterInfo * TRI)961 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
962   const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
963   const unsigned FirstSubReg = TRI->getSubReg(Reg, 1);
964   return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
965     Reg == AMDGPU::SCC;
966 }
967 
isRegIntersect(unsigned Reg0,unsigned Reg1,const MCRegisterInfo * TRI)968 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
969   for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) {
970     if (*R == Reg1) return true;
971   }
972   return false;
973 }
974 
975 #define MAP_REG2REG \
976   using namespace AMDGPU; \
977   switch(Reg) { \
978   default: return Reg; \
979   CASE_CI_VI(FLAT_SCR) \
980   CASE_CI_VI(FLAT_SCR_LO) \
981   CASE_CI_VI(FLAT_SCR_HI) \
982   CASE_VI_GFX9_GFX10(TTMP0) \
983   CASE_VI_GFX9_GFX10(TTMP1) \
984   CASE_VI_GFX9_GFX10(TTMP2) \
985   CASE_VI_GFX9_GFX10(TTMP3) \
986   CASE_VI_GFX9_GFX10(TTMP4) \
987   CASE_VI_GFX9_GFX10(TTMP5) \
988   CASE_VI_GFX9_GFX10(TTMP6) \
989   CASE_VI_GFX9_GFX10(TTMP7) \
990   CASE_VI_GFX9_GFX10(TTMP8) \
991   CASE_VI_GFX9_GFX10(TTMP9) \
992   CASE_VI_GFX9_GFX10(TTMP10) \
993   CASE_VI_GFX9_GFX10(TTMP11) \
994   CASE_VI_GFX9_GFX10(TTMP12) \
995   CASE_VI_GFX9_GFX10(TTMP13) \
996   CASE_VI_GFX9_GFX10(TTMP14) \
997   CASE_VI_GFX9_GFX10(TTMP15) \
998   CASE_VI_GFX9_GFX10(TTMP0_TTMP1) \
999   CASE_VI_GFX9_GFX10(TTMP2_TTMP3) \
1000   CASE_VI_GFX9_GFX10(TTMP4_TTMP5) \
1001   CASE_VI_GFX9_GFX10(TTMP6_TTMP7) \
1002   CASE_VI_GFX9_GFX10(TTMP8_TTMP9) \
1003   CASE_VI_GFX9_GFX10(TTMP10_TTMP11) \
1004   CASE_VI_GFX9_GFX10(TTMP12_TTMP13) \
1005   CASE_VI_GFX9_GFX10(TTMP14_TTMP15) \
1006   CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3) \
1007   CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7) \
1008   CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11) \
1009   CASE_VI_GFX9_GFX10(TTMP12_TTMP13_TTMP14_TTMP15) \
1010   CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
1011   CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
1012   CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
1013   CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
1014   }
1015 
1016 #define CASE_CI_VI(node) \
1017   assert(!isSI(STI)); \
1018   case node: return isCI(STI) ? node##_ci : node##_vi;
1019 
1020 #define CASE_VI_GFX9_GFX10(node) \
1021   case node: return (isGFX9(STI) || isGFX10(STI)) ? node##_gfx9_gfx10 : node##_vi;
1022 
getMCReg(unsigned Reg,const MCSubtargetInfo & STI)1023 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
1024   if (STI.getTargetTriple().getArch() == Triple::r600)
1025     return Reg;
1026   MAP_REG2REG
1027 }
1028 
1029 #undef CASE_CI_VI
1030 #undef CASE_VI_GFX9_GFX10
1031 
1032 #define CASE_CI_VI(node)   case node##_ci: case node##_vi:   return node;
1033 #define CASE_VI_GFX9_GFX10(node) case node##_vi: case node##_gfx9_gfx10: return node;
1034 
mc2PseudoReg(unsigned Reg)1035 unsigned mc2PseudoReg(unsigned Reg) {
1036   MAP_REG2REG
1037 }
1038 
1039 #undef CASE_CI_VI
1040 #undef CASE_VI_GFX9_GFX10
1041 #undef MAP_REG2REG
1042 
isSISrcOperand(const MCInstrDesc & Desc,unsigned OpNo)1043 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
1044   assert(OpNo < Desc.NumOperands);
1045   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
1046   return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
1047          OpType <= AMDGPU::OPERAND_SRC_LAST;
1048 }
1049 
isSISrcFPOperand(const MCInstrDesc & Desc,unsigned OpNo)1050 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
1051   assert(OpNo < Desc.NumOperands);
1052   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
1053   switch (OpType) {
1054   case AMDGPU::OPERAND_REG_IMM_FP32:
1055   case AMDGPU::OPERAND_REG_IMM_FP64:
1056   case AMDGPU::OPERAND_REG_IMM_FP16:
1057   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1058   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1059   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1060   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1061   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1062   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1063   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1064   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1065   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1066   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1067   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1068     return true;
1069   default:
1070     return false;
1071   }
1072 }
1073 
isSISrcInlinableOperand(const MCInstrDesc & Desc,unsigned OpNo)1074 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
1075   assert(OpNo < Desc.NumOperands);
1076   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
1077   return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
1078          OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST;
1079 }
1080 
1081 // Avoid using MCRegisterClass::getSize, since that function will go away
1082 // (move from MC* level to Target* level). Return size in bits.
getRegBitWidth(unsigned RCID)1083 unsigned getRegBitWidth(unsigned RCID) {
1084   switch (RCID) {
1085   case AMDGPU::SGPR_32RegClassID:
1086   case AMDGPU::VGPR_32RegClassID:
1087   case AMDGPU::VRegOrLds_32RegClassID:
1088   case AMDGPU::AGPR_32RegClassID:
1089   case AMDGPU::VS_32RegClassID:
1090   case AMDGPU::AV_32RegClassID:
1091   case AMDGPU::SReg_32RegClassID:
1092   case AMDGPU::SReg_32_XM0RegClassID:
1093   case AMDGPU::SRegOrLds_32RegClassID:
1094     return 32;
1095   case AMDGPU::SGPR_64RegClassID:
1096   case AMDGPU::VS_64RegClassID:
1097   case AMDGPU::AV_64RegClassID:
1098   case AMDGPU::SReg_64RegClassID:
1099   case AMDGPU::VReg_64RegClassID:
1100   case AMDGPU::AReg_64RegClassID:
1101   case AMDGPU::SReg_64_XEXECRegClassID:
1102     return 64;
1103   case AMDGPU::SGPR_96RegClassID:
1104   case AMDGPU::SReg_96RegClassID:
1105   case AMDGPU::VReg_96RegClassID:
1106     return 96;
1107   case AMDGPU::SGPR_128RegClassID:
1108   case AMDGPU::SReg_128RegClassID:
1109   case AMDGPU::VReg_128RegClassID:
1110   case AMDGPU::AReg_128RegClassID:
1111     return 128;
1112   case AMDGPU::SGPR_160RegClassID:
1113   case AMDGPU::SReg_160RegClassID:
1114   case AMDGPU::VReg_160RegClassID:
1115     return 160;
1116   case AMDGPU::SReg_256RegClassID:
1117   case AMDGPU::VReg_256RegClassID:
1118     return 256;
1119   case AMDGPU::SReg_512RegClassID:
1120   case AMDGPU::VReg_512RegClassID:
1121   case AMDGPU::AReg_512RegClassID:
1122     return 512;
1123   case AMDGPU::SReg_1024RegClassID:
1124   case AMDGPU::VReg_1024RegClassID:
1125   case AMDGPU::AReg_1024RegClassID:
1126     return 1024;
1127   default:
1128     llvm_unreachable("Unexpected register class");
1129   }
1130 }
1131 
getRegBitWidth(const MCRegisterClass & RC)1132 unsigned getRegBitWidth(const MCRegisterClass &RC) {
1133   return getRegBitWidth(RC.getID());
1134 }
1135 
getRegOperandSize(const MCRegisterInfo * MRI,const MCInstrDesc & Desc,unsigned OpNo)1136 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
1137                            unsigned OpNo) {
1138   assert(OpNo < Desc.NumOperands);
1139   unsigned RCID = Desc.OpInfo[OpNo].RegClass;
1140   return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
1141 }
1142 
isInlinableLiteral64(int64_t Literal,bool HasInv2Pi)1143 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
1144   if (Literal >= -16 && Literal <= 64)
1145     return true;
1146 
1147   uint64_t Val = static_cast<uint64_t>(Literal);
1148   return (Val == DoubleToBits(0.0)) ||
1149          (Val == DoubleToBits(1.0)) ||
1150          (Val == DoubleToBits(-1.0)) ||
1151          (Val == DoubleToBits(0.5)) ||
1152          (Val == DoubleToBits(-0.5)) ||
1153          (Val == DoubleToBits(2.0)) ||
1154          (Val == DoubleToBits(-2.0)) ||
1155          (Val == DoubleToBits(4.0)) ||
1156          (Val == DoubleToBits(-4.0)) ||
1157          (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
1158 }
1159 
isInlinableLiteral32(int32_t Literal,bool HasInv2Pi)1160 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
1161   if (Literal >= -16 && Literal <= 64)
1162     return true;
1163 
1164   // The actual type of the operand does not seem to matter as long
1165   // as the bits match one of the inline immediate values.  For example:
1166   //
1167   // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
1168   // so it is a legal inline immediate.
1169   //
1170   // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
1171   // floating-point, so it is a legal inline immediate.
1172 
1173   uint32_t Val = static_cast<uint32_t>(Literal);
1174   return (Val == FloatToBits(0.0f)) ||
1175          (Val == FloatToBits(1.0f)) ||
1176          (Val == FloatToBits(-1.0f)) ||
1177          (Val == FloatToBits(0.5f)) ||
1178          (Val == FloatToBits(-0.5f)) ||
1179          (Val == FloatToBits(2.0f)) ||
1180          (Val == FloatToBits(-2.0f)) ||
1181          (Val == FloatToBits(4.0f)) ||
1182          (Val == FloatToBits(-4.0f)) ||
1183          (Val == 0x3e22f983 && HasInv2Pi);
1184 }
1185 
isInlinableLiteral16(int16_t Literal,bool HasInv2Pi)1186 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
1187   if (!HasInv2Pi)
1188     return false;
1189 
1190   if (Literal >= -16 && Literal <= 64)
1191     return true;
1192 
1193   uint16_t Val = static_cast<uint16_t>(Literal);
1194   return Val == 0x3C00 || // 1.0
1195          Val == 0xBC00 || // -1.0
1196          Val == 0x3800 || // 0.5
1197          Val == 0xB800 || // -0.5
1198          Val == 0x4000 || // 2.0
1199          Val == 0xC000 || // -2.0
1200          Val == 0x4400 || // 4.0
1201          Val == 0xC400 || // -4.0
1202          Val == 0x3118;   // 1/2pi
1203 }
1204 
isInlinableLiteralV216(int32_t Literal,bool HasInv2Pi)1205 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
1206   assert(HasInv2Pi);
1207 
1208   if (isInt<16>(Literal) || isUInt<16>(Literal)) {
1209     int16_t Trunc = static_cast<int16_t>(Literal);
1210     return AMDGPU::isInlinableLiteral16(Trunc, HasInv2Pi);
1211   }
1212   if (!(Literal & 0xffff))
1213     return AMDGPU::isInlinableLiteral16(Literal >> 16, HasInv2Pi);
1214 
1215   int16_t Lo16 = static_cast<int16_t>(Literal);
1216   int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
1217   return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
1218 }
1219 
isArgPassedInSGPR(const Argument * A)1220 bool isArgPassedInSGPR(const Argument *A) {
1221   const Function *F = A->getParent();
1222 
1223   // Arguments to compute shaders are never a source of divergence.
1224   CallingConv::ID CC = F->getCallingConv();
1225   switch (CC) {
1226   case CallingConv::AMDGPU_KERNEL:
1227   case CallingConv::SPIR_KERNEL:
1228     return true;
1229   case CallingConv::AMDGPU_VS:
1230   case CallingConv::AMDGPU_LS:
1231   case CallingConv::AMDGPU_HS:
1232   case CallingConv::AMDGPU_ES:
1233   case CallingConv::AMDGPU_GS:
1234   case CallingConv::AMDGPU_PS:
1235   case CallingConv::AMDGPU_CS:
1236     // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
1237     // Everything else is in VGPRs.
1238     return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
1239            F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal);
1240   default:
1241     // TODO: Should calls support inreg for SGPR inputs?
1242     return false;
1243   }
1244 }
1245 
hasSMEMByteOffset(const MCSubtargetInfo & ST)1246 static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
1247   return isGCN3Encoding(ST) || isGFX10(ST);
1248 }
1249 
getSMRDEncodedOffset(const MCSubtargetInfo & ST,int64_t ByteOffset)1250 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
1251   if (hasSMEMByteOffset(ST))
1252     return ByteOffset;
1253   return ByteOffset >> 2;
1254 }
1255 
isLegalSMRDImmOffset(const MCSubtargetInfo & ST,int64_t ByteOffset)1256 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
1257   int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
1258   return (hasSMEMByteOffset(ST)) ?
1259     isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset);
1260 }
1261 
1262 // Given Imm, split it into the values to put into the SOffset and ImmOffset
1263 // fields in an MUBUF instruction. Return false if it is not possible (due to a
1264 // hardware bug needing a workaround).
1265 //
1266 // The required alignment ensures that individual address components remain
1267 // aligned if they are aligned to begin with. It also ensures that additional
1268 // offsets within the given alignment can be added to the resulting ImmOffset.
splitMUBUFOffset(uint32_t Imm,uint32_t & SOffset,uint32_t & ImmOffset,const GCNSubtarget * Subtarget,uint32_t Align)1269 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
1270                       const GCNSubtarget *Subtarget, uint32_t Align) {
1271   const uint32_t MaxImm = alignDown(4095, Align);
1272   uint32_t Overflow = 0;
1273 
1274   if (Imm > MaxImm) {
1275     if (Imm <= MaxImm + 64) {
1276       // Use an SOffset inline constant for 4..64
1277       Overflow = Imm - MaxImm;
1278       Imm = MaxImm;
1279     } else {
1280       // Try to keep the same value in SOffset for adjacent loads, so that
1281       // the corresponding register contents can be re-used.
1282       //
1283       // Load values with all low-bits (except for alignment bits) set into
1284       // SOffset, so that a larger range of values can be covered using
1285       // s_movk_i32.
1286       //
1287       // Atomic operations fail to work correctly when individual address
1288       // components are unaligned, even if their sum is aligned.
1289       uint32_t High = (Imm + Align) & ~4095;
1290       uint32_t Low = (Imm + Align) & 4095;
1291       Imm = Low;
1292       Overflow = High - Align;
1293     }
1294   }
1295 
1296   // There is a hardware bug in SI and CI which prevents address clamping in
1297   // MUBUF instructions from working correctly with SOffsets. The immediate
1298   // offset is unaffected.
1299   if (Overflow > 0 &&
1300       Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
1301     return false;
1302 
1303   ImmOffset = Imm;
1304   SOffset = Overflow;
1305   return true;
1306 }
1307 
SIModeRegisterDefaults(const Function & F,const GCNSubtarget & ST)1308 SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F,
1309                                                const GCNSubtarget &ST) {
1310   *this = getDefaultForCallingConv(F.getCallingConv());
1311 
1312   StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString();
1313   if (!IEEEAttr.empty())
1314     IEEE = IEEEAttr == "true";
1315 
1316   StringRef DX10ClampAttr
1317     = F.getFnAttribute("amdgpu-dx10-clamp").getValueAsString();
1318   if (!DX10ClampAttr.empty())
1319     DX10Clamp = DX10ClampAttr == "true";
1320 
1321   FP32Denormals = ST.hasFP32Denormals(F);
1322   FP64FP16Denormals = ST.hasFP64FP16Denormals(F);
1323 }
1324 
1325 namespace {
1326 
1327 struct SourceOfDivergence {
1328   unsigned Intr;
1329 };
1330 const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
1331 
1332 #define GET_SourcesOfDivergence_IMPL
1333 #define GET_Gfx9BufferFormat_IMPL
1334 #define GET_Gfx10PlusBufferFormat_IMPL
1335 #include "AMDGPUGenSearchableTables.inc"
1336 
1337 } // end anonymous namespace
1338 
isIntrinsicSourceOfDivergence(unsigned IntrID)1339 bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
1340   return lookupSourceOfDivergence(IntrID);
1341 }
1342 
getGcnBufferFormatInfo(uint8_t BitsPerComp,uint8_t NumComponents,uint8_t NumFormat,const MCSubtargetInfo & STI)1343 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
1344                                                   uint8_t NumComponents,
1345                                                   uint8_t NumFormat,
1346                                                   const MCSubtargetInfo &STI) {
1347   return isGFX10(STI)
1348              ? getGfx10PlusBufferFormatInfo(BitsPerComp, NumComponents,
1349                                             NumFormat)
1350              : getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat);
1351 }
1352 
getGcnBufferFormatInfo(uint8_t Format,const MCSubtargetInfo & STI)1353 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
1354                                                   const MCSubtargetInfo &STI) {
1355   return isGFX10(STI) ? getGfx10PlusBufferFormatInfo(Format)
1356                       : getGfx9BufferFormatInfo(Format);
1357 }
1358 
1359 } // namespace AMDGPU
1360 } // namespace llvm
1361