1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "AMDGPUBaseInfo.h"
10 #include "AMDGPU.h"
11 #include "AMDGPUAsmUtils.h"
12 #include "AMDGPUTargetTransformInfo.h"
13 #include "SIDefines.h"
14 #include "llvm/ADT/StringRef.h"
15 #include "llvm/ADT/Triple.h"
16 #include "llvm/BinaryFormat/ELF.h"
17 #include "llvm/CodeGen/MachineMemOperand.h"
18 #include "llvm/IR/Attributes.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/Function.h"
21 #include "llvm/IR/GlobalValue.h"
22 #include "llvm/IR/Instruction.h"
23 #include "llvm/IR/IntrinsicsAMDGPU.h"
24 #include "llvm/IR/IntrinsicsR600.h"
25 #include "llvm/IR/LLVMContext.h"
26 #include "llvm/IR/Module.h"
27 #include "llvm/MC/MCContext.h"
28 #include "llvm/MC/MCInstrDesc.h"
29 #include "llvm/MC/MCInstrInfo.h"
30 #include "llvm/MC/MCRegisterInfo.h"
31 #include "llvm/MC/MCSectionELF.h"
32 #include "llvm/MC/MCSubtargetInfo.h"
33 #include "llvm/MC/SubtargetFeature.h"
34 #include "llvm/Support/Casting.h"
35 #include "llvm/Support/CommandLine.h"
36 #include "llvm/Support/ErrorHandling.h"
37 #include "llvm/Support/MathExtras.h"
38 #include <algorithm>
39 #include <cassert>
40 #include <cstdint>
41 #include <cstring>
42 #include <utility>
43
44 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
45
46 #define GET_INSTRINFO_NAMED_OPS
47 #define GET_INSTRMAP_INFO
48 #include "AMDGPUGenInstrInfo.inc"
49 #undef GET_INSTRMAP_INFO
50 #undef GET_INSTRINFO_NAMED_OPS
51
52 static llvm::cl::opt<unsigned> AmdhsaCodeObjectVersion(
53 "amdhsa-code-object-version", llvm::cl::Hidden,
54 llvm::cl::desc("AMDHSA Code Object Version"), llvm::cl::init(3));
55
56 namespace {
57
58 /// \returns Bit mask for given bit \p Shift and bit \p Width.
getBitMask(unsigned Shift,unsigned Width)59 unsigned getBitMask(unsigned Shift, unsigned Width) {
60 return ((1 << Width) - 1) << Shift;
61 }
62
63 /// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
64 ///
65 /// \returns Packed \p Dst.
packBits(unsigned Src,unsigned Dst,unsigned Shift,unsigned Width)66 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
67 Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
68 Dst |= (Src << Shift) & getBitMask(Shift, Width);
69 return Dst;
70 }
71
72 /// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
73 ///
74 /// \returns Unpacked bits.
unpackBits(unsigned Src,unsigned Shift,unsigned Width)75 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
76 return (Src & getBitMask(Shift, Width)) >> Shift;
77 }
78
79 /// \returns Vmcnt bit shift (lower bits).
getVmcntBitShiftLo()80 unsigned getVmcntBitShiftLo() { return 0; }
81
82 /// \returns Vmcnt bit width (lower bits).
getVmcntBitWidthLo()83 unsigned getVmcntBitWidthLo() { return 4; }
84
85 /// \returns Expcnt bit shift.
getExpcntBitShift()86 unsigned getExpcntBitShift() { return 4; }
87
88 /// \returns Expcnt bit width.
getExpcntBitWidth()89 unsigned getExpcntBitWidth() { return 3; }
90
91 /// \returns Lgkmcnt bit shift.
getLgkmcntBitShift()92 unsigned getLgkmcntBitShift() { return 8; }
93
94 /// \returns Lgkmcnt bit width.
getLgkmcntBitWidth(unsigned VersionMajor)95 unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
96 return (VersionMajor >= 10) ? 6 : 4;
97 }
98
99 /// \returns Vmcnt bit shift (higher bits).
getVmcntBitShiftHi()100 unsigned getVmcntBitShiftHi() { return 14; }
101
102 /// \returns Vmcnt bit width (higher bits).
getVmcntBitWidthHi()103 unsigned getVmcntBitWidthHi() { return 2; }
104
105 } // end namespace anonymous
106
107 namespace llvm {
108
109 namespace AMDGPU {
110
getHsaAbiVersion(const MCSubtargetInfo * STI)111 Optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI) {
112 if (STI && STI->getTargetTriple().getOS() != Triple::AMDHSA)
113 return None;
114
115 switch (AmdhsaCodeObjectVersion) {
116 case 2:
117 return ELF::ELFABIVERSION_AMDGPU_HSA_V2;
118 case 3:
119 return ELF::ELFABIVERSION_AMDGPU_HSA_V3;
120 default:
121 return ELF::ELFABIVERSION_AMDGPU_HSA_V3;
122 }
123 }
124
isHsaAbiVersion2(const MCSubtargetInfo * STI)125 bool isHsaAbiVersion2(const MCSubtargetInfo *STI) {
126 if (const auto &&HsaAbiVer = getHsaAbiVersion(STI))
127 return HsaAbiVer.getValue() == ELF::ELFABIVERSION_AMDGPU_HSA_V2;
128 return false;
129 }
130
isHsaAbiVersion3(const MCSubtargetInfo * STI)131 bool isHsaAbiVersion3(const MCSubtargetInfo *STI) {
132 if (const auto &&HsaAbiVer = getHsaAbiVersion(STI))
133 return HsaAbiVer.getValue() == ELF::ELFABIVERSION_AMDGPU_HSA_V3;
134 return false;
135 }
136
137 #define GET_MIMGBaseOpcodesTable_IMPL
138 #define GET_MIMGDimInfoTable_IMPL
139 #define GET_MIMGInfoTable_IMPL
140 #define GET_MIMGLZMappingTable_IMPL
141 #define GET_MIMGMIPMappingTable_IMPL
142 #define GET_MIMGG16MappingTable_IMPL
143 #include "AMDGPUGenSearchableTables.inc"
144
getMIMGOpcode(unsigned BaseOpcode,unsigned MIMGEncoding,unsigned VDataDwords,unsigned VAddrDwords)145 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
146 unsigned VDataDwords, unsigned VAddrDwords) {
147 const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
148 VDataDwords, VAddrDwords);
149 return Info ? Info->Opcode : -1;
150 }
151
getMIMGBaseOpcode(unsigned Opc)152 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) {
153 const MIMGInfo *Info = getMIMGInfo(Opc);
154 return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr;
155 }
156
getMaskedMIMGOp(unsigned Opc,unsigned NewChannels)157 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
158 const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
159 const MIMGInfo *NewInfo =
160 getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
161 NewChannels, OrigInfo->VAddrDwords);
162 return NewInfo ? NewInfo->Opcode : -1;
163 }
164
165 struct MUBUFInfo {
166 uint16_t Opcode;
167 uint16_t BaseOpcode;
168 uint8_t elements;
169 bool has_vaddr;
170 bool has_srsrc;
171 bool has_soffset;
172 };
173
174 struct MTBUFInfo {
175 uint16_t Opcode;
176 uint16_t BaseOpcode;
177 uint8_t elements;
178 bool has_vaddr;
179 bool has_srsrc;
180 bool has_soffset;
181 };
182
183 struct SMInfo {
184 uint16_t Opcode;
185 bool IsBuffer;
186 };
187
188 #define GET_MTBUFInfoTable_DECL
189 #define GET_MTBUFInfoTable_IMPL
190 #define GET_MUBUFInfoTable_DECL
191 #define GET_MUBUFInfoTable_IMPL
192 #define GET_SMInfoTable_DECL
193 #define GET_SMInfoTable_IMPL
194 #include "AMDGPUGenSearchableTables.inc"
195
getMTBUFBaseOpcode(unsigned Opc)196 int getMTBUFBaseOpcode(unsigned Opc) {
197 const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc);
198 return Info ? Info->BaseOpcode : -1;
199 }
200
getMTBUFOpcode(unsigned BaseOpc,unsigned Elements)201 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
202 const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
203 return Info ? Info->Opcode : -1;
204 }
205
getMTBUFElements(unsigned Opc)206 int getMTBUFElements(unsigned Opc) {
207 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
208 return Info ? Info->elements : 0;
209 }
210
getMTBUFHasVAddr(unsigned Opc)211 bool getMTBUFHasVAddr(unsigned Opc) {
212 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
213 return Info ? Info->has_vaddr : false;
214 }
215
getMTBUFHasSrsrc(unsigned Opc)216 bool getMTBUFHasSrsrc(unsigned Opc) {
217 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
218 return Info ? Info->has_srsrc : false;
219 }
220
getMTBUFHasSoffset(unsigned Opc)221 bool getMTBUFHasSoffset(unsigned Opc) {
222 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
223 return Info ? Info->has_soffset : false;
224 }
225
getMUBUFBaseOpcode(unsigned Opc)226 int getMUBUFBaseOpcode(unsigned Opc) {
227 const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
228 return Info ? Info->BaseOpcode : -1;
229 }
230
getMUBUFOpcode(unsigned BaseOpc,unsigned Elements)231 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
232 const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
233 return Info ? Info->Opcode : -1;
234 }
235
getMUBUFElements(unsigned Opc)236 int getMUBUFElements(unsigned Opc) {
237 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
238 return Info ? Info->elements : 0;
239 }
240
getMUBUFHasVAddr(unsigned Opc)241 bool getMUBUFHasVAddr(unsigned Opc) {
242 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
243 return Info ? Info->has_vaddr : false;
244 }
245
getMUBUFHasSrsrc(unsigned Opc)246 bool getMUBUFHasSrsrc(unsigned Opc) {
247 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
248 return Info ? Info->has_srsrc : false;
249 }
250
getMUBUFHasSoffset(unsigned Opc)251 bool getMUBUFHasSoffset(unsigned Opc) {
252 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
253 return Info ? Info->has_soffset : false;
254 }
255
getSMEMIsBuffer(unsigned Opc)256 bool getSMEMIsBuffer(unsigned Opc) {
257 const SMInfo *Info = getSMEMOpcodeHelper(Opc);
258 return Info ? Info->IsBuffer : false;
259 }
260
261 // Wrapper for Tablegen'd function. enum Subtarget is not defined in any
262 // header files, so we need to wrap it in a function that takes unsigned
263 // instead.
getMCOpcode(uint16_t Opcode,unsigned Gen)264 int getMCOpcode(uint16_t Opcode, unsigned Gen) {
265 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
266 }
267
268 namespace IsaInfo {
269
streamIsaVersion(const MCSubtargetInfo * STI,raw_ostream & Stream)270 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
271 auto TargetTriple = STI->getTargetTriple();
272 auto Version = getIsaVersion(STI->getCPU());
273
274 Stream << TargetTriple.getArchName() << '-'
275 << TargetTriple.getVendorName() << '-'
276 << TargetTriple.getOSName() << '-'
277 << TargetTriple.getEnvironmentName() << '-'
278 << "gfx"
279 << Version.Major
280 << Version.Minor
281 << Version.Stepping;
282
283 if (hasXNACK(*STI))
284 Stream << "+xnack";
285 if (hasSRAMECC(*STI))
286 Stream << "+sram-ecc";
287
288 Stream.flush();
289 }
290
getWavefrontSize(const MCSubtargetInfo * STI)291 unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
292 if (STI->getFeatureBits().test(FeatureWavefrontSize16))
293 return 16;
294 if (STI->getFeatureBits().test(FeatureWavefrontSize32))
295 return 32;
296
297 return 64;
298 }
299
getLocalMemorySize(const MCSubtargetInfo * STI)300 unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
301 if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
302 return 32768;
303 if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
304 return 65536;
305
306 return 0;
307 }
308
getEUsPerCU(const MCSubtargetInfo * STI)309 unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
310 // "Per CU" really means "per whatever functional block the waves of a
311 // workgroup must share". For gfx10 in CU mode this is the CU, which contains
312 // two SIMDs.
313 if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode))
314 return 2;
315 // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains
316 // two CUs, so a total of four SIMDs.
317 return 4;
318 }
319
getMaxWorkGroupsPerCU(const MCSubtargetInfo * STI,unsigned FlatWorkGroupSize)320 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
321 unsigned FlatWorkGroupSize) {
322 assert(FlatWorkGroupSize != 0);
323 if (STI->getTargetTriple().getArch() != Triple::amdgcn)
324 return 8;
325 unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
326 if (N == 1)
327 return 40;
328 N = 40 / N;
329 return std::min(N, 16u);
330 }
331
getMinWavesPerEU(const MCSubtargetInfo * STI)332 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
333 return 1;
334 }
335
getMaxWavesPerEU(const MCSubtargetInfo * STI)336 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
337 // FIXME: Need to take scratch memory into account.
338 if (!isGFX10Plus(*STI))
339 return 10;
340 return hasGFX10_3Insts(*STI) ? 16 : 20;
341 }
342
getWavesPerEUForWorkGroup(const MCSubtargetInfo * STI,unsigned FlatWorkGroupSize)343 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
344 unsigned FlatWorkGroupSize) {
345 return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize),
346 getEUsPerCU(STI));
347 }
348
getMinFlatWorkGroupSize(const MCSubtargetInfo * STI)349 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) {
350 return 1;
351 }
352
getMaxFlatWorkGroupSize(const MCSubtargetInfo * STI)353 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
354 // Some subtargets allow encoding 2048, but this isn't tested or supported.
355 return 1024;
356 }
357
getWavesPerWorkGroup(const MCSubtargetInfo * STI,unsigned FlatWorkGroupSize)358 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
359 unsigned FlatWorkGroupSize) {
360 return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI));
361 }
362
getSGPRAllocGranule(const MCSubtargetInfo * STI)363 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
364 IsaVersion Version = getIsaVersion(STI->getCPU());
365 if (Version.Major >= 10)
366 return getAddressableNumSGPRs(STI);
367 if (Version.Major >= 8)
368 return 16;
369 return 8;
370 }
371
getSGPREncodingGranule(const MCSubtargetInfo * STI)372 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) {
373 return 8;
374 }
375
getTotalNumSGPRs(const MCSubtargetInfo * STI)376 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
377 IsaVersion Version = getIsaVersion(STI->getCPU());
378 if (Version.Major >= 8)
379 return 800;
380 return 512;
381 }
382
getAddressableNumSGPRs(const MCSubtargetInfo * STI)383 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
384 if (STI->getFeatureBits().test(FeatureSGPRInitBug))
385 return FIXED_NUM_SGPRS_FOR_INIT_BUG;
386
387 IsaVersion Version = getIsaVersion(STI->getCPU());
388 if (Version.Major >= 10)
389 return 106;
390 if (Version.Major >= 8)
391 return 102;
392 return 104;
393 }
394
getMinNumSGPRs(const MCSubtargetInfo * STI,unsigned WavesPerEU)395 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
396 assert(WavesPerEU != 0);
397
398 IsaVersion Version = getIsaVersion(STI->getCPU());
399 if (Version.Major >= 10)
400 return 0;
401
402 if (WavesPerEU >= getMaxWavesPerEU(STI))
403 return 0;
404
405 unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
406 if (STI->getFeatureBits().test(FeatureTrapHandler))
407 MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
408 MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
409 return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
410 }
411
getMaxNumSGPRs(const MCSubtargetInfo * STI,unsigned WavesPerEU,bool Addressable)412 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
413 bool Addressable) {
414 assert(WavesPerEU != 0);
415
416 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
417 IsaVersion Version = getIsaVersion(STI->getCPU());
418 if (Version.Major >= 10)
419 return Addressable ? AddressableNumSGPRs : 108;
420 if (Version.Major >= 8 && !Addressable)
421 AddressableNumSGPRs = 112;
422 unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
423 if (STI->getFeatureBits().test(FeatureTrapHandler))
424 MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
425 MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
426 return std::min(MaxNumSGPRs, AddressableNumSGPRs);
427 }
428
getNumExtraSGPRs(const MCSubtargetInfo * STI,bool VCCUsed,bool FlatScrUsed,bool XNACKUsed)429 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
430 bool FlatScrUsed, bool XNACKUsed) {
431 unsigned ExtraSGPRs = 0;
432 if (VCCUsed)
433 ExtraSGPRs = 2;
434
435 IsaVersion Version = getIsaVersion(STI->getCPU());
436 if (Version.Major >= 10)
437 return ExtraSGPRs;
438
439 if (Version.Major < 8) {
440 if (FlatScrUsed)
441 ExtraSGPRs = 4;
442 } else {
443 if (XNACKUsed)
444 ExtraSGPRs = 4;
445
446 if (FlatScrUsed)
447 ExtraSGPRs = 6;
448 }
449
450 return ExtraSGPRs;
451 }
452
getNumExtraSGPRs(const MCSubtargetInfo * STI,bool VCCUsed,bool FlatScrUsed)453 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
454 bool FlatScrUsed) {
455 return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
456 STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
457 }
458
getNumSGPRBlocks(const MCSubtargetInfo * STI,unsigned NumSGPRs)459 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
460 NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI));
461 // SGPRBlocks is actual number of SGPR blocks minus 1.
462 return NumSGPRs / getSGPREncodingGranule(STI) - 1;
463 }
464
getVGPRAllocGranule(const MCSubtargetInfo * STI,Optional<bool> EnableWavefrontSize32)465 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
466 Optional<bool> EnableWavefrontSize32) {
467 bool IsWave32 = EnableWavefrontSize32 ?
468 *EnableWavefrontSize32 :
469 STI->getFeatureBits().test(FeatureWavefrontSize32);
470
471 if (hasGFX10_3Insts(*STI))
472 return IsWave32 ? 16 : 8;
473
474 return IsWave32 ? 8 : 4;
475 }
476
getVGPREncodingGranule(const MCSubtargetInfo * STI,Optional<bool> EnableWavefrontSize32)477 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
478 Optional<bool> EnableWavefrontSize32) {
479
480 bool IsWave32 = EnableWavefrontSize32 ?
481 *EnableWavefrontSize32 :
482 STI->getFeatureBits().test(FeatureWavefrontSize32);
483
484 return IsWave32 ? 8 : 4;
485 }
486
getTotalNumVGPRs(const MCSubtargetInfo * STI)487 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
488 if (!isGFX10Plus(*STI))
489 return 256;
490 return STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1024 : 512;
491 }
492
getAddressableNumVGPRs(const MCSubtargetInfo * STI)493 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
494 return 256;
495 }
496
getMinNumVGPRs(const MCSubtargetInfo * STI,unsigned WavesPerEU)497 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
498 assert(WavesPerEU != 0);
499
500 if (WavesPerEU >= getMaxWavesPerEU(STI))
501 return 0;
502 unsigned MinNumVGPRs =
503 alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1),
504 getVGPRAllocGranule(STI)) + 1;
505 return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI));
506 }
507
getMaxNumVGPRs(const MCSubtargetInfo * STI,unsigned WavesPerEU)508 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
509 assert(WavesPerEU != 0);
510
511 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
512 getVGPRAllocGranule(STI));
513 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
514 return std::min(MaxNumVGPRs, AddressableNumVGPRs);
515 }
516
getNumVGPRBlocks(const MCSubtargetInfo * STI,unsigned NumVGPRs,Optional<bool> EnableWavefrontSize32)517 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
518 Optional<bool> EnableWavefrontSize32) {
519 NumVGPRs = alignTo(std::max(1u, NumVGPRs),
520 getVGPREncodingGranule(STI, EnableWavefrontSize32));
521 // VGPRBlocks is actual number of VGPR blocks minus 1.
522 return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1;
523 }
524
525 } // end namespace IsaInfo
526
initDefaultAMDKernelCodeT(amd_kernel_code_t & Header,const MCSubtargetInfo * STI)527 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
528 const MCSubtargetInfo *STI) {
529 IsaVersion Version = getIsaVersion(STI->getCPU());
530
531 memset(&Header, 0, sizeof(Header));
532
533 Header.amd_kernel_code_version_major = 1;
534 Header.amd_kernel_code_version_minor = 2;
535 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
536 Header.amd_machine_version_major = Version.Major;
537 Header.amd_machine_version_minor = Version.Minor;
538 Header.amd_machine_version_stepping = Version.Stepping;
539 Header.kernel_code_entry_byte_offset = sizeof(Header);
540 Header.wavefront_size = 6;
541
542 // If the code object does not support indirect functions, then the value must
543 // be 0xffffffff.
544 Header.call_convention = -1;
545
546 // These alignment values are specified in powers of two, so alignment =
547 // 2^n. The minimum alignment is 2^4 = 16.
548 Header.kernarg_segment_alignment = 4;
549 Header.group_segment_alignment = 4;
550 Header.private_segment_alignment = 4;
551
552 if (Version.Major >= 10) {
553 if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
554 Header.wavefront_size = 5;
555 Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
556 }
557 Header.compute_pgm_resource_registers |=
558 S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
559 S_00B848_MEM_ORDERED(1);
560 }
561 }
562
getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo * STI)563 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
564 const MCSubtargetInfo *STI) {
565 IsaVersion Version = getIsaVersion(STI->getCPU());
566
567 amdhsa::kernel_descriptor_t KD;
568 memset(&KD, 0, sizeof(KD));
569
570 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
571 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
572 amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE);
573 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
574 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, 1);
575 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
576 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1);
577 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2,
578 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
579 if (Version.Major >= 10) {
580 AMDHSA_BITS_SET(KD.kernel_code_properties,
581 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
582 STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0);
583 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
584 amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE,
585 STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
586 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
587 amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED, 1);
588 }
589 return KD;
590 }
591
isGroupSegment(const GlobalValue * GV)592 bool isGroupSegment(const GlobalValue *GV) {
593 return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
594 }
595
isGlobalSegment(const GlobalValue * GV)596 bool isGlobalSegment(const GlobalValue *GV) {
597 return GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
598 }
599
isReadOnlySegment(const GlobalValue * GV)600 bool isReadOnlySegment(const GlobalValue *GV) {
601 unsigned AS = GV->getAddressSpace();
602 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
603 AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
604 }
605
shouldEmitConstantsToTextSection(const Triple & TT)606 bool shouldEmitConstantsToTextSection(const Triple &TT) {
607 return TT.getArch() == Triple::r600;
608 }
609
getIntegerAttribute(const Function & F,StringRef Name,int Default)610 int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
611 Attribute A = F.getFnAttribute(Name);
612 int Result = Default;
613
614 if (A.isStringAttribute()) {
615 StringRef Str = A.getValueAsString();
616 if (Str.getAsInteger(0, Result)) {
617 LLVMContext &Ctx = F.getContext();
618 Ctx.emitError("can't parse integer attribute " + Name);
619 }
620 }
621
622 return Result;
623 }
624
getIntegerPairAttribute(const Function & F,StringRef Name,std::pair<int,int> Default,bool OnlyFirstRequired)625 std::pair<int, int> getIntegerPairAttribute(const Function &F,
626 StringRef Name,
627 std::pair<int, int> Default,
628 bool OnlyFirstRequired) {
629 Attribute A = F.getFnAttribute(Name);
630 if (!A.isStringAttribute())
631 return Default;
632
633 LLVMContext &Ctx = F.getContext();
634 std::pair<int, int> Ints = Default;
635 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
636 if (Strs.first.trim().getAsInteger(0, Ints.first)) {
637 Ctx.emitError("can't parse first integer attribute " + Name);
638 return Default;
639 }
640 if (Strs.second.trim().getAsInteger(0, Ints.second)) {
641 if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
642 Ctx.emitError("can't parse second integer attribute " + Name);
643 return Default;
644 }
645 }
646
647 return Ints;
648 }
649
getVmcntBitMask(const IsaVersion & Version)650 unsigned getVmcntBitMask(const IsaVersion &Version) {
651 unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
652 if (Version.Major < 9)
653 return VmcntLo;
654
655 unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo();
656 return VmcntLo | VmcntHi;
657 }
658
getExpcntBitMask(const IsaVersion & Version)659 unsigned getExpcntBitMask(const IsaVersion &Version) {
660 return (1 << getExpcntBitWidth()) - 1;
661 }
662
getLgkmcntBitMask(const IsaVersion & Version)663 unsigned getLgkmcntBitMask(const IsaVersion &Version) {
664 return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
665 }
666
getWaitcntBitMask(const IsaVersion & Version)667 unsigned getWaitcntBitMask(const IsaVersion &Version) {
668 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
669 unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
670 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(),
671 getLgkmcntBitWidth(Version.Major));
672 unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
673 if (Version.Major < 9)
674 return Waitcnt;
675
676 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi());
677 return Waitcnt | VmcntHi;
678 }
679
decodeVmcnt(const IsaVersion & Version,unsigned Waitcnt)680 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
681 unsigned VmcntLo =
682 unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
683 if (Version.Major < 9)
684 return VmcntLo;
685
686 unsigned VmcntHi =
687 unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
688 VmcntHi <<= getVmcntBitWidthLo();
689 return VmcntLo | VmcntHi;
690 }
691
decodeExpcnt(const IsaVersion & Version,unsigned Waitcnt)692 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
693 return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
694 }
695
decodeLgkmcnt(const IsaVersion & Version,unsigned Waitcnt)696 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
697 return unpackBits(Waitcnt, getLgkmcntBitShift(),
698 getLgkmcntBitWidth(Version.Major));
699 }
700
decodeWaitcnt(const IsaVersion & Version,unsigned Waitcnt,unsigned & Vmcnt,unsigned & Expcnt,unsigned & Lgkmcnt)701 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
702 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
703 Vmcnt = decodeVmcnt(Version, Waitcnt);
704 Expcnt = decodeExpcnt(Version, Waitcnt);
705 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
706 }
707
decodeWaitcnt(const IsaVersion & Version,unsigned Encoded)708 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
709 Waitcnt Decoded;
710 Decoded.VmCnt = decodeVmcnt(Version, Encoded);
711 Decoded.ExpCnt = decodeExpcnt(Version, Encoded);
712 Decoded.LgkmCnt = decodeLgkmcnt(Version, Encoded);
713 return Decoded;
714 }
715
encodeVmcnt(const IsaVersion & Version,unsigned Waitcnt,unsigned Vmcnt)716 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
717 unsigned Vmcnt) {
718 Waitcnt =
719 packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
720 if (Version.Major < 9)
721 return Waitcnt;
722
723 Vmcnt >>= getVmcntBitWidthLo();
724 return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
725 }
726
encodeExpcnt(const IsaVersion & Version,unsigned Waitcnt,unsigned Expcnt)727 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
728 unsigned Expcnt) {
729 return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
730 }
731
encodeLgkmcnt(const IsaVersion & Version,unsigned Waitcnt,unsigned Lgkmcnt)732 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
733 unsigned Lgkmcnt) {
734 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(),
735 getLgkmcntBitWidth(Version.Major));
736 }
737
encodeWaitcnt(const IsaVersion & Version,unsigned Vmcnt,unsigned Expcnt,unsigned Lgkmcnt)738 unsigned encodeWaitcnt(const IsaVersion &Version,
739 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
740 unsigned Waitcnt = getWaitcntBitMask(Version);
741 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
742 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
743 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
744 return Waitcnt;
745 }
746
encodeWaitcnt(const IsaVersion & Version,const Waitcnt & Decoded)747 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
748 return encodeWaitcnt(Version, Decoded.VmCnt, Decoded.ExpCnt, Decoded.LgkmCnt);
749 }
750
751 //===----------------------------------------------------------------------===//
752 // hwreg
753 //===----------------------------------------------------------------------===//
754
755 namespace Hwreg {
756
getHwregId(const StringRef Name)757 int64_t getHwregId(const StringRef Name) {
758 for (int Id = ID_SYMBOLIC_FIRST_; Id < ID_SYMBOLIC_LAST_; ++Id) {
759 if (IdSymbolic[Id] && Name == IdSymbolic[Id])
760 return Id;
761 }
762 return ID_UNKNOWN_;
763 }
764
getLastSymbolicHwreg(const MCSubtargetInfo & STI)765 static unsigned getLastSymbolicHwreg(const MCSubtargetInfo &STI) {
766 if (isSI(STI) || isCI(STI) || isVI(STI))
767 return ID_SYMBOLIC_FIRST_GFX9_;
768 else if (isGFX9(STI))
769 return ID_SYMBOLIC_FIRST_GFX10_;
770 else if (isGFX10(STI) && !isGFX10_BEncoding(STI))
771 return ID_SYMBOLIC_FIRST_GFX1030_;
772 else
773 return ID_SYMBOLIC_LAST_;
774 }
775
isValidHwreg(int64_t Id,const MCSubtargetInfo & STI)776 bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI) {
777 return
778 ID_SYMBOLIC_FIRST_ <= Id && Id < getLastSymbolicHwreg(STI) &&
779 IdSymbolic[Id] && (Id != ID_XNACK_MASK || !AMDGPU::isGFX10_BEncoding(STI));
780 }
781
isValidHwreg(int64_t Id)782 bool isValidHwreg(int64_t Id) {
783 return 0 <= Id && isUInt<ID_WIDTH_>(Id);
784 }
785
isValidHwregOffset(int64_t Offset)786 bool isValidHwregOffset(int64_t Offset) {
787 return 0 <= Offset && isUInt<OFFSET_WIDTH_>(Offset);
788 }
789
isValidHwregWidth(int64_t Width)790 bool isValidHwregWidth(int64_t Width) {
791 return 0 <= (Width - 1) && isUInt<WIDTH_M1_WIDTH_>(Width - 1);
792 }
793
encodeHwreg(uint64_t Id,uint64_t Offset,uint64_t Width)794 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width) {
795 return (Id << ID_SHIFT_) |
796 (Offset << OFFSET_SHIFT_) |
797 ((Width - 1) << WIDTH_M1_SHIFT_);
798 }
799
getHwreg(unsigned Id,const MCSubtargetInfo & STI)800 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) {
801 return isValidHwreg(Id, STI) ? IdSymbolic[Id] : "";
802 }
803
decodeHwreg(unsigned Val,unsigned & Id,unsigned & Offset,unsigned & Width)804 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width) {
805 Id = (Val & ID_MASK_) >> ID_SHIFT_;
806 Offset = (Val & OFFSET_MASK_) >> OFFSET_SHIFT_;
807 Width = ((Val & WIDTH_M1_MASK_) >> WIDTH_M1_SHIFT_) + 1;
808 }
809
810 } // namespace Hwreg
811
812 //===----------------------------------------------------------------------===//
813 // MTBUF Format
814 //===----------------------------------------------------------------------===//
815
816 namespace MTBUFFormat {
817
getDfmt(const StringRef Name)818 int64_t getDfmt(const StringRef Name) {
819 for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) {
820 if (Name == DfmtSymbolic[Id])
821 return Id;
822 }
823 return DFMT_UNDEF;
824 }
825
getDfmtName(unsigned Id)826 StringRef getDfmtName(unsigned Id) {
827 assert(Id <= DFMT_MAX);
828 return DfmtSymbolic[Id];
829 }
830
getNfmtLookupTable(const MCSubtargetInfo & STI)831 static StringLiteral const *getNfmtLookupTable(const MCSubtargetInfo &STI) {
832 if (isSI(STI) || isCI(STI))
833 return NfmtSymbolicSICI;
834 if (isVI(STI) || isGFX9(STI))
835 return NfmtSymbolicVI;
836 return NfmtSymbolicGFX10;
837 }
838
getNfmt(const StringRef Name,const MCSubtargetInfo & STI)839 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) {
840 auto lookupTable = getNfmtLookupTable(STI);
841 for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) {
842 if (Name == lookupTable[Id])
843 return Id;
844 }
845 return NFMT_UNDEF;
846 }
847
getNfmtName(unsigned Id,const MCSubtargetInfo & STI)848 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) {
849 assert(Id <= NFMT_MAX);
850 return getNfmtLookupTable(STI)[Id];
851 }
852
isValidDfmtNfmt(unsigned Id,const MCSubtargetInfo & STI)853 bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) {
854 unsigned Dfmt;
855 unsigned Nfmt;
856 decodeDfmtNfmt(Id, Dfmt, Nfmt);
857 return isValidNfmt(Nfmt, STI);
858 }
859
isValidNfmt(unsigned Id,const MCSubtargetInfo & STI)860 bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) {
861 return !getNfmtName(Id, STI).empty();
862 }
863
encodeDfmtNfmt(unsigned Dfmt,unsigned Nfmt)864 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) {
865 return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT);
866 }
867
decodeDfmtNfmt(unsigned Format,unsigned & Dfmt,unsigned & Nfmt)868 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) {
869 Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK;
870 Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK;
871 }
872
getUnifiedFormat(const StringRef Name)873 int64_t getUnifiedFormat(const StringRef Name) {
874 for (int Id = UFMT_FIRST; Id <= UFMT_LAST; ++Id) {
875 if (Name == UfmtSymbolic[Id])
876 return Id;
877 }
878 return UFMT_UNDEF;
879 }
880
getUnifiedFormatName(unsigned Id)881 StringRef getUnifiedFormatName(unsigned Id) {
882 return isValidUnifiedFormat(Id) ? UfmtSymbolic[Id] : "";
883 }
884
isValidUnifiedFormat(unsigned Id)885 bool isValidUnifiedFormat(unsigned Id) {
886 return Id <= UFMT_LAST;
887 }
888
convertDfmtNfmt2Ufmt(unsigned Dfmt,unsigned Nfmt)889 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt) {
890 int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt);
891 for (int Id = UFMT_FIRST; Id <= UFMT_LAST; ++Id) {
892 if (Fmt == DfmtNfmt2UFmt[Id])
893 return Id;
894 }
895 return UFMT_UNDEF;
896 }
897
isValidFormatEncoding(unsigned Val,const MCSubtargetInfo & STI)898 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) {
899 return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX);
900 }
901
getDefaultFormatEncoding(const MCSubtargetInfo & STI)902 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI) {
903 if (isGFX10Plus(STI))
904 return UFMT_DEFAULT;
905 return DFMT_NFMT_DEFAULT;
906 }
907
908 } // namespace MTBUFFormat
909
910 //===----------------------------------------------------------------------===//
911 // SendMsg
912 //===----------------------------------------------------------------------===//
913
914 namespace SendMsg {
915
getMsgId(const StringRef Name)916 int64_t getMsgId(const StringRef Name) {
917 for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
918 if (IdSymbolic[i] && Name == IdSymbolic[i])
919 return i;
920 }
921 return ID_UNKNOWN_;
922 }
923
isValidMsgId(int64_t MsgId)924 static bool isValidMsgId(int64_t MsgId) {
925 return (ID_GAPS_FIRST_ <= MsgId && MsgId < ID_GAPS_LAST_) && IdSymbolic[MsgId];
926 }
927
isValidMsgId(int64_t MsgId,const MCSubtargetInfo & STI,bool Strict)928 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict) {
929 if (Strict) {
930 if (MsgId == ID_GS_ALLOC_REQ || MsgId == ID_GET_DOORBELL)
931 return isGFX9Plus(STI);
932 else
933 return isValidMsgId(MsgId);
934 } else {
935 return 0 <= MsgId && isUInt<ID_WIDTH_>(MsgId);
936 }
937 }
938
getMsgName(int64_t MsgId)939 StringRef getMsgName(int64_t MsgId) {
940 return isValidMsgId(MsgId)? IdSymbolic[MsgId] : "";
941 }
942
getMsgOpId(int64_t MsgId,const StringRef Name)943 int64_t getMsgOpId(int64_t MsgId, const StringRef Name) {
944 const char* const *S = (MsgId == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
945 const int F = (MsgId == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
946 const int L = (MsgId == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
947 for (int i = F; i < L; ++i) {
948 if (Name == S[i]) {
949 return i;
950 }
951 }
952 return OP_UNKNOWN_;
953 }
954
isValidMsgOp(int64_t MsgId,int64_t OpId,bool Strict)955 bool isValidMsgOp(int64_t MsgId, int64_t OpId, bool Strict) {
956
957 if (!Strict)
958 return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
959
960 switch(MsgId)
961 {
962 case ID_GS:
963 return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP;
964 case ID_GS_DONE:
965 return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_;
966 case ID_SYSMSG:
967 return OP_SYS_FIRST_ <= OpId && OpId < OP_SYS_LAST_;
968 default:
969 return OpId == OP_NONE_;
970 }
971 }
972
getMsgOpName(int64_t MsgId,int64_t OpId)973 StringRef getMsgOpName(int64_t MsgId, int64_t OpId) {
974 assert(msgRequiresOp(MsgId));
975 return (MsgId == ID_SYSMSG)? OpSysSymbolic[OpId] : OpGsSymbolic[OpId];
976 }
977
isValidMsgStream(int64_t MsgId,int64_t OpId,int64_t StreamId,bool Strict)978 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, bool Strict) {
979
980 if (!Strict)
981 return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId);
982
983 switch(MsgId)
984 {
985 case ID_GS:
986 return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_;
987 case ID_GS_DONE:
988 return (OpId == OP_GS_NOP)?
989 (StreamId == STREAM_ID_NONE_) :
990 (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_);
991 default:
992 return StreamId == STREAM_ID_NONE_;
993 }
994 }
995
msgRequiresOp(int64_t MsgId)996 bool msgRequiresOp(int64_t MsgId) {
997 return MsgId == ID_GS || MsgId == ID_GS_DONE || MsgId == ID_SYSMSG;
998 }
999
msgSupportsStream(int64_t MsgId,int64_t OpId)1000 bool msgSupportsStream(int64_t MsgId, int64_t OpId) {
1001 return (MsgId == ID_GS || MsgId == ID_GS_DONE) && OpId != OP_GS_NOP;
1002 }
1003
decodeMsg(unsigned Val,uint16_t & MsgId,uint16_t & OpId,uint16_t & StreamId)1004 void decodeMsg(unsigned Val,
1005 uint16_t &MsgId,
1006 uint16_t &OpId,
1007 uint16_t &StreamId) {
1008 MsgId = Val & ID_MASK_;
1009 OpId = (Val & OP_MASK_) >> OP_SHIFT_;
1010 StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_;
1011 }
1012
encodeMsg(uint64_t MsgId,uint64_t OpId,uint64_t StreamId)1013 uint64_t encodeMsg(uint64_t MsgId,
1014 uint64_t OpId,
1015 uint64_t StreamId) {
1016 return (MsgId << ID_SHIFT_) |
1017 (OpId << OP_SHIFT_) |
1018 (StreamId << STREAM_ID_SHIFT_);
1019 }
1020
1021 } // namespace SendMsg
1022
1023 //===----------------------------------------------------------------------===//
1024 //
1025 //===----------------------------------------------------------------------===//
1026
getInitialPSInputAddr(const Function & F)1027 unsigned getInitialPSInputAddr(const Function &F) {
1028 return getIntegerAttribute(F, "InitialPSInputAddr", 0);
1029 }
1030
isShader(CallingConv::ID cc)1031 bool isShader(CallingConv::ID cc) {
1032 switch(cc) {
1033 case CallingConv::AMDGPU_VS:
1034 case CallingConv::AMDGPU_LS:
1035 case CallingConv::AMDGPU_HS:
1036 case CallingConv::AMDGPU_ES:
1037 case CallingConv::AMDGPU_GS:
1038 case CallingConv::AMDGPU_PS:
1039 case CallingConv::AMDGPU_CS:
1040 return true;
1041 default:
1042 return false;
1043 }
1044 }
1045
isGraphics(CallingConv::ID cc)1046 bool isGraphics(CallingConv::ID cc) {
1047 return isShader(cc) || cc == CallingConv::AMDGPU_Gfx;
1048 }
1049
isCompute(CallingConv::ID cc)1050 bool isCompute(CallingConv::ID cc) {
1051 return !isGraphics(cc) || cc == CallingConv::AMDGPU_CS;
1052 }
1053
isEntryFunctionCC(CallingConv::ID CC)1054 bool isEntryFunctionCC(CallingConv::ID CC) {
1055 switch (CC) {
1056 case CallingConv::AMDGPU_KERNEL:
1057 case CallingConv::SPIR_KERNEL:
1058 case CallingConv::AMDGPU_VS:
1059 case CallingConv::AMDGPU_GS:
1060 case CallingConv::AMDGPU_PS:
1061 case CallingConv::AMDGPU_CS:
1062 case CallingConv::AMDGPU_ES:
1063 case CallingConv::AMDGPU_HS:
1064 case CallingConv::AMDGPU_LS:
1065 return true;
1066 default:
1067 return false;
1068 }
1069 }
1070
hasXNACK(const MCSubtargetInfo & STI)1071 bool hasXNACK(const MCSubtargetInfo &STI) {
1072 return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
1073 }
1074
hasSRAMECC(const MCSubtargetInfo & STI)1075 bool hasSRAMECC(const MCSubtargetInfo &STI) {
1076 return STI.getFeatureBits()[AMDGPU::FeatureSRAMECC];
1077 }
1078
hasMIMG_R128(const MCSubtargetInfo & STI)1079 bool hasMIMG_R128(const MCSubtargetInfo &STI) {
1080 return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128] && !STI.getFeatureBits()[AMDGPU::FeatureR128A16];
1081 }
1082
hasGFX10A16(const MCSubtargetInfo & STI)1083 bool hasGFX10A16(const MCSubtargetInfo &STI) {
1084 return STI.getFeatureBits()[AMDGPU::FeatureGFX10A16];
1085 }
1086
hasG16(const MCSubtargetInfo & STI)1087 bool hasG16(const MCSubtargetInfo &STI) {
1088 return STI.getFeatureBits()[AMDGPU::FeatureG16];
1089 }
1090
hasPackedD16(const MCSubtargetInfo & STI)1091 bool hasPackedD16(const MCSubtargetInfo &STI) {
1092 return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem];
1093 }
1094
isSI(const MCSubtargetInfo & STI)1095 bool isSI(const MCSubtargetInfo &STI) {
1096 return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
1097 }
1098
isCI(const MCSubtargetInfo & STI)1099 bool isCI(const MCSubtargetInfo &STI) {
1100 return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
1101 }
1102
isVI(const MCSubtargetInfo & STI)1103 bool isVI(const MCSubtargetInfo &STI) {
1104 return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
1105 }
1106
isGFX9(const MCSubtargetInfo & STI)1107 bool isGFX9(const MCSubtargetInfo &STI) {
1108 return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
1109 }
1110
isGFX9Plus(const MCSubtargetInfo & STI)1111 bool isGFX9Plus(const MCSubtargetInfo &STI) {
1112 return isGFX9(STI) || isGFX10Plus(STI);
1113 }
1114
isGFX10(const MCSubtargetInfo & STI)1115 bool isGFX10(const MCSubtargetInfo &STI) {
1116 return STI.getFeatureBits()[AMDGPU::FeatureGFX10];
1117 }
1118
isGFX10Plus(const MCSubtargetInfo & STI)1119 bool isGFX10Plus(const MCSubtargetInfo &STI) { return isGFX10(STI); }
1120
isGCN3Encoding(const MCSubtargetInfo & STI)1121 bool isGCN3Encoding(const MCSubtargetInfo &STI) {
1122 return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
1123 }
1124
isGFX10_BEncoding(const MCSubtargetInfo & STI)1125 bool isGFX10_BEncoding(const MCSubtargetInfo &STI) {
1126 return STI.getFeatureBits()[AMDGPU::FeatureGFX10_BEncoding];
1127 }
1128
hasGFX10_3Insts(const MCSubtargetInfo & STI)1129 bool hasGFX10_3Insts(const MCSubtargetInfo &STI) {
1130 return STI.getFeatureBits()[AMDGPU::FeatureGFX10_3Insts];
1131 }
1132
isSGPR(unsigned Reg,const MCRegisterInfo * TRI)1133 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
1134 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
1135 const unsigned FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0);
1136 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
1137 Reg == AMDGPU::SCC;
1138 }
1139
isRegIntersect(unsigned Reg0,unsigned Reg1,const MCRegisterInfo * TRI)1140 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
1141 for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) {
1142 if (*R == Reg1) return true;
1143 }
1144 return false;
1145 }
1146
1147 #define MAP_REG2REG \
1148 using namespace AMDGPU; \
1149 switch(Reg) { \
1150 default: return Reg; \
1151 CASE_CI_VI(FLAT_SCR) \
1152 CASE_CI_VI(FLAT_SCR_LO) \
1153 CASE_CI_VI(FLAT_SCR_HI) \
1154 CASE_VI_GFX9_GFX10(TTMP0) \
1155 CASE_VI_GFX9_GFX10(TTMP1) \
1156 CASE_VI_GFX9_GFX10(TTMP2) \
1157 CASE_VI_GFX9_GFX10(TTMP3) \
1158 CASE_VI_GFX9_GFX10(TTMP4) \
1159 CASE_VI_GFX9_GFX10(TTMP5) \
1160 CASE_VI_GFX9_GFX10(TTMP6) \
1161 CASE_VI_GFX9_GFX10(TTMP7) \
1162 CASE_VI_GFX9_GFX10(TTMP8) \
1163 CASE_VI_GFX9_GFX10(TTMP9) \
1164 CASE_VI_GFX9_GFX10(TTMP10) \
1165 CASE_VI_GFX9_GFX10(TTMP11) \
1166 CASE_VI_GFX9_GFX10(TTMP12) \
1167 CASE_VI_GFX9_GFX10(TTMP13) \
1168 CASE_VI_GFX9_GFX10(TTMP14) \
1169 CASE_VI_GFX9_GFX10(TTMP15) \
1170 CASE_VI_GFX9_GFX10(TTMP0_TTMP1) \
1171 CASE_VI_GFX9_GFX10(TTMP2_TTMP3) \
1172 CASE_VI_GFX9_GFX10(TTMP4_TTMP5) \
1173 CASE_VI_GFX9_GFX10(TTMP6_TTMP7) \
1174 CASE_VI_GFX9_GFX10(TTMP8_TTMP9) \
1175 CASE_VI_GFX9_GFX10(TTMP10_TTMP11) \
1176 CASE_VI_GFX9_GFX10(TTMP12_TTMP13) \
1177 CASE_VI_GFX9_GFX10(TTMP14_TTMP15) \
1178 CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3) \
1179 CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7) \
1180 CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11) \
1181 CASE_VI_GFX9_GFX10(TTMP12_TTMP13_TTMP14_TTMP15) \
1182 CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
1183 CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
1184 CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
1185 CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
1186 }
1187
1188 #define CASE_CI_VI(node) \
1189 assert(!isSI(STI)); \
1190 case node: return isCI(STI) ? node##_ci : node##_vi;
1191
1192 #define CASE_VI_GFX9_GFX10(node) \
1193 case node: return (isGFX9(STI) || isGFX10(STI)) ? node##_gfx9_gfx10 : node##_vi;
1194
getMCReg(unsigned Reg,const MCSubtargetInfo & STI)1195 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
1196 if (STI.getTargetTriple().getArch() == Triple::r600)
1197 return Reg;
1198 MAP_REG2REG
1199 }
1200
1201 #undef CASE_CI_VI
1202 #undef CASE_VI_GFX9_GFX10
1203
1204 #define CASE_CI_VI(node) case node##_ci: case node##_vi: return node;
1205 #define CASE_VI_GFX9_GFX10(node) case node##_vi: case node##_gfx9_gfx10: return node;
1206
mc2PseudoReg(unsigned Reg)1207 unsigned mc2PseudoReg(unsigned Reg) {
1208 MAP_REG2REG
1209 }
1210
1211 #undef CASE_CI_VI
1212 #undef CASE_VI_GFX9_GFX10
1213 #undef MAP_REG2REG
1214
isSISrcOperand(const MCInstrDesc & Desc,unsigned OpNo)1215 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
1216 assert(OpNo < Desc.NumOperands);
1217 unsigned OpType = Desc.OpInfo[OpNo].OperandType;
1218 return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
1219 OpType <= AMDGPU::OPERAND_SRC_LAST;
1220 }
1221
isSISrcFPOperand(const MCInstrDesc & Desc,unsigned OpNo)1222 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
1223 assert(OpNo < Desc.NumOperands);
1224 unsigned OpType = Desc.OpInfo[OpNo].OperandType;
1225 switch (OpType) {
1226 case AMDGPU::OPERAND_REG_IMM_FP32:
1227 case AMDGPU::OPERAND_REG_IMM_FP64:
1228 case AMDGPU::OPERAND_REG_IMM_FP16:
1229 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1230 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1231 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1232 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1233 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1234 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1235 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1236 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1237 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1238 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1239 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1240 return true;
1241 default:
1242 return false;
1243 }
1244 }
1245
isSISrcInlinableOperand(const MCInstrDesc & Desc,unsigned OpNo)1246 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
1247 assert(OpNo < Desc.NumOperands);
1248 unsigned OpType = Desc.OpInfo[OpNo].OperandType;
1249 return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
1250 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST;
1251 }
1252
1253 // Avoid using MCRegisterClass::getSize, since that function will go away
1254 // (move from MC* level to Target* level). Return size in bits.
getRegBitWidth(unsigned RCID)1255 unsigned getRegBitWidth(unsigned RCID) {
1256 switch (RCID) {
1257 case AMDGPU::VGPR_LO16RegClassID:
1258 case AMDGPU::VGPR_HI16RegClassID:
1259 case AMDGPU::SGPR_LO16RegClassID:
1260 case AMDGPU::AGPR_LO16RegClassID:
1261 return 16;
1262 case AMDGPU::SGPR_32RegClassID:
1263 case AMDGPU::VGPR_32RegClassID:
1264 case AMDGPU::VRegOrLds_32RegClassID:
1265 case AMDGPU::AGPR_32RegClassID:
1266 case AMDGPU::VS_32RegClassID:
1267 case AMDGPU::AV_32RegClassID:
1268 case AMDGPU::SReg_32RegClassID:
1269 case AMDGPU::SReg_32_XM0RegClassID:
1270 case AMDGPU::SRegOrLds_32RegClassID:
1271 return 32;
1272 case AMDGPU::SGPR_64RegClassID:
1273 case AMDGPU::VS_64RegClassID:
1274 case AMDGPU::AV_64RegClassID:
1275 case AMDGPU::SReg_64RegClassID:
1276 case AMDGPU::VReg_64RegClassID:
1277 case AMDGPU::AReg_64RegClassID:
1278 case AMDGPU::SReg_64_XEXECRegClassID:
1279 return 64;
1280 case AMDGPU::SGPR_96RegClassID:
1281 case AMDGPU::SReg_96RegClassID:
1282 case AMDGPU::VReg_96RegClassID:
1283 case AMDGPU::AReg_96RegClassID:
1284 return 96;
1285 case AMDGPU::SGPR_128RegClassID:
1286 case AMDGPU::SReg_128RegClassID:
1287 case AMDGPU::VReg_128RegClassID:
1288 case AMDGPU::AReg_128RegClassID:
1289 return 128;
1290 case AMDGPU::SGPR_160RegClassID:
1291 case AMDGPU::SReg_160RegClassID:
1292 case AMDGPU::VReg_160RegClassID:
1293 case AMDGPU::AReg_160RegClassID:
1294 return 160;
1295 case AMDGPU::SGPR_192RegClassID:
1296 case AMDGPU::SReg_192RegClassID:
1297 case AMDGPU::VReg_192RegClassID:
1298 case AMDGPU::AReg_192RegClassID:
1299 return 192;
1300 case AMDGPU::SGPR_256RegClassID:
1301 case AMDGPU::SReg_256RegClassID:
1302 case AMDGPU::VReg_256RegClassID:
1303 case AMDGPU::AReg_256RegClassID:
1304 return 256;
1305 case AMDGPU::SGPR_512RegClassID:
1306 case AMDGPU::SReg_512RegClassID:
1307 case AMDGPU::VReg_512RegClassID:
1308 case AMDGPU::AReg_512RegClassID:
1309 return 512;
1310 case AMDGPU::SGPR_1024RegClassID:
1311 case AMDGPU::SReg_1024RegClassID:
1312 case AMDGPU::VReg_1024RegClassID:
1313 case AMDGPU::AReg_1024RegClassID:
1314 return 1024;
1315 default:
1316 llvm_unreachable("Unexpected register class");
1317 }
1318 }
1319
getRegBitWidth(const MCRegisterClass & RC)1320 unsigned getRegBitWidth(const MCRegisterClass &RC) {
1321 return getRegBitWidth(RC.getID());
1322 }
1323
getRegOperandSize(const MCRegisterInfo * MRI,const MCInstrDesc & Desc,unsigned OpNo)1324 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
1325 unsigned OpNo) {
1326 assert(OpNo < Desc.NumOperands);
1327 unsigned RCID = Desc.OpInfo[OpNo].RegClass;
1328 return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
1329 }
1330
isInlinableLiteral64(int64_t Literal,bool HasInv2Pi)1331 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
1332 if (isInlinableIntLiteral(Literal))
1333 return true;
1334
1335 uint64_t Val = static_cast<uint64_t>(Literal);
1336 return (Val == DoubleToBits(0.0)) ||
1337 (Val == DoubleToBits(1.0)) ||
1338 (Val == DoubleToBits(-1.0)) ||
1339 (Val == DoubleToBits(0.5)) ||
1340 (Val == DoubleToBits(-0.5)) ||
1341 (Val == DoubleToBits(2.0)) ||
1342 (Val == DoubleToBits(-2.0)) ||
1343 (Val == DoubleToBits(4.0)) ||
1344 (Val == DoubleToBits(-4.0)) ||
1345 (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
1346 }
1347
isInlinableLiteral32(int32_t Literal,bool HasInv2Pi)1348 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
1349 if (isInlinableIntLiteral(Literal))
1350 return true;
1351
1352 // The actual type of the operand does not seem to matter as long
1353 // as the bits match one of the inline immediate values. For example:
1354 //
1355 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
1356 // so it is a legal inline immediate.
1357 //
1358 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
1359 // floating-point, so it is a legal inline immediate.
1360
1361 uint32_t Val = static_cast<uint32_t>(Literal);
1362 return (Val == FloatToBits(0.0f)) ||
1363 (Val == FloatToBits(1.0f)) ||
1364 (Val == FloatToBits(-1.0f)) ||
1365 (Val == FloatToBits(0.5f)) ||
1366 (Val == FloatToBits(-0.5f)) ||
1367 (Val == FloatToBits(2.0f)) ||
1368 (Val == FloatToBits(-2.0f)) ||
1369 (Val == FloatToBits(4.0f)) ||
1370 (Val == FloatToBits(-4.0f)) ||
1371 (Val == 0x3e22f983 && HasInv2Pi);
1372 }
1373
isInlinableLiteral16(int16_t Literal,bool HasInv2Pi)1374 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
1375 if (!HasInv2Pi)
1376 return false;
1377
1378 if (isInlinableIntLiteral(Literal))
1379 return true;
1380
1381 uint16_t Val = static_cast<uint16_t>(Literal);
1382 return Val == 0x3C00 || // 1.0
1383 Val == 0xBC00 || // -1.0
1384 Val == 0x3800 || // 0.5
1385 Val == 0xB800 || // -0.5
1386 Val == 0x4000 || // 2.0
1387 Val == 0xC000 || // -2.0
1388 Val == 0x4400 || // 4.0
1389 Val == 0xC400 || // -4.0
1390 Val == 0x3118; // 1/2pi
1391 }
1392
isInlinableLiteralV216(int32_t Literal,bool HasInv2Pi)1393 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
1394 assert(HasInv2Pi);
1395
1396 if (isInt<16>(Literal) || isUInt<16>(Literal)) {
1397 int16_t Trunc = static_cast<int16_t>(Literal);
1398 return AMDGPU::isInlinableLiteral16(Trunc, HasInv2Pi);
1399 }
1400 if (!(Literal & 0xffff))
1401 return AMDGPU::isInlinableLiteral16(Literal >> 16, HasInv2Pi);
1402
1403 int16_t Lo16 = static_cast<int16_t>(Literal);
1404 int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
1405 return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
1406 }
1407
isInlinableIntLiteralV216(int32_t Literal)1408 bool isInlinableIntLiteralV216(int32_t Literal) {
1409 int16_t Lo16 = static_cast<int16_t>(Literal);
1410 if (isInt<16>(Literal) || isUInt<16>(Literal))
1411 return isInlinableIntLiteral(Lo16);
1412
1413 int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
1414 if (!(Literal & 0xffff))
1415 return isInlinableIntLiteral(Hi16);
1416 return Lo16 == Hi16 && isInlinableIntLiteral(Lo16);
1417 }
1418
isFoldableLiteralV216(int32_t Literal,bool HasInv2Pi)1419 bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi) {
1420 assert(HasInv2Pi);
1421
1422 int16_t Lo16 = static_cast<int16_t>(Literal);
1423 if (isInt<16>(Literal) || isUInt<16>(Literal))
1424 return true;
1425
1426 int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
1427 if (!(Literal & 0xffff))
1428 return true;
1429 return Lo16 == Hi16;
1430 }
1431
isArgPassedInSGPR(const Argument * A)1432 bool isArgPassedInSGPR(const Argument *A) {
1433 const Function *F = A->getParent();
1434
1435 // Arguments to compute shaders are never a source of divergence.
1436 CallingConv::ID CC = F->getCallingConv();
1437 switch (CC) {
1438 case CallingConv::AMDGPU_KERNEL:
1439 case CallingConv::SPIR_KERNEL:
1440 return true;
1441 case CallingConv::AMDGPU_VS:
1442 case CallingConv::AMDGPU_LS:
1443 case CallingConv::AMDGPU_HS:
1444 case CallingConv::AMDGPU_ES:
1445 case CallingConv::AMDGPU_GS:
1446 case CallingConv::AMDGPU_PS:
1447 case CallingConv::AMDGPU_CS:
1448 case CallingConv::AMDGPU_Gfx:
1449 // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
1450 // Everything else is in VGPRs.
1451 return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
1452 F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal);
1453 default:
1454 // TODO: Should calls support inreg for SGPR inputs?
1455 return false;
1456 }
1457 }
1458
hasSMEMByteOffset(const MCSubtargetInfo & ST)1459 static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
1460 return isGCN3Encoding(ST) || isGFX10Plus(ST);
1461 }
1462
hasSMRDSignedImmOffset(const MCSubtargetInfo & ST)1463 static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) {
1464 return isGFX9Plus(ST);
1465 }
1466
isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo & ST,int64_t EncodedOffset)1467 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
1468 int64_t EncodedOffset) {
1469 return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset)
1470 : isUInt<8>(EncodedOffset);
1471 }
1472
isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo & ST,int64_t EncodedOffset,bool IsBuffer)1473 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
1474 int64_t EncodedOffset,
1475 bool IsBuffer) {
1476 return !IsBuffer &&
1477 hasSMRDSignedImmOffset(ST) &&
1478 isInt<21>(EncodedOffset);
1479 }
1480
isDwordAligned(uint64_t ByteOffset)1481 static bool isDwordAligned(uint64_t ByteOffset) {
1482 return (ByteOffset & 3) == 0;
1483 }
1484
convertSMRDOffsetUnits(const MCSubtargetInfo & ST,uint64_t ByteOffset)1485 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST,
1486 uint64_t ByteOffset) {
1487 if (hasSMEMByteOffset(ST))
1488 return ByteOffset;
1489
1490 assert(isDwordAligned(ByteOffset));
1491 return ByteOffset >> 2;
1492 }
1493
getSMRDEncodedOffset(const MCSubtargetInfo & ST,int64_t ByteOffset,bool IsBuffer)1494 Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
1495 int64_t ByteOffset, bool IsBuffer) {
1496 // The signed version is always a byte offset.
1497 if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
1498 assert(hasSMEMByteOffset(ST));
1499 return isInt<20>(ByteOffset) ? Optional<int64_t>(ByteOffset) : None;
1500 }
1501
1502 if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
1503 return None;
1504
1505 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
1506 return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset)
1507 ? Optional<int64_t>(EncodedOffset)
1508 : None;
1509 }
1510
getSMRDEncodedLiteralOffset32(const MCSubtargetInfo & ST,int64_t ByteOffset)1511 Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
1512 int64_t ByteOffset) {
1513 if (!isCI(ST) || !isDwordAligned(ByteOffset))
1514 return None;
1515
1516 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
1517 return isUInt<32>(EncodedOffset) ? Optional<int64_t>(EncodedOffset) : None;
1518 }
1519
1520 // Given Imm, split it into the values to put into the SOffset and ImmOffset
1521 // fields in an MUBUF instruction. Return false if it is not possible (due to a
1522 // hardware bug needing a workaround).
1523 //
1524 // The required alignment ensures that individual address components remain
1525 // aligned if they are aligned to begin with. It also ensures that additional
1526 // offsets within the given alignment can be added to the resulting ImmOffset.
splitMUBUFOffset(uint32_t Imm,uint32_t & SOffset,uint32_t & ImmOffset,const GCNSubtarget * Subtarget,Align Alignment)1527 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
1528 const GCNSubtarget *Subtarget, Align Alignment) {
1529 const uint32_t MaxImm = alignDown(4095, Alignment.value());
1530 uint32_t Overflow = 0;
1531
1532 if (Imm > MaxImm) {
1533 if (Imm <= MaxImm + 64) {
1534 // Use an SOffset inline constant for 4..64
1535 Overflow = Imm - MaxImm;
1536 Imm = MaxImm;
1537 } else {
1538 // Try to keep the same value in SOffset for adjacent loads, so that
1539 // the corresponding register contents can be re-used.
1540 //
1541 // Load values with all low-bits (except for alignment bits) set into
1542 // SOffset, so that a larger range of values can be covered using
1543 // s_movk_i32.
1544 //
1545 // Atomic operations fail to work correctly when individual address
1546 // components are unaligned, even if their sum is aligned.
1547 uint32_t High = (Imm + Alignment.value()) & ~4095;
1548 uint32_t Low = (Imm + Alignment.value()) & 4095;
1549 Imm = Low;
1550 Overflow = High - Alignment.value();
1551 }
1552 }
1553
1554 // There is a hardware bug in SI and CI which prevents address clamping in
1555 // MUBUF instructions from working correctly with SOffsets. The immediate
1556 // offset is unaffected.
1557 if (Overflow > 0 &&
1558 Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
1559 return false;
1560
1561 ImmOffset = Imm;
1562 SOffset = Overflow;
1563 return true;
1564 }
1565
SIModeRegisterDefaults(const Function & F)1566 SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) {
1567 *this = getDefaultForCallingConv(F.getCallingConv());
1568
1569 StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString();
1570 if (!IEEEAttr.empty())
1571 IEEE = IEEEAttr == "true";
1572
1573 StringRef DX10ClampAttr
1574 = F.getFnAttribute("amdgpu-dx10-clamp").getValueAsString();
1575 if (!DX10ClampAttr.empty())
1576 DX10Clamp = DX10ClampAttr == "true";
1577
1578 StringRef DenormF32Attr = F.getFnAttribute("denormal-fp-math-f32").getValueAsString();
1579 if (!DenormF32Attr.empty()) {
1580 DenormalMode DenormMode = parseDenormalFPAttribute(DenormF32Attr);
1581 FP32InputDenormals = DenormMode.Input == DenormalMode::IEEE;
1582 FP32OutputDenormals = DenormMode.Output == DenormalMode::IEEE;
1583 }
1584
1585 StringRef DenormAttr = F.getFnAttribute("denormal-fp-math").getValueAsString();
1586 if (!DenormAttr.empty()) {
1587 DenormalMode DenormMode = parseDenormalFPAttribute(DenormAttr);
1588
1589 if (DenormF32Attr.empty()) {
1590 FP32InputDenormals = DenormMode.Input == DenormalMode::IEEE;
1591 FP32OutputDenormals = DenormMode.Output == DenormalMode::IEEE;
1592 }
1593
1594 FP64FP16InputDenormals = DenormMode.Input == DenormalMode::IEEE;
1595 FP64FP16OutputDenormals = DenormMode.Output == DenormalMode::IEEE;
1596 }
1597 }
1598
1599 namespace {
1600
1601 struct SourceOfDivergence {
1602 unsigned Intr;
1603 };
1604 const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
1605
1606 #define GET_SourcesOfDivergence_IMPL
1607 #define GET_Gfx9BufferFormat_IMPL
1608 #define GET_Gfx10PlusBufferFormat_IMPL
1609 #include "AMDGPUGenSearchableTables.inc"
1610
1611 } // end anonymous namespace
1612
isIntrinsicSourceOfDivergence(unsigned IntrID)1613 bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
1614 return lookupSourceOfDivergence(IntrID);
1615 }
1616
getGcnBufferFormatInfo(uint8_t BitsPerComp,uint8_t NumComponents,uint8_t NumFormat,const MCSubtargetInfo & STI)1617 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
1618 uint8_t NumComponents,
1619 uint8_t NumFormat,
1620 const MCSubtargetInfo &STI) {
1621 return isGFX10Plus(STI)
1622 ? getGfx10PlusBufferFormatInfo(BitsPerComp, NumComponents,
1623 NumFormat)
1624 : getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat);
1625 }
1626
getGcnBufferFormatInfo(uint8_t Format,const MCSubtargetInfo & STI)1627 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
1628 const MCSubtargetInfo &STI) {
1629 return isGFX10Plus(STI) ? getGfx10PlusBufferFormatInfo(Format)
1630 : getGfx9BufferFormatInfo(Format);
1631 }
1632
1633 } // namespace AMDGPU
1634 } // namespace llvm
1635