• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11 
12 #include "AMDGPU.h"
13 #include "AMDKernelCodeT.h"
14 #include "SIDefines.h"
15 #include "llvm/IR/CallingConv.h"
16 #include "llvm/MC/MCInstrDesc.h"
17 #include "llvm/Support/AMDHSAKernelDescriptor.h"
18 #include "llvm/Support/Alignment.h"
19 #include "llvm/Support/Compiler.h"
20 #include "llvm/Support/ErrorHandling.h"
21 #include "llvm/Support/TargetParser.h"
22 #include <cstdint>
23 #include <string>
24 #include <utility>
25 
26 namespace llvm {
27 
28 class Argument;
29 class Function;
30 class GCNSubtarget;
31 class GlobalValue;
32 class MCRegisterClass;
33 class MCRegisterInfo;
34 class MCSubtargetInfo;
35 class StringRef;
36 class Triple;
37 
38 namespace AMDGPU {
39 
40 /// \returns HSA OS ABI Version identification.
41 Optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI);
42 /// \returns True if HSA OS ABI Version identification is 2,
43 /// false otherwise.
44 bool isHsaAbiVersion2(const MCSubtargetInfo *STI);
45 /// \returns True if HSA OS ABI Version identification is 3,
46 /// false otherwise.
47 bool isHsaAbiVersion3(const MCSubtargetInfo *STI);
48 
49 struct GcnBufferFormatInfo {
50   unsigned Format;
51   unsigned BitsPerComp;
52   unsigned NumComponents;
53   unsigned NumFormat;
54   unsigned DataFormat;
55 };
56 
57 #define GET_MIMGBaseOpcode_DECL
58 #define GET_MIMGDim_DECL
59 #define GET_MIMGEncoding_DECL
60 #define GET_MIMGLZMapping_DECL
61 #define GET_MIMGMIPMapping_DECL
62 #include "AMDGPUGenSearchableTables.inc"
63 
64 namespace IsaInfo {
65 
66 enum {
67   // The closed Vulkan driver sets 96, which limits the wave count to 8 but
68   // doesn't spill SGPRs as much as when 80 is set.
69   FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
70   TRAP_NUM_SGPRS = 16
71 };
72 
73 /// Streams isa version string for given subtarget \p STI into \p Stream.
74 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
75 
76 /// \returns Wavefront size for given subtarget \p STI.
77 unsigned getWavefrontSize(const MCSubtargetInfo *STI);
78 
79 /// \returns Local memory size in bytes for given subtarget \p STI.
80 unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
81 
82 /// \returns Number of execution units per compute unit for given subtarget \p
83 /// STI.
84 unsigned getEUsPerCU(const MCSubtargetInfo *STI);
85 
86 /// \returns Maximum number of work groups per compute unit for given subtarget
87 /// \p STI and limited by given \p FlatWorkGroupSize.
88 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
89                                unsigned FlatWorkGroupSize);
90 
91 /// \returns Minimum number of waves per execution unit for given subtarget \p
92 /// STI.
93 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
94 
95 /// \returns Maximum number of waves per execution unit for given subtarget \p
96 /// STI without any kind of limitation.
97 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
98 
99 /// \returns Number of waves per execution unit required to support the given \p
100 /// FlatWorkGroupSize.
101 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
102                                    unsigned FlatWorkGroupSize);
103 
104 /// \returns Minimum flat work group size for given subtarget \p STI.
105 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
106 
107 /// \returns Maximum flat work group size for given subtarget \p STI.
108 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
109 
110 /// \returns Number of waves per work group for given subtarget \p STI and
111 /// \p FlatWorkGroupSize.
112 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
113                               unsigned FlatWorkGroupSize);
114 
115 /// \returns SGPR allocation granularity for given subtarget \p STI.
116 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
117 
118 /// \returns SGPR encoding granularity for given subtarget \p STI.
119 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
120 
121 /// \returns Total number of SGPRs for given subtarget \p STI.
122 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
123 
124 /// \returns Addressable number of SGPRs for given subtarget \p STI.
125 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
126 
127 /// \returns Minimum number of SGPRs that meets the given number of waves per
128 /// execution unit requirement for given subtarget \p STI.
129 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
130 
131 /// \returns Maximum number of SGPRs that meets the given number of waves per
132 /// execution unit requirement for given subtarget \p STI.
133 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
134                         bool Addressable);
135 
136 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
137 /// STI when the given special registers are used.
138 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
139                           bool FlatScrUsed, bool XNACKUsed);
140 
141 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
142 /// STI when the given special registers are used. XNACK is inferred from
143 /// \p STI.
144 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
145                           bool FlatScrUsed);
146 
147 /// \returns Number of SGPR blocks needed for given subtarget \p STI when
148 /// \p NumSGPRs are used. \p NumSGPRs should already include any special
149 /// register counts.
150 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
151 
152 /// \returns VGPR allocation granularity for given subtarget \p STI.
153 ///
154 /// For subtargets which support it, \p EnableWavefrontSize32 should match
155 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
156 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
157                              Optional<bool> EnableWavefrontSize32 = None);
158 
159 /// \returns VGPR encoding granularity for given subtarget \p STI.
160 ///
161 /// For subtargets which support it, \p EnableWavefrontSize32 should match
162 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
163 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
164                                 Optional<bool> EnableWavefrontSize32 = None);
165 
166 /// \returns Total number of VGPRs for given subtarget \p STI.
167 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
168 
169 /// \returns Addressable number of VGPRs for given subtarget \p STI.
170 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
171 
172 /// \returns Minimum number of VGPRs that meets given number of waves per
173 /// execution unit requirement for given subtarget \p STI.
174 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
175 
176 /// \returns Maximum number of VGPRs that meets given number of waves per
177 /// execution unit requirement for given subtarget \p STI.
178 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
179 
180 /// \returns Number of VGPR blocks needed for given subtarget \p STI when
181 /// \p NumVGPRs are used.
182 ///
183 /// For subtargets which support it, \p EnableWavefrontSize32 should match the
184 /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
185 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs,
186                           Optional<bool> EnableWavefrontSize32 = None);
187 
188 } // end namespace IsaInfo
189 
190 LLVM_READONLY
191 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
192 
193 LLVM_READONLY
194 int getSOPPWithRelaxation(uint16_t Opcode);
195 
196 struct MIMGBaseOpcodeInfo {
197   MIMGBaseOpcode BaseOpcode;
198   bool Store;
199   bool Atomic;
200   bool AtomicX2;
201   bool Sampler;
202   bool Gather4;
203 
204   uint8_t NumExtraArgs;
205   bool Gradients;
206   bool G16;
207   bool Coordinates;
208   bool LodOrClampOrMip;
209   bool HasD16;
210 };
211 
212 LLVM_READONLY
213 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
214 
215 struct MIMGDimInfo {
216   MIMGDim Dim;
217   uint8_t NumCoords;
218   uint8_t NumGradients;
219   bool DA;
220   uint8_t Encoding;
221   const char *AsmSuffix;
222 };
223 
224 LLVM_READONLY
225 const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum);
226 
227 LLVM_READONLY
228 const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
229 
230 LLVM_READONLY
231 const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
232 
233 struct MIMGLZMappingInfo {
234   MIMGBaseOpcode L;
235   MIMGBaseOpcode LZ;
236 };
237 
238 struct MIMGMIPMappingInfo {
239   MIMGBaseOpcode MIP;
240   MIMGBaseOpcode NONMIP;
241 };
242 
243 struct MIMGG16MappingInfo {
244   MIMGBaseOpcode G;
245   MIMGBaseOpcode G16;
246 };
247 
248 LLVM_READONLY
249 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
250 
251 LLVM_READONLY
252 const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);
253 
254 LLVM_READONLY
255 const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);
256 
257 LLVM_READONLY
258 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
259                   unsigned VDataDwords, unsigned VAddrDwords);
260 
261 LLVM_READONLY
262 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
263 
264 struct MIMGInfo {
265   uint16_t Opcode;
266   uint16_t BaseOpcode;
267   uint8_t MIMGEncoding;
268   uint8_t VDataDwords;
269   uint8_t VAddrDwords;
270 };
271 
272 LLVM_READONLY
273 const MIMGInfo *getMIMGInfo(unsigned Opc);
274 
275 LLVM_READONLY
276 int getMTBUFBaseOpcode(unsigned Opc);
277 
278 LLVM_READONLY
279 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
280 
281 LLVM_READONLY
282 int getMTBUFElements(unsigned Opc);
283 
284 LLVM_READONLY
285 bool getMTBUFHasVAddr(unsigned Opc);
286 
287 LLVM_READONLY
288 bool getMTBUFHasSrsrc(unsigned Opc);
289 
290 LLVM_READONLY
291 bool getMTBUFHasSoffset(unsigned Opc);
292 
293 LLVM_READONLY
294 int getMUBUFBaseOpcode(unsigned Opc);
295 
296 LLVM_READONLY
297 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
298 
299 LLVM_READONLY
300 int getMUBUFElements(unsigned Opc);
301 
302 LLVM_READONLY
303 bool getMUBUFHasVAddr(unsigned Opc);
304 
305 LLVM_READONLY
306 bool getMUBUFHasSrsrc(unsigned Opc);
307 
308 LLVM_READONLY
309 bool getMUBUFHasSoffset(unsigned Opc);
310 
311 LLVM_READONLY
312 bool getSMEMIsBuffer(unsigned Opc);
313 
314 LLVM_READONLY
315 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
316                                                   uint8_t NumComponents,
317                                                   uint8_t NumFormat,
318                                                   const MCSubtargetInfo &STI);
319 LLVM_READONLY
320 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
321                                                   const MCSubtargetInfo &STI);
322 
323 LLVM_READONLY
324 int getMCOpcode(uint16_t Opcode, unsigned Gen);
325 
326 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
327                                const MCSubtargetInfo *STI);
328 
329 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
330     const MCSubtargetInfo *STI);
331 
332 bool isGroupSegment(const GlobalValue *GV);
333 bool isGlobalSegment(const GlobalValue *GV);
334 bool isReadOnlySegment(const GlobalValue *GV);
335 
336 /// \returns True if constants should be emitted to .text section for given
337 /// target triple \p TT, false otherwise.
338 bool shouldEmitConstantsToTextSection(const Triple &TT);
339 
340 /// \returns Integer value requested using \p F's \p Name attribute.
341 ///
342 /// \returns \p Default if attribute is not present.
343 ///
344 /// \returns \p Default and emits error if requested value cannot be converted
345 /// to integer.
346 int getIntegerAttribute(const Function &F, StringRef Name, int Default);
347 
348 /// \returns A pair of integer values requested using \p F's \p Name attribute
349 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
350 /// is false).
351 ///
352 /// \returns \p Default if attribute is not present.
353 ///
354 /// \returns \p Default and emits error if one of the requested values cannot be
355 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is
356 /// not present.
357 std::pair<int, int> getIntegerPairAttribute(const Function &F,
358                                             StringRef Name,
359                                             std::pair<int, int> Default,
360                                             bool OnlyFirstRequired = false);
361 
362 /// Represents the counter values to wait for in an s_waitcnt instruction.
363 ///
364 /// Large values (including the maximum possible integer) can be used to
365 /// represent "don't care" waits.
366 struct Waitcnt {
367   unsigned VmCnt = ~0u;
368   unsigned ExpCnt = ~0u;
369   unsigned LgkmCnt = ~0u;
370   unsigned VsCnt = ~0u;
371 
WaitcntWaitcnt372   Waitcnt() {}
WaitcntWaitcnt373   Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
374       : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {}
375 
allZeroWaitcnt376   static Waitcnt allZero(bool HasVscnt) {
377     return Waitcnt(0, 0, 0, HasVscnt ? 0 : ~0u);
378   }
allZeroExceptVsCntWaitcnt379   static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); }
380 
hasWaitWaitcnt381   bool hasWait() const {
382     return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u;
383   }
384 
dominatesWaitcnt385   bool dominates(const Waitcnt &Other) const {
386     return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt &&
387            LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt;
388   }
389 
combinedWaitcnt390   Waitcnt combined(const Waitcnt &Other) const {
391     return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt),
392                    std::min(LgkmCnt, Other.LgkmCnt),
393                    std::min(VsCnt, Other.VsCnt));
394   }
395 };
396 
397 /// \returns Vmcnt bit mask for given isa \p Version.
398 unsigned getVmcntBitMask(const IsaVersion &Version);
399 
400 /// \returns Expcnt bit mask for given isa \p Version.
401 unsigned getExpcntBitMask(const IsaVersion &Version);
402 
403 /// \returns Lgkmcnt bit mask for given isa \p Version.
404 unsigned getLgkmcntBitMask(const IsaVersion &Version);
405 
406 /// \returns Waitcnt bit mask for given isa \p Version.
407 unsigned getWaitcntBitMask(const IsaVersion &Version);
408 
409 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
410 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
411 
412 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
413 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
414 
415 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
416 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
417 
418 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
419 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
420 /// \p Lgkmcnt respectively.
421 ///
422 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
423 ///     \p Vmcnt = \p Waitcnt[3:0]                      (pre-gfx9 only)
424 ///     \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14]  (gfx9+ only)
425 ///     \p Expcnt = \p Waitcnt[6:4]
426 ///     \p Lgkmcnt = \p Waitcnt[11:8]                   (pre-gfx10 only)
427 ///     \p Lgkmcnt = \p Waitcnt[13:8]                   (gfx10+ only)
428 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
429                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
430 
431 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
432 
433 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
434 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
435                      unsigned Vmcnt);
436 
437 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
438 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
439                       unsigned Expcnt);
440 
441 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
442 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
443                        unsigned Lgkmcnt);
444 
445 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
446 /// \p Version.
447 ///
448 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
449 ///     Waitcnt[3:0]   = \p Vmcnt       (pre-gfx9 only)
450 ///     Waitcnt[3:0]   = \p Vmcnt[3:0]  (gfx9+ only)
451 ///     Waitcnt[6:4]   = \p Expcnt
452 ///     Waitcnt[11:8]  = \p Lgkmcnt     (pre-gfx10 only)
453 ///     Waitcnt[13:8]  = \p Lgkmcnt     (gfx10+ only)
454 ///     Waitcnt[15:14] = \p Vmcnt[5:4]  (gfx9+ only)
455 ///
456 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
457 /// isa \p Version.
458 unsigned encodeWaitcnt(const IsaVersion &Version,
459                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
460 
461 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
462 
463 namespace Hwreg {
464 
465 LLVM_READONLY
466 int64_t getHwregId(const StringRef Name);
467 
468 LLVM_READNONE
469 bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI);
470 
471 LLVM_READNONE
472 bool isValidHwreg(int64_t Id);
473 
474 LLVM_READNONE
475 bool isValidHwregOffset(int64_t Offset);
476 
477 LLVM_READNONE
478 bool isValidHwregWidth(int64_t Width);
479 
480 LLVM_READNONE
481 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width);
482 
483 LLVM_READNONE
484 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI);
485 
486 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width);
487 
488 } // namespace Hwreg
489 
490 namespace MTBUFFormat {
491 
492 LLVM_READNONE
493 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt);
494 
495 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt);
496 
497 int64_t getDfmt(const StringRef Name);
498 
499 StringRef getDfmtName(unsigned Id);
500 
501 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI);
502 
503 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI);
504 
505 bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI);
506 
507 bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI);
508 
509 int64_t getUnifiedFormat(const StringRef Name);
510 
511 StringRef getUnifiedFormatName(unsigned Id);
512 
513 bool isValidUnifiedFormat(unsigned Val);
514 
515 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt);
516 
517 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI);
518 
519 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI);
520 
521 } // namespace MTBUFFormat
522 
523 namespace SendMsg {
524 
525 LLVM_READONLY
526 int64_t getMsgId(const StringRef Name);
527 
528 LLVM_READONLY
529 int64_t getMsgOpId(int64_t MsgId, const StringRef Name);
530 
531 LLVM_READNONE
532 StringRef getMsgName(int64_t MsgId);
533 
534 LLVM_READNONE
535 StringRef getMsgOpName(int64_t MsgId, int64_t OpId);
536 
537 LLVM_READNONE
538 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict = true);
539 
540 LLVM_READNONE
541 bool isValidMsgOp(int64_t MsgId, int64_t OpId, bool Strict = true);
542 
543 LLVM_READNONE
544 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, bool Strict = true);
545 
546 LLVM_READNONE
547 bool msgRequiresOp(int64_t MsgId);
548 
549 LLVM_READNONE
550 bool msgSupportsStream(int64_t MsgId, int64_t OpId);
551 
552 void decodeMsg(unsigned Val,
553                uint16_t &MsgId,
554                uint16_t &OpId,
555                uint16_t &StreamId);
556 
557 LLVM_READNONE
558 uint64_t encodeMsg(uint64_t MsgId,
559                    uint64_t OpId,
560                    uint64_t StreamId);
561 
562 } // namespace SendMsg
563 
564 
565 unsigned getInitialPSInputAddr(const Function &F);
566 
567 LLVM_READNONE
568 bool isShader(CallingConv::ID CC);
569 
570 LLVM_READNONE
571 bool isGraphics(CallingConv::ID CC);
572 
573 LLVM_READNONE
574 bool isCompute(CallingConv::ID CC);
575 
576 LLVM_READNONE
577 bool isEntryFunctionCC(CallingConv::ID CC);
578 
579 // FIXME: Remove this when calling conventions cleaned up
580 LLVM_READNONE
isKernel(CallingConv::ID CC)581 inline bool isKernel(CallingConv::ID CC) {
582   switch (CC) {
583   case CallingConv::AMDGPU_KERNEL:
584   case CallingConv::SPIR_KERNEL:
585     return true;
586   default:
587     return false;
588   }
589 }
590 
591 bool hasXNACK(const MCSubtargetInfo &STI);
592 bool hasSRAMECC(const MCSubtargetInfo &STI);
593 bool hasMIMG_R128(const MCSubtargetInfo &STI);
594 bool hasGFX10A16(const MCSubtargetInfo &STI);
595 bool hasG16(const MCSubtargetInfo &STI);
596 bool hasPackedD16(const MCSubtargetInfo &STI);
597 
598 bool isSI(const MCSubtargetInfo &STI);
599 bool isCI(const MCSubtargetInfo &STI);
600 bool isVI(const MCSubtargetInfo &STI);
601 bool isGFX9(const MCSubtargetInfo &STI);
602 bool isGFX9Plus(const MCSubtargetInfo &STI);
603 bool isGFX10(const MCSubtargetInfo &STI);
604 bool isGFX10Plus(const MCSubtargetInfo &STI);
605 bool isGCN3Encoding(const MCSubtargetInfo &STI);
606 bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
607 bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
608 
609 /// Is Reg - scalar register
610 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
611 
612 /// Is there any intersection between registers
613 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI);
614 
615 /// If \p Reg is a pseudo reg, return the correct hardware register given
616 /// \p STI otherwise return \p Reg.
617 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
618 
619 /// Convert hardware register \p Reg to a pseudo register
620 LLVM_READNONE
621 unsigned mc2PseudoReg(unsigned Reg);
622 
623 /// Can this operand also contain immediate values?
624 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
625 
626 /// Is this floating-point operand?
627 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
628 
629 /// Does this opearnd support only inlinable literals?
630 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
631 
632 /// Get the size in bits of a register from the register class \p RC.
633 unsigned getRegBitWidth(unsigned RCID);
634 
635 /// Get the size in bits of a register from the register class \p RC.
636 unsigned getRegBitWidth(const MCRegisterClass &RC);
637 
638 /// Get size of register operand
639 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
640                            unsigned OpNo);
641 
642 LLVM_READNONE
getOperandSize(const MCOperandInfo & OpInfo)643 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
644   switch (OpInfo.OperandType) {
645   case AMDGPU::OPERAND_REG_IMM_INT32:
646   case AMDGPU::OPERAND_REG_IMM_FP32:
647   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
648   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
649   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
650   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
651     return 4;
652 
653   case AMDGPU::OPERAND_REG_IMM_INT64:
654   case AMDGPU::OPERAND_REG_IMM_FP64:
655   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
656   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
657     return 8;
658 
659   case AMDGPU::OPERAND_REG_IMM_INT16:
660   case AMDGPU::OPERAND_REG_IMM_FP16:
661   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
662   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
663   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
664   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
665   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
666   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
667   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
668   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
669   case AMDGPU::OPERAND_REG_IMM_V2INT16:
670   case AMDGPU::OPERAND_REG_IMM_V2FP16:
671     return 2;
672 
673   default:
674     llvm_unreachable("unhandled operand type");
675   }
676 }
677 
678 LLVM_READNONE
getOperandSize(const MCInstrDesc & Desc,unsigned OpNo)679 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
680   return getOperandSize(Desc.OpInfo[OpNo]);
681 }
682 
683 /// Is this literal inlinable, and not one of the values intended for floating
684 /// point values.
685 LLVM_READNONE
isInlinableIntLiteral(int64_t Literal)686 inline bool isInlinableIntLiteral(int64_t Literal) {
687   return Literal >= -16 && Literal <= 64;
688 }
689 
690 /// Is this literal inlinable
691 LLVM_READNONE
692 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
693 
694 LLVM_READNONE
695 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
696 
697 LLVM_READNONE
698 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
699 
700 LLVM_READNONE
701 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
702 
703 LLVM_READNONE
704 bool isInlinableIntLiteralV216(int32_t Literal);
705 
706 LLVM_READNONE
707 bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi);
708 
709 bool isArgPassedInSGPR(const Argument *Arg);
710 
711 LLVM_READONLY
712 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
713                                       int64_t EncodedOffset);
714 
715 LLVM_READONLY
716 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
717                                     int64_t EncodedOffset,
718                                     bool IsBuffer);
719 
720 /// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
721 /// offsets.
722 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset);
723 
724 /// \returns The encoding that will be used for \p ByteOffset in the
725 /// SMRD offset field, or None if it won't fit. On GFX9 and GFX10
726 /// S_LOAD instructions have a signed offset, on other subtargets it is
727 /// unsigned. S_BUFFER has an unsigned offset for all subtargets.
728 Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
729                                        int64_t ByteOffset, bool IsBuffer);
730 
731 /// \return The encoding that can be used for a 32-bit literal offset in an SMRD
732 /// instruction. This is only useful on CI.s
733 Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
734                                                 int64_t ByteOffset);
735 
736 /// \returns true if this offset is small enough to fit in the SMRD
737 /// offset field.  \p ByteOffset should be the offset in bytes and
738 /// not the encoded offset.
739 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
740 
741 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
742                       const GCNSubtarget *Subtarget,
743                       Align Alignment = Align(4));
744 
745 /// \returns true if the intrinsic is divergent
746 bool isIntrinsicSourceOfDivergence(unsigned IntrID);
747 
748 // Track defaults for fields in the MODE registser.
749 struct SIModeRegisterDefaults {
750   /// Floating point opcodes that support exception flag gathering quiet and
751   /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10
752   /// become IEEE 754- 2008 compliant due to signaling NaN propagation and
753   /// quieting.
754   bool IEEE : 1;
755 
756   /// Used by the vector ALU to force DX10-style treatment of NaNs: when set,
757   /// clamp NaN to zero; otherwise, pass NaN through.
758   bool DX10Clamp : 1;
759 
760   /// If this is set, neither input or output denormals are flushed for most f32
761   /// instructions.
762   bool FP32InputDenormals : 1;
763   bool FP32OutputDenormals : 1;
764 
765   /// If this is set, neither input or output denormals are flushed for both f64
766   /// and f16/v2f16 instructions.
767   bool FP64FP16InputDenormals : 1;
768   bool FP64FP16OutputDenormals : 1;
769 
SIModeRegisterDefaultsSIModeRegisterDefaults770   SIModeRegisterDefaults() :
771     IEEE(true),
772     DX10Clamp(true),
773     FP32InputDenormals(true),
774     FP32OutputDenormals(true),
775     FP64FP16InputDenormals(true),
776     FP64FP16OutputDenormals(true) {}
777 
778   SIModeRegisterDefaults(const Function &F);
779 
getDefaultForCallingConvSIModeRegisterDefaults780   static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
781     SIModeRegisterDefaults Mode;
782     Mode.IEEE = !AMDGPU::isShader(CC);
783     return Mode;
784   }
785 
786   bool operator ==(const SIModeRegisterDefaults Other) const {
787     return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp &&
788            FP32InputDenormals == Other.FP32InputDenormals &&
789            FP32OutputDenormals == Other.FP32OutputDenormals &&
790            FP64FP16InputDenormals == Other.FP64FP16InputDenormals &&
791            FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals;
792   }
793 
allFP32DenormalsSIModeRegisterDefaults794   bool allFP32Denormals() const {
795     return FP32InputDenormals && FP32OutputDenormals;
796   }
797 
allFP64FP16DenormalsSIModeRegisterDefaults798   bool allFP64FP16Denormals() const {
799     return FP64FP16InputDenormals && FP64FP16OutputDenormals;
800   }
801 
802   /// Get the encoding value for the FP_DENORM bits of the mode register for the
803   /// FP32 denormal mode.
fpDenormModeSPValueSIModeRegisterDefaults804   uint32_t fpDenormModeSPValue() const {
805     if (FP32InputDenormals && FP32OutputDenormals)
806       return FP_DENORM_FLUSH_NONE;
807     if (FP32InputDenormals)
808       return FP_DENORM_FLUSH_OUT;
809     if (FP32OutputDenormals)
810       return FP_DENORM_FLUSH_IN;
811     return FP_DENORM_FLUSH_IN_FLUSH_OUT;
812   }
813 
814   /// Get the encoding value for the FP_DENORM bits of the mode register for the
815   /// FP64/FP16 denormal mode.
fpDenormModeDPValueSIModeRegisterDefaults816   uint32_t fpDenormModeDPValue() const {
817     if (FP64FP16InputDenormals && FP64FP16OutputDenormals)
818       return FP_DENORM_FLUSH_NONE;
819     if (FP64FP16InputDenormals)
820       return FP_DENORM_FLUSH_OUT;
821     if (FP64FP16OutputDenormals)
822       return FP_DENORM_FLUSH_IN;
823     return FP_DENORM_FLUSH_IN_FLUSH_OUT;
824   }
825 
826   /// Returns true if a flag is compatible if it's enabled in the callee, but
827   /// disabled in the caller.
oneWayCompatibleSIModeRegisterDefaults828   static bool oneWayCompatible(bool CallerMode, bool CalleeMode) {
829     return CallerMode == CalleeMode || (!CallerMode && CalleeMode);
830   }
831 
832   // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should
833   // be able to override.
isInlineCompatibleSIModeRegisterDefaults834   bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const {
835     if (DX10Clamp != CalleeMode.DX10Clamp)
836       return false;
837     if (IEEE != CalleeMode.IEEE)
838       return false;
839 
840     // Allow inlining denormals enabled into denormals flushed functions.
841     return oneWayCompatible(FP64FP16InputDenormals, CalleeMode.FP64FP16InputDenormals) &&
842            oneWayCompatible(FP64FP16OutputDenormals, CalleeMode.FP64FP16OutputDenormals) &&
843            oneWayCompatible(FP32InputDenormals, CalleeMode.FP32InputDenormals) &&
844            oneWayCompatible(FP32OutputDenormals, CalleeMode.FP32OutputDenormals);
845   }
846 };
847 
848 } // end namespace AMDGPU
849 } // end namespace llvm
850 
851 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
852