1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11
12 #include "AMDGPU.h"
13 #include "AMDKernelCodeT.h"
14 #include "SIDefines.h"
15 #include "llvm/IR/CallingConv.h"
16 #include "llvm/MC/MCInstrDesc.h"
17 #include "llvm/Support/AMDHSAKernelDescriptor.h"
18 #include "llvm/Support/Alignment.h"
19 #include "llvm/Support/Compiler.h"
20 #include "llvm/Support/ErrorHandling.h"
21 #include "llvm/Support/TargetParser.h"
22 #include <cstdint>
23 #include <string>
24 #include <utility>
25
26 namespace llvm {
27
28 class Argument;
29 class Function;
30 class GCNSubtarget;
31 class GlobalValue;
32 class MCRegisterClass;
33 class MCRegisterInfo;
34 class MCSubtargetInfo;
35 class StringRef;
36 class Triple;
37
38 namespace AMDGPU {
39
40 /// \returns HSA OS ABI Version identification.
41 Optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI);
42 /// \returns True if HSA OS ABI Version identification is 2,
43 /// false otherwise.
44 bool isHsaAbiVersion2(const MCSubtargetInfo *STI);
45 /// \returns True if HSA OS ABI Version identification is 3,
46 /// false otherwise.
47 bool isHsaAbiVersion3(const MCSubtargetInfo *STI);
48
49 struct GcnBufferFormatInfo {
50 unsigned Format;
51 unsigned BitsPerComp;
52 unsigned NumComponents;
53 unsigned NumFormat;
54 unsigned DataFormat;
55 };
56
57 #define GET_MIMGBaseOpcode_DECL
58 #define GET_MIMGDim_DECL
59 #define GET_MIMGEncoding_DECL
60 #define GET_MIMGLZMapping_DECL
61 #define GET_MIMGMIPMapping_DECL
62 #include "AMDGPUGenSearchableTables.inc"
63
64 namespace IsaInfo {
65
66 enum {
67 // The closed Vulkan driver sets 96, which limits the wave count to 8 but
68 // doesn't spill SGPRs as much as when 80 is set.
69 FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
70 TRAP_NUM_SGPRS = 16
71 };
72
73 /// Streams isa version string for given subtarget \p STI into \p Stream.
74 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
75
76 /// \returns Wavefront size for given subtarget \p STI.
77 unsigned getWavefrontSize(const MCSubtargetInfo *STI);
78
79 /// \returns Local memory size in bytes for given subtarget \p STI.
80 unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
81
82 /// \returns Number of execution units per compute unit for given subtarget \p
83 /// STI.
84 unsigned getEUsPerCU(const MCSubtargetInfo *STI);
85
86 /// \returns Maximum number of work groups per compute unit for given subtarget
87 /// \p STI and limited by given \p FlatWorkGroupSize.
88 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
89 unsigned FlatWorkGroupSize);
90
91 /// \returns Minimum number of waves per execution unit for given subtarget \p
92 /// STI.
93 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
94
95 /// \returns Maximum number of waves per execution unit for given subtarget \p
96 /// STI without any kind of limitation.
97 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
98
99 /// \returns Number of waves per execution unit required to support the given \p
100 /// FlatWorkGroupSize.
101 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
102 unsigned FlatWorkGroupSize);
103
104 /// \returns Minimum flat work group size for given subtarget \p STI.
105 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
106
107 /// \returns Maximum flat work group size for given subtarget \p STI.
108 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
109
110 /// \returns Number of waves per work group for given subtarget \p STI and
111 /// \p FlatWorkGroupSize.
112 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
113 unsigned FlatWorkGroupSize);
114
115 /// \returns SGPR allocation granularity for given subtarget \p STI.
116 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
117
118 /// \returns SGPR encoding granularity for given subtarget \p STI.
119 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
120
121 /// \returns Total number of SGPRs for given subtarget \p STI.
122 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
123
124 /// \returns Addressable number of SGPRs for given subtarget \p STI.
125 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
126
127 /// \returns Minimum number of SGPRs that meets the given number of waves per
128 /// execution unit requirement for given subtarget \p STI.
129 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
130
131 /// \returns Maximum number of SGPRs that meets the given number of waves per
132 /// execution unit requirement for given subtarget \p STI.
133 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
134 bool Addressable);
135
136 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
137 /// STI when the given special registers are used.
138 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
139 bool FlatScrUsed, bool XNACKUsed);
140
141 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
142 /// STI when the given special registers are used. XNACK is inferred from
143 /// \p STI.
144 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
145 bool FlatScrUsed);
146
147 /// \returns Number of SGPR blocks needed for given subtarget \p STI when
148 /// \p NumSGPRs are used. \p NumSGPRs should already include any special
149 /// register counts.
150 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
151
152 /// \returns VGPR allocation granularity for given subtarget \p STI.
153 ///
154 /// For subtargets which support it, \p EnableWavefrontSize32 should match
155 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
156 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
157 Optional<bool> EnableWavefrontSize32 = None);
158
159 /// \returns VGPR encoding granularity for given subtarget \p STI.
160 ///
161 /// For subtargets which support it, \p EnableWavefrontSize32 should match
162 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
163 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
164 Optional<bool> EnableWavefrontSize32 = None);
165
166 /// \returns Total number of VGPRs for given subtarget \p STI.
167 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
168
169 /// \returns Addressable number of VGPRs for given subtarget \p STI.
170 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
171
172 /// \returns Minimum number of VGPRs that meets given number of waves per
173 /// execution unit requirement for given subtarget \p STI.
174 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
175
176 /// \returns Maximum number of VGPRs that meets given number of waves per
177 /// execution unit requirement for given subtarget \p STI.
178 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
179
180 /// \returns Number of VGPR blocks needed for given subtarget \p STI when
181 /// \p NumVGPRs are used.
182 ///
183 /// For subtargets which support it, \p EnableWavefrontSize32 should match the
184 /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
185 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs,
186 Optional<bool> EnableWavefrontSize32 = None);
187
188 } // end namespace IsaInfo
189
190 LLVM_READONLY
191 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
192
193 LLVM_READONLY
194 int getSOPPWithRelaxation(uint16_t Opcode);
195
196 struct MIMGBaseOpcodeInfo {
197 MIMGBaseOpcode BaseOpcode;
198 bool Store;
199 bool Atomic;
200 bool AtomicX2;
201 bool Sampler;
202 bool Gather4;
203
204 uint8_t NumExtraArgs;
205 bool Gradients;
206 bool G16;
207 bool Coordinates;
208 bool LodOrClampOrMip;
209 bool HasD16;
210 };
211
212 LLVM_READONLY
213 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
214
215 struct MIMGDimInfo {
216 MIMGDim Dim;
217 uint8_t NumCoords;
218 uint8_t NumGradients;
219 bool DA;
220 uint8_t Encoding;
221 const char *AsmSuffix;
222 };
223
224 LLVM_READONLY
225 const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum);
226
227 LLVM_READONLY
228 const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
229
230 LLVM_READONLY
231 const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
232
233 struct MIMGLZMappingInfo {
234 MIMGBaseOpcode L;
235 MIMGBaseOpcode LZ;
236 };
237
238 struct MIMGMIPMappingInfo {
239 MIMGBaseOpcode MIP;
240 MIMGBaseOpcode NONMIP;
241 };
242
243 struct MIMGG16MappingInfo {
244 MIMGBaseOpcode G;
245 MIMGBaseOpcode G16;
246 };
247
248 LLVM_READONLY
249 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
250
251 LLVM_READONLY
252 const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);
253
254 LLVM_READONLY
255 const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);
256
257 LLVM_READONLY
258 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
259 unsigned VDataDwords, unsigned VAddrDwords);
260
261 LLVM_READONLY
262 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
263
264 struct MIMGInfo {
265 uint16_t Opcode;
266 uint16_t BaseOpcode;
267 uint8_t MIMGEncoding;
268 uint8_t VDataDwords;
269 uint8_t VAddrDwords;
270 };
271
272 LLVM_READONLY
273 const MIMGInfo *getMIMGInfo(unsigned Opc);
274
275 LLVM_READONLY
276 int getMTBUFBaseOpcode(unsigned Opc);
277
278 LLVM_READONLY
279 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
280
281 LLVM_READONLY
282 int getMTBUFElements(unsigned Opc);
283
284 LLVM_READONLY
285 bool getMTBUFHasVAddr(unsigned Opc);
286
287 LLVM_READONLY
288 bool getMTBUFHasSrsrc(unsigned Opc);
289
290 LLVM_READONLY
291 bool getMTBUFHasSoffset(unsigned Opc);
292
293 LLVM_READONLY
294 int getMUBUFBaseOpcode(unsigned Opc);
295
296 LLVM_READONLY
297 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
298
299 LLVM_READONLY
300 int getMUBUFElements(unsigned Opc);
301
302 LLVM_READONLY
303 bool getMUBUFHasVAddr(unsigned Opc);
304
305 LLVM_READONLY
306 bool getMUBUFHasSrsrc(unsigned Opc);
307
308 LLVM_READONLY
309 bool getMUBUFHasSoffset(unsigned Opc);
310
311 LLVM_READONLY
312 bool getSMEMIsBuffer(unsigned Opc);
313
314 LLVM_READONLY
315 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
316 uint8_t NumComponents,
317 uint8_t NumFormat,
318 const MCSubtargetInfo &STI);
319 LLVM_READONLY
320 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
321 const MCSubtargetInfo &STI);
322
323 LLVM_READONLY
324 int getMCOpcode(uint16_t Opcode, unsigned Gen);
325
326 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
327 const MCSubtargetInfo *STI);
328
329 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
330 const MCSubtargetInfo *STI);
331
332 bool isGroupSegment(const GlobalValue *GV);
333 bool isGlobalSegment(const GlobalValue *GV);
334 bool isReadOnlySegment(const GlobalValue *GV);
335
336 /// \returns True if constants should be emitted to .text section for given
337 /// target triple \p TT, false otherwise.
338 bool shouldEmitConstantsToTextSection(const Triple &TT);
339
340 /// \returns Integer value requested using \p F's \p Name attribute.
341 ///
342 /// \returns \p Default if attribute is not present.
343 ///
344 /// \returns \p Default and emits error if requested value cannot be converted
345 /// to integer.
346 int getIntegerAttribute(const Function &F, StringRef Name, int Default);
347
348 /// \returns A pair of integer values requested using \p F's \p Name attribute
349 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
350 /// is false).
351 ///
352 /// \returns \p Default if attribute is not present.
353 ///
354 /// \returns \p Default and emits error if one of the requested values cannot be
355 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is
356 /// not present.
357 std::pair<int, int> getIntegerPairAttribute(const Function &F,
358 StringRef Name,
359 std::pair<int, int> Default,
360 bool OnlyFirstRequired = false);
361
362 /// Represents the counter values to wait for in an s_waitcnt instruction.
363 ///
364 /// Large values (including the maximum possible integer) can be used to
365 /// represent "don't care" waits.
366 struct Waitcnt {
367 unsigned VmCnt = ~0u;
368 unsigned ExpCnt = ~0u;
369 unsigned LgkmCnt = ~0u;
370 unsigned VsCnt = ~0u;
371
WaitcntWaitcnt372 Waitcnt() {}
WaitcntWaitcnt373 Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
374 : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {}
375
allZeroWaitcnt376 static Waitcnt allZero(bool HasVscnt) {
377 return Waitcnt(0, 0, 0, HasVscnt ? 0 : ~0u);
378 }
allZeroExceptVsCntWaitcnt379 static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); }
380
hasWaitWaitcnt381 bool hasWait() const {
382 return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u;
383 }
384
dominatesWaitcnt385 bool dominates(const Waitcnt &Other) const {
386 return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt &&
387 LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt;
388 }
389
combinedWaitcnt390 Waitcnt combined(const Waitcnt &Other) const {
391 return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt),
392 std::min(LgkmCnt, Other.LgkmCnt),
393 std::min(VsCnt, Other.VsCnt));
394 }
395 };
396
397 /// \returns Vmcnt bit mask for given isa \p Version.
398 unsigned getVmcntBitMask(const IsaVersion &Version);
399
400 /// \returns Expcnt bit mask for given isa \p Version.
401 unsigned getExpcntBitMask(const IsaVersion &Version);
402
403 /// \returns Lgkmcnt bit mask for given isa \p Version.
404 unsigned getLgkmcntBitMask(const IsaVersion &Version);
405
406 /// \returns Waitcnt bit mask for given isa \p Version.
407 unsigned getWaitcntBitMask(const IsaVersion &Version);
408
409 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
410 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
411
412 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
413 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
414
415 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
416 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
417
418 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
419 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
420 /// \p Lgkmcnt respectively.
421 ///
422 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
423 /// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only)
424 /// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only)
425 /// \p Expcnt = \p Waitcnt[6:4]
426 /// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10 only)
427 /// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10+ only)
428 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
429 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
430
431 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
432
433 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
434 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
435 unsigned Vmcnt);
436
437 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
438 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
439 unsigned Expcnt);
440
441 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
442 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
443 unsigned Lgkmcnt);
444
445 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
446 /// \p Version.
447 ///
448 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
449 /// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only)
450 /// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only)
451 /// Waitcnt[6:4] = \p Expcnt
452 /// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10 only)
453 /// Waitcnt[13:8] = \p Lgkmcnt (gfx10+ only)
454 /// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only)
455 ///
456 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
457 /// isa \p Version.
458 unsigned encodeWaitcnt(const IsaVersion &Version,
459 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
460
461 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
462
463 namespace Hwreg {
464
465 LLVM_READONLY
466 int64_t getHwregId(const StringRef Name);
467
468 LLVM_READNONE
469 bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI);
470
471 LLVM_READNONE
472 bool isValidHwreg(int64_t Id);
473
474 LLVM_READNONE
475 bool isValidHwregOffset(int64_t Offset);
476
477 LLVM_READNONE
478 bool isValidHwregWidth(int64_t Width);
479
480 LLVM_READNONE
481 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width);
482
483 LLVM_READNONE
484 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI);
485
486 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width);
487
488 } // namespace Hwreg
489
490 namespace MTBUFFormat {
491
492 LLVM_READNONE
493 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt);
494
495 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt);
496
497 int64_t getDfmt(const StringRef Name);
498
499 StringRef getDfmtName(unsigned Id);
500
501 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI);
502
503 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI);
504
505 bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI);
506
507 bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI);
508
509 int64_t getUnifiedFormat(const StringRef Name);
510
511 StringRef getUnifiedFormatName(unsigned Id);
512
513 bool isValidUnifiedFormat(unsigned Val);
514
515 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt);
516
517 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI);
518
519 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI);
520
521 } // namespace MTBUFFormat
522
523 namespace SendMsg {
524
525 LLVM_READONLY
526 int64_t getMsgId(const StringRef Name);
527
528 LLVM_READONLY
529 int64_t getMsgOpId(int64_t MsgId, const StringRef Name);
530
531 LLVM_READNONE
532 StringRef getMsgName(int64_t MsgId);
533
534 LLVM_READNONE
535 StringRef getMsgOpName(int64_t MsgId, int64_t OpId);
536
537 LLVM_READNONE
538 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict = true);
539
540 LLVM_READNONE
541 bool isValidMsgOp(int64_t MsgId, int64_t OpId, bool Strict = true);
542
543 LLVM_READNONE
544 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, bool Strict = true);
545
546 LLVM_READNONE
547 bool msgRequiresOp(int64_t MsgId);
548
549 LLVM_READNONE
550 bool msgSupportsStream(int64_t MsgId, int64_t OpId);
551
552 void decodeMsg(unsigned Val,
553 uint16_t &MsgId,
554 uint16_t &OpId,
555 uint16_t &StreamId);
556
557 LLVM_READNONE
558 uint64_t encodeMsg(uint64_t MsgId,
559 uint64_t OpId,
560 uint64_t StreamId);
561
562 } // namespace SendMsg
563
564
565 unsigned getInitialPSInputAddr(const Function &F);
566
567 LLVM_READNONE
568 bool isShader(CallingConv::ID CC);
569
570 LLVM_READNONE
571 bool isGraphics(CallingConv::ID CC);
572
573 LLVM_READNONE
574 bool isCompute(CallingConv::ID CC);
575
576 LLVM_READNONE
577 bool isEntryFunctionCC(CallingConv::ID CC);
578
579 // FIXME: Remove this when calling conventions cleaned up
580 LLVM_READNONE
isKernel(CallingConv::ID CC)581 inline bool isKernel(CallingConv::ID CC) {
582 switch (CC) {
583 case CallingConv::AMDGPU_KERNEL:
584 case CallingConv::SPIR_KERNEL:
585 return true;
586 default:
587 return false;
588 }
589 }
590
591 bool hasXNACK(const MCSubtargetInfo &STI);
592 bool hasSRAMECC(const MCSubtargetInfo &STI);
593 bool hasMIMG_R128(const MCSubtargetInfo &STI);
594 bool hasGFX10A16(const MCSubtargetInfo &STI);
595 bool hasG16(const MCSubtargetInfo &STI);
596 bool hasPackedD16(const MCSubtargetInfo &STI);
597
598 bool isSI(const MCSubtargetInfo &STI);
599 bool isCI(const MCSubtargetInfo &STI);
600 bool isVI(const MCSubtargetInfo &STI);
601 bool isGFX9(const MCSubtargetInfo &STI);
602 bool isGFX9Plus(const MCSubtargetInfo &STI);
603 bool isGFX10(const MCSubtargetInfo &STI);
604 bool isGFX10Plus(const MCSubtargetInfo &STI);
605 bool isGCN3Encoding(const MCSubtargetInfo &STI);
606 bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
607 bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
608
609 /// Is Reg - scalar register
610 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
611
612 /// Is there any intersection between registers
613 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI);
614
615 /// If \p Reg is a pseudo reg, return the correct hardware register given
616 /// \p STI otherwise return \p Reg.
617 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
618
619 /// Convert hardware register \p Reg to a pseudo register
620 LLVM_READNONE
621 unsigned mc2PseudoReg(unsigned Reg);
622
623 /// Can this operand also contain immediate values?
624 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
625
626 /// Is this floating-point operand?
627 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
628
629 /// Does this opearnd support only inlinable literals?
630 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
631
632 /// Get the size in bits of a register from the register class \p RC.
633 unsigned getRegBitWidth(unsigned RCID);
634
635 /// Get the size in bits of a register from the register class \p RC.
636 unsigned getRegBitWidth(const MCRegisterClass &RC);
637
638 /// Get size of register operand
639 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
640 unsigned OpNo);
641
642 LLVM_READNONE
getOperandSize(const MCOperandInfo & OpInfo)643 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
644 switch (OpInfo.OperandType) {
645 case AMDGPU::OPERAND_REG_IMM_INT32:
646 case AMDGPU::OPERAND_REG_IMM_FP32:
647 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
648 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
649 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
650 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
651 return 4;
652
653 case AMDGPU::OPERAND_REG_IMM_INT64:
654 case AMDGPU::OPERAND_REG_IMM_FP64:
655 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
656 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
657 return 8;
658
659 case AMDGPU::OPERAND_REG_IMM_INT16:
660 case AMDGPU::OPERAND_REG_IMM_FP16:
661 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
662 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
663 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
664 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
665 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
666 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
667 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
668 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
669 case AMDGPU::OPERAND_REG_IMM_V2INT16:
670 case AMDGPU::OPERAND_REG_IMM_V2FP16:
671 return 2;
672
673 default:
674 llvm_unreachable("unhandled operand type");
675 }
676 }
677
678 LLVM_READNONE
getOperandSize(const MCInstrDesc & Desc,unsigned OpNo)679 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
680 return getOperandSize(Desc.OpInfo[OpNo]);
681 }
682
683 /// Is this literal inlinable, and not one of the values intended for floating
684 /// point values.
685 LLVM_READNONE
isInlinableIntLiteral(int64_t Literal)686 inline bool isInlinableIntLiteral(int64_t Literal) {
687 return Literal >= -16 && Literal <= 64;
688 }
689
690 /// Is this literal inlinable
691 LLVM_READNONE
692 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
693
694 LLVM_READNONE
695 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
696
697 LLVM_READNONE
698 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
699
700 LLVM_READNONE
701 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
702
703 LLVM_READNONE
704 bool isInlinableIntLiteralV216(int32_t Literal);
705
706 LLVM_READNONE
707 bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi);
708
709 bool isArgPassedInSGPR(const Argument *Arg);
710
711 LLVM_READONLY
712 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
713 int64_t EncodedOffset);
714
715 LLVM_READONLY
716 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
717 int64_t EncodedOffset,
718 bool IsBuffer);
719
720 /// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
721 /// offsets.
722 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset);
723
724 /// \returns The encoding that will be used for \p ByteOffset in the
725 /// SMRD offset field, or None if it won't fit. On GFX9 and GFX10
726 /// S_LOAD instructions have a signed offset, on other subtargets it is
727 /// unsigned. S_BUFFER has an unsigned offset for all subtargets.
728 Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
729 int64_t ByteOffset, bool IsBuffer);
730
731 /// \return The encoding that can be used for a 32-bit literal offset in an SMRD
732 /// instruction. This is only useful on CI.s
733 Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
734 int64_t ByteOffset);
735
736 /// \returns true if this offset is small enough to fit in the SMRD
737 /// offset field. \p ByteOffset should be the offset in bytes and
738 /// not the encoded offset.
739 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
740
741 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
742 const GCNSubtarget *Subtarget,
743 Align Alignment = Align(4));
744
745 /// \returns true if the intrinsic is divergent
746 bool isIntrinsicSourceOfDivergence(unsigned IntrID);
747
748 // Track defaults for fields in the MODE registser.
749 struct SIModeRegisterDefaults {
750 /// Floating point opcodes that support exception flag gathering quiet and
751 /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10
752 /// become IEEE 754- 2008 compliant due to signaling NaN propagation and
753 /// quieting.
754 bool IEEE : 1;
755
756 /// Used by the vector ALU to force DX10-style treatment of NaNs: when set,
757 /// clamp NaN to zero; otherwise, pass NaN through.
758 bool DX10Clamp : 1;
759
760 /// If this is set, neither input or output denormals are flushed for most f32
761 /// instructions.
762 bool FP32InputDenormals : 1;
763 bool FP32OutputDenormals : 1;
764
765 /// If this is set, neither input or output denormals are flushed for both f64
766 /// and f16/v2f16 instructions.
767 bool FP64FP16InputDenormals : 1;
768 bool FP64FP16OutputDenormals : 1;
769
SIModeRegisterDefaultsSIModeRegisterDefaults770 SIModeRegisterDefaults() :
771 IEEE(true),
772 DX10Clamp(true),
773 FP32InputDenormals(true),
774 FP32OutputDenormals(true),
775 FP64FP16InputDenormals(true),
776 FP64FP16OutputDenormals(true) {}
777
778 SIModeRegisterDefaults(const Function &F);
779
getDefaultForCallingConvSIModeRegisterDefaults780 static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
781 SIModeRegisterDefaults Mode;
782 Mode.IEEE = !AMDGPU::isShader(CC);
783 return Mode;
784 }
785
786 bool operator ==(const SIModeRegisterDefaults Other) const {
787 return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp &&
788 FP32InputDenormals == Other.FP32InputDenormals &&
789 FP32OutputDenormals == Other.FP32OutputDenormals &&
790 FP64FP16InputDenormals == Other.FP64FP16InputDenormals &&
791 FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals;
792 }
793
allFP32DenormalsSIModeRegisterDefaults794 bool allFP32Denormals() const {
795 return FP32InputDenormals && FP32OutputDenormals;
796 }
797
allFP64FP16DenormalsSIModeRegisterDefaults798 bool allFP64FP16Denormals() const {
799 return FP64FP16InputDenormals && FP64FP16OutputDenormals;
800 }
801
802 /// Get the encoding value for the FP_DENORM bits of the mode register for the
803 /// FP32 denormal mode.
fpDenormModeSPValueSIModeRegisterDefaults804 uint32_t fpDenormModeSPValue() const {
805 if (FP32InputDenormals && FP32OutputDenormals)
806 return FP_DENORM_FLUSH_NONE;
807 if (FP32InputDenormals)
808 return FP_DENORM_FLUSH_OUT;
809 if (FP32OutputDenormals)
810 return FP_DENORM_FLUSH_IN;
811 return FP_DENORM_FLUSH_IN_FLUSH_OUT;
812 }
813
814 /// Get the encoding value for the FP_DENORM bits of the mode register for the
815 /// FP64/FP16 denormal mode.
fpDenormModeDPValueSIModeRegisterDefaults816 uint32_t fpDenormModeDPValue() const {
817 if (FP64FP16InputDenormals && FP64FP16OutputDenormals)
818 return FP_DENORM_FLUSH_NONE;
819 if (FP64FP16InputDenormals)
820 return FP_DENORM_FLUSH_OUT;
821 if (FP64FP16OutputDenormals)
822 return FP_DENORM_FLUSH_IN;
823 return FP_DENORM_FLUSH_IN_FLUSH_OUT;
824 }
825
826 /// Returns true if a flag is compatible if it's enabled in the callee, but
827 /// disabled in the caller.
oneWayCompatibleSIModeRegisterDefaults828 static bool oneWayCompatible(bool CallerMode, bool CalleeMode) {
829 return CallerMode == CalleeMode || (!CallerMode && CalleeMode);
830 }
831
832 // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should
833 // be able to override.
isInlineCompatibleSIModeRegisterDefaults834 bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const {
835 if (DX10Clamp != CalleeMode.DX10Clamp)
836 return false;
837 if (IEEE != CalleeMode.IEEE)
838 return false;
839
840 // Allow inlining denormals enabled into denormals flushed functions.
841 return oneWayCompatible(FP64FP16InputDenormals, CalleeMode.FP64FP16InputDenormals) &&
842 oneWayCompatible(FP64FP16OutputDenormals, CalleeMode.FP64FP16OutputDenormals) &&
843 oneWayCompatible(FP32InputDenormals, CalleeMode.FP32InputDenormals) &&
844 oneWayCompatible(FP32OutputDenormals, CalleeMode.FP32OutputDenormals);
845 }
846 };
847
848 } // end namespace AMDGPU
849 } // end namespace llvm
850
851 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
852