• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11 
12 #include "AMDGPU.h"
13 #include "AMDKernelCodeT.h"
14 #include "SIDefines.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/IR/CallingConv.h"
17 #include "llvm/MC/MCInstrDesc.h"
18 #include "llvm/Support/AMDHSAKernelDescriptor.h"
19 #include "llvm/Support/Compiler.h"
20 #include "llvm/Support/ErrorHandling.h"
21 #include "llvm/Support/TargetParser.h"
22 #include <cstdint>
23 #include <string>
24 #include <utility>
25 
26 namespace llvm {
27 
28 class Argument;
29 class AMDGPUSubtarget;
30 class FeatureBitset;
31 class Function;
32 class GCNSubtarget;
33 class GlobalValue;
34 class MCContext;
35 class MCRegisterClass;
36 class MCRegisterInfo;
37 class MCSection;
38 class MCSubtargetInfo;
39 class MachineMemOperand;
40 class Triple;
41 
42 namespace AMDGPU {
43 
44 struct GcnBufferFormatInfo {
45   unsigned Format;
46   unsigned BitsPerComp;
47   unsigned NumComponents;
48   unsigned NumFormat;
49   unsigned DataFormat;
50 };
51 
52 #define GET_MIMGBaseOpcode_DECL
53 #define GET_MIMGDim_DECL
54 #define GET_MIMGEncoding_DECL
55 #define GET_MIMGLZMapping_DECL
56 #define GET_MIMGMIPMapping_DECL
57 #include "AMDGPUGenSearchableTables.inc"
58 
59 namespace IsaInfo {
60 
61 enum {
62   // The closed Vulkan driver sets 96, which limits the wave count to 8 but
63   // doesn't spill SGPRs as much as when 80 is set.
64   FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
65   TRAP_NUM_SGPRS = 16
66 };
67 
68 /// Streams isa version string for given subtarget \p STI into \p Stream.
69 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
70 
71 /// \returns True if given subtarget \p STI supports code object version 3,
72 /// false otherwise.
73 bool hasCodeObjectV3(const MCSubtargetInfo *STI);
74 
75 /// \returns Wavefront size for given subtarget \p STI.
76 unsigned getWavefrontSize(const MCSubtargetInfo *STI);
77 
78 /// \returns Local memory size in bytes for given subtarget \p STI.
79 unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
80 
81 /// \returns Number of execution units per compute unit for given subtarget \p
82 /// STI.
83 unsigned getEUsPerCU(const MCSubtargetInfo *STI);
84 
85 /// \returns Maximum number of work groups per compute unit for given subtarget
86 /// \p STI and limited by given \p FlatWorkGroupSize.
87 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
88                                unsigned FlatWorkGroupSize);
89 
90 /// \returns Maximum number of waves per compute unit for given subtarget \p
91 /// STI without any kind of limitation.
92 unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI);
93 
94 /// \returns Maximum number of waves per compute unit for given subtarget \p
95 /// STI and limited by given \p FlatWorkGroupSize.
96 unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
97                           unsigned FlatWorkGroupSize);
98 
99 /// \returns Minimum number of waves per execution unit for given subtarget \p
100 /// STI.
101 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
102 
103 /// \returns Maximum number of waves per execution unit for given subtarget \p
104 /// STI without any kind of limitation.
105 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
106 
107 /// \returns Maximum number of waves per execution unit for given subtarget \p
108 /// STI and limited by given \p FlatWorkGroupSize.
109 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
110                           unsigned FlatWorkGroupSize);
111 
112 /// \returns Minimum flat work group size for given subtarget \p STI.
113 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
114 
115 /// \returns Maximum flat work group size for given subtarget \p STI.
116 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
117 
118 /// \returns Number of waves per work group for given subtarget \p STI and
119 /// limited by given \p FlatWorkGroupSize.
120 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
121                               unsigned FlatWorkGroupSize);
122 
123 /// \returns SGPR allocation granularity for given subtarget \p STI.
124 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
125 
126 /// \returns SGPR encoding granularity for given subtarget \p STI.
127 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
128 
129 /// \returns Total number of SGPRs for given subtarget \p STI.
130 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
131 
132 /// \returns Addressable number of SGPRs for given subtarget \p STI.
133 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
134 
135 /// \returns Minimum number of SGPRs that meets the given number of waves per
136 /// execution unit requirement for given subtarget \p STI.
137 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
138 
139 /// \returns Maximum number of SGPRs that meets the given number of waves per
140 /// execution unit requirement for given subtarget \p STI.
141 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
142                         bool Addressable);
143 
144 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
145 /// STI when the given special registers are used.
146 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
147                           bool FlatScrUsed, bool XNACKUsed);
148 
149 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
150 /// STI when the given special registers are used. XNACK is inferred from
151 /// \p STI.
152 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
153                           bool FlatScrUsed);
154 
155 /// \returns Number of SGPR blocks needed for given subtarget \p STI when
156 /// \p NumSGPRs are used. \p NumSGPRs should already include any special
157 /// register counts.
158 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
159 
160 /// \returns VGPR allocation granularity for given subtarget \p STI.
161 ///
162 /// For subtargets which support it, \p EnableWavefrontSize32 should match
163 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
164 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
165                              Optional<bool> EnableWavefrontSize32 = None);
166 
167 /// \returns VGPR encoding granularity for given subtarget \p STI.
168 ///
169 /// For subtargets which support it, \p EnableWavefrontSize32 should match
170 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
171 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
172                                 Optional<bool> EnableWavefrontSize32 = None);
173 
174 /// \returns Total number of VGPRs for given subtarget \p STI.
175 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
176 
177 /// \returns Addressable number of VGPRs for given subtarget \p STI.
178 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
179 
180 /// \returns Minimum number of VGPRs that meets given number of waves per
181 /// execution unit requirement for given subtarget \p STI.
182 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
183 
184 /// \returns Maximum number of VGPRs that meets given number of waves per
185 /// execution unit requirement for given subtarget \p STI.
186 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
187 
188 /// \returns Number of VGPR blocks needed for given subtarget \p STI when
189 /// \p NumVGPRs are used.
190 ///
191 /// For subtargets which support it, \p EnableWavefrontSize32 should match the
192 /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
193 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs,
194                           Optional<bool> EnableWavefrontSize32 = None);
195 
196 } // end namespace IsaInfo
197 
198 LLVM_READONLY
199 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
200 
201 LLVM_READONLY
202 int getSOPPWithRelaxation(uint16_t Opcode);
203 
204 struct MIMGBaseOpcodeInfo {
205   MIMGBaseOpcode BaseOpcode;
206   bool Store;
207   bool Atomic;
208   bool AtomicX2;
209   bool Sampler;
210   bool Gather4;
211 
212   uint8_t NumExtraArgs;
213   bool Gradients;
214   bool Coordinates;
215   bool LodOrClampOrMip;
216   bool HasD16;
217 };
218 
219 LLVM_READONLY
220 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
221 
222 struct MIMGDimInfo {
223   MIMGDim Dim;
224   uint8_t NumCoords;
225   uint8_t NumGradients;
226   bool DA;
227   uint8_t Encoding;
228   const char *AsmSuffix;
229 };
230 
231 LLVM_READONLY
232 const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum);
233 
234 LLVM_READONLY
235 const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
236 
237 LLVM_READONLY
238 const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
239 
240 struct MIMGLZMappingInfo {
241   MIMGBaseOpcode L;
242   MIMGBaseOpcode LZ;
243 };
244 
245 struct MIMGMIPMappingInfo {
246   MIMGBaseOpcode MIP;
247   MIMGBaseOpcode NONMIP;
248 };
249 
250 LLVM_READONLY
251 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
252 
253 LLVM_READONLY
254 const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned L);
255 
256 LLVM_READONLY
257 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
258                   unsigned VDataDwords, unsigned VAddrDwords);
259 
260 LLVM_READONLY
261 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
262 
263 struct MIMGInfo {
264   uint16_t Opcode;
265   uint16_t BaseOpcode;
266   uint8_t MIMGEncoding;
267   uint8_t VDataDwords;
268   uint8_t VAddrDwords;
269 };
270 
271 LLVM_READONLY
272 const MIMGInfo *getMIMGInfo(unsigned Opc);
273 
274 LLVM_READONLY
275 int getMTBUFBaseOpcode(unsigned Opc);
276 
277 LLVM_READONLY
278 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
279 
280 LLVM_READONLY
281 int getMTBUFElements(unsigned Opc);
282 
283 LLVM_READONLY
284 bool getMTBUFHasVAddr(unsigned Opc);
285 
286 LLVM_READONLY
287 bool getMTBUFHasSrsrc(unsigned Opc);
288 
289 LLVM_READONLY
290 bool getMTBUFHasSoffset(unsigned Opc);
291 
292 LLVM_READONLY
293 int getMUBUFBaseOpcode(unsigned Opc);
294 
295 LLVM_READONLY
296 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
297 
298 LLVM_READONLY
299 int getMUBUFElements(unsigned Opc);
300 
301 LLVM_READONLY
302 bool getMUBUFHasVAddr(unsigned Opc);
303 
304 LLVM_READONLY
305 bool getMUBUFHasSrsrc(unsigned Opc);
306 
307 LLVM_READONLY
308 bool getMUBUFHasSoffset(unsigned Opc);
309 
310 LLVM_READONLY
311 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
312                                                   uint8_t NumComponents,
313                                                   uint8_t NumFormat,
314                                                   const MCSubtargetInfo &STI);
315 LLVM_READONLY
316 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
317                                                   const MCSubtargetInfo &STI);
318 
319 LLVM_READONLY
320 int getMCOpcode(uint16_t Opcode, unsigned Gen);
321 
322 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
323                                const MCSubtargetInfo *STI);
324 
325 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
326     const MCSubtargetInfo *STI);
327 
328 bool isGroupSegment(const GlobalValue *GV);
329 bool isGlobalSegment(const GlobalValue *GV);
330 bool isReadOnlySegment(const GlobalValue *GV);
331 
332 /// \returns True if constants should be emitted to .text section for given
333 /// target triple \p TT, false otherwise.
334 bool shouldEmitConstantsToTextSection(const Triple &TT);
335 
336 /// \returns Integer value requested using \p F's \p Name attribute.
337 ///
338 /// \returns \p Default if attribute is not present.
339 ///
340 /// \returns \p Default and emits error if requested value cannot be converted
341 /// to integer.
342 int getIntegerAttribute(const Function &F, StringRef Name, int Default);
343 
344 /// \returns A pair of integer values requested using \p F's \p Name attribute
345 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
346 /// is false).
347 ///
348 /// \returns \p Default if attribute is not present.
349 ///
350 /// \returns \p Default and emits error if one of the requested values cannot be
351 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is
352 /// not present.
353 std::pair<int, int> getIntegerPairAttribute(const Function &F,
354                                             StringRef Name,
355                                             std::pair<int, int> Default,
356                                             bool OnlyFirstRequired = false);
357 
358 /// Represents the counter values to wait for in an s_waitcnt instruction.
359 ///
360 /// Large values (including the maximum possible integer) can be used to
361 /// represent "don't care" waits.
362 struct Waitcnt {
363   unsigned VmCnt = ~0u;
364   unsigned ExpCnt = ~0u;
365   unsigned LgkmCnt = ~0u;
366   unsigned VsCnt = ~0u;
367 
WaitcntWaitcnt368   Waitcnt() {}
WaitcntWaitcnt369   Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
370       : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {}
371 
allZeroWaitcnt372   static Waitcnt allZero(const IsaVersion &Version) {
373     return Waitcnt(0, 0, 0, Version.Major >= 10 ? 0 : ~0u);
374   }
allZeroExceptVsCntWaitcnt375   static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); }
376 
hasWaitWaitcnt377   bool hasWait() const {
378     return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u;
379   }
380 
dominatesWaitcnt381   bool dominates(const Waitcnt &Other) const {
382     return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt &&
383            LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt;
384   }
385 
combinedWaitcnt386   Waitcnt combined(const Waitcnt &Other) const {
387     return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt),
388                    std::min(LgkmCnt, Other.LgkmCnt),
389                    std::min(VsCnt, Other.VsCnt));
390   }
391 };
392 
393 /// \returns Vmcnt bit mask for given isa \p Version.
394 unsigned getVmcntBitMask(const IsaVersion &Version);
395 
396 /// \returns Expcnt bit mask for given isa \p Version.
397 unsigned getExpcntBitMask(const IsaVersion &Version);
398 
399 /// \returns Lgkmcnt bit mask for given isa \p Version.
400 unsigned getLgkmcntBitMask(const IsaVersion &Version);
401 
402 /// \returns Waitcnt bit mask for given isa \p Version.
403 unsigned getWaitcntBitMask(const IsaVersion &Version);
404 
405 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
406 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
407 
408 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
409 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
410 
411 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
412 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
413 
414 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
415 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
416 /// \p Lgkmcnt respectively.
417 ///
418 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
419 ///     \p Vmcnt = \p Waitcnt[3:0]                      (pre-gfx9 only)
420 ///     \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14]  (gfx9+ only)
421 ///     \p Expcnt = \p Waitcnt[6:4]
422 ///     \p Lgkmcnt = \p Waitcnt[11:8]                   (pre-gfx10 only)
423 ///     \p Lgkmcnt = \p Waitcnt[13:8]                   (gfx10+ only)
424 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
425                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
426 
427 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
428 
429 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
430 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
431                      unsigned Vmcnt);
432 
433 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
434 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
435                       unsigned Expcnt);
436 
437 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
438 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
439                        unsigned Lgkmcnt);
440 
441 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
442 /// \p Version.
443 ///
444 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
445 ///     Waitcnt[3:0]   = \p Vmcnt       (pre-gfx9 only)
446 ///     Waitcnt[3:0]   = \p Vmcnt[3:0]  (gfx9+ only)
447 ///     Waitcnt[6:4]   = \p Expcnt
448 ///     Waitcnt[11:8]  = \p Lgkmcnt     (pre-gfx10 only)
449 ///     Waitcnt[13:8]  = \p Lgkmcnt     (gfx10+ only)
450 ///     Waitcnt[15:14] = \p Vmcnt[5:4]  (gfx9+ only)
451 ///
452 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
453 /// isa \p Version.
454 unsigned encodeWaitcnt(const IsaVersion &Version,
455                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
456 
457 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
458 
459 namespace Hwreg {
460 
461 LLVM_READONLY
462 int64_t getHwregId(const StringRef Name);
463 
464 LLVM_READNONE
465 bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI);
466 
467 LLVM_READNONE
468 bool isValidHwreg(int64_t Id);
469 
470 LLVM_READNONE
471 bool isValidHwregOffset(int64_t Offset);
472 
473 LLVM_READNONE
474 bool isValidHwregWidth(int64_t Width);
475 
476 LLVM_READNONE
477 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width);
478 
479 LLVM_READNONE
480 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI);
481 
482 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width);
483 
484 } // namespace Hwreg
485 
486 namespace SendMsg {
487 
488 LLVM_READONLY
489 int64_t getMsgId(const StringRef Name);
490 
491 LLVM_READONLY
492 int64_t getMsgOpId(int64_t MsgId, const StringRef Name);
493 
494 LLVM_READNONE
495 StringRef getMsgName(int64_t MsgId);
496 
497 LLVM_READNONE
498 StringRef getMsgOpName(int64_t MsgId, int64_t OpId);
499 
500 LLVM_READNONE
501 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict = true);
502 
503 LLVM_READNONE
504 bool isValidMsgOp(int64_t MsgId, int64_t OpId, bool Strict = true);
505 
506 LLVM_READNONE
507 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, bool Strict = true);
508 
509 LLVM_READNONE
510 bool msgRequiresOp(int64_t MsgId);
511 
512 LLVM_READNONE
513 bool msgSupportsStream(int64_t MsgId, int64_t OpId);
514 
515 void decodeMsg(unsigned Val,
516                uint16_t &MsgId,
517                uint16_t &OpId,
518                uint16_t &StreamId);
519 
520 LLVM_READNONE
521 uint64_t encodeMsg(uint64_t MsgId,
522                    uint64_t OpId,
523                    uint64_t StreamId);
524 
525 } // namespace SendMsg
526 
527 
528 unsigned getInitialPSInputAddr(const Function &F);
529 
530 LLVM_READNONE
531 bool isShader(CallingConv::ID CC);
532 
533 LLVM_READNONE
534 bool isCompute(CallingConv::ID CC);
535 
536 LLVM_READNONE
537 bool isEntryFunctionCC(CallingConv::ID CC);
538 
539 // FIXME: Remove this when calling conventions cleaned up
540 LLVM_READNONE
isKernel(CallingConv::ID CC)541 inline bool isKernel(CallingConv::ID CC) {
542   switch (CC) {
543   case CallingConv::AMDGPU_KERNEL:
544   case CallingConv::SPIR_KERNEL:
545     return true;
546   default:
547     return false;
548   }
549 }
550 
551 bool hasXNACK(const MCSubtargetInfo &STI);
552 bool hasSRAMECC(const MCSubtargetInfo &STI);
553 bool hasMIMG_R128(const MCSubtargetInfo &STI);
554 bool hasPackedD16(const MCSubtargetInfo &STI);
555 
556 bool isSI(const MCSubtargetInfo &STI);
557 bool isCI(const MCSubtargetInfo &STI);
558 bool isVI(const MCSubtargetInfo &STI);
559 bool isGFX9(const MCSubtargetInfo &STI);
560 bool isGFX10(const MCSubtargetInfo &STI);
561 
562 /// Is Reg - scalar register
563 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
564 
565 /// Is there any intersection between registers
566 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI);
567 
568 /// If \p Reg is a pseudo reg, return the correct hardware register given
569 /// \p STI otherwise return \p Reg.
570 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
571 
572 /// Convert hardware register \p Reg to a pseudo register
573 LLVM_READNONE
574 unsigned mc2PseudoReg(unsigned Reg);
575 
576 /// Can this operand also contain immediate values?
577 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
578 
579 /// Is this floating-point operand?
580 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
581 
582 /// Does this opearnd support only inlinable literals?
583 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
584 
585 /// Get the size in bits of a register from the register class \p RC.
586 unsigned getRegBitWidth(unsigned RCID);
587 
588 /// Get the size in bits of a register from the register class \p RC.
589 unsigned getRegBitWidth(const MCRegisterClass &RC);
590 
591 /// Get size of register operand
592 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
593                            unsigned OpNo);
594 
595 LLVM_READNONE
getOperandSize(const MCOperandInfo & OpInfo)596 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
597   switch (OpInfo.OperandType) {
598   case AMDGPU::OPERAND_REG_IMM_INT32:
599   case AMDGPU::OPERAND_REG_IMM_FP32:
600   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
601   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
602   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
603   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
604     return 4;
605 
606   case AMDGPU::OPERAND_REG_IMM_INT64:
607   case AMDGPU::OPERAND_REG_IMM_FP64:
608   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
609   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
610     return 8;
611 
612   case AMDGPU::OPERAND_REG_IMM_INT16:
613   case AMDGPU::OPERAND_REG_IMM_FP16:
614   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
615   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
616   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
617   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
618   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
619   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
620   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
621   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
622   case AMDGPU::OPERAND_REG_IMM_V2INT16:
623   case AMDGPU::OPERAND_REG_IMM_V2FP16:
624     return 2;
625 
626   default:
627     llvm_unreachable("unhandled operand type");
628   }
629 }
630 
631 LLVM_READNONE
getOperandSize(const MCInstrDesc & Desc,unsigned OpNo)632 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
633   return getOperandSize(Desc.OpInfo[OpNo]);
634 }
635 
636 /// Is this literal inlinable
637 LLVM_READNONE
638 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
639 
640 LLVM_READNONE
641 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
642 
643 LLVM_READNONE
644 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
645 
646 LLVM_READNONE
647 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
648 
649 bool isArgPassedInSGPR(const Argument *Arg);
650 
651 /// \returns The encoding that will be used for \p ByteOffset in the SMRD
652 /// offset field.
653 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
654 
655 /// \returns true if this offset is small enough to fit in the SMRD
656 /// offset field.  \p ByteOffset should be the offset in bytes and
657 /// not the encoded offset.
658 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
659 
660 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
661                       const GCNSubtarget *Subtarget, uint32_t Align = 4);
662 
663 /// \returns true if the intrinsic is divergent
664 bool isIntrinsicSourceOfDivergence(unsigned IntrID);
665 
666 // Track defaults for fields in the MODE registser.
667 struct SIModeRegisterDefaults {
668   /// Floating point opcodes that support exception flag gathering quiet and
669   /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10
670   /// become IEEE 754- 2008 compliant due to signaling NaN propagation and
671   /// quieting.
672   bool IEEE : 1;
673 
674   /// Used by the vector ALU to force DX10-style treatment of NaNs: when set,
675   /// clamp NaN to zero; otherwise, pass NaN through.
676   bool DX10Clamp : 1;
677 
678   /// If this is set, neither input or output denormals are flushed for most f32
679   /// instructions.
680   ///
681   /// TODO: Split into separate input and output fields if necessary like the
682   /// control bits really provide?
683   bool FP32Denormals : 1;
684 
685   /// If this is set, neither input or output denormals are flushed for both f64
686   /// and f16/v2f16 instructions.
687   bool FP64FP16Denormals : 1;
688 
SIModeRegisterDefaultsSIModeRegisterDefaults689   SIModeRegisterDefaults() :
690     IEEE(true),
691     DX10Clamp(true),
692     FP32Denormals(true),
693     FP64FP16Denormals(true) {}
694 
695   // FIXME: Should not depend on the subtarget
696   SIModeRegisterDefaults(const Function &F, const GCNSubtarget &ST);
697 
getDefaultForCallingConvSIModeRegisterDefaults698   static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
699     const bool IsCompute = AMDGPU::isCompute(CC);
700 
701     SIModeRegisterDefaults Mode;
702     Mode.DX10Clamp = true;
703     Mode.IEEE = IsCompute;
704     Mode.FP32Denormals = false; // FIXME: Should be on by default.
705     Mode.FP64FP16Denormals = true;
706     return Mode;
707   }
708 
709   bool operator ==(const SIModeRegisterDefaults Other) const {
710     return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp &&
711            FP32Denormals == Other.FP32Denormals &&
712            FP64FP16Denormals == Other.FP64FP16Denormals;
713   }
714 
715   /// Returns true if a flag is compatible if it's enabled in the callee, but
716   /// disabled in the caller.
oneWayCompatibleSIModeRegisterDefaults717   static bool oneWayCompatible(bool CallerMode, bool CalleeMode) {
718     return CallerMode == CalleeMode || (CallerMode && !CalleeMode);
719   }
720 
721   // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should
722   // be able to override.
isInlineCompatibleSIModeRegisterDefaults723   bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const {
724     if (DX10Clamp != CalleeMode.DX10Clamp)
725       return false;
726     if (IEEE != CalleeMode.IEEE)
727       return false;
728 
729     // Allow inlining denormals enabled into denormals flushed functions.
730     return oneWayCompatible(FP64FP16Denormals, CalleeMode.FP64FP16Denormals) &&
731            oneWayCompatible(FP32Denormals, CalleeMode.FP32Denormals);
732   }
733 };
734 
735 } // end namespace AMDGPU
736 } // end namespace llvm
737 
738 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
739