• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===--- AMDHSAKernelDescriptor.h -----------------------------*- C++ -*---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMDHSA kernel descriptor definitions. For more information, visit
11 /// https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor
12 ///
13 /// \warning
14 /// Any changes to this file should also be audited for corresponding changes
15 /// needed in both the assembler and disassembler, namely:
16 /// * AMDGPUAsmPrinter.{cpp,h}
17 /// * AMDGPUTargetStreamer.{cpp,h}
18 /// * AMDGPUDisassembler.{cpp,h}
19 //
20 //===----------------------------------------------------------------------===//
21 
22 #ifndef LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H
23 #define LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H
24 
25 #include <cstddef>
26 #include <cstdint>
27 
28 // Gets offset of specified member in specified type.
29 #ifndef offsetof
30 #define offsetof(TYPE, MEMBER) ((size_t)&((TYPE*)0)->MEMBER)
31 #endif // offsetof
32 
33 // Creates enumeration entries used for packing bits into integers. Enumeration
34 // entries include bit shift amount, bit width, and bit mask.
35 #ifndef AMDHSA_BITS_ENUM_ENTRY
36 #define AMDHSA_BITS_ENUM_ENTRY(NAME, SHIFT, WIDTH) \
37   NAME ## _SHIFT = (SHIFT),                        \
38   NAME ## _WIDTH = (WIDTH),                        \
39   NAME = (((1 << (WIDTH)) - 1) << (SHIFT))
40 #endif // AMDHSA_BITS_ENUM_ENTRY
41 
42 // Gets bits for specified bit mask from specified source.
43 #ifndef AMDHSA_BITS_GET
44 #define AMDHSA_BITS_GET(SRC, MSK) ((SRC & MSK) >> MSK ## _SHIFT)
45 #endif // AMDHSA_BITS_GET
46 
47 // Sets bits for specified bit mask in specified destination.
48 #ifndef AMDHSA_BITS_SET
49 #define AMDHSA_BITS_SET(DST, MSK, VAL)  \
50   DST &= ~MSK;                          \
51   DST |= ((VAL << MSK ## _SHIFT) & MSK)
52 #endif // AMDHSA_BITS_SET
53 
54 namespace llvm {
55 namespace amdhsa {
56 
57 // Floating point rounding modes. Must match hardware definition.
58 enum : uint8_t {
59   FLOAT_ROUND_MODE_NEAR_EVEN = 0,
60   FLOAT_ROUND_MODE_PLUS_INFINITY = 1,
61   FLOAT_ROUND_MODE_MINUS_INFINITY = 2,
62   FLOAT_ROUND_MODE_ZERO = 3,
63 };
64 
65 // Floating point denorm modes. Must match hardware definition.
66 enum : uint8_t {
67   FLOAT_DENORM_MODE_FLUSH_SRC_DST = 0,
68   FLOAT_DENORM_MODE_FLUSH_DST = 1,
69   FLOAT_DENORM_MODE_FLUSH_SRC = 2,
70   FLOAT_DENORM_MODE_FLUSH_NONE = 3,
71 };
72 
73 // System VGPR workitem IDs. Must match hardware definition.
74 enum : uint8_t {
75   SYSTEM_VGPR_WORKITEM_ID_X = 0,
76   SYSTEM_VGPR_WORKITEM_ID_X_Y = 1,
77   SYSTEM_VGPR_WORKITEM_ID_X_Y_Z = 2,
78   SYSTEM_VGPR_WORKITEM_ID_UNDEFINED = 3,
79 };
80 
81 // Compute program resource register 1. Must match hardware definition.
82 // GFX6+.
83 #define COMPUTE_PGM_RSRC1(NAME, SHIFT, WIDTH) \
84   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_ ## NAME, SHIFT, WIDTH)
85 // [GFX6-GFX8].
86 #define COMPUTE_PGM_RSRC1_GFX6_GFX8(NAME, SHIFT, WIDTH) \
87   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX6_GFX8_ ## NAME, SHIFT, WIDTH)
88 // [GFX6-GFX9].
89 #define COMPUTE_PGM_RSRC1_GFX6_GFX9(NAME, SHIFT, WIDTH) \
90   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX6_GFX9_ ## NAME, SHIFT, WIDTH)
91 // [GFX6-GFX11].
92 #define COMPUTE_PGM_RSRC1_GFX6_GFX11(NAME, SHIFT, WIDTH)                       \
93   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX6_GFX11_##NAME, SHIFT, WIDTH)
94 // GFX9+.
95 #define COMPUTE_PGM_RSRC1_GFX9_PLUS(NAME, SHIFT, WIDTH) \
96   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX9_PLUS_ ## NAME, SHIFT, WIDTH)
97 // GFX10+.
98 #define COMPUTE_PGM_RSRC1_GFX10_PLUS(NAME, SHIFT, WIDTH) \
99   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX10_PLUS_ ## NAME, SHIFT, WIDTH)
100 // GFX12+.
101 #define COMPUTE_PGM_RSRC1_GFX12_PLUS(NAME, SHIFT, WIDTH)                       \
102   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX12_PLUS_##NAME, SHIFT, WIDTH)
103 enum : int32_t {
104   COMPUTE_PGM_RSRC1(GRANULATED_WORKITEM_VGPR_COUNT, 0, 6),
105   COMPUTE_PGM_RSRC1(GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4),
106   COMPUTE_PGM_RSRC1(PRIORITY, 10, 2),
107   COMPUTE_PGM_RSRC1(FLOAT_ROUND_MODE_32, 12, 2),
108   COMPUTE_PGM_RSRC1(FLOAT_ROUND_MODE_16_64, 14, 2),
109   COMPUTE_PGM_RSRC1(FLOAT_DENORM_MODE_32, 16, 2),
110   COMPUTE_PGM_RSRC1(FLOAT_DENORM_MODE_16_64, 18, 2),
111   COMPUTE_PGM_RSRC1(PRIV, 20, 1),
112   COMPUTE_PGM_RSRC1_GFX6_GFX11(ENABLE_DX10_CLAMP, 21, 1),
113   COMPUTE_PGM_RSRC1_GFX12_PLUS(ENABLE_WG_RR_EN, 21, 1),
114   COMPUTE_PGM_RSRC1(DEBUG_MODE, 22, 1),
115   COMPUTE_PGM_RSRC1_GFX6_GFX11(ENABLE_IEEE_MODE, 23, 1),
116   COMPUTE_PGM_RSRC1_GFX12_PLUS(DISABLE_PERF, 23, 1),
117   COMPUTE_PGM_RSRC1(BULKY, 24, 1),
118   COMPUTE_PGM_RSRC1(CDBG_USER, 25, 1),
119   COMPUTE_PGM_RSRC1_GFX6_GFX8(RESERVED0, 26, 1),
120   COMPUTE_PGM_RSRC1_GFX9_PLUS(FP16_OVFL, 26, 1),
121   COMPUTE_PGM_RSRC1(RESERVED1, 27, 2),
122   COMPUTE_PGM_RSRC1_GFX6_GFX9(RESERVED2, 29, 3),
123   COMPUTE_PGM_RSRC1_GFX10_PLUS(WGP_MODE, 29, 1),
124   COMPUTE_PGM_RSRC1_GFX10_PLUS(MEM_ORDERED, 30, 1),
125   COMPUTE_PGM_RSRC1_GFX10_PLUS(FWD_PROGRESS, 31, 1),
126 };
127 #undef COMPUTE_PGM_RSRC1
128 
129 // Compute program resource register 2. Must match hardware definition.
130 #define COMPUTE_PGM_RSRC2(NAME, SHIFT, WIDTH) \
131   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_ ## NAME, SHIFT, WIDTH)
132 enum : int32_t {
133   COMPUTE_PGM_RSRC2(ENABLE_PRIVATE_SEGMENT, 0, 1),
134   COMPUTE_PGM_RSRC2(USER_SGPR_COUNT, 1, 5),
135   COMPUTE_PGM_RSRC2(ENABLE_TRAP_HANDLER, 6, 1),
136   COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_X, 7, 1),
137   COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Y, 8, 1),
138   COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Z, 9, 1),
139   COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_INFO, 10, 1),
140   COMPUTE_PGM_RSRC2(ENABLE_VGPR_WORKITEM_ID, 11, 2),
141   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_ADDRESS_WATCH, 13, 1),
142   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_MEMORY, 14, 1),
143   COMPUTE_PGM_RSRC2(GRANULATED_LDS_SIZE, 15, 9),
144   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, 24, 1),
145   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 25, 1),
146   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, 26, 1),
147   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 27, 1),
148   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 28, 1),
149   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 29, 1),
150   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 30, 1),
151   COMPUTE_PGM_RSRC2(RESERVED0, 31, 1),
152 };
153 #undef COMPUTE_PGM_RSRC2
154 
155 // Compute program resource register 3 for GFX90A+. Must match hardware
156 // definition.
157 #define COMPUTE_PGM_RSRC3_GFX90A(NAME, SHIFT, WIDTH) \
158   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX90A_ ## NAME, SHIFT, WIDTH)
159 enum : int32_t {
160   COMPUTE_PGM_RSRC3_GFX90A(ACCUM_OFFSET, 0, 6),
161   COMPUTE_PGM_RSRC3_GFX90A(RESERVED0, 6, 10),
162   COMPUTE_PGM_RSRC3_GFX90A(TG_SPLIT, 16, 1),
163   COMPUTE_PGM_RSRC3_GFX90A(RESERVED1, 17, 15),
164 };
165 #undef COMPUTE_PGM_RSRC3_GFX90A
166 
167 // Compute program resource register 3 for GFX10+. Must match hardware
168 // definition.
169 // [GFX10].
170 #define COMPUTE_PGM_RSRC3_GFX10(NAME, SHIFT, WIDTH) \
171   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_ ## NAME, SHIFT, WIDTH)
172 // GFX10+.
173 #define COMPUTE_PGM_RSRC3_GFX10_PLUS(NAME, SHIFT, WIDTH) \
174   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_PLUS_ ## NAME, SHIFT, WIDTH)
175 // GFX11+.
176 #define COMPUTE_PGM_RSRC3_GFX11_PLUS(NAME, SHIFT, WIDTH) \
177   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX11_PLUS_ ## NAME, SHIFT, WIDTH)
178 enum : int32_t {
179   COMPUTE_PGM_RSRC3_GFX10_PLUS(SHARED_VGPR_COUNT, 0, 4),
180   COMPUTE_PGM_RSRC3_GFX10(RESERVED0, 4, 8),
181   COMPUTE_PGM_RSRC3_GFX11_PLUS(INST_PREF_SIZE, 4, 6),
182   COMPUTE_PGM_RSRC3_GFX11_PLUS(TRAP_ON_START, 10, 1),
183   COMPUTE_PGM_RSRC3_GFX11_PLUS(TRAP_ON_END, 11, 1),
184   COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED1, 12, 19),
185   COMPUTE_PGM_RSRC3_GFX10(RESERVED2, 31, 1),
186   COMPUTE_PGM_RSRC3_GFX11_PLUS(IMAGE_OP, 31, 1),
187 };
188 #undef COMPUTE_PGM_RSRC3_GFX10_PLUS
189 
190 // Kernel code properties. Must be kept backwards compatible.
191 #define KERNEL_CODE_PROPERTY(NAME, SHIFT, WIDTH) \
192   AMDHSA_BITS_ENUM_ENTRY(KERNEL_CODE_PROPERTY_ ## NAME, SHIFT, WIDTH)
193 enum : int32_t {
194   KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 0, 1),
195   KERNEL_CODE_PROPERTY(ENABLE_SGPR_DISPATCH_PTR, 1, 1),
196   KERNEL_CODE_PROPERTY(ENABLE_SGPR_QUEUE_PTR, 2, 1),
197   KERNEL_CODE_PROPERTY(ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3, 1),
198   KERNEL_CODE_PROPERTY(ENABLE_SGPR_DISPATCH_ID, 4, 1),
199   KERNEL_CODE_PROPERTY(ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1),
200   KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1),
201   KERNEL_CODE_PROPERTY(RESERVED0, 7, 3),
202   KERNEL_CODE_PROPERTY(ENABLE_WAVEFRONT_SIZE32, 10, 1), // GFX10+
203   KERNEL_CODE_PROPERTY(USES_DYNAMIC_STACK, 11, 1),
204   KERNEL_CODE_PROPERTY(RESERVED1, 12, 4),
205 };
206 #undef KERNEL_CODE_PROPERTY
207 
208 // Kernarg preload specification.
209 #define KERNARG_PRELOAD_SPEC(NAME, SHIFT, WIDTH)                               \
210   AMDHSA_BITS_ENUM_ENTRY(KERNARG_PRELOAD_SPEC_##NAME, SHIFT, WIDTH)
211 enum : int32_t {
212   KERNARG_PRELOAD_SPEC(LENGTH, 0, 7),
213   KERNARG_PRELOAD_SPEC(OFFSET, 7, 9),
214 };
215 #undef KERNARG_PRELOAD_SPEC
216 
217 // Kernel descriptor. Must be kept backwards compatible.
218 struct kernel_descriptor_t {
219   uint32_t group_segment_fixed_size;
220   uint32_t private_segment_fixed_size;
221   uint32_t kernarg_size;
222   uint8_t reserved0[4];
223   int64_t kernel_code_entry_byte_offset;
224   uint8_t reserved1[20];
225   uint32_t compute_pgm_rsrc3; // GFX10+ and GFX90A+
226   uint32_t compute_pgm_rsrc1;
227   uint32_t compute_pgm_rsrc2;
228   uint16_t kernel_code_properties;
229   uint16_t kernarg_preload;
230   uint8_t reserved3[4];
231 };
232 
233 enum : uint32_t {
234   GROUP_SEGMENT_FIXED_SIZE_OFFSET = 0,
235   PRIVATE_SEGMENT_FIXED_SIZE_OFFSET = 4,
236   KERNARG_SIZE_OFFSET = 8,
237   RESERVED0_OFFSET = 12,
238   KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET = 16,
239   RESERVED1_OFFSET = 24,
240   COMPUTE_PGM_RSRC3_OFFSET = 44,
241   COMPUTE_PGM_RSRC1_OFFSET = 48,
242   COMPUTE_PGM_RSRC2_OFFSET = 52,
243   KERNEL_CODE_PROPERTIES_OFFSET = 56,
244   KERNARG_PRELOAD_OFFSET = 58,
245   RESERVED3_OFFSET = 60
246 };
247 
248 static_assert(
249     sizeof(kernel_descriptor_t) == 64,
250     "invalid size for kernel_descriptor_t");
251 static_assert(offsetof(kernel_descriptor_t, group_segment_fixed_size) ==
252                   GROUP_SEGMENT_FIXED_SIZE_OFFSET,
253               "invalid offset for group_segment_fixed_size");
254 static_assert(offsetof(kernel_descriptor_t, private_segment_fixed_size) ==
255                   PRIVATE_SEGMENT_FIXED_SIZE_OFFSET,
256               "invalid offset for private_segment_fixed_size");
257 static_assert(offsetof(kernel_descriptor_t, kernarg_size) ==
258                   KERNARG_SIZE_OFFSET,
259               "invalid offset for kernarg_size");
260 static_assert(offsetof(kernel_descriptor_t, reserved0) == RESERVED0_OFFSET,
261               "invalid offset for reserved0");
262 static_assert(offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) ==
263                   KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET,
264               "invalid offset for kernel_code_entry_byte_offset");
265 static_assert(offsetof(kernel_descriptor_t, reserved1) == RESERVED1_OFFSET,
266               "invalid offset for reserved1");
267 static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc3) ==
268                   COMPUTE_PGM_RSRC3_OFFSET,
269               "invalid offset for compute_pgm_rsrc3");
270 static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc1) ==
271                   COMPUTE_PGM_RSRC1_OFFSET,
272               "invalid offset for compute_pgm_rsrc1");
273 static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc2) ==
274                   COMPUTE_PGM_RSRC2_OFFSET,
275               "invalid offset for compute_pgm_rsrc2");
276 static_assert(offsetof(kernel_descriptor_t, kernel_code_properties) ==
277                   KERNEL_CODE_PROPERTIES_OFFSET,
278               "invalid offset for kernel_code_properties");
279 static_assert(offsetof(kernel_descriptor_t, kernarg_preload) ==
280                   KERNARG_PRELOAD_OFFSET,
281               "invalid offset for kernarg_preload");
282 static_assert(offsetof(kernel_descriptor_t, reserved3) == RESERVED3_OFFSET,
283               "invalid offset for reserved3");
284 
285 } // end namespace amdhsa
286 } // end namespace llvm
287 
288 #endif // LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H
289