• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2011  Advanced Micro Devices, Inc.
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #ifndef SID_H
8 #define SID_H
9 
10 #include "amdgfxregs.h"
11 
12 /* si values */
13 #define SI_CONFIG_REG_OFFSET       0x00008000
14 #define SI_CONFIG_REG_END          0x0000B000
15 #define SI_SH_REG_OFFSET           0x0000B000
16 #define SI_SH_REG_END              0x0000C000
17 #define SI_CONTEXT_REG_OFFSET      0x00028000
18 #define SI_CONTEXT_REG_END         0x00030000
19 #define CIK_UCONFIG_REG_OFFSET     0x00030000
20 #define CIK_UCONFIG_REG_END        0x00040000
21 #define SI_UCONFIG_PERF_REG_OFFSET 0x00034000
22 #define SI_UCONFIG_PERF_REG_END    0x00038000
23 
24 /* For register shadowing: */
25 #define SI_SH_REG_SPACE_SIZE           (SI_SH_REG_END - SI_SH_REG_OFFSET)
26 #define SI_CONTEXT_REG_SPACE_SIZE      (SI_CONTEXT_REG_END - SI_CONTEXT_REG_OFFSET)
27 #define SI_UCONFIG_REG_SPACE_SIZE      (CIK_UCONFIG_REG_END - CIK_UCONFIG_REG_OFFSET)
28 #define SI_UCONFIG_PERF_REG_SPACE_SIZE (SI_UCONFIG_PERF_REG_END - SI_UCONFIG_PERF_REG_OFFSET)
29 
30 #define SI_SHADOWED_SH_REG_OFFSET      0
31 #define SI_SHADOWED_CONTEXT_REG_OFFSET SI_SH_REG_SPACE_SIZE
32 #define SI_SHADOWED_UCONFIG_REG_OFFSET (SI_SH_REG_SPACE_SIZE + SI_CONTEXT_REG_SPACE_SIZE)
33 #define SI_SHADOWED_REG_BUFFER_SIZE                                                                \
34    (SI_SH_REG_SPACE_SIZE + SI_CONTEXT_REG_SPACE_SIZE + SI_UCONFIG_REG_SPACE_SIZE)
35 
36 /* All registers defined in this packet section don't exist and the only
37  * purpose of these definitions is to define packet encoding that
38  * the IB parser understands, and also to have an accurate documentation.
39  */
40 #define PKT3_NOP                                   0x10
41 #define PKT3_SET_BASE                              0x11
42 #define PKT3_CLEAR_STATE                           0x12
43 #define PKT3_INDEX_BUFFER_SIZE                     0x13
44 #define PKT3_DISPATCH_DIRECT                       0x15
45 #define PKT3_DISPATCH_INDIRECT                     0x16
46 #define PKT3_ATOMIC_MEM                            0x1E
47 #define   ATOMIC_OP(x)                                ((unsigned)((x)&0x7f) << 0)
48 #define     TC_OP_ATOMIC_SUB_RTN_32                   16
49 #define     TC_OP_ATOMIC_SUB_RTN_64                   48
50 #define     TC_OP_ATOMIC_CMPSWAP_32                   72
51 #define     TC_OP_ATOMIC_SUB_64                       112
52 #define     TC_OP_ATOMIC_XOR_64                       119
53 #define   ATOMIC_COMMAND(x)                           ((unsigned)((x)&0x3) << 8)
54 #define   ATOMIC_COMMAND_SEND_RTN                     0x0 /* only RTN opcodes */
55 #define   ATOMIC_COMMAND_LOOP                         0x1 /* only RTN opcodes */
56 #define   ATOMIC_COMMAND_WR_CONFIRM                   0x2 /* only non-RTN opcodes */
57 #define   ATOMIC_COMMAND_SEND_NO_RTN                  0x3 /* only non-RTN opcodes */
58 #define   ATOMIC_ENGINE_PFP                           (1 << 30)
59 #define PKT3_OCCLUSION_QUERY                       0x1F /* GFX7+ */
60 #define PKT3_SET_PREDICATION                       0x20
61 #define   PREDICATION_DRAW_NOT_VISIBLE                (0 << 8)
62 #define   PREDICATION_DRAW_VISIBLE                    (1 << 8)
63 #define   PREDICATION_HINT_WAIT                       (0 << 12)
64 #define   PREDICATION_HINT_NOWAIT_DRAW                (1 << 12)
65 #define   PRED_OP(x)                                  ((x) << 16)
66 #define     PREDICATION_OP_CLEAR                      0x0
67 #define     PREDICATION_OP_ZPASS                      0x1
68 #define     PREDICATION_OP_PRIMCOUNT                  0x2
69 #define     PREDICATION_OP_BOOL64                     0x3
70 #define     PREDICATION_OP_BOOL32                     0x4
71 #define   PREDICATION_CONTINUE                        (1 << 31)
72 #define PKT3_COND_EXEC                             0x22
73 #define PKT3_PRED_EXEC                             0x23
74 #define PKT3_DRAW_INDIRECT                         0x24
75 #define PKT3_DRAW_INDEX_INDIRECT                   0x25
76 #define PKT3_INDEX_BASE                            0x26
77 #define PKT3_DRAW_INDEX_2                          0x27
78 #define PKT3_CONTEXT_CONTROL                       0x28
79 #define   CC0_LOAD_GLOBAL_CONFIG(x)                   (((unsigned)(x)&0x1) << 0)
80 #define   CC0_LOAD_PER_CONTEXT_STATE(x)               (((unsigned)(x)&0x1) << 1)
81 #define   CC0_LOAD_GLOBAL_UCONFIG(x)                  (((unsigned)(x)&0x1) << 15)
82 #define   CC0_LOAD_GFX_SH_REGS(x)                     (((unsigned)(x)&0x1) << 16)
83 #define   CC0_LOAD_CS_SH_REGS(x)                      (((unsigned)(x)&0x1) << 24)
84 #define   CC0_LOAD_CE_RAM(x)                          (((unsigned)(x)&0x1) << 28)
85 #define   CC0_UPDATE_LOAD_ENABLES(x)                  (((unsigned)(x)&0x1) << 31)
86 #define   CC1_SHADOW_GLOBAL_CONFIG(x)                 (((unsigned)(x)&0x1) << 0)
87 #define   CC1_SHADOW_PER_CONTEXT_STATE(x)             (((unsigned)(x)&0x1) << 1)
88 #define   CC1_SHADOW_GLOBAL_UCONFIG(x)                (((unsigned)(x)&0x1) << 15)
89 #define   CC1_SHADOW_GFX_SH_REGS(x)                   (((unsigned)(x)&0x1) << 16)
90 #define   CC1_SHADOW_CS_SH_REGS(x)                    (((unsigned)(x)&0x1) << 24)
91 #define   CC1_UPDATE_SHADOW_ENABLES(x)                (((unsigned)(x)&0x1) << 31)
92 #define PKT3_INDEX_TYPE                            0x2A /* GFX6-8 */
93 #define PKT3_DRAW_INDIRECT_MULTI                   0x2C
94 #define   R_2C3_DRAW_INDEX_LOC                     0x2C3
95 #define   S_2C3_COUNT_INDIRECT_ENABLE(x)              (((unsigned)(x)&0x1) << 30)
96 #define   S_2C3_DRAW_INDEX_ENABLE(x)                  (((unsigned)(x)&0x1) << 31)
97 #define PKT3_DRAW_INDEX_AUTO                       0x2D
98 #define PKT3_DRAW_INDEX_IMMD                       0x2E /* GFX6 only */
99 #define PKT3_NUM_INSTANCES                         0x2F
100 #define PKT3_DRAW_INDEX_MULTI_AUTO                 0x30
101 #define PKT3_INDIRECT_BUFFER_SI                    0x32 /* GFX6 only */
102 #define PKT3_INDIRECT_BUFFER_CONST                 0x33
103 #define PKT3_STRMOUT_BUFFER_UPDATE                 0x34
104 #define   STRMOUT_STORE_BUFFER_FILLED_SIZE            1
105 #define   STRMOUT_OFFSET_SOURCE(x)                    (((unsigned)(x)&0x3) << 1)
106 #define   STRMOUT_OFFSET_FROM_PACKET                  0
107 #define   STRMOUT_OFFSET_FROM_VGT_FILLED_SIZE         1
108 #define   STRMOUT_OFFSET_FROM_MEM                     2
109 #define   STRMOUT_OFFSET_NONE                         3
110 #define   STRMOUT_DATA_TYPE(x)                        (((unsigned)(x)&0x1) << 7)
111 #define   STRMOUT_SELECT_BUFFER(x)                    (((unsigned)(x)&0x3) << 8)
112 #define PKT3_DRAW_INDEX_OFFSET_2                   0x35
113 #define PKT3_WRITE_DATA                            0x37
114 #define PKT3_DRAW_INDEX_INDIRECT_MULTI             0x38
115 #define PKT3_MEM_SEMAPHORE                         0x39
116 #define PKT3_MPEG_INDEX                            0x3A /* GFX6 only */
117 #define PKT3_WAIT_REG_MEM                          0x3C
118 #define   WAIT_REG_MEM_EQUAL                          3
119 #define   WAIT_REG_MEM_NOT_EQUAL                      4
120 #define   WAIT_REG_MEM_GREATER_OR_EQUAL               5
121 #define   WAIT_REG_MEM_MEM_SPACE(x)                   (((unsigned)(x)&0x3) << 4)
122 #define   WAIT_REG_MEM_PFP                            (1 << 8)
123 #define PKT3_MEM_WRITE                             0x3D /* GFX6 only */
124 #define PKT3_INDIRECT_BUFFER                       0x3F /* GFX6+ */
125 #define   S_3F3_INHERIT_VMID_MQD_GFX(x)               (((unsigned)(x)&0x1) << 22) /* userqueue only */
126 #define   S_3F3_VALID_COMPUTE(x)                      (((unsigned)(x)&0x1) << 23) /* userqueue only */
127 #define   S_3F3_INHERIT_VMID_MQD_COMPUTE(x)           (((unsigned)(x)&0x1) << 30) /* userqueue only */
128 #define PKT3_COPY_DATA                             0x40
129 #define   COPY_DATA_SRC_SEL(x)                        ((x)&0xf)
130 #define   COPY_DATA_REG                               0
131 #define   COPY_DATA_SRC_MEM                           1 /* only valid as source */
132 #define   COPY_DATA_TC_L2                             2
133 #define   COPY_DATA_GDS                               3
134 #define   COPY_DATA_PERF                              4
135 #define   COPY_DATA_IMM                               5
136 #define   COPY_DATA_TIMESTAMP                         9
137 #define   COPY_DATA_DST_SEL(x)                        (((unsigned)(x)&0xf) << 8)
138 #define   COPY_DATA_DST_MEM_GRBM                      1 /* sync across GRBM, deprecated */
139 #define   COPY_DATA_TC_L2                             2
140 #define   COPY_DATA_GDS                               3
141 #define   COPY_DATA_PERF                              4
142 #define   COPY_DATA_DST_MEM                           5
143 #define   COPY_DATA_COUNT_SEL                         (1 << 16)
144 #define   COPY_DATA_WR_CONFIRM                        (1 << 20)
145 #define   COPY_DATA_ENGINE_PFP                        (1 << 30)
146 /* 1. header
147  * 2. SRC_ADDR_LO [31:0] or DATA [31:0]
148  * 3. CP_SYNC [31] | SRC_SEL [30:29] | ENGINE [27] | DST_SEL [21:20] | SRC_ADDR_HI [15:0]
149  * 4. DST_ADDR_LO [31:0]
150  * 5. DST_ADDR_HI [15:0]
151  * 6. COMMAND [29:22] | BYTE_COUNT [20:0]
152  */
153 #define PKT3_CP_DMA                                0x41 /* GFX6 only */
154 #define PKT3_PFP_SYNC_ME                           0x42
155 #define PKT3_SURFACE_SYNC                          0x43 /* deprecated on GFX7, use ACQUIRE_MEM */
156 #define PKT3_ME_INITIALIZE                         0x44 /* GFX6 only */
157 #define PKT3_COND_WRITE                            0x45
158 #define PKT3_EVENT_WRITE                           0x46
159 #define   EVENT_TYPE(x)                               ((x) << 0)
160 /* 0 - any non-TS event
161  * 1 - ZPASS_DONE
162  * 2 - SAMPLE_PIPELINESTAT
163  * 3 - SAMPLE_STREAMOUTSTAT*
164  * 4 - *S_PARTIAL_FLUSH
165  * 5 - TS events
166  */
167 #define   EVENT_INDEX(x)                              ((x) << 8)
168 #define   PIXEL_PIPE_STATE_CNTL_COUNTER_ID(x)         ((x) << 3)
169 #define   PIXEL_PIPE_STATE_CNTL_STRIDE(x)             ((x) << 9)
170 /* 0 - 32 bits
171  * 1 - 64 bits
172  * 2 - 128 bits
173  * 3 - 256 bits
174  */
175 #define   PIXEL_PIPE_STATE_CNTL_INSTANCE_EN_LO(x)     ((x) << 11)
176 #define   PIXEL_PIPE_STATE_CNTL_INSTANCE_EN_HI(x)     ((x) >> 21)
177 #define PKT3_EVENT_WRITE_EOP                       0x47 /* GFX6-8 */
178 /* EVENT_WRITE_EOP (GFX6-8) & RELEASE_MEM (GFX9) */
179 #define   EVENT_TCL1_VOL_ACTION_ENA                   (1 << 12)
180 #define   EVENT_TC_VOL_ACTION_ENA                     (1 << 13)
181 #define   EVENT_TC_WB_ACTION_ENA                      (1 << 15)
182 #define   EVENT_TCL1_ACTION_ENA                       (1 << 16)
183 #define   EVENT_TC_ACTION_ENA                         (1 << 17)
184 #define   EVENT_TC_NC_ACTION_ENA                      (1 << 19) /* GFX9+ */
185 #define   EVENT_TC_WC_ACTION_ENA                      (1 << 20) /* GFX9+ */
186 #define   EVENT_TC_MD_ACTION_ENA                      (1 << 21) /* GFX9+ */
187 #define   EOP_DST_SEL(x)                              ((x) << 16)
188 #define     EOP_DST_SEL_MEM                           0
189 #define     EOP_DST_SEL_TC_L2                         1
190 #define   EOP_INT_SEL(x)                              ((x) << 24)
191 #define     EOP_INT_SEL_NONE                          0
192 #define     EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM    3
193 #define   EOP_DATA_SEL(x)                             ((x) << 29)
194 #define     EOP_DATA_SEL_DISCARD                      0
195 #define     EOP_DATA_SEL_VALUE_32BIT                  1
196 #define     EOP_DATA_SEL_VALUE_64BIT                  2
197 #define     EOP_DATA_SEL_TIMESTAMP                    3
198 #define     EOP_DATA_SEL_GDS                          5
199 #define   EOP_DATA_GDS(dw_offset, num_dwords)         ((dw_offset) | ((unsigned)(num_dwords) << 16))
200 #define PKT3_EVENT_WRITE_EOS                       0x48 /* GFX6-8 */
201 #define   EOS_DATA_SEL(x)                             ((x) << 29)
202 #define     EOS_DATA_SEL_APPEND_COUNT                 0
203 #define     EOS_DATA_SEL_GDS                          1
204 #define     EOS_DATA_SEL_VALUE_32BIT                  2
205 /* CP DMA bug: Any use of CP_DMA.DST_SEL=TC must be avoided when EOS packets
206  * are used. Use DST_SEL=MC instead. For prefetch, use SRC_SEL=TC and
207  * DST_SEL=MC. Only GFX7 chips are affected.
208  */
209 #define PKT3_EVENT_WRITE_EOS                       0x48 /* GFX6-8, breaks CP DMA */
210 #define PKT3_RELEASE_MEM                           0x49 /* GFX9+ [any ring] or GFX8 [compute ring only] */
211 /* 1. header
212  * 2. CP_SYNC [31] | SRC_SEL [30:29] | DST_SEL [21:20] | ENGINE [0]
213  * 2. SRC_ADDR_LO [31:0] or DATA [31:0]
214  * 3. SRC_ADDR_HI [31:0]
215  * 4. DST_ADDR_LO [31:0]
216  * 5. DST_ADDR_HI [31:0]
217  * 6. COMMAND [29:22] | BYTE_COUNT [20:0]
218  */
219 #define PKT3_DISPATCH_MESH_INDIRECT_MULTI          0x4C /* Indirect mesh shader only dispatch [GFX only], GFX10.3+ */
220 #define   S_4C1_XYZ_DIM_REG(x)                        ((x & 0xFFFF))
221 #define   S_4C1_DRAW_INDEX_REG(x)                     ((x & 0xFFFF) << 16)
222 #define   S_4C2_DRAW_INDEX_ENABLE(x)                  ((x & 1) << 31)
223 #define   S_4C2_COUNT_INDIRECT_ENABLE(x)              ((x & 1) << 30)
224 #define   S_4C2_THREAD_TRACE_MARKER_ENABLE(x)         ((x & 1) << 29)
225 #define   S_4C2_XYZ_DIM_ENABLE(x)                     ((x & 1) << 28) /* GFX11+ */
226 #define   S_4C2_MODE1_ENABLE(x)                       ((x & 1) << 27) /* GFX11+ */
227 #define PKT3_DISPATCH_TASKMESH_GFX                 0x4D /* Task + mesh shader dispatch [GFX side], GFX10.3+ */
228 #define   S_4D0_RING_ENTRY_REG(x)                     ((x & 0xFFFF) << 16)
229 #define   S_4D0_XYZ_DIM_REG(x)                        ((x & 0xFFFF))
230 #define   S_4D1_THREAD_TRACE_MARKER_ENABLE(x)         ((x & 1) << 31)
231 #define   S_4D1_XYZ_DIM_ENABLE(x)                     ((x & 1) << 30) /* GFX11+ */
232 #define   S_4D1_MODE1_ENABLE(x)                       ((x & 1) << 29) /* GFX11+ */
233 #define   S_4D1_LINEAR_DISPATCH_ENABLE(x)             ((x & 1) << 28) /* GFX11+ */
234 #define PKT3_DISPATCH_MESH_DIRECT                  0x4E /* Direct mesh shader only dispatch [GFX only], GFX11+ */
235 #define PKT3_DMA_DATA                              0x50 /* GFX7+ */
236 #define PKT3_CONTEXT_REG_RMW                       0x51 /* older firmware versions on older chips don't have this */
237 #define PKT3_ONE_REG_WRITE                         0x57 /* GFX6 only */
238 #define PKT3_ACQUIRE_MEM                           0x58 /* GFX7+ */
239 #define PKT3_REWIND                                0x59 /* GFX8+ [any ring] or GFX7 [compute ring only] */
240 #define PKT3_PRIME_UTCL2                           0x5D
241 #define PKT3_LOAD_UCONFIG_REG                      0x5E /* GFX7+ */
242 #define PKT3_LOAD_SH_REG                           0x5F
243 #define PKT3_LOAD_CONTEXT_REG                      0x61
244 #define PKT3_LOAD_SH_REG_INDEX                     0x63 /* GFX8+ */
245 #define PKT3_SET_CONFIG_REG                        0x68
246 #define PKT3_SET_CONTEXT_REG                       0x69
247 #define PKT3_SET_SH_REG                            0x76
248 #define PKT3_SET_SH_REG_OFFSET                     0x77
249 #define PKT3_SET_UCONFIG_REG                       0x79 /* GFX7+ */
250 #define PKT3_SET_UCONFIG_REG_INDEX                 0x7A /* new for GFX9, CP ucode version >= 26 */
251 #define PKT3_LOAD_CONST_RAM                        0x80
252 #define PKT3_WRITE_CONST_RAM                       0x81
253 #define PKT3_DUMP_CONST_RAM                        0x83
254 #define PKT3_INCREMENT_CE_COUNTER                  0x84
255 #define PKT3_INCREMENT_DE_COUNTER                  0x85
256 #define PKT3_WAIT_ON_CE_COUNTER                    0x86
257 #define PKT3_HDP_FLUSH                             0x95
258 #define PKT3_SET_SH_REG_INDEX                      0x9B
259 #define PKT3_LOAD_CONTEXT_REG_INDEX                0x9F /* GFX8+ */
260 #define PKT3_DISPATCH_DIRECT_INTERLEAVED           0xA7 /* GFX12+ */
261 #define PKT3_DISPATCH_INDIRECT_INTERLEAVED         0xA8 /* GFX12+ */
262 #define PKT3_DISPATCH_TASK_STATE_INIT              0xA9 /* Tells the HW about the task control buffer, GFX10.3+ */
263 #define PKT3_DISPATCH_TASKMESH_DIRECT_ACE          0xAA /* Direct task + mesh shader dispatch [ACE side], GFX10.3+ */
264 #define PKT3_DISPATCH_TASKMESH_INDIRECT_MULTI_ACE  0xAD /* Indirect task + mesh shader dispatch [ACE side], GFX10.3+ */
265 #define   S_AD2_RING_ENTRY_REG(x)                     ((x & 0xFFFF))
266 #define   S_AD3_COUNT_INDIRECT_ENABLE(x)              ((x & 1) << 1)
267 #define   S_AD3_DRAW_INDEX_ENABLE(x)                  ((x & 1) << 2)
268 #define   S_AD3_XYZ_DIM_ENABLE(x)                     ((x & 1) << 3)
269 #define   S_AD3_DRAW_INDEX_REG(x)                     ((x & 0xFFFF) << 16)
270 #define   S_AD4_XYZ_DIM_REG(x)                        ((x & 0xFFFF))
271 #define PKT3_EVENT_WRITE_ZPASS                     0xB1 /* GFX11+ & PFP version >= 1458 */
272 #define   EVENT_WRITE_ZPASS_PFP_VERSION               1458
273 /* Use these on GFX11 with a high PFP firmware version (only dGPUs should have that, not APUs)
274  * because they are the fastest SET packets there.
275  *    SET_CONTEXT_REG_PAIRS_PACKED:
276  *    SET_SH_REG_PAIRS_PACKED:
277  *    SET_SH_REG_PAIRS_PACKED_N:
278  *      Format: header, count, (offset0 | (offset1 << 16), value0, value1)^(count / 2)
279  *      - "count" is the register count and must be aligned to 2.
280  *      - Consecutive offsets must not be equal.
281  *      - RESET_FILTER_CAM must be set to 1.
282  *      - If the register count is odd, write the first register again at the end to make it even.
283  *      - The SH_*_PACKED* variants require register shadowing to be enabled.
284  *      - The *_N variant is identical to the non-N variant, but the maximum allowed "count" is 14
285  *        and it's faster.
286  *
287  * Use these on GFX12 because they are the fastest SET packets there. The PACKED variants don't
288  * exist on GFX12.
289  *    SET_CONTEXT_REG_PAIRS:
290  *    SET_SH_REG_PAIRS:
291  *    SET_UCONFIG_REG_PAIRS:
292  *      Format: header, (offset, value)^n.
293  *      - Consecutive offsets must not be equal.
294  *      - RESET_FILTER_CAM must be set to 1.
295  */
296 #define PKT3_SET_CONTEXT_REG_PAIRS                 0xB8 /* GFX11+; only use on GFX12, not GFX11 */
297 #define PKT3_SET_CONTEXT_REG_PAIRS_PACKED          0xB9 /* GFX11 dGPUs only */
298 #define PKT3_SET_SH_REG_PAIRS                      0xBA /* GFX11+; only use on GFX12, not GFX11 */
299 #define PKT3_SET_SH_REG_PAIRS_PACKED               0xBB /* GFX11 dGPUs only */
300 #define PKT3_SET_SH_REG_PAIRS_PACKED_N             0xBD /* GFX11 dGPUs only */
301 #define PKT3_SET_UCONFIG_REG_PAIRS                 0xBE /* GFX12+ */
302 
303 #define PKT_TYPE_S(x)         (((unsigned)(x)&0x3) << 30)
304 #define PKT_TYPE_G(x)         (((x) >> 30) & 0x3)
305 #define PKT_TYPE_C            0x3FFFFFFF
306 #define PKT_COUNT_S(x)        (((unsigned)(x)&0x3FFF) << 16)
307 #define PKT_COUNT_G(x)        (((x) >> 16) & 0x3FFF)
308 #define PKT_COUNT_C           0xC000FFFF
309 #define PKT3_IT_OPCODE_S(x)   (((unsigned)(x)&0xFF) << 8)
310 #define PKT3_IT_OPCODE_G(x)   (((x) >> 8) & 0xFF)
311 #define PKT3_IT_OPCODE_C      0xFFFF00FF
312 #define PKT3_PREDICATE(x)     (((x) >> 0) & 0x1)
313 #define PKT3_SHADER_TYPE_S(x) (((unsigned)(x) & 0x1) << 1)
314 #define PKT3_SHADER_TYPE_G(x) (((x) >> 1) & 0x1)
315 #define PKT3_RESET_FILTER_CAM_S(x) (((unsigned)(x) & 0x1) << 2)
316 #define PKT3_RESET_FILTER_CAM_G(x) (((unsigned)(x) >> 2) & 0x1)
317 #define PKT3(op, count, predicate)                                                                 \
318    (PKT_TYPE_S(3) | PKT_COUNT_S(count) | PKT3_IT_OPCODE_S(op) | PKT3_PREDICATE(predicate))
319 
320 #define PKT3_PROTECTED_FENCE_SIGNAL                0xD0
321 #define PKT3_FENCE_WAIT_MULTI                      0xD1
322 #define   S_D10_ENGINE_SEL(x)                         ((x & 1) << 0)
323 #define   S_D10_PREEMPTABLE(x)                        ((x & 1) << 1)
324 #define   S_D10_CACHE_POLICY(x)                       ((x & 3) << 2)
325 #define   S_D10_POLL_INTERVAL(x)                      ((x & 0xFFFF) << 16)
326 
327 #define PKT2_NOP_PAD PKT_TYPE_S(2)
328 #define PKT3_NOP_PAD PKT3(PKT3_NOP, 0x3fff, 0) /* header-only version */
329 
330 /* SI async DMA packets */
331 #define SI_DMA_PACKET(cmd, sub_cmd, n)                                                             \
332    ((((unsigned)(cmd)&0xF) << 28) | (((unsigned)(sub_cmd)&0xFF) << 20) |                           \
333     (((unsigned)(n)&0xFFFFF) << 0))
334 /* SI async DMA Packet types */
335 #define SI_DMA_PACKET_WRITE               0x2
336 #define SI_DMA_PACKET_COPY                0x3
337 #define SI_DMA_COPY_MAX_BYTE_ALIGNED_SIZE 0xfffe0
338 /* The documentation says 0xffff8 is the maximum size in dwords, which is
339  * 0x3fffe0 in bytes. */
340 #define SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE 0x3fffe0
341 #define SI_DMA_COPY_DWORD_ALIGNED          0x00
342 #define SI_DMA_COPY_BYTE_ALIGNED           0x40
343 #define SI_DMA_COPY_TILED                  0x8
344 #define SI_DMA_PACKET_INDIRECT_BUFFER      0x4
345 #define SI_DMA_PACKET_SEMAPHORE            0x5
346 #define SI_DMA_PACKET_FENCE                0x6
347 #define SI_DMA_PACKET_TRAP                 0x7
348 #define SI_DMA_PACKET_SRBM_WRITE           0x9
349 #define SI_DMA_PACKET_CONSTANT_FILL        0xd
350 #define SI_DMA_PACKET_NOP                  0xf
351 
352 /* CIK async DMA packets */
353 #define SDMA_PACKET(op, sub_op, n)                                                                 \
354    ((((unsigned)(n)&0xFFFF) << 16) | (((unsigned)(sub_op)&0xFF) << 8) |                            \
355     (((unsigned)(op)&0xFF) << 0))
356 /* CIK async DMA packet types */
357 #define SDMA_OPCODE_NOP                            0x0
358 #define SDMA_OPCODE_COPY                           0x1
359 #define SDMA_COPY_SUB_OPCODE_LINEAR                0x0
360 #define SDMA_COPY_SUB_OPCODE_TILED                 0x1
361 #define SDMA_COPY_SUB_OPCODE_SOA                   0x3
362 #define SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW     0x4
363 #define SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW      0x5
364 #define SDMA_COPY_SUB_OPCODE_T2T_SUB_WINDOW        0x6
365 #define SDMA_OPCODE_WRITE                          0x2
366 #define SDMA_WRITE_SUB_OPCODE_LINEAR               0x0
367 #define SDMA_WRITE_SUB_OPCODE_TILED                0x1
368 #define SDMA_OPCODE_INDIRECT_BUFFER                0x4
369 #define SDMA_OPCODE_FENCE                          0x5
370 #define SDMA_FENCE_MTYPE_UC                        0x3
371 #define SDMA_OPCODE_TRAP                           0x6
372 #define SDMA_OPCODE_SEMAPHORE                      0x7
373 #define SDMA_OPCODE_POLL_REGMEM                    0x8
374 #define SDMA_POLL_MEM                              (1 << 31)
375 #define SDMA_POLL_INTERVAL_160_CLK                 0xa
376 #define SDMA_POLL_RETRY_INDEFINITELY               0xfff
377 #define SDMA_OPCODE_CONSTANT_FILL                  0xb
378 #define SDMA_OPCODE_TIMESTAMP                      0xd
379 #define SDMA_TS_SUB_OPCODE_SET_LOCAL_TIMESTAMP     0x0
380 #define SDMA_TS_SUB_OPCODE_GET_LOCAL_TIMESTAMP     0x1
381 #define SDMA_TS_SUB_OPCODE_GET_GLOBAL_TIMESTAMP    0x2
382 #define SDMA_OPCODE_SRBM_WRITE                     0xe
383 
384 /* There is apparently an undocumented HW limitation that
385  * prevents the HW from copying the last 255 bytes of (1 << 22) - 1
386  */
387 #define SDMA_V2_0_COPY_MAX_BYTES 0x3fff00   /* almost 4 MB*/
388 #define SDMA_V5_2_COPY_MAX_BYTES 0x3fffff00 /* almost 1 GB */
389 
390 #define SDMA_NOP_PAD SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0) /* header-only version */
391 
392 enum amd_cmp_class_flags
393 {
394    S_NAN = 1 << 0,       // Signaling NaN
395    Q_NAN = 1 << 1,       // Quiet NaN
396    N_INFINITY = 1 << 2,  // Negative infinity
397    N_NORMAL = 1 << 3,    // Negative normal
398    N_SUBNORMAL = 1 << 4, // Negative subnormal
399    N_ZERO = 1 << 5,      // Negative zero
400    P_ZERO = 1 << 6,      // Positive zero
401    P_SUBNORMAL = 1 << 7, // Positive subnormal
402    P_NORMAL = 1 << 8,    // Positive normal
403    P_INFINITY = 1 << 9   // Positive infinity
404 };
405 
406 /* Use the last bit of AMDGPU_GEM_CREATE_* flag as a virtio-only
407  * flag.
408  */
409 #define AMDGPU_GEM_CREATE_VIRTIO_SHARED 1u << 31
410 
411 #endif /* _SID_H */
412