• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2013 Advanced Micro Devices, Inc.
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 /**
8  * This file contains helpers for writing commands to commands streams.
9  */
10 
11 #ifndef SI_BUILD_PM4_H
12 #define SI_BUILD_PM4_H
13 
14 #include "si_pipe.h"
15 #include "sid.h"
16 
17 #define radeon_begin(cs) struct radeon_cmdbuf *__cs = (cs); \
18                          unsigned __cs_num = __cs->current.cdw; \
19                          UNUSED unsigned __cs_num_initial = __cs_num; \
20                          uint32_t *__cs_buf = __cs->current.buf
21 
22 #define radeon_begin_again(cs) do { \
23    assert(__cs == NULL); \
24    __cs = (cs); \
25    __cs_num = __cs->current.cdw; \
26    __cs_num_initial = __cs_num; \
27    __cs_buf = __cs->current.buf; \
28 } while (0)
29 
30 #define radeon_end() do { \
31    __cs->current.cdw = __cs_num; \
32    assert(__cs->current.cdw <= __cs->current.max_dw); \
33    __cs = NULL; \
34 } while (0)
35 
36 #define radeon_emit(value)  __cs_buf[__cs_num++] = (value)
37 #define radeon_packets_added()  (__cs_num != __cs_num_initial)
38 
39 #define radeon_end_update_context_roll() do { \
40    radeon_end(); \
41    if (radeon_packets_added()) \
42       sctx->context_roll = true; \
43 } while (0)
44 
45 #define radeon_emit_array(values, num) do { \
46    unsigned __n = (num); \
47    memcpy(__cs_buf + __cs_num, (values), __n * 4); \
48    __cs_num += __n; \
49 } while (0)
50 
51 /* Instead of writing into the command buffer, return the pointer to the command buffer and
52  * assume that the caller will fill the specified number of elements.
53  */
54 #define radeon_emit_array_get_ptr(num, ptr) do { \
55    *(ptr) = __cs_buf + __cs_num; \
56    __cs_num += (num); \
57 } while (0)
58 
59 /* Packet building helpers. Don't use directly. */
60 #define radeon_set_reg_seq(reg, num, idx, prefix_name, packet, reset_filter_cam) do { \
61    assert((reg) >= prefix_name##_REG_OFFSET && (reg) < prefix_name##_REG_END); \
62    radeon_emit(PKT3(packet, num, 0) | PKT3_RESET_FILTER_CAM_S(reset_filter_cam)); \
63    radeon_emit((((reg) - prefix_name##_REG_OFFSET) >> 2) | ((idx) << 28)); \
64 } while (0)
65 
66 #define radeon_set_reg(reg, idx, value, prefix_name, packet) do { \
67    radeon_set_reg_seq(reg, 1, idx, prefix_name, packet, 0); \
68    radeon_emit(value); \
69 } while (0)
70 
71 #define radeon_opt_set_reg(reg, reg_enum, idx, value, prefix_name, packet) do { \
72    unsigned __value = (value); \
73    if (!BITSET_TEST(sctx->tracked_regs.reg_saved_mask, (reg_enum)) || \
74        sctx->tracked_regs.reg_value[(reg_enum)] != __value) { \
75       radeon_set_reg(reg, idx, __value, prefix_name, packet); \
76       BITSET_SET(sctx->tracked_regs.reg_saved_mask, (reg_enum)); \
77       sctx->tracked_regs.reg_value[(reg_enum)] = __value; \
78    } \
79 } while (0)
80 
81 /* Set consecutive registers if any value is different. */
82 #define radeon_opt_set_reg2(reg, reg_enum, v1, v2, prefix_name, packet) do { \
83    static_assert(BITSET_BITWORD(reg_enum) == BITSET_BITWORD(reg_enum + 1), \
84                  "bit range crosses dword boundary"); \
85    unsigned __v1 = (v1), __v2 = (v2); \
86    if (!BITSET_TEST_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
87                                       (reg_enum), (reg_enum) + 1, 0x3) || \
88        sctx->tracked_regs.reg_value[(reg_enum)] != __v1 || \
89        sctx->tracked_regs.reg_value[(reg_enum) + 1] != __v2) { \
90       radeon_set_reg_seq(reg, 2, 0, prefix_name, packet, 0); \
91       radeon_emit(__v1); \
92       radeon_emit(__v2); \
93       BITSET_SET_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
94                                    (reg_enum), (reg_enum) + 1); \
95       sctx->tracked_regs.reg_value[(reg_enum)] = __v1; \
96       sctx->tracked_regs.reg_value[(reg_enum) + 1] = __v2; \
97    } \
98 } while (0)
99 
100 #define radeon_opt_set_reg3(reg, reg_enum, v1, v2, v3, prefix_name, packet) do { \
101    static_assert(BITSET_BITWORD(reg_enum) == BITSET_BITWORD(reg_enum + 2), \
102                  "bit range crosses dword boundary"); \
103    unsigned __v1 = (v1), __v2 = (v2), __v3 = (v3); \
104    if (!BITSET_TEST_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
105                                       (reg_enum), (reg_enum) + 2, 0x7) || \
106        sctx->tracked_regs.reg_value[(reg_enum)] != __v1 || \
107        sctx->tracked_regs.reg_value[(reg_enum) + 1] != __v2 || \
108        sctx->tracked_regs.reg_value[(reg_enum) + 2] != __v3) { \
109       radeon_set_reg_seq(reg, 3, 0, prefix_name, packet, 0); \
110       radeon_emit(__v1); \
111       radeon_emit(__v2); \
112       radeon_emit(__v3); \
113       BITSET_SET_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
114                                    (reg_enum), (reg_enum) + 2); \
115       sctx->tracked_regs.reg_value[(reg_enum)] = __v1; \
116       sctx->tracked_regs.reg_value[(reg_enum) + 1] = __v2; \
117       sctx->tracked_regs.reg_value[(reg_enum) + 2] = __v3; \
118    } \
119 } while (0)
120 
121 #define radeon_opt_set_reg4(reg, reg_enum, v1, v2, v3, v4, prefix_name, packet) do { \
122    static_assert(BITSET_BITWORD((reg_enum)) == BITSET_BITWORD((reg_enum) + 3), \
123                  "bit range crosses dword boundary"); \
124    unsigned __v1 = (v1), __v2 = (v2), __v3 = (v3), __v4 = (v4); \
125    if (!BITSET_TEST_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
126                                       (reg_enum), (reg_enum) + 3, 0xf) || \
127        sctx->tracked_regs.reg_value[(reg_enum)] != __v1 || \
128        sctx->tracked_regs.reg_value[(reg_enum) + 1] != __v2 || \
129        sctx->tracked_regs.reg_value[(reg_enum) + 2] != __v3 || \
130        sctx->tracked_regs.reg_value[(reg_enum) + 3] != __v4) { \
131       radeon_set_reg_seq(reg, 4, 0, prefix_name, packet, 0); \
132       radeon_emit(__v1); \
133       radeon_emit(__v2); \
134       radeon_emit(__v3); \
135       radeon_emit(__v4); \
136       BITSET_SET_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
137                                    (reg_enum), (reg_enum) + 3); \
138       sctx->tracked_regs.reg_value[(reg_enum)] = __v1; \
139       sctx->tracked_regs.reg_value[(reg_enum) + 1] = __v2; \
140       sctx->tracked_regs.reg_value[(reg_enum) + 2] = __v3; \
141       sctx->tracked_regs.reg_value[(reg_enum) + 3] = __v4; \
142    } \
143 } while (0)
144 
145 #define radeon_opt_set_reg5(reg, reg_enum, v1, v2, v3, v4, v5, prefix_name, packet) do { \
146    static_assert(BITSET_BITWORD((reg_enum)) == BITSET_BITWORD((reg_enum) + 4), \
147                  "bit range crosses dword boundary"); \
148    unsigned __v1 = (v1), __v2 = (v2), __v3 = (v3), __v4 = (v4), __v5 = (v5); \
149    if (!BITSET_TEST_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
150                                       (reg_enum), (reg_enum) + 4, 0x1f) || \
151        sctx->tracked_regs.reg_value[(reg_enum)] != __v1 || \
152        sctx->tracked_regs.reg_value[(reg_enum) + 1] != __v2 || \
153        sctx->tracked_regs.reg_value[(reg_enum) + 2] != __v3 || \
154        sctx->tracked_regs.reg_value[(reg_enum) + 3] != __v4 || \
155        sctx->tracked_regs.reg_value[(reg_enum) + 4] != __v5) { \
156       radeon_set_reg_seq(reg, 5, 0, prefix_name, packet, 0); \
157       radeon_emit(__v1); \
158       radeon_emit(__v2); \
159       radeon_emit(__v3); \
160       radeon_emit(__v4); \
161       radeon_emit(__v5); \
162       BITSET_SET_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
163                                    (reg_enum), (reg_enum) + 4); \
164       sctx->tracked_regs.reg_value[(reg_enum)] = __v1; \
165       sctx->tracked_regs.reg_value[(reg_enum) + 1] = __v2; \
166       sctx->tracked_regs.reg_value[(reg_enum) + 2] = __v3; \
167       sctx->tracked_regs.reg_value[(reg_enum) + 3] = __v4; \
168       sctx->tracked_regs.reg_value[(reg_enum) + 4] = __v5; \
169    } \
170 } while (0)
171 
172 #define radeon_opt_set_reg6(reg, reg_enum, v1, v2, v3, v4, v5, v6, prefix_name, packet) do { \
173    static_assert(BITSET_BITWORD((reg_enum)) == BITSET_BITWORD((reg_enum) + 5), \
174                  "bit range crosses dword boundary"); \
175    unsigned __v1 = (v1), __v2 = (v2), __v3 = (v3), __v4 = (v4), __v5 = (v5), __v6 = (v6); \
176    if (!BITSET_TEST_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
177                                       (reg_enum), (reg_enum) + 5, 0x3f) || \
178        sctx->tracked_regs.reg_value[(reg_enum)] != __v1 || \
179        sctx->tracked_regs.reg_value[(reg_enum) + 1] != __v2 || \
180        sctx->tracked_regs.reg_value[(reg_enum) + 2] != __v3 || \
181        sctx->tracked_regs.reg_value[(reg_enum) + 3] != __v4 || \
182        sctx->tracked_regs.reg_value[(reg_enum) + 4] != __v5 || \
183        sctx->tracked_regs.reg_value[(reg_enum) + 5] != __v6) { \
184       radeon_set_reg_seq(reg, 6, 0, prefix_name, packet, 0); \
185       radeon_emit(__v1); \
186       radeon_emit(__v2); \
187       radeon_emit(__v3); \
188       radeon_emit(__v4); \
189       radeon_emit(__v5); \
190       radeon_emit(__v6); \
191       BITSET_SET_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
192                                    (reg_enum), (reg_enum) + 5); \
193       sctx->tracked_regs.reg_value[(reg_enum)] = __v1; \
194       sctx->tracked_regs.reg_value[(reg_enum) + 1] = __v2; \
195       sctx->tracked_regs.reg_value[(reg_enum) + 2] = __v3; \
196       sctx->tracked_regs.reg_value[(reg_enum) + 3] = __v4; \
197       sctx->tracked_regs.reg_value[(reg_enum) + 4] = __v5; \
198       sctx->tracked_regs.reg_value[(reg_enum) + 5] = __v6; \
199    } \
200 } while (0)
201 
202 #define radeon_opt_set_regn(reg, values, saved_values, num, prefix_name, packet) do { \
203    if (memcmp(values, saved_values, sizeof(uint32_t) * (num))) { \
204       radeon_set_reg_seq(reg, num, 0, prefix_name, packet, 0); \
205       radeon_emit_array(values, num); \
206       memcpy(saved_values, values, sizeof(uint32_t) * (num)); \
207    } \
208 } while (0)
209 
210 /* Packet building helpers for CONFIG registers. */
211 #define radeon_set_config_reg(reg, value) \
212    radeon_set_reg(reg, 0, value, SI_CONFIG, PKT3_SET_CONFIG_REG)
213 
214 /* Packet building helpers for CONTEXT registers. */
215 #define radeon_set_context_reg_seq(reg, num) \
216    radeon_set_reg_seq(reg, num, 0, SI_CONTEXT, PKT3_SET_CONTEXT_REG, 0)
217 
218 #define radeon_set_context_reg(reg, value) \
219    radeon_set_reg(reg, 0, value, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
220 
221 #define radeon_opt_set_context_reg(reg, reg_enum, value) \
222    radeon_opt_set_reg(reg, reg_enum, 0, value, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
223 
224 #define radeon_opt_set_context_reg_idx(reg, reg_enum, idx, value) \
225    radeon_opt_set_reg(reg, reg_enum, idx, value, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
226 
227 #define radeon_opt_set_context_reg2(reg, reg_enum, v1, v2) \
228    radeon_opt_set_reg2(reg, reg_enum, v1, v2, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
229 
230 #define radeon_opt_set_context_reg3(reg, reg_enum, v1, v2, v3) \
231    radeon_opt_set_reg3(reg, reg_enum, v1, v2, v3, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
232 
233 #define radeon_opt_set_context_reg4(reg, reg_enum, v1, v2, v3, v4) \
234    radeon_opt_set_reg4(reg, reg_enum, v1, v2, v3, v4, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
235 
236 #define radeon_opt_set_context_reg5(reg, reg_enum, v1, v2, v3, v4, v5) \
237    radeon_opt_set_reg5(reg, reg_enum, v1, v2, v3, v4, v5, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
238 
239 #define radeon_opt_set_context_reg6(reg, reg_enum, v1, v2, v3, v4, v5, v6) \
240    radeon_opt_set_reg6(reg, reg_enum, v1, v2, v3, v4, v5, v6, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
241 
242 #define radeon_opt_set_context_regn(reg, values, saved_values, num) \
243    radeon_opt_set_regn(reg, values, saved_values, num, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
244 
245 /* Packet building helpers for SH registers. */
246 #define radeon_set_sh_reg_seq(reg, num) \
247    radeon_set_reg_seq(reg, num, 0, SI_SH, PKT3_SET_SH_REG, 0)
248 
249 #define radeon_set_sh_reg(reg, value) \
250    radeon_set_reg(reg, 0, value, SI_SH, PKT3_SET_SH_REG)
251 
252 #define radeon_opt_set_sh_reg(reg, reg_enum, value) \
253    radeon_opt_set_reg(reg, reg_enum, 0, value, SI_SH, PKT3_SET_SH_REG)
254 
255 #define radeon_opt_set_sh_reg2(reg, reg_enum, v1, v2) \
256    radeon_opt_set_reg2(reg, reg_enum, v1, v2, SI_SH, PKT3_SET_SH_REG)
257 
258 #define radeon_opt_set_sh_reg3(reg, reg_enum, v1, v2, v3) \
259    radeon_opt_set_reg3(reg, reg_enum, v1, v2, v3, SI_SH, PKT3_SET_SH_REG)
260 
261 #define radeon_opt_set_sh_reg_idx(reg, reg_enum, idx, value) do { \
262    assert(sctx->gfx_level >= GFX10); \
263    radeon_opt_set_reg(reg, reg_enum, idx, value, SI_SH, PKT3_SET_SH_REG_INDEX); \
264 } while (0)
265 
266 #define radeon_emit_32bit_pointer(va) do { \
267    assert((va) == 0 || ((va) >> 32) == sctx->screen->info.address32_hi); \
268    radeon_emit(va); \
269 } while (0)
270 
271 #define radeon_emit_one_32bit_pointer(desc, sh_base) do { \
272    radeon_set_sh_reg_seq((sh_base) + (desc)->shader_userdata_offset, 1); \
273    radeon_emit_32bit_pointer((desc)->gpu_address); \
274 } while (0)
275 
276 /* Packet building helpers for UCONFIG registers. */
277 #define radeon_set_uconfig_reg_seq(reg, num) \
278    radeon_set_reg_seq(reg, num, 0, CIK_UCONFIG, PKT3_SET_UCONFIG_REG, 0)
279 
280 #define radeon_set_uconfig_perfctr_reg_seq(reg, num) \
281    radeon_set_reg_seq(reg, num, 0, CIK_UCONFIG, PKT3_SET_UCONFIG_REG, \
282                       sctx->gfx_level >= GFX10 && \
283                       sctx->ws->cs_get_ip_type(__cs) == AMD_IP_GFX)
284 
285 #define radeon_set_uconfig_reg(reg, value) \
286    radeon_set_reg(reg, 0, value, CIK_UCONFIG, PKT3_SET_UCONFIG_REG)
287 
288 #define radeon_opt_set_uconfig_reg(reg, reg_enum, value) \
289    radeon_opt_set_reg(reg, reg_enum, 0, value, CIK_UCONFIG, PKT3_SET_UCONFIG_REG)
290 
291 #define RESOLVE_PKT3_SET_UCONFIG_REG_INDEX \
292    (GFX_VERSION >= GFX10 || (GFX_VERSION == GFX9 && sctx->screen->info.me_fw_version >= 26) ? \
293     PKT3_SET_UCONFIG_REG_INDEX : PKT3_SET_UCONFIG_REG)
294 
295 #define radeon_set_uconfig_reg_idx(reg, idx, value) \
296    radeon_set_reg(reg, idx, value, CIK_UCONFIG, RESOLVE_PKT3_SET_UCONFIG_REG_INDEX)
297 
298 #define radeon_opt_set_uconfig_reg_idx(reg, reg_enum, idx, value) \
299    radeon_opt_set_reg(reg, reg_enum, idx, value, CIK_UCONFIG, RESOLVE_PKT3_SET_UCONFIG_REG_INDEX)
300 
301 #define radeon_set_privileged_config_reg(reg, value) do { \
302    assert((reg) < CIK_UCONFIG_REG_OFFSET); \
303    radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); \
304    radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_IMM) | \
305                COPY_DATA_DST_SEL(COPY_DATA_PERF)); \
306    radeon_emit(value); \
307    radeon_emit(0); /* unused */ \
308    radeon_emit((reg) >> 2); \
309    radeon_emit(0); /* unused */ \
310 } while (0)
311 
312 /* GFX11 generic packet building helpers for buffered SH registers. Don't use these directly. */
313 #define gfx11_push_reg(reg, value, prefix_name, buffer, reg_count) do { \
314    unsigned __i = (reg_count)++; \
315    assert((reg) >= prefix_name##_REG_OFFSET && (reg) < prefix_name##_REG_END); \
316    assert(__i / 2 < ARRAY_SIZE(buffer)); \
317    buffer[__i / 2].reg_offset[__i % 2] = ((reg) - prefix_name##_REG_OFFSET) >> 2; \
318    buffer[__i / 2].reg_value[__i % 2] = value; \
319 } while (0)
320 
321 #define gfx11_opt_push_reg(reg, reg_enum, value, prefix_name, buffer, reg_count) do { \
322    unsigned __value = value; \
323    if (!BITSET_TEST(sctx->tracked_regs.reg_saved_mask, (reg_enum)) || \
324        sctx->tracked_regs.reg_value[reg_enum] != __value) { \
325       gfx11_push_reg(reg, __value, prefix_name, buffer, reg_count); \
326       BITSET_SET(sctx->tracked_regs.reg_saved_mask, (reg_enum)); \
327       sctx->tracked_regs.reg_value[reg_enum] = __value; \
328    } \
329 } while (0)
330 
331 #define gfx11_opt_push_reg4(reg, reg_enum, v1, v2, v3, v4, prefix_name, buffer, reg_count) do { \
332    static_assert(BITSET_BITWORD((reg_enum)) == BITSET_BITWORD((reg_enum) + 3), \
333                  "bit range crosses dword boundary"); \
334    unsigned __v1 = (v1); \
335    unsigned __v2 = (v2); \
336    unsigned __v3 = (v3); \
337    unsigned __v4 = (v4); \
338    if (!BITSET_TEST_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
339                                       (reg_enum), (reg_enum) + 3, 0xf) || \
340        sctx->tracked_regs.reg_value[(reg_enum)] != __v1 || \
341        sctx->tracked_regs.reg_value[(reg_enum) + 1] != __v2 || \
342        sctx->tracked_regs.reg_value[(reg_enum) + 2] != __v3 || \
343        sctx->tracked_regs.reg_value[(reg_enum) + 3] != __v4) { \
344       gfx11_push_reg((reg), __v1, prefix_name, buffer, reg_count); \
345       gfx11_push_reg((reg) + 4, __v2, prefix_name, buffer, reg_count); \
346       gfx11_push_reg((reg) + 8, __v3, prefix_name, buffer, reg_count); \
347       gfx11_push_reg((reg) + 12, __v4, prefix_name, buffer, reg_count); \
348       BITSET_SET_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
349                                    (reg_enum), (reg_enum) + 3); \
350       sctx->tracked_regs.reg_value[(reg_enum)] = __v1; \
351       sctx->tracked_regs.reg_value[(reg_enum) + 1] = __v2; \
352       sctx->tracked_regs.reg_value[(reg_enum) + 2] = __v3; \
353       sctx->tracked_regs.reg_value[(reg_enum) + 3] = __v4; \
354    } \
355 } while (0)
356 
357 /* GFX11 packet building helpers for buffered SH registers. */
358 #define gfx11_push_gfx_sh_reg(reg, value) \
359    gfx11_push_reg(reg, value, SI_SH, sctx->gfx11.buffered_gfx_sh_regs, \
360                   sctx->num_buffered_gfx_sh_regs)
361 
362 #define gfx11_push_compute_sh_reg(reg, value) \
363    gfx11_push_reg(reg, value, SI_SH, sctx->gfx11.buffered_compute_sh_regs, \
364                   sctx->num_buffered_compute_sh_regs)
365 
366 #define gfx11_opt_push_gfx_sh_reg(reg, reg_enum, value) \
367    gfx11_opt_push_reg(reg, reg_enum, value, SI_SH, sctx->gfx11.buffered_gfx_sh_regs, \
368                       sctx->num_buffered_gfx_sh_regs)
369 
370 #define gfx11_opt_push_compute_sh_reg(reg, reg_enum, value) \
371    gfx11_opt_push_reg(reg, reg_enum, value, SI_SH, sctx->gfx11.buffered_compute_sh_regs, \
372                       sctx->num_buffered_compute_sh_regs)
373 
374 /* GFX11 packet building helpers for SET_CONTEXT_REG_PAIRS_PACKED.
375  * Registers are buffered on the stack and then copied to the command buffer at the end.
376  */
377 #define gfx11_begin_packed_context_regs() \
378    struct gfx11_reg_pair __cs_context_regs[50]; \
379    unsigned __cs_context_reg_count = 0;
380 
381 #define gfx11_set_context_reg(reg, value) \
382    gfx11_push_reg(reg, value, SI_CONTEXT, __cs_context_regs, __cs_context_reg_count)
383 
384 #define gfx11_opt_set_context_reg(reg, reg_enum, value) \
385    gfx11_opt_push_reg(reg, reg_enum, value, SI_CONTEXT, __cs_context_regs, \
386                       __cs_context_reg_count)
387 
388 #define gfx11_opt_set_context_reg4(reg, reg_enum, v1, v2, v3, v4) \
389    gfx11_opt_push_reg4(reg, reg_enum, v1, v2, v3, v4, SI_CONTEXT, __cs_context_regs, \
390                        __cs_context_reg_count)
391 
392 #define gfx11_end_packed_context_regs() do { \
393    if (__cs_context_reg_count >= 2) { \
394       /* Align the count to 2 by duplicating the first register. */ \
395       if (__cs_context_reg_count % 2 == 1) { \
396          gfx11_set_context_reg(SI_CONTEXT_REG_OFFSET + __cs_context_regs[0].reg_offset[0] * 4, \
397                                __cs_context_regs[0].reg_value[0]); \
398       } \
399       assert(__cs_context_reg_count % 2 == 0); \
400       unsigned __num_dw = (__cs_context_reg_count / 2) * 3; \
401       radeon_emit(PKT3(PKT3_SET_CONTEXT_REG_PAIRS_PACKED, __num_dw, 0) | PKT3_RESET_FILTER_CAM_S(1)); \
402       radeon_emit(__cs_context_reg_count); \
403       radeon_emit_array(__cs_context_regs, __num_dw); \
404    } else if (__cs_context_reg_count == 1) { \
405       radeon_emit(PKT3(PKT3_SET_CONTEXT_REG, 1, 0)); \
406       radeon_emit(__cs_context_regs[0].reg_offset[0]); \
407       radeon_emit(__cs_context_regs[0].reg_value[0]); \
408    } \
409 } while (0)
410 
411 /* GFX12 generic packet building helpers for PAIRS packets. Don't use these directly. */
412 #define gfx12_begin_regs(header) unsigned header = __cs_num++
413 
414 #define gfx12_set_reg(reg, value, base_offset) do { \
415    radeon_emit(((reg) - (base_offset)) >> 2); \
416    radeon_emit(value); \
417 } while (0)
418 
419 #define gfx12_opt_set_reg(reg, reg_enum, value, base_offset) do { \
420    unsigned __value = value; \
421    if (!BITSET_TEST(sctx->tracked_regs.reg_saved_mask, (reg_enum)) || \
422        sctx->tracked_regs.reg_value[reg_enum] != __value) { \
423       gfx12_set_reg(reg, __value, base_offset); \
424       BITSET_SET(sctx->tracked_regs.reg_saved_mask, (reg_enum)); \
425       sctx->tracked_regs.reg_value[reg_enum] = __value; \
426    } \
427 } while (0)
428 
429 #define gfx12_opt_set_reg4(reg, reg_enum, v1, v2, v3, v4, base_offset) do { \
430    static_assert(BITSET_BITWORD((reg_enum)) == BITSET_BITWORD((reg_enum) + 3), \
431                  "bit range crosses dword boundary"); \
432    unsigned __v1 = (v1), __v2 = (v2), __v3 = (v3), __v4 = (v4); \
433    if (!BITSET_TEST_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
434                                       (reg_enum), (reg_enum) + 3, 0xf) || \
435        sctx->tracked_regs.reg_value[(reg_enum)] != __v1 || \
436        sctx->tracked_regs.reg_value[(reg_enum) + 1] != __v2 || \
437        sctx->tracked_regs.reg_value[(reg_enum) + 2] != __v3 || \
438        sctx->tracked_regs.reg_value[(reg_enum) + 3] != __v4) { \
439       gfx12_set_reg((reg), __v1, (base_offset)); \
440       gfx12_set_reg((reg) + 4, __v2, (base_offset)); \
441       gfx12_set_reg((reg) + 8, __v3, (base_offset)); \
442       gfx12_set_reg((reg) + 12, __v4, (base_offset)); \
443       BITSET_SET_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
444                                    (reg_enum), (reg_enum) + 3); \
445       sctx->tracked_regs.reg_value[(reg_enum)] = __v1; \
446       sctx->tracked_regs.reg_value[(reg_enum) + 1] = __v2; \
447       sctx->tracked_regs.reg_value[(reg_enum) + 2] = __v3; \
448       sctx->tracked_regs.reg_value[(reg_enum) + 3] = __v4; \
449    } \
450 } while (0)
451 
452 #define gfx12_end_regs(header, packet) do { \
453    if ((header) + 1 == __cs_num) { \
454       __cs_num--; /* no registers have been set, back off */ \
455    } else { \
456       unsigned __dw_count = __cs_num - (header) - 2; \
457       __cs_buf[(header)] = PKT3((packet), __dw_count, 0) | PKT3_RESET_FILTER_CAM_S(1); \
458    } \
459 } while (0)
460 
461 /* GFX12 generic packet building helpers for buffered registers. Don't use these directly. */
462 #define gfx12_push_reg(reg, value, base_offset, type) do { \
463    unsigned __i = sctx->num_buffered_##type##_regs++; \
464    assert(__i < ARRAY_SIZE(sctx->gfx12.buffered_##type##_regs)); \
465    sctx->gfx12.buffered_##type##_regs[__i].reg_offset = ((reg) - (base_offset)) >> 2; \
466    sctx->gfx12.buffered_##type##_regs[__i].reg_value = value; \
467 } while (0)
468 
469 #define gfx12_opt_push_reg(reg, reg_enum, value, type) do { \
470    unsigned __value = value; \
471    unsigned __reg_enum = reg_enum; \
472    if (!BITSET_TEST(sctx->tracked_regs.reg_saved_mask, (reg_enum)) || \
473        sctx->tracked_regs.reg_value[__reg_enum] != __value) { \
474       gfx12_push_##type##_reg(reg, __value); \
475       BITSET_SET(sctx->tracked_regs.reg_saved_mask, (reg_enum)); \
476       sctx->tracked_regs.reg_value[__reg_enum] = __value; \
477    } \
478 } while (0)
479 
480 /* GFX12 packet building helpers for PAIRS packets. */
481 #define gfx12_begin_context_regs() \
482    gfx12_begin_regs(__cs_context_reg_header)
483 
484 #define gfx12_set_context_reg(reg, value) \
485    gfx12_set_reg(reg, value, SI_CONTEXT_REG_OFFSET)
486 
487 #define gfx12_opt_set_context_reg(reg, reg_enum, value) \
488    gfx12_opt_set_reg(reg, reg_enum, value, SI_CONTEXT_REG_OFFSET)
489 
490 #define gfx12_opt_set_context_reg4(reg, reg_enum, v1, v2, v3, v4) \
491    gfx12_opt_set_reg4(reg, reg_enum, v1, v2, v3, v4, SI_CONTEXT_REG_OFFSET)
492 
493 #define gfx12_end_context_regs() \
494    gfx12_end_regs(__cs_context_reg_header, PKT3_SET_CONTEXT_REG_PAIRS)
495 
496 /* GFX12 packet building helpers for buffered registers. */
497 #define gfx12_push_gfx_sh_reg(reg, value) \
498    gfx12_push_reg(reg, value, SI_SH_REG_OFFSET, gfx_sh)
499 
500 #define gfx12_push_compute_sh_reg(reg, value) \
501    gfx12_push_reg(reg, value, SI_SH_REG_OFFSET, compute_sh)
502 
503 #define gfx12_opt_push_gfx_sh_reg(reg, reg_enum, value) \
504    gfx12_opt_push_reg(reg, reg_enum, value, gfx_sh)
505 
506 #define gfx12_opt_push_compute_sh_reg(reg, reg_enum, value) \
507    gfx12_opt_push_reg(reg, reg_enum, value, compute_sh)
508 
509 #define radeon_set_or_push_gfx_sh_reg(reg, value) do { \
510    if (GFX_VERSION >= GFX12) { \
511       gfx12_push_gfx_sh_reg(reg, value); \
512    } else if (GFX_VERSION >= GFX11 && HAS_SH_PAIRS_PACKED) { \
513       gfx11_push_gfx_sh_reg(reg, value); \
514    } else { \
515       radeon_set_sh_reg_seq(reg, 1); \
516       radeon_emit(value); \
517    } \
518 } while (0)
519 
520 /* Other packet helpers. */
521 #define radeon_event_write(event_type) do { \
522    unsigned __event_type = (event_type); \
523    radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); \
524    radeon_emit(EVENT_TYPE(__event_type) | \
525                EVENT_INDEX(__event_type == V_028A90_VS_PARTIAL_FLUSH || \
526                            __event_type == V_028A90_PS_PARTIAL_FLUSH || \
527                            __event_type == V_028A90_CS_PARTIAL_FLUSH ? 4 : \
528                            __event_type == V_028A90_PIXEL_PIPE_STAT_CONTROL ? 1 : 0)); \
529 } while (0)
530 
531 #define radeon_emit_alt_hiz_logic() do { \
532    static_assert(GFX_VERSION == GFX12 || !ALT_HIZ_LOGIC, ""); \
533    if (GFX_VERSION == GFX12 && ALT_HIZ_LOGIC) { \
534       radeon_emit(PKT3(PKT3_RELEASE_MEM, 6, 0)); \
535       radeon_emit(S_490_EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | S_490_EVENT_INDEX(5)); \
536       radeon_emit(0); /* DST_SEL, INT_SEL = no write confirm, DATA_SEL = no data */ \
537       radeon_emit(0); /* ADDRESS_LO */ \
538       radeon_emit(0); /* ADDRESS_HI */ \
539       radeon_emit(0); /* DATA_LO */ \
540       radeon_emit(0); /* DATA_HI */ \
541       radeon_emit(0); /* INT_CTXID */ \
542    } \
543 } while (0)
544 
545 /* This should be evaluated at compile time if all parameters are constants. */
546 static ALWAYS_INLINE unsigned
si_get_user_data_base(enum amd_gfx_level gfx_level,enum si_has_tess has_tess,enum si_has_gs has_gs,enum si_has_ngg ngg,enum pipe_shader_type shader)547 si_get_user_data_base(enum amd_gfx_level gfx_level, enum si_has_tess has_tess,
548                       enum si_has_gs has_gs, enum si_has_ngg ngg,
549                       enum pipe_shader_type shader)
550 {
551    switch (shader) {
552    case PIPE_SHADER_VERTEX:
553       /* VS can be bound as VS, ES, LS, or GS. */
554       if (has_tess) {
555          if (gfx_level >= GFX10) {
556             return R_00B430_SPI_SHADER_USER_DATA_HS_0;
557          } else if (gfx_level == GFX9) {
558             return R_00B430_SPI_SHADER_USER_DATA_LS_0;
559          } else {
560             return R_00B530_SPI_SHADER_USER_DATA_LS_0;
561          }
562       } else if (gfx_level >= GFX10) {
563          if (ngg || has_gs) {
564             return R_00B230_SPI_SHADER_USER_DATA_GS_0;
565          } else {
566             return R_00B130_SPI_SHADER_USER_DATA_VS_0;
567          }
568       } else if (has_gs) {
569          return R_00B330_SPI_SHADER_USER_DATA_ES_0;
570       } else {
571          return R_00B130_SPI_SHADER_USER_DATA_VS_0;
572       }
573 
574    case PIPE_SHADER_TESS_CTRL:
575       if (gfx_level == GFX9) {
576          return R_00B430_SPI_SHADER_USER_DATA_LS_0;
577       } else {
578          return R_00B430_SPI_SHADER_USER_DATA_HS_0;
579       }
580 
581    case PIPE_SHADER_TESS_EVAL:
582       /* TES can be bound as ES, VS, or not bound. */
583       if (has_tess) {
584          if (gfx_level >= GFX10) {
585             if (ngg || has_gs) {
586                return R_00B230_SPI_SHADER_USER_DATA_GS_0;
587             } else {
588                return R_00B130_SPI_SHADER_USER_DATA_VS_0;
589             }
590          } else if (has_gs) {
591             return R_00B330_SPI_SHADER_USER_DATA_ES_0;
592          } else {
593             return R_00B130_SPI_SHADER_USER_DATA_VS_0;
594          }
595       } else {
596          return 0;
597       }
598 
599    case PIPE_SHADER_GEOMETRY:
600       if (gfx_level == GFX9) {
601          return R_00B330_SPI_SHADER_USER_DATA_ES_0;
602       } else {
603          return R_00B230_SPI_SHADER_USER_DATA_GS_0;
604       }
605 
606    default:
607       assert(0);
608       return 0;
609    }
610 }
611 
612 #endif
613