1 /*
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 /**
8 * This file contains helpers for writing commands to commands streams.
9 */
10
11 #ifndef SI_BUILD_PM4_H
12 #define SI_BUILD_PM4_H
13
14 #include "si_pipe.h"
15 #include "sid.h"
16
17 #define radeon_begin(cs) struct radeon_cmdbuf *__cs = (cs); \
18 unsigned __cs_num = __cs->current.cdw; \
19 UNUSED unsigned __cs_num_initial = __cs_num; \
20 uint32_t *__cs_buf = __cs->current.buf
21
22 #define radeon_begin_again(cs) do { \
23 assert(__cs == NULL); \
24 __cs = (cs); \
25 __cs_num = __cs->current.cdw; \
26 __cs_num_initial = __cs_num; \
27 __cs_buf = __cs->current.buf; \
28 } while (0)
29
30 #define radeon_end() do { \
31 __cs->current.cdw = __cs_num; \
32 assert(__cs->current.cdw <= __cs->current.max_dw); \
33 __cs = NULL; \
34 } while (0)
35
36 #define radeon_emit(value) __cs_buf[__cs_num++] = (value)
37 #define radeon_packets_added() (__cs_num != __cs_num_initial)
38
39 #define radeon_end_update_context_roll() do { \
40 radeon_end(); \
41 if (radeon_packets_added()) \
42 sctx->context_roll = true; \
43 } while (0)
44
45 #define radeon_emit_array(values, num) do { \
46 unsigned __n = (num); \
47 memcpy(__cs_buf + __cs_num, (values), __n * 4); \
48 __cs_num += __n; \
49 } while (0)
50
51 /* Instead of writing into the command buffer, return the pointer to the command buffer and
52 * assume that the caller will fill the specified number of elements.
53 */
54 #define radeon_emit_array_get_ptr(num, ptr) do { \
55 *(ptr) = __cs_buf + __cs_num; \
56 __cs_num += (num); \
57 } while (0)
58
59 /* Packet building helpers. Don't use directly. */
60 #define radeon_set_reg_seq(reg, num, idx, prefix_name, packet, reset_filter_cam) do { \
61 assert((reg) >= prefix_name##_REG_OFFSET && (reg) < prefix_name##_REG_END); \
62 radeon_emit(PKT3(packet, num, 0) | PKT3_RESET_FILTER_CAM_S(reset_filter_cam)); \
63 radeon_emit((((reg) - prefix_name##_REG_OFFSET) >> 2) | ((idx) << 28)); \
64 } while (0)
65
66 #define radeon_set_reg(reg, idx, value, prefix_name, packet) do { \
67 radeon_set_reg_seq(reg, 1, idx, prefix_name, packet, 0); \
68 radeon_emit(value); \
69 } while (0)
70
71 #define radeon_opt_set_reg(reg, reg_enum, idx, value, prefix_name, packet) do { \
72 unsigned __value = (value); \
73 if (!BITSET_TEST(sctx->tracked_regs.reg_saved_mask, (reg_enum)) || \
74 sctx->tracked_regs.reg_value[(reg_enum)] != __value) { \
75 radeon_set_reg(reg, idx, __value, prefix_name, packet); \
76 BITSET_SET(sctx->tracked_regs.reg_saved_mask, (reg_enum)); \
77 sctx->tracked_regs.reg_value[(reg_enum)] = __value; \
78 } \
79 } while (0)
80
81 /* Set consecutive registers if any value is different. */
82 #define radeon_opt_set_reg2(reg, reg_enum, v1, v2, prefix_name, packet) do { \
83 static_assert(BITSET_BITWORD(reg_enum) == BITSET_BITWORD(reg_enum + 1), \
84 "bit range crosses dword boundary"); \
85 unsigned __v1 = (v1), __v2 = (v2); \
86 if (!BITSET_TEST_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
87 (reg_enum), (reg_enum) + 1, 0x3) || \
88 sctx->tracked_regs.reg_value[(reg_enum)] != __v1 || \
89 sctx->tracked_regs.reg_value[(reg_enum) + 1] != __v2) { \
90 radeon_set_reg_seq(reg, 2, 0, prefix_name, packet, 0); \
91 radeon_emit(__v1); \
92 radeon_emit(__v2); \
93 BITSET_SET_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
94 (reg_enum), (reg_enum) + 1); \
95 sctx->tracked_regs.reg_value[(reg_enum)] = __v1; \
96 sctx->tracked_regs.reg_value[(reg_enum) + 1] = __v2; \
97 } \
98 } while (0)
99
100 #define radeon_opt_set_reg3(reg, reg_enum, v1, v2, v3, prefix_name, packet) do { \
101 static_assert(BITSET_BITWORD(reg_enum) == BITSET_BITWORD(reg_enum + 2), \
102 "bit range crosses dword boundary"); \
103 unsigned __v1 = (v1), __v2 = (v2), __v3 = (v3); \
104 if (!BITSET_TEST_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
105 (reg_enum), (reg_enum) + 2, 0x7) || \
106 sctx->tracked_regs.reg_value[(reg_enum)] != __v1 || \
107 sctx->tracked_regs.reg_value[(reg_enum) + 1] != __v2 || \
108 sctx->tracked_regs.reg_value[(reg_enum) + 2] != __v3) { \
109 radeon_set_reg_seq(reg, 3, 0, prefix_name, packet, 0); \
110 radeon_emit(__v1); \
111 radeon_emit(__v2); \
112 radeon_emit(__v3); \
113 BITSET_SET_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
114 (reg_enum), (reg_enum) + 2); \
115 sctx->tracked_regs.reg_value[(reg_enum)] = __v1; \
116 sctx->tracked_regs.reg_value[(reg_enum) + 1] = __v2; \
117 sctx->tracked_regs.reg_value[(reg_enum) + 2] = __v3; \
118 } \
119 } while (0)
120
121 #define radeon_opt_set_reg4(reg, reg_enum, v1, v2, v3, v4, prefix_name, packet) do { \
122 static_assert(BITSET_BITWORD((reg_enum)) == BITSET_BITWORD((reg_enum) + 3), \
123 "bit range crosses dword boundary"); \
124 unsigned __v1 = (v1), __v2 = (v2), __v3 = (v3), __v4 = (v4); \
125 if (!BITSET_TEST_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
126 (reg_enum), (reg_enum) + 3, 0xf) || \
127 sctx->tracked_regs.reg_value[(reg_enum)] != __v1 || \
128 sctx->tracked_regs.reg_value[(reg_enum) + 1] != __v2 || \
129 sctx->tracked_regs.reg_value[(reg_enum) + 2] != __v3 || \
130 sctx->tracked_regs.reg_value[(reg_enum) + 3] != __v4) { \
131 radeon_set_reg_seq(reg, 4, 0, prefix_name, packet, 0); \
132 radeon_emit(__v1); \
133 radeon_emit(__v2); \
134 radeon_emit(__v3); \
135 radeon_emit(__v4); \
136 BITSET_SET_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
137 (reg_enum), (reg_enum) + 3); \
138 sctx->tracked_regs.reg_value[(reg_enum)] = __v1; \
139 sctx->tracked_regs.reg_value[(reg_enum) + 1] = __v2; \
140 sctx->tracked_regs.reg_value[(reg_enum) + 2] = __v3; \
141 sctx->tracked_regs.reg_value[(reg_enum) + 3] = __v4; \
142 } \
143 } while (0)
144
145 #define radeon_opt_set_reg5(reg, reg_enum, v1, v2, v3, v4, v5, prefix_name, packet) do { \
146 static_assert(BITSET_BITWORD((reg_enum)) == BITSET_BITWORD((reg_enum) + 4), \
147 "bit range crosses dword boundary"); \
148 unsigned __v1 = (v1), __v2 = (v2), __v3 = (v3), __v4 = (v4), __v5 = (v5); \
149 if (!BITSET_TEST_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
150 (reg_enum), (reg_enum) + 4, 0x1f) || \
151 sctx->tracked_regs.reg_value[(reg_enum)] != __v1 || \
152 sctx->tracked_regs.reg_value[(reg_enum) + 1] != __v2 || \
153 sctx->tracked_regs.reg_value[(reg_enum) + 2] != __v3 || \
154 sctx->tracked_regs.reg_value[(reg_enum) + 3] != __v4 || \
155 sctx->tracked_regs.reg_value[(reg_enum) + 4] != __v5) { \
156 radeon_set_reg_seq(reg, 5, 0, prefix_name, packet, 0); \
157 radeon_emit(__v1); \
158 radeon_emit(__v2); \
159 radeon_emit(__v3); \
160 radeon_emit(__v4); \
161 radeon_emit(__v5); \
162 BITSET_SET_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
163 (reg_enum), (reg_enum) + 4); \
164 sctx->tracked_regs.reg_value[(reg_enum)] = __v1; \
165 sctx->tracked_regs.reg_value[(reg_enum) + 1] = __v2; \
166 sctx->tracked_regs.reg_value[(reg_enum) + 2] = __v3; \
167 sctx->tracked_regs.reg_value[(reg_enum) + 3] = __v4; \
168 sctx->tracked_regs.reg_value[(reg_enum) + 4] = __v5; \
169 } \
170 } while (0)
171
172 #define radeon_opt_set_reg6(reg, reg_enum, v1, v2, v3, v4, v5, v6, prefix_name, packet) do { \
173 static_assert(BITSET_BITWORD((reg_enum)) == BITSET_BITWORD((reg_enum) + 5), \
174 "bit range crosses dword boundary"); \
175 unsigned __v1 = (v1), __v2 = (v2), __v3 = (v3), __v4 = (v4), __v5 = (v5), __v6 = (v6); \
176 if (!BITSET_TEST_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
177 (reg_enum), (reg_enum) + 5, 0x3f) || \
178 sctx->tracked_regs.reg_value[(reg_enum)] != __v1 || \
179 sctx->tracked_regs.reg_value[(reg_enum) + 1] != __v2 || \
180 sctx->tracked_regs.reg_value[(reg_enum) + 2] != __v3 || \
181 sctx->tracked_regs.reg_value[(reg_enum) + 3] != __v4 || \
182 sctx->tracked_regs.reg_value[(reg_enum) + 4] != __v5 || \
183 sctx->tracked_regs.reg_value[(reg_enum) + 5] != __v6) { \
184 radeon_set_reg_seq(reg, 6, 0, prefix_name, packet, 0); \
185 radeon_emit(__v1); \
186 radeon_emit(__v2); \
187 radeon_emit(__v3); \
188 radeon_emit(__v4); \
189 radeon_emit(__v5); \
190 radeon_emit(__v6); \
191 BITSET_SET_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
192 (reg_enum), (reg_enum) + 5); \
193 sctx->tracked_regs.reg_value[(reg_enum)] = __v1; \
194 sctx->tracked_regs.reg_value[(reg_enum) + 1] = __v2; \
195 sctx->tracked_regs.reg_value[(reg_enum) + 2] = __v3; \
196 sctx->tracked_regs.reg_value[(reg_enum) + 3] = __v4; \
197 sctx->tracked_regs.reg_value[(reg_enum) + 4] = __v5; \
198 sctx->tracked_regs.reg_value[(reg_enum) + 5] = __v6; \
199 } \
200 } while (0)
201
202 #define radeon_opt_set_regn(reg, values, saved_values, num, prefix_name, packet) do { \
203 if (memcmp(values, saved_values, sizeof(uint32_t) * (num))) { \
204 radeon_set_reg_seq(reg, num, 0, prefix_name, packet, 0); \
205 radeon_emit_array(values, num); \
206 memcpy(saved_values, values, sizeof(uint32_t) * (num)); \
207 } \
208 } while (0)
209
210 /* Packet building helpers for CONFIG registers. */
211 #define radeon_set_config_reg(reg, value) \
212 radeon_set_reg(reg, 0, value, SI_CONFIG, PKT3_SET_CONFIG_REG)
213
214 /* Packet building helpers for CONTEXT registers. */
215 #define radeon_set_context_reg_seq(reg, num) \
216 radeon_set_reg_seq(reg, num, 0, SI_CONTEXT, PKT3_SET_CONTEXT_REG, 0)
217
218 #define radeon_set_context_reg(reg, value) \
219 radeon_set_reg(reg, 0, value, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
220
221 #define radeon_opt_set_context_reg(reg, reg_enum, value) \
222 radeon_opt_set_reg(reg, reg_enum, 0, value, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
223
224 #define radeon_opt_set_context_reg_idx(reg, reg_enum, idx, value) \
225 radeon_opt_set_reg(reg, reg_enum, idx, value, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
226
227 #define radeon_opt_set_context_reg2(reg, reg_enum, v1, v2) \
228 radeon_opt_set_reg2(reg, reg_enum, v1, v2, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
229
230 #define radeon_opt_set_context_reg3(reg, reg_enum, v1, v2, v3) \
231 radeon_opt_set_reg3(reg, reg_enum, v1, v2, v3, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
232
233 #define radeon_opt_set_context_reg4(reg, reg_enum, v1, v2, v3, v4) \
234 radeon_opt_set_reg4(reg, reg_enum, v1, v2, v3, v4, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
235
236 #define radeon_opt_set_context_reg5(reg, reg_enum, v1, v2, v3, v4, v5) \
237 radeon_opt_set_reg5(reg, reg_enum, v1, v2, v3, v4, v5, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
238
239 #define radeon_opt_set_context_reg6(reg, reg_enum, v1, v2, v3, v4, v5, v6) \
240 radeon_opt_set_reg6(reg, reg_enum, v1, v2, v3, v4, v5, v6, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
241
242 #define radeon_opt_set_context_regn(reg, values, saved_values, num) \
243 radeon_opt_set_regn(reg, values, saved_values, num, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
244
245 /* Packet building helpers for SH registers. */
246 #define radeon_set_sh_reg_seq(reg, num) \
247 radeon_set_reg_seq(reg, num, 0, SI_SH, PKT3_SET_SH_REG, 0)
248
249 #define radeon_set_sh_reg(reg, value) \
250 radeon_set_reg(reg, 0, value, SI_SH, PKT3_SET_SH_REG)
251
252 #define radeon_opt_set_sh_reg(reg, reg_enum, value) \
253 radeon_opt_set_reg(reg, reg_enum, 0, value, SI_SH, PKT3_SET_SH_REG)
254
255 #define radeon_opt_set_sh_reg2(reg, reg_enum, v1, v2) \
256 radeon_opt_set_reg2(reg, reg_enum, v1, v2, SI_SH, PKT3_SET_SH_REG)
257
258 #define radeon_opt_set_sh_reg3(reg, reg_enum, v1, v2, v3) \
259 radeon_opt_set_reg3(reg, reg_enum, v1, v2, v3, SI_SH, PKT3_SET_SH_REG)
260
261 #define radeon_opt_set_sh_reg_idx(reg, reg_enum, idx, value) do { \
262 assert(sctx->gfx_level >= GFX10); \
263 radeon_opt_set_reg(reg, reg_enum, idx, value, SI_SH, PKT3_SET_SH_REG_INDEX); \
264 } while (0)
265
266 #define radeon_emit_32bit_pointer(va) do { \
267 assert((va) == 0 || ((va) >> 32) == sctx->screen->info.address32_hi); \
268 radeon_emit(va); \
269 } while (0)
270
271 #define radeon_emit_one_32bit_pointer(desc, sh_base) do { \
272 radeon_set_sh_reg_seq((sh_base) + (desc)->shader_userdata_offset, 1); \
273 radeon_emit_32bit_pointer((desc)->gpu_address); \
274 } while (0)
275
276 /* Packet building helpers for UCONFIG registers. */
277 #define radeon_set_uconfig_reg_seq(reg, num) \
278 radeon_set_reg_seq(reg, num, 0, CIK_UCONFIG, PKT3_SET_UCONFIG_REG, 0)
279
280 #define radeon_set_uconfig_perfctr_reg_seq(reg, num) \
281 radeon_set_reg_seq(reg, num, 0, CIK_UCONFIG, PKT3_SET_UCONFIG_REG, \
282 sctx->gfx_level >= GFX10 && \
283 sctx->ws->cs_get_ip_type(__cs) == AMD_IP_GFX)
284
285 #define radeon_set_uconfig_reg(reg, value) \
286 radeon_set_reg(reg, 0, value, CIK_UCONFIG, PKT3_SET_UCONFIG_REG)
287
288 #define radeon_opt_set_uconfig_reg(reg, reg_enum, value) \
289 radeon_opt_set_reg(reg, reg_enum, 0, value, CIK_UCONFIG, PKT3_SET_UCONFIG_REG)
290
291 #define RESOLVE_PKT3_SET_UCONFIG_REG_INDEX \
292 (GFX_VERSION >= GFX10 || (GFX_VERSION == GFX9 && sctx->screen->info.me_fw_version >= 26) ? \
293 PKT3_SET_UCONFIG_REG_INDEX : PKT3_SET_UCONFIG_REG)
294
295 #define radeon_set_uconfig_reg_idx(reg, idx, value) \
296 radeon_set_reg(reg, idx, value, CIK_UCONFIG, RESOLVE_PKT3_SET_UCONFIG_REG_INDEX)
297
298 #define radeon_opt_set_uconfig_reg_idx(reg, reg_enum, idx, value) \
299 radeon_opt_set_reg(reg, reg_enum, idx, value, CIK_UCONFIG, RESOLVE_PKT3_SET_UCONFIG_REG_INDEX)
300
301 #define radeon_set_privileged_config_reg(reg, value) do { \
302 assert((reg) < CIK_UCONFIG_REG_OFFSET); \
303 radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); \
304 radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_IMM) | \
305 COPY_DATA_DST_SEL(COPY_DATA_PERF)); \
306 radeon_emit(value); \
307 radeon_emit(0); /* unused */ \
308 radeon_emit((reg) >> 2); \
309 radeon_emit(0); /* unused */ \
310 } while (0)
311
312 /* GFX11 generic packet building helpers for buffered SH registers. Don't use these directly. */
313 #define gfx11_push_reg(reg, value, prefix_name, buffer, reg_count) do { \
314 unsigned __i = (reg_count)++; \
315 assert((reg) >= prefix_name##_REG_OFFSET && (reg) < prefix_name##_REG_END); \
316 assert(__i / 2 < ARRAY_SIZE(buffer)); \
317 buffer[__i / 2].reg_offset[__i % 2] = ((reg) - prefix_name##_REG_OFFSET) >> 2; \
318 buffer[__i / 2].reg_value[__i % 2] = value; \
319 } while (0)
320
321 #define gfx11_opt_push_reg(reg, reg_enum, value, prefix_name, buffer, reg_count) do { \
322 unsigned __value = value; \
323 if (!BITSET_TEST(sctx->tracked_regs.reg_saved_mask, (reg_enum)) || \
324 sctx->tracked_regs.reg_value[reg_enum] != __value) { \
325 gfx11_push_reg(reg, __value, prefix_name, buffer, reg_count); \
326 BITSET_SET(sctx->tracked_regs.reg_saved_mask, (reg_enum)); \
327 sctx->tracked_regs.reg_value[reg_enum] = __value; \
328 } \
329 } while (0)
330
331 #define gfx11_opt_push_reg4(reg, reg_enum, v1, v2, v3, v4, prefix_name, buffer, reg_count) do { \
332 static_assert(BITSET_BITWORD((reg_enum)) == BITSET_BITWORD((reg_enum) + 3), \
333 "bit range crosses dword boundary"); \
334 unsigned __v1 = (v1); \
335 unsigned __v2 = (v2); \
336 unsigned __v3 = (v3); \
337 unsigned __v4 = (v4); \
338 if (!BITSET_TEST_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
339 (reg_enum), (reg_enum) + 3, 0xf) || \
340 sctx->tracked_regs.reg_value[(reg_enum)] != __v1 || \
341 sctx->tracked_regs.reg_value[(reg_enum) + 1] != __v2 || \
342 sctx->tracked_regs.reg_value[(reg_enum) + 2] != __v3 || \
343 sctx->tracked_regs.reg_value[(reg_enum) + 3] != __v4) { \
344 gfx11_push_reg((reg), __v1, prefix_name, buffer, reg_count); \
345 gfx11_push_reg((reg) + 4, __v2, prefix_name, buffer, reg_count); \
346 gfx11_push_reg((reg) + 8, __v3, prefix_name, buffer, reg_count); \
347 gfx11_push_reg((reg) + 12, __v4, prefix_name, buffer, reg_count); \
348 BITSET_SET_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
349 (reg_enum), (reg_enum) + 3); \
350 sctx->tracked_regs.reg_value[(reg_enum)] = __v1; \
351 sctx->tracked_regs.reg_value[(reg_enum) + 1] = __v2; \
352 sctx->tracked_regs.reg_value[(reg_enum) + 2] = __v3; \
353 sctx->tracked_regs.reg_value[(reg_enum) + 3] = __v4; \
354 } \
355 } while (0)
356
357 /* GFX11 packet building helpers for buffered SH registers. */
358 #define gfx11_push_gfx_sh_reg(reg, value) \
359 gfx11_push_reg(reg, value, SI_SH, sctx->gfx11.buffered_gfx_sh_regs, \
360 sctx->num_buffered_gfx_sh_regs)
361
362 #define gfx11_push_compute_sh_reg(reg, value) \
363 gfx11_push_reg(reg, value, SI_SH, sctx->gfx11.buffered_compute_sh_regs, \
364 sctx->num_buffered_compute_sh_regs)
365
366 #define gfx11_opt_push_gfx_sh_reg(reg, reg_enum, value) \
367 gfx11_opt_push_reg(reg, reg_enum, value, SI_SH, sctx->gfx11.buffered_gfx_sh_regs, \
368 sctx->num_buffered_gfx_sh_regs)
369
370 #define gfx11_opt_push_compute_sh_reg(reg, reg_enum, value) \
371 gfx11_opt_push_reg(reg, reg_enum, value, SI_SH, sctx->gfx11.buffered_compute_sh_regs, \
372 sctx->num_buffered_compute_sh_regs)
373
374 /* GFX11 packet building helpers for SET_CONTEXT_REG_PAIRS_PACKED.
375 * Registers are buffered on the stack and then copied to the command buffer at the end.
376 */
377 #define gfx11_begin_packed_context_regs() \
378 struct gfx11_reg_pair __cs_context_regs[50]; \
379 unsigned __cs_context_reg_count = 0;
380
381 #define gfx11_set_context_reg(reg, value) \
382 gfx11_push_reg(reg, value, SI_CONTEXT, __cs_context_regs, __cs_context_reg_count)
383
384 #define gfx11_opt_set_context_reg(reg, reg_enum, value) \
385 gfx11_opt_push_reg(reg, reg_enum, value, SI_CONTEXT, __cs_context_regs, \
386 __cs_context_reg_count)
387
388 #define gfx11_opt_set_context_reg4(reg, reg_enum, v1, v2, v3, v4) \
389 gfx11_opt_push_reg4(reg, reg_enum, v1, v2, v3, v4, SI_CONTEXT, __cs_context_regs, \
390 __cs_context_reg_count)
391
392 #define gfx11_end_packed_context_regs() do { \
393 if (__cs_context_reg_count >= 2) { \
394 /* Align the count to 2 by duplicating the first register. */ \
395 if (__cs_context_reg_count % 2 == 1) { \
396 gfx11_set_context_reg(SI_CONTEXT_REG_OFFSET + __cs_context_regs[0].reg_offset[0] * 4, \
397 __cs_context_regs[0].reg_value[0]); \
398 } \
399 assert(__cs_context_reg_count % 2 == 0); \
400 unsigned __num_dw = (__cs_context_reg_count / 2) * 3; \
401 radeon_emit(PKT3(PKT3_SET_CONTEXT_REG_PAIRS_PACKED, __num_dw, 0) | PKT3_RESET_FILTER_CAM_S(1)); \
402 radeon_emit(__cs_context_reg_count); \
403 radeon_emit_array(__cs_context_regs, __num_dw); \
404 } else if (__cs_context_reg_count == 1) { \
405 radeon_emit(PKT3(PKT3_SET_CONTEXT_REG, 1, 0)); \
406 radeon_emit(__cs_context_regs[0].reg_offset[0]); \
407 radeon_emit(__cs_context_regs[0].reg_value[0]); \
408 } \
409 } while (0)
410
411 /* GFX12 generic packet building helpers for PAIRS packets. Don't use these directly. */
412 #define gfx12_begin_regs(header) unsigned header = __cs_num++
413
414 #define gfx12_set_reg(reg, value, base_offset) do { \
415 radeon_emit(((reg) - (base_offset)) >> 2); \
416 radeon_emit(value); \
417 } while (0)
418
419 #define gfx12_opt_set_reg(reg, reg_enum, value, base_offset) do { \
420 unsigned __value = value; \
421 if (!BITSET_TEST(sctx->tracked_regs.reg_saved_mask, (reg_enum)) || \
422 sctx->tracked_regs.reg_value[reg_enum] != __value) { \
423 gfx12_set_reg(reg, __value, base_offset); \
424 BITSET_SET(sctx->tracked_regs.reg_saved_mask, (reg_enum)); \
425 sctx->tracked_regs.reg_value[reg_enum] = __value; \
426 } \
427 } while (0)
428
429 #define gfx12_opt_set_reg4(reg, reg_enum, v1, v2, v3, v4, base_offset) do { \
430 static_assert(BITSET_BITWORD((reg_enum)) == BITSET_BITWORD((reg_enum) + 3), \
431 "bit range crosses dword boundary"); \
432 unsigned __v1 = (v1), __v2 = (v2), __v3 = (v3), __v4 = (v4); \
433 if (!BITSET_TEST_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
434 (reg_enum), (reg_enum) + 3, 0xf) || \
435 sctx->tracked_regs.reg_value[(reg_enum)] != __v1 || \
436 sctx->tracked_regs.reg_value[(reg_enum) + 1] != __v2 || \
437 sctx->tracked_regs.reg_value[(reg_enum) + 2] != __v3 || \
438 sctx->tracked_regs.reg_value[(reg_enum) + 3] != __v4) { \
439 gfx12_set_reg((reg), __v1, (base_offset)); \
440 gfx12_set_reg((reg) + 4, __v2, (base_offset)); \
441 gfx12_set_reg((reg) + 8, __v3, (base_offset)); \
442 gfx12_set_reg((reg) + 12, __v4, (base_offset)); \
443 BITSET_SET_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
444 (reg_enum), (reg_enum) + 3); \
445 sctx->tracked_regs.reg_value[(reg_enum)] = __v1; \
446 sctx->tracked_regs.reg_value[(reg_enum) + 1] = __v2; \
447 sctx->tracked_regs.reg_value[(reg_enum) + 2] = __v3; \
448 sctx->tracked_regs.reg_value[(reg_enum) + 3] = __v4; \
449 } \
450 } while (0)
451
452 #define gfx12_end_regs(header, packet) do { \
453 if ((header) + 1 == __cs_num) { \
454 __cs_num--; /* no registers have been set, back off */ \
455 } else { \
456 unsigned __dw_count = __cs_num - (header) - 2; \
457 __cs_buf[(header)] = PKT3((packet), __dw_count, 0) | PKT3_RESET_FILTER_CAM_S(1); \
458 } \
459 } while (0)
460
461 /* GFX12 generic packet building helpers for buffered registers. Don't use these directly. */
462 #define gfx12_push_reg(reg, value, base_offset, type) do { \
463 unsigned __i = sctx->num_buffered_##type##_regs++; \
464 assert(__i < ARRAY_SIZE(sctx->gfx12.buffered_##type##_regs)); \
465 sctx->gfx12.buffered_##type##_regs[__i].reg_offset = ((reg) - (base_offset)) >> 2; \
466 sctx->gfx12.buffered_##type##_regs[__i].reg_value = value; \
467 } while (0)
468
469 #define gfx12_opt_push_reg(reg, reg_enum, value, type) do { \
470 unsigned __value = value; \
471 unsigned __reg_enum = reg_enum; \
472 if (!BITSET_TEST(sctx->tracked_regs.reg_saved_mask, (reg_enum)) || \
473 sctx->tracked_regs.reg_value[__reg_enum] != __value) { \
474 gfx12_push_##type##_reg(reg, __value); \
475 BITSET_SET(sctx->tracked_regs.reg_saved_mask, (reg_enum)); \
476 sctx->tracked_regs.reg_value[__reg_enum] = __value; \
477 } \
478 } while (0)
479
480 /* GFX12 packet building helpers for PAIRS packets. */
481 #define gfx12_begin_context_regs() \
482 gfx12_begin_regs(__cs_context_reg_header)
483
484 #define gfx12_set_context_reg(reg, value) \
485 gfx12_set_reg(reg, value, SI_CONTEXT_REG_OFFSET)
486
487 #define gfx12_opt_set_context_reg(reg, reg_enum, value) \
488 gfx12_opt_set_reg(reg, reg_enum, value, SI_CONTEXT_REG_OFFSET)
489
490 #define gfx12_opt_set_context_reg4(reg, reg_enum, v1, v2, v3, v4) \
491 gfx12_opt_set_reg4(reg, reg_enum, v1, v2, v3, v4, SI_CONTEXT_REG_OFFSET)
492
493 #define gfx12_end_context_regs() \
494 gfx12_end_regs(__cs_context_reg_header, PKT3_SET_CONTEXT_REG_PAIRS)
495
496 /* GFX12 packet building helpers for buffered registers. */
497 #define gfx12_push_gfx_sh_reg(reg, value) \
498 gfx12_push_reg(reg, value, SI_SH_REG_OFFSET, gfx_sh)
499
500 #define gfx12_push_compute_sh_reg(reg, value) \
501 gfx12_push_reg(reg, value, SI_SH_REG_OFFSET, compute_sh)
502
503 #define gfx12_opt_push_gfx_sh_reg(reg, reg_enum, value) \
504 gfx12_opt_push_reg(reg, reg_enum, value, gfx_sh)
505
506 #define gfx12_opt_push_compute_sh_reg(reg, reg_enum, value) \
507 gfx12_opt_push_reg(reg, reg_enum, value, compute_sh)
508
509 #define radeon_set_or_push_gfx_sh_reg(reg, value) do { \
510 if (GFX_VERSION >= GFX12) { \
511 gfx12_push_gfx_sh_reg(reg, value); \
512 } else if (GFX_VERSION >= GFX11 && HAS_SH_PAIRS_PACKED) { \
513 gfx11_push_gfx_sh_reg(reg, value); \
514 } else { \
515 radeon_set_sh_reg_seq(reg, 1); \
516 radeon_emit(value); \
517 } \
518 } while (0)
519
520 /* Other packet helpers. */
521 #define radeon_event_write(event_type) do { \
522 unsigned __event_type = (event_type); \
523 radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); \
524 radeon_emit(EVENT_TYPE(__event_type) | \
525 EVENT_INDEX(__event_type == V_028A90_VS_PARTIAL_FLUSH || \
526 __event_type == V_028A90_PS_PARTIAL_FLUSH || \
527 __event_type == V_028A90_CS_PARTIAL_FLUSH ? 4 : \
528 __event_type == V_028A90_PIXEL_PIPE_STAT_CONTROL ? 1 : 0)); \
529 } while (0)
530
531 #define radeon_emit_alt_hiz_logic() do { \
532 static_assert(GFX_VERSION == GFX12 || !ALT_HIZ_LOGIC, ""); \
533 if (GFX_VERSION == GFX12 && ALT_HIZ_LOGIC) { \
534 radeon_emit(PKT3(PKT3_RELEASE_MEM, 6, 0)); \
535 radeon_emit(S_490_EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | S_490_EVENT_INDEX(5)); \
536 radeon_emit(0); /* DST_SEL, INT_SEL = no write confirm, DATA_SEL = no data */ \
537 radeon_emit(0); /* ADDRESS_LO */ \
538 radeon_emit(0); /* ADDRESS_HI */ \
539 radeon_emit(0); /* DATA_LO */ \
540 radeon_emit(0); /* DATA_HI */ \
541 radeon_emit(0); /* INT_CTXID */ \
542 } \
543 } while (0)
544
545 /* This should be evaluated at compile time if all parameters are constants. */
546 static ALWAYS_INLINE unsigned
si_get_user_data_base(enum amd_gfx_level gfx_level,enum si_has_tess has_tess,enum si_has_gs has_gs,enum si_has_ngg ngg,enum pipe_shader_type shader)547 si_get_user_data_base(enum amd_gfx_level gfx_level, enum si_has_tess has_tess,
548 enum si_has_gs has_gs, enum si_has_ngg ngg,
549 enum pipe_shader_type shader)
550 {
551 switch (shader) {
552 case PIPE_SHADER_VERTEX:
553 /* VS can be bound as VS, ES, LS, or GS. */
554 if (has_tess) {
555 if (gfx_level >= GFX10) {
556 return R_00B430_SPI_SHADER_USER_DATA_HS_0;
557 } else if (gfx_level == GFX9) {
558 return R_00B430_SPI_SHADER_USER_DATA_LS_0;
559 } else {
560 return R_00B530_SPI_SHADER_USER_DATA_LS_0;
561 }
562 } else if (gfx_level >= GFX10) {
563 if (ngg || has_gs) {
564 return R_00B230_SPI_SHADER_USER_DATA_GS_0;
565 } else {
566 return R_00B130_SPI_SHADER_USER_DATA_VS_0;
567 }
568 } else if (has_gs) {
569 return R_00B330_SPI_SHADER_USER_DATA_ES_0;
570 } else {
571 return R_00B130_SPI_SHADER_USER_DATA_VS_0;
572 }
573
574 case PIPE_SHADER_TESS_CTRL:
575 if (gfx_level == GFX9) {
576 return R_00B430_SPI_SHADER_USER_DATA_LS_0;
577 } else {
578 return R_00B430_SPI_SHADER_USER_DATA_HS_0;
579 }
580
581 case PIPE_SHADER_TESS_EVAL:
582 /* TES can be bound as ES, VS, or not bound. */
583 if (has_tess) {
584 if (gfx_level >= GFX10) {
585 if (ngg || has_gs) {
586 return R_00B230_SPI_SHADER_USER_DATA_GS_0;
587 } else {
588 return R_00B130_SPI_SHADER_USER_DATA_VS_0;
589 }
590 } else if (has_gs) {
591 return R_00B330_SPI_SHADER_USER_DATA_ES_0;
592 } else {
593 return R_00B130_SPI_SHADER_USER_DATA_VS_0;
594 }
595 } else {
596 return 0;
597 }
598
599 case PIPE_SHADER_GEOMETRY:
600 if (gfx_level == GFX9) {
601 return R_00B330_SPI_SHADER_USER_DATA_ES_0;
602 } else {
603 return R_00B230_SPI_SHADER_USER_DATA_GS_0;
604 }
605
606 default:
607 assert(0);
608 return 0;
609 }
610 }
611
612 #endif
613