1 /*
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 /**
8 * This file contains helpers for writing commands to commands streams.
9 */
10
11 #ifndef SI_BUILD_PM4_H
12 #define SI_BUILD_PM4_H
13
14 #include "si_pipe.h"
15 #include "sid.h"
16
17 #define radeon_begin(cs) struct radeon_cmdbuf *__cs = (cs); \
18 unsigned __cs_num = __cs->current.cdw; \
19 UNUSED unsigned __cs_num_initial = __cs_num; \
20 uint32_t *__cs_buf = __cs->current.buf
21
22 #define radeon_begin_again(cs) do { \
23 assert(__cs == NULL); \
24 __cs = (cs); \
25 __cs_num = __cs->current.cdw; \
26 __cs_num_initial = __cs_num; \
27 __cs_buf = __cs->current.buf; \
28 } while (0)
29
30 #define radeon_end() do { \
31 __cs->current.cdw = __cs_num; \
32 assert(__cs->current.cdw <= __cs->current.max_dw); \
33 __cs = NULL; \
34 } while (0)
35
36 #define radeon_emit(value) __cs_buf[__cs_num++] = (value)
37 #define radeon_packets_added() (__cs_num != __cs_num_initial)
38
39 #define radeon_end_update_context_roll(_unused) do { \
40 radeon_end(); \
41 if (radeon_packets_added()) \
42 sctx->context_roll = true; \
43 } while (0)
44
45 #define radeon_emit_array(values, num) do { \
46 unsigned __n = (num); \
47 memcpy(__cs_buf + __cs_num, (values), __n * 4); \
48 __cs_num += __n; \
49 } while (0)
50
51 /* Instead of writing into the command buffer, return the pointer to the command buffer and
52 * assume that the caller will fill the specified number of elements.
53 */
54 #define radeon_emit_array_get_ptr(num, ptr) do { \
55 *(ptr) = __cs_buf + __cs_num; \
56 __cs_num += (num); \
57 } while (0)
58
59 /* Packet building helpers. Don't use directly. */
60 #define radeon_set_reg_seq(reg, num, idx, prefix_name, packet, reset_filter_cam) do { \
61 assert((reg) >= prefix_name##_REG_OFFSET && (reg) < prefix_name##_REG_END); \
62 radeon_emit(PKT3(packet, num, 0) | PKT3_RESET_FILTER_CAM_S(reset_filter_cam)); \
63 radeon_emit((((reg) - prefix_name##_REG_OFFSET) >> 2) | ((idx) << 28)); \
64 } while (0)
65
66 #define radeon_set_reg(reg, idx, value, prefix_name, packet) do { \
67 radeon_set_reg_seq(reg, 1, idx, prefix_name, packet, 0); \
68 radeon_emit(value); \
69 } while (0)
70
71 #define radeon_opt_set_reg(reg, reg_enum, idx, value, prefix_name, packet) do { \
72 unsigned __value = (value); \
73 if (!BITSET_TEST(sctx->tracked_regs.reg_saved_mask, (reg_enum)) || \
74 sctx->tracked_regs.reg_value[(reg_enum)] != __value) { \
75 radeon_set_reg(reg, idx, __value, prefix_name, packet); \
76 BITSET_SET(sctx->tracked_regs.reg_saved_mask, (reg_enum)); \
77 sctx->tracked_regs.reg_value[(reg_enum)] = __value; \
78 } \
79 } while (0)
80
81 /* Set consecutive registers if any value is different. */
82 #define radeon_opt_set_reg2(reg, reg_enum, v1, v2, prefix_name, packet) do { \
83 unsigned __v1 = (v1), __v2 = (v2); \
84 if (!BITSET_TEST_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
85 (reg_enum), (reg_enum) + 1, 0x3) || \
86 sctx->tracked_regs.reg_value[(reg_enum)] != __v1 || \
87 sctx->tracked_regs.reg_value[(reg_enum) + 1] != __v2) { \
88 radeon_set_reg_seq(reg, 2, 0, prefix_name, packet, 0); \
89 radeon_emit(__v1); \
90 radeon_emit(__v2); \
91 BITSET_SET_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
92 (reg_enum), (reg_enum) + 1); \
93 sctx->tracked_regs.reg_value[(reg_enum)] = __v1; \
94 sctx->tracked_regs.reg_value[(reg_enum) + 1] = __v2; \
95 } \
96 } while (0)
97
98 #define radeon_opt_set_reg3(reg, reg_enum, v1, v2, v3, prefix_name, packet) do { \
99 unsigned __v1 = (v1), __v2 = (v2), __v3 = (v3); \
100 if (!BITSET_TEST_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
101 (reg_enum), (reg_enum) + 2, 0x7) || \
102 sctx->tracked_regs.reg_value[(reg_enum)] != __v1 || \
103 sctx->tracked_regs.reg_value[(reg_enum) + 1] != __v2 || \
104 sctx->tracked_regs.reg_value[(reg_enum) + 2] != __v3) { \
105 radeon_set_reg_seq(reg, 3, 0, prefix_name, packet, 0); \
106 radeon_emit(__v1); \
107 radeon_emit(__v2); \
108 radeon_emit(__v3); \
109 BITSET_SET_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
110 (reg_enum), (reg_enum) + 2); \
111 sctx->tracked_regs.reg_value[(reg_enum)] = __v1; \
112 sctx->tracked_regs.reg_value[(reg_enum) + 1] = __v2; \
113 sctx->tracked_regs.reg_value[(reg_enum) + 2] = __v3; \
114 } \
115 } while (0)
116
117 #define radeon_opt_set_reg4(reg, reg_enum, v1, v2, v3, v4, prefix_name, packet) do { \
118 unsigned __v1 = (v1), __v2 = (v2), __v3 = (v3), __v4 = (v4); \
119 if (!BITSET_TEST_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
120 (reg_enum), (reg_enum) + 3, 0xf) || \
121 sctx->tracked_regs.reg_value[(reg_enum)] != __v1 || \
122 sctx->tracked_regs.reg_value[(reg_enum) + 1] != __v2 || \
123 sctx->tracked_regs.reg_value[(reg_enum) + 2] != __v3 || \
124 sctx->tracked_regs.reg_value[(reg_enum) + 3] != __v4) { \
125 radeon_set_reg_seq(reg, 4, 0, prefix_name, packet, 0); \
126 radeon_emit(__v1); \
127 radeon_emit(__v2); \
128 radeon_emit(__v3); \
129 radeon_emit(__v4); \
130 BITSET_SET_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
131 (reg_enum), (reg_enum) + 3); \
132 sctx->tracked_regs.reg_value[(reg_enum)] = __v1; \
133 sctx->tracked_regs.reg_value[(reg_enum) + 1] = __v2; \
134 sctx->tracked_regs.reg_value[(reg_enum) + 2] = __v3; \
135 sctx->tracked_regs.reg_value[(reg_enum) + 3] = __v4; \
136 } \
137 } while (0)
138
139 #define radeon_opt_set_reg5(reg, reg_enum, v1, v2, v3, v4, v5, prefix_name, packet) do { \
140 unsigned __v1 = (v1), __v2 = (v2), __v3 = (v3), __v4 = (v4), __v5 = (v5); \
141 if (!BITSET_TEST_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
142 (reg_enum), (reg_enum) + 4, 0x1f) || \
143 sctx->tracked_regs.reg_value[(reg_enum)] != __v1 || \
144 sctx->tracked_regs.reg_value[(reg_enum) + 1] != __v2 || \
145 sctx->tracked_regs.reg_value[(reg_enum) + 2] != __v3 || \
146 sctx->tracked_regs.reg_value[(reg_enum) + 3] != __v4 || \
147 sctx->tracked_regs.reg_value[(reg_enum) + 4] != __v5) { \
148 radeon_set_reg_seq(reg, 5, 0, prefix_name, packet, 0); \
149 radeon_emit(__v1); \
150 radeon_emit(__v2); \
151 radeon_emit(__v3); \
152 radeon_emit(__v4); \
153 radeon_emit(__v5); \
154 BITSET_SET_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
155 (reg_enum), (reg_enum) + 4); \
156 sctx->tracked_regs.reg_value[(reg_enum)] = __v1; \
157 sctx->tracked_regs.reg_value[(reg_enum) + 1] = __v2; \
158 sctx->tracked_regs.reg_value[(reg_enum) + 2] = __v3; \
159 sctx->tracked_regs.reg_value[(reg_enum) + 3] = __v4; \
160 sctx->tracked_regs.reg_value[(reg_enum) + 4] = __v5; \
161 } \
162 } while (0)
163
164 #define radeon_opt_set_reg6(reg, reg_enum, v1, v2, v3, v4, v5, v6, prefix_name, packet) do { \
165 unsigned __v1 = (v1), __v2 = (v2), __v3 = (v3), __v4 = (v4), __v5 = (v5), __v6 = (v6); \
166 if (!BITSET_TEST_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
167 (reg_enum), (reg_enum) + 5, 0x3f) || \
168 sctx->tracked_regs.reg_value[(reg_enum)] != __v1 || \
169 sctx->tracked_regs.reg_value[(reg_enum) + 1] != __v2 || \
170 sctx->tracked_regs.reg_value[(reg_enum) + 2] != __v3 || \
171 sctx->tracked_regs.reg_value[(reg_enum) + 3] != __v4 || \
172 sctx->tracked_regs.reg_value[(reg_enum) + 4] != __v5 || \
173 sctx->tracked_regs.reg_value[(reg_enum) + 5] != __v6) { \
174 radeon_set_reg_seq(reg, 6, 0, prefix_name, packet, 0); \
175 radeon_emit(__v1); \
176 radeon_emit(__v2); \
177 radeon_emit(__v3); \
178 radeon_emit(__v4); \
179 radeon_emit(__v5); \
180 radeon_emit(__v6); \
181 BITSET_SET_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
182 (reg_enum), (reg_enum) + 5); \
183 sctx->tracked_regs.reg_value[(reg_enum)] = __v1; \
184 sctx->tracked_regs.reg_value[(reg_enum) + 1] = __v2; \
185 sctx->tracked_regs.reg_value[(reg_enum) + 2] = __v3; \
186 sctx->tracked_regs.reg_value[(reg_enum) + 3] = __v4; \
187 sctx->tracked_regs.reg_value[(reg_enum) + 4] = __v5; \
188 sctx->tracked_regs.reg_value[(reg_enum) + 5] = __v6; \
189 } \
190 } while (0)
191
192 #define radeon_opt_set_regn(reg, values, saved_values, num, prefix_name, packet) do { \
193 if (memcmp(values, saved_values, sizeof(uint32_t) * (num))) { \
194 radeon_set_reg_seq(reg, num, 0, prefix_name, packet, 0); \
195 radeon_emit_array(values, num); \
196 memcpy(saved_values, values, sizeof(uint32_t) * (num)); \
197 } \
198 } while (0)
199
200 /* Packet building helpers for CONFIG registers. */
201 #define radeon_set_config_reg(reg, value) \
202 radeon_set_reg(reg, 0, value, SI_CONFIG, PKT3_SET_CONFIG_REG)
203
204 /* Packet building helpers for CONTEXT registers. */
205 /* TODO: Remove the _unused parameters everywhere. */
206 #define radeon_set_context_reg_seq(reg, num) \
207 radeon_set_reg_seq(reg, num, 0, SI_CONTEXT, PKT3_SET_CONTEXT_REG, 0)
208
209 #define radeon_set_context_reg(reg, value) \
210 radeon_set_reg(reg, 0, value, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
211
212 #define radeon_opt_set_context_reg(_unused, reg, reg_enum, value) \
213 radeon_opt_set_reg(reg, reg_enum, 0, value, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
214
215 #define radeon_opt_set_context_reg_idx(_unused, reg, reg_enum, idx, value) \
216 radeon_opt_set_reg(reg, reg_enum, idx, value, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
217
218 #define radeon_opt_set_context_reg2(_unused, reg, reg_enum, v1, v2) \
219 radeon_opt_set_reg2(reg, reg_enum, v1, v2, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
220
221 #define radeon_opt_set_context_reg3(_unused, reg, reg_enum, v1, v2, v3) \
222 radeon_opt_set_reg3(reg, reg_enum, v1, v2, v3, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
223
224 #define radeon_opt_set_context_reg4(_unused, reg, reg_enum, v1, v2, v3, v4) \
225 radeon_opt_set_reg4(reg, reg_enum, v1, v2, v3, v4, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
226
227 #define radeon_opt_set_context_reg5(_unused, reg, reg_enum, v1, v2, v3, v4, v5) \
228 radeon_opt_set_reg5(reg, reg_enum, v1, v2, v3, v4, v5, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
229
230 #define radeon_opt_set_context_reg6(reg, reg_enum, v1, v2, v3, v4, v5, v6) \
231 radeon_opt_set_reg6(reg, reg_enum, v1, v2, v3, v4, v5, v6, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
232
233 #define radeon_opt_set_context_regn(_unused, reg, values, saved_values, num) \
234 radeon_opt_set_regn(reg, values, saved_values, num, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
235
236 /* Packet building helpers for SH registers. */
237 #define radeon_set_sh_reg_seq(reg, num) \
238 radeon_set_reg_seq(reg, num, 0, SI_SH, PKT3_SET_SH_REG, 0)
239
240 #define radeon_set_sh_reg(reg, value) \
241 radeon_set_reg(reg, 0, value, SI_SH, PKT3_SET_SH_REG)
242
243 #define radeon_opt_set_sh_reg(_unused, reg, reg_enum, value) \
244 radeon_opt_set_reg(reg, reg_enum, 0, value, SI_SH, PKT3_SET_SH_REG)
245
246 #define radeon_opt_set_sh_reg2(_unused, reg, reg_enum, v1, v2) \
247 radeon_opt_set_reg2(reg, reg_enum, v1, v2, SI_SH, PKT3_SET_SH_REG)
248
249 #define radeon_opt_set_sh_reg3(_unused, reg, reg_enum, v1, v2, v3) \
250 radeon_opt_set_reg3(reg, reg_enum, v1, v2, v3, SI_SH, PKT3_SET_SH_REG)
251
252 #define radeon_opt_set_sh_reg_idx(_unused, reg, reg_enum, idx, value) do { \
253 assert(sctx->gfx_level >= GFX10); \
254 radeon_opt_set_reg(reg, reg_enum, idx, value, SI_SH, PKT3_SET_SH_REG_INDEX); \
255 } while (0)
256
257 #define radeon_emit_32bit_pointer(_unused, va) do { \
258 assert((va) == 0 || ((va) >> 32) == sctx->screen->info.address32_hi); \
259 radeon_emit(va); \
260 } while (0)
261
262 #define radeon_emit_one_32bit_pointer(_unused, desc, sh_base) do { \
263 radeon_set_sh_reg_seq((sh_base) + (desc)->shader_userdata_offset, 1); \
264 radeon_emit_32bit_pointer(_unused, (desc)->gpu_address); \
265 } while (0)
266
267 /* Packet building helpers for UCONFIG registers. */
268 #define radeon_set_uconfig_reg_seq(reg, num) \
269 radeon_set_reg_seq(reg, num, 0, CIK_UCONFIG, PKT3_SET_UCONFIG_REG, 0)
270
271 #define radeon_set_uconfig_perfctr_reg_seq(reg, num) \
272 radeon_set_reg_seq(reg, num, 0, CIK_UCONFIG, PKT3_SET_UCONFIG_REG, \
273 sctx->gfx_level >= GFX10 && \
274 sctx->ws->cs_get_ip_type(__cs) == AMD_IP_GFX)
275
276 #define radeon_set_uconfig_reg(reg, value) \
277 radeon_set_reg(reg, 0, value, CIK_UCONFIG, PKT3_SET_UCONFIG_REG)
278
279 #define radeon_opt_set_uconfig_reg(_unused, reg, reg_enum, value) \
280 radeon_opt_set_reg(reg, reg_enum, 0, value, CIK_UCONFIG, PKT3_SET_UCONFIG_REG)
281
282 #define RESOLVE_PKT3_SET_UCONFIG_REG_INDEX \
283 (GFX_VERSION >= GFX10 || (GFX_VERSION == GFX9 && sctx->screen->info.me_fw_version >= 26) ? \
284 PKT3_SET_UCONFIG_REG_INDEX : PKT3_SET_UCONFIG_REG)
285
286 #define radeon_set_uconfig_reg_idx(_unused, _unused2, reg, idx, value) \
287 radeon_set_reg(reg, idx, value, CIK_UCONFIG, RESOLVE_PKT3_SET_UCONFIG_REG_INDEX)
288
289 #define radeon_opt_set_uconfig_reg_idx(_unused, _unused2, reg, reg_enum, idx, value) \
290 radeon_opt_set_reg(reg, reg_enum, idx, value, CIK_UCONFIG, RESOLVE_PKT3_SET_UCONFIG_REG_INDEX)
291
292 #define radeon_set_privileged_config_reg(reg, value) do { \
293 assert((reg) < CIK_UCONFIG_REG_OFFSET); \
294 radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); \
295 radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_IMM) | \
296 COPY_DATA_DST_SEL(COPY_DATA_PERF)); \
297 radeon_emit(value); \
298 radeon_emit(0); /* unused */ \
299 radeon_emit((reg) >> 2); \
300 radeon_emit(0); /* unused */ \
301 } while (0)
302
303 /* GFX11 generic packet building helpers for buffered SH registers. Don't use these directly. */
304 #define gfx11_push_reg(reg, value, prefix_name, buffer, reg_count) do { \
305 unsigned __i = (reg_count)++; \
306 assert((reg) >= prefix_name##_REG_OFFSET && (reg) < prefix_name##_REG_END); \
307 assert(__i / 2 < ARRAY_SIZE(buffer)); \
308 buffer[__i / 2].reg_offset[__i % 2] = ((reg) - prefix_name##_REG_OFFSET) >> 2; \
309 buffer[__i / 2].reg_value[__i % 2] = value; \
310 } while (0)
311
312 #define gfx11_opt_push_reg(reg, reg_enum, value, prefix_name, buffer, reg_count) do { \
313 unsigned __value = value; \
314 if (!BITSET_TEST(sctx->tracked_regs.reg_saved_mask, (reg_enum)) || \
315 sctx->tracked_regs.reg_value[reg_enum] != __value) { \
316 gfx11_push_reg(reg, __value, prefix_name, buffer, reg_count); \
317 BITSET_SET(sctx->tracked_regs.reg_saved_mask, (reg_enum)); \
318 sctx->tracked_regs.reg_value[reg_enum] = __value; \
319 } \
320 } while (0)
321
322 #define gfx11_opt_push_reg4(reg, reg_enum, v1, v2, v3, v4, prefix_name, buffer, reg_count) do { \
323 unsigned __v1 = (v1); \
324 unsigned __v2 = (v2); \
325 unsigned __v3 = (v3); \
326 unsigned __v4 = (v4); \
327 if (!BITSET_TEST_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
328 (reg_enum), (reg_enum) + 3, 0xf) || \
329 sctx->tracked_regs.reg_value[(reg_enum)] != __v1 || \
330 sctx->tracked_regs.reg_value[(reg_enum) + 1] != __v2 || \
331 sctx->tracked_regs.reg_value[(reg_enum) + 2] != __v3 || \
332 sctx->tracked_regs.reg_value[(reg_enum) + 3] != __v4) { \
333 gfx11_push_reg((reg), __v1, prefix_name, buffer, reg_count); \
334 gfx11_push_reg((reg) + 4, __v2, prefix_name, buffer, reg_count); \
335 gfx11_push_reg((reg) + 8, __v3, prefix_name, buffer, reg_count); \
336 gfx11_push_reg((reg) + 12, __v4, prefix_name, buffer, reg_count); \
337 BITSET_SET_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
338 (reg_enum), (reg_enum) + 3); \
339 sctx->tracked_regs.reg_value[(reg_enum)] = __v1; \
340 sctx->tracked_regs.reg_value[(reg_enum) + 1] = __v2; \
341 sctx->tracked_regs.reg_value[(reg_enum) + 2] = __v3; \
342 sctx->tracked_regs.reg_value[(reg_enum) + 3] = __v4; \
343 } \
344 } while (0)
345
346 /* GFX11 packet building helpers for buffered SH registers. */
347 #define gfx11_push_gfx_sh_reg(reg, value) \
348 gfx11_push_reg(reg, value, SI_SH, sctx->gfx11.buffered_gfx_sh_regs, \
349 sctx->num_buffered_gfx_sh_regs)
350
351 #define gfx11_push_compute_sh_reg(reg, value) \
352 gfx11_push_reg(reg, value, SI_SH, sctx->gfx11.buffered_compute_sh_regs, \
353 sctx->num_buffered_compute_sh_regs)
354
355 #define gfx11_opt_push_gfx_sh_reg(reg, reg_enum, value) \
356 gfx11_opt_push_reg(reg, reg_enum, value, SI_SH, sctx->gfx11.buffered_gfx_sh_regs, \
357 sctx->num_buffered_gfx_sh_regs)
358
359 #define gfx11_opt_push_compute_sh_reg(reg, reg_enum, value) \
360 gfx11_opt_push_reg(reg, reg_enum, value, SI_SH, sctx->gfx11.buffered_compute_sh_regs, \
361 sctx->num_buffered_compute_sh_regs)
362
363 /* GFX11 packet building helpers for SET_CONTEXT_REG_PAIRS_PACKED.
364 * Registers are buffered on the stack and then copied to the command buffer at the end.
365 */
366 #define gfx11_begin_packed_context_regs() \
367 struct gfx11_reg_pair __cs_context_regs[50]; \
368 unsigned __cs_context_reg_count = 0;
369
370 #define gfx11_set_context_reg(reg, value) \
371 gfx11_push_reg(reg, value, SI_CONTEXT, __cs_context_regs, __cs_context_reg_count)
372
373 #define gfx11_opt_set_context_reg(reg, reg_enum, value) \
374 gfx11_opt_push_reg(reg, reg_enum, value, SI_CONTEXT, __cs_context_regs, \
375 __cs_context_reg_count)
376
377 #define gfx11_opt_set_context_reg4(reg, reg_enum, v1, v2, v3, v4) \
378 gfx11_opt_push_reg4(reg, reg_enum, v1, v2, v3, v4, SI_CONTEXT, __cs_context_regs, \
379 __cs_context_reg_count)
380
381 #define gfx11_end_packed_context_regs() do { \
382 if (__cs_context_reg_count >= 2) { \
383 /* Align the count to 2 by duplicating the first register. */ \
384 if (__cs_context_reg_count % 2 == 1) { \
385 gfx11_set_context_reg(SI_CONTEXT_REG_OFFSET + __cs_context_regs[0].reg_offset[0] * 4, \
386 __cs_context_regs[0].reg_value[0]); \
387 } \
388 assert(__cs_context_reg_count % 2 == 0); \
389 unsigned __num_dw = (__cs_context_reg_count / 2) * 3; \
390 radeon_emit(PKT3(PKT3_SET_CONTEXT_REG_PAIRS_PACKED, __num_dw, 0) | PKT3_RESET_FILTER_CAM_S(1)); \
391 radeon_emit(__cs_context_reg_count); \
392 radeon_emit_array(__cs_context_regs, __num_dw); \
393 } else if (__cs_context_reg_count == 1) { \
394 radeon_emit(PKT3(PKT3_SET_CONTEXT_REG, 1, 0)); \
395 radeon_emit(__cs_context_regs[0].reg_offset[0]); \
396 radeon_emit(__cs_context_regs[0].reg_value[0]); \
397 } \
398 } while (0)
399
400 #define radeon_set_or_push_gfx_sh_reg(reg, value) do { \
401 if (GFX_VERSION >= GFX11 && HAS_SH_PAIRS_PACKED) { \
402 gfx11_push_gfx_sh_reg(reg, value); \
403 } else { \
404 radeon_set_sh_reg_seq(reg, 1); \
405 radeon_emit(value); \
406 } \
407 } while (0)
408
409 /* This should be evaluated at compile time if all parameters are constants. */
410 static ALWAYS_INLINE unsigned
si_get_user_data_base(enum amd_gfx_level gfx_level,enum si_has_tess has_tess,enum si_has_gs has_gs,enum si_has_ngg ngg,enum pipe_shader_type shader)411 si_get_user_data_base(enum amd_gfx_level gfx_level, enum si_has_tess has_tess,
412 enum si_has_gs has_gs, enum si_has_ngg ngg,
413 enum pipe_shader_type shader)
414 {
415 switch (shader) {
416 case PIPE_SHADER_VERTEX:
417 /* VS can be bound as VS, ES, LS, or GS. */
418 if (has_tess) {
419 if (gfx_level >= GFX10) {
420 return R_00B430_SPI_SHADER_USER_DATA_HS_0;
421 } else if (gfx_level == GFX9) {
422 return R_00B430_SPI_SHADER_USER_DATA_LS_0;
423 } else {
424 return R_00B530_SPI_SHADER_USER_DATA_LS_0;
425 }
426 } else if (gfx_level >= GFX10) {
427 if (ngg || has_gs) {
428 return R_00B230_SPI_SHADER_USER_DATA_GS_0;
429 } else {
430 return R_00B130_SPI_SHADER_USER_DATA_VS_0;
431 }
432 } else if (has_gs) {
433 return R_00B330_SPI_SHADER_USER_DATA_ES_0;
434 } else {
435 return R_00B130_SPI_SHADER_USER_DATA_VS_0;
436 }
437
438 case PIPE_SHADER_TESS_CTRL:
439 if (gfx_level == GFX9) {
440 return R_00B430_SPI_SHADER_USER_DATA_LS_0;
441 } else {
442 return R_00B430_SPI_SHADER_USER_DATA_HS_0;
443 }
444
445 case PIPE_SHADER_TESS_EVAL:
446 /* TES can be bound as ES, VS, or not bound. */
447 if (has_tess) {
448 if (gfx_level >= GFX10) {
449 if (ngg || has_gs) {
450 return R_00B230_SPI_SHADER_USER_DATA_GS_0;
451 } else {
452 return R_00B130_SPI_SHADER_USER_DATA_VS_0;
453 }
454 } else if (has_gs) {
455 return R_00B330_SPI_SHADER_USER_DATA_ES_0;
456 } else {
457 return R_00B130_SPI_SHADER_USER_DATA_VS_0;
458 }
459 } else {
460 return 0;
461 }
462
463 case PIPE_SHADER_GEOMETRY:
464 if (gfx_level == GFX9) {
465 return R_00B330_SPI_SHADER_USER_DATA_ES_0;
466 } else {
467 return R_00B230_SPI_SHADER_USER_DATA_GS_0;
468 }
469
470 default:
471 assert(0);
472 return 0;
473 }
474 }
475
476 #endif
477