1 /*
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * This file contains helpers for writing commands to commands streams.
27 */
28
29 #ifndef SI_BUILD_PM4_H
30 #define SI_BUILD_PM4_H
31
32 #include "si_pipe.h"
33 #include "sid.h"
34
35 #if 0
36 #include "ac_shadowed_regs.h"
37 #define SI_CHECK_SHADOWED_REGS(reg_offset, count) ac_check_shadowed_regs(GFX10, CHIP_NAVI14, reg_offset, count)
38 #else
39 #define SI_CHECK_SHADOWED_REGS(reg_offset, count)
40 #endif
41
42 #define radeon_begin(cs) struct radeon_cmdbuf *__cs = (cs); \
43 unsigned __cs_num = __cs->current.cdw; \
44 UNUSED unsigned __cs_num_initial = __cs_num; \
45 uint32_t *__cs_buf = __cs->current.buf
46
47 #define radeon_begin_again(cs) do { \
48 assert(__cs == NULL); \
49 __cs = (cs); \
50 __cs_num = __cs->current.cdw; \
51 __cs_num_initial = __cs_num; \
52 __cs_buf = __cs->current.buf; \
53 } while (0)
54
55 #define radeon_end() do { \
56 __cs->current.cdw = __cs_num; \
57 assert(__cs->current.cdw <= __cs->current.max_dw); \
58 __cs = NULL; \
59 } while (0)
60
61 #define radeon_emit(value) __cs_buf[__cs_num++] = (value)
62 #define radeon_packets_added() (__cs_num != __cs_num_initial)
63
64 #define radeon_end_update_context_roll(sctx) do { \
65 radeon_end(); \
66 if (radeon_packets_added()) \
67 (sctx)->context_roll = true; \
68 } while (0)
69
70 #define radeon_emit_array(values, num) do { \
71 unsigned __n = (num); \
72 memcpy(__cs_buf + __cs_num, (values), __n * 4); \
73 __cs_num += __n; \
74 } while (0)
75
76 #define radeon_set_config_reg_seq(reg, num) do { \
77 SI_CHECK_SHADOWED_REGS(reg, num); \
78 assert((reg) < SI_CONTEXT_REG_OFFSET); \
79 radeon_emit(PKT3(PKT3_SET_CONFIG_REG, num, 0)); \
80 radeon_emit(((reg) - SI_CONFIG_REG_OFFSET) >> 2); \
81 } while (0)
82
83 #define radeon_set_config_reg(reg, value) do { \
84 radeon_set_config_reg_seq(reg, 1); \
85 radeon_emit(value); \
86 } while (0)
87
88 #define radeon_set_context_reg_seq(reg, num) do { \
89 SI_CHECK_SHADOWED_REGS(reg, num); \
90 assert((reg) >= SI_CONTEXT_REG_OFFSET); \
91 radeon_emit(PKT3(PKT3_SET_CONTEXT_REG, num, 0)); \
92 radeon_emit(((reg) - SI_CONTEXT_REG_OFFSET) >> 2); \
93 } while (0)
94
95 #define radeon_set_context_reg(reg, value) do { \
96 radeon_set_context_reg_seq(reg, 1); \
97 radeon_emit(value); \
98 } while (0)
99
100 #define radeon_set_context_reg_seq_array(reg, num, values) do { \
101 radeon_set_context_reg_seq(reg, num); \
102 radeon_emit_array(values, num); \
103 } while (0)
104
105 #define radeon_set_context_reg_idx(reg, idx, value) do { \
106 SI_CHECK_SHADOWED_REGS(reg, 1); \
107 assert((reg) >= SI_CONTEXT_REG_OFFSET); \
108 radeon_emit(PKT3(PKT3_SET_CONTEXT_REG, 1, 0)); \
109 radeon_emit(((reg) - SI_CONTEXT_REG_OFFSET) >> 2 | ((idx) << 28)); \
110 radeon_emit(value); \
111 } while (0)
112
113 #define radeon_set_sh_reg_seq(reg, num) do { \
114 SI_CHECK_SHADOWED_REGS(reg, num); \
115 assert((reg) >= SI_SH_REG_OFFSET && (reg) < SI_SH_REG_END); \
116 radeon_emit(PKT3(PKT3_SET_SH_REG, num, 0)); \
117 radeon_emit(((reg) - SI_SH_REG_OFFSET) >> 2); \
118 } while (0)
119
120 #define radeon_set_sh_reg_idx3_seq(reg, num) do { \
121 SI_CHECK_SHADOWED_REGS(reg, num); \
122 assert((reg) >= SI_SH_REG_OFFSET && (reg) < SI_SH_REG_END); \
123 radeon_emit(PKT3(PKT3_SET_SH_REG_INDEX, num, 0)); \
124 radeon_emit((((reg) - SI_SH_REG_OFFSET) >> 2) | (3 << 28)); \
125 } while (0)
126
127 #define radeon_set_sh_reg(reg, value) do { \
128 radeon_set_sh_reg_seq(reg, 1); \
129 radeon_emit(value); \
130 } while (0)
131
132 #define radeon_set_sh_reg_idx3(reg, value) do { \
133 radeon_set_sh_reg_idx3_seq(reg, 1); \
134 radeon_emit(value); \
135 } while (0)
136
137 #define radeon_set_uconfig_reg_seq(reg, num, perfctr) do { \
138 SI_CHECK_SHADOWED_REGS(reg, num); \
139 assert((reg) >= CIK_UCONFIG_REG_OFFSET && (reg) < CIK_UCONFIG_REG_END); \
140 radeon_emit(PKT3(PKT3_SET_UCONFIG_REG, num, perfctr)); \
141 radeon_emit(((reg) - CIK_UCONFIG_REG_OFFSET) >> 2); \
142 } while (0)
143
144 #define radeon_set_uconfig_reg(reg, value) do { \
145 radeon_set_uconfig_reg_seq(reg, 1, false); \
146 radeon_emit(value); \
147 } while (0)
148
149 #define radeon_set_uconfig_reg_perfctr(reg, value) do { \
150 radeon_set_uconfig_reg_seq(reg, 1, true); \
151 radeon_emit(value); \
152 } while (0)
153
154 #define radeon_set_uconfig_reg_idx(screen, gfx_level, reg, idx, value) do { \
155 SI_CHECK_SHADOWED_REGS(reg, 1); \
156 assert((reg) >= CIK_UCONFIG_REG_OFFSET && (reg) < CIK_UCONFIG_REG_END); \
157 assert((idx) != 0); \
158 unsigned __opcode = PKT3_SET_UCONFIG_REG_INDEX; \
159 if ((gfx_level) < GFX9 || \
160 ((gfx_level) == GFX9 && (screen)->info.me_fw_version < 26)) \
161 __opcode = PKT3_SET_UCONFIG_REG; \
162 radeon_emit(PKT3(__opcode, 1, 0)); \
163 radeon_emit(((reg) - CIK_UCONFIG_REG_OFFSET) >> 2 | ((idx) << 28)); \
164 radeon_emit(value); \
165 } while (0)
166
167 /* Emit PKT3_SET_CONTEXT_REG if the register value is different. */
168 #define radeon_opt_set_context_reg(sctx, offset, reg, val) do { \
169 unsigned __value = val; \
170 if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x1) != 0x1 || \
171 sctx->tracked_regs.reg_value[reg] != __value) { \
172 radeon_set_context_reg(offset, __value); \
173 sctx->tracked_regs.reg_saved |= 0x1ull << (reg); \
174 sctx->tracked_regs.reg_value[reg] = __value; \
175 } \
176 } while (0)
177
178 /**
179 * Set 2 consecutive registers if any registers value is different.
180 * @param offset starting register offset
181 * @param val1 is written to first register
182 * @param val2 is written to second register
183 */
184 #define radeon_opt_set_context_reg2(sctx, offset, reg, val1, val2) do { \
185 unsigned __value1 = (val1), __value2 = (val2); \
186 if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x3) != 0x3 || \
187 sctx->tracked_regs.reg_value[reg] != __value1 || \
188 sctx->tracked_regs.reg_value[(reg) + 1] != __value2) { \
189 radeon_set_context_reg_seq(offset, 2); \
190 radeon_emit(__value1); \
191 radeon_emit(__value2); \
192 sctx->tracked_regs.reg_value[reg] = __value1; \
193 sctx->tracked_regs.reg_value[(reg) + 1] = __value2; \
194 sctx->tracked_regs.reg_saved |= 0x3ull << (reg); \
195 } \
196 } while (0)
197
198 /**
199 * Set 3 consecutive registers if any registers value is different.
200 */
201 #define radeon_opt_set_context_reg3(sctx, offset, reg, val1, val2, val3) do { \
202 unsigned __value1 = (val1), __value2 = (val2), __value3 = (val3); \
203 if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x7) != 0x7 || \
204 sctx->tracked_regs.reg_value[reg] != __value1 || \
205 sctx->tracked_regs.reg_value[(reg) + 1] != __value2 || \
206 sctx->tracked_regs.reg_value[(reg) + 2] != __value3) { \
207 radeon_set_context_reg_seq(offset, 3); \
208 radeon_emit(__value1); \
209 radeon_emit(__value2); \
210 radeon_emit(__value3); \
211 sctx->tracked_regs.reg_value[reg] = __value1; \
212 sctx->tracked_regs.reg_value[(reg) + 1] = __value2; \
213 sctx->tracked_regs.reg_value[(reg) + 2] = __value3; \
214 sctx->tracked_regs.reg_saved |= 0x7ull << (reg); \
215 } \
216 } while (0)
217
218 /**
219 * Set 4 consecutive registers if any registers value is different.
220 */
221 #define radeon_opt_set_context_reg4(sctx, offset, reg, val1, val2, val3, val4) do { \
222 unsigned __value1 = (val1), __value2 = (val2), __value3 = (val3), __value4 = (val4); \
223 if (((sctx->tracked_regs.reg_saved >> (reg)) & 0xf) != 0xf || \
224 sctx->tracked_regs.reg_value[reg] != __value1 || \
225 sctx->tracked_regs.reg_value[(reg) + 1] != __value2 || \
226 sctx->tracked_regs.reg_value[(reg) + 2] != __value3 || \
227 sctx->tracked_regs.reg_value[(reg) + 3] != __value4) { \
228 radeon_set_context_reg_seq(offset, 4); \
229 radeon_emit(__value1); \
230 radeon_emit(__value2); \
231 radeon_emit(__value3); \
232 radeon_emit(__value4); \
233 sctx->tracked_regs.reg_value[reg] = __value1; \
234 sctx->tracked_regs.reg_value[(reg) + 1] = __value2; \
235 sctx->tracked_regs.reg_value[(reg) + 2] = __value3; \
236 sctx->tracked_regs.reg_value[(reg) + 3] = __value4; \
237 sctx->tracked_regs.reg_saved |= 0xfull << (reg); \
238 } \
239 } while (0)
240
241 /**
242 * Set consecutive registers if any registers value is different.
243 */
244 #define radeon_opt_set_context_regn(sctx, offset, value, saved_val, num) do { \
245 if (memcmp(value, saved_val, sizeof(uint32_t) * (num))) { \
246 radeon_set_context_reg_seq(offset, num); \
247 radeon_emit_array(value, num); \
248 memcpy(saved_val, value, sizeof(uint32_t) * (num)); \
249 } \
250 } while (0)
251
252 #define radeon_opt_set_sh_reg(sctx, offset, reg, val) do { \
253 unsigned __value = val; \
254 if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x1) != 0x1 || \
255 sctx->tracked_regs.reg_value[reg] != __value) { \
256 radeon_set_sh_reg(offset, __value); \
257 sctx->tracked_regs.reg_saved |= BITFIELD64_BIT(reg); \
258 sctx->tracked_regs.reg_value[reg] = __value; \
259 } \
260 } while (0)
261
262 #define radeon_opt_set_sh_reg_idx3(sctx, offset, reg, val) do { \
263 unsigned __value = val; \
264 if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x1) != 0x1 || \
265 sctx->tracked_regs.reg_value[reg] != __value) { \
266 if (sctx->gfx_level >= GFX10) \
267 radeon_set_sh_reg_idx3(offset, __value); \
268 else \
269 radeon_set_sh_reg(offset, __value); \
270 sctx->tracked_regs.reg_saved |= BITFIELD64_BIT(reg); \
271 sctx->tracked_regs.reg_value[reg] = __value; \
272 } \
273 } while (0)
274
275 #define radeon_opt_set_uconfig_reg(sctx, offset, reg, val) do { \
276 unsigned __value = val; \
277 if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x1) != 0x1 || \
278 sctx->tracked_regs.reg_value[reg] != __value) { \
279 radeon_set_uconfig_reg(offset, __value); \
280 sctx->tracked_regs.reg_saved |= 0x1ull << (reg); \
281 sctx->tracked_regs.reg_value[reg] = __value; \
282 } \
283 } while (0)
284
285 #define radeon_set_privileged_config_reg(reg, value) do { \
286 assert((reg) < CIK_UCONFIG_REG_OFFSET); \
287 radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); \
288 radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_IMM) | \
289 COPY_DATA_DST_SEL(COPY_DATA_PERF)); \
290 radeon_emit(value); \
291 radeon_emit(0); /* unused */ \
292 radeon_emit((reg) >> 2); \
293 radeon_emit(0); /* unused */ \
294 } while (0)
295
296 #define radeon_emit_32bit_pointer(sscreen, va) do { \
297 radeon_emit(va); \
298 assert((va) == 0 || ((va) >> 32) == sscreen->info.address32_hi); \
299 } while (0)
300
301 #define radeon_emit_one_32bit_pointer(sctx, desc, sh_base) do { \
302 unsigned sh_offset = (sh_base) + (desc)->shader_userdata_offset; \
303 radeon_set_sh_reg_seq(sh_offset, 1); \
304 radeon_emit_32bit_pointer(sctx->screen, (desc)->gpu_address); \
305 } while (0)
306
307 /* Wrappers that are only used when they are passed as function pointers. */
radeon_set_sh_reg_func(struct radeon_cmdbuf * cs,unsigned reg_offset,uint32_t value)308 static inline void radeon_set_sh_reg_func(struct radeon_cmdbuf *cs, unsigned reg_offset,
309 uint32_t value)
310 {
311 radeon_begin(cs);
312 radeon_set_sh_reg(reg_offset, value);
313 radeon_end();
314 }
315
radeon_set_sh_reg_idx3_func(struct radeon_cmdbuf * cs,unsigned reg_offset,uint32_t value)316 static inline void radeon_set_sh_reg_idx3_func(struct radeon_cmdbuf *cs, unsigned reg_offset,
317 uint32_t value)
318 {
319 radeon_begin(cs);
320 radeon_set_sh_reg_idx3(reg_offset, value);
321 radeon_end();
322 }
323
324 /* This should be evaluated at compile time if all parameters are constants. */
325 static ALWAYS_INLINE unsigned
si_get_user_data_base(enum amd_gfx_level gfx_level,enum si_has_tess has_tess,enum si_has_gs has_gs,enum si_has_ngg ngg,enum pipe_shader_type shader)326 si_get_user_data_base(enum amd_gfx_level gfx_level, enum si_has_tess has_tess,
327 enum si_has_gs has_gs, enum si_has_ngg ngg,
328 enum pipe_shader_type shader)
329 {
330 switch (shader) {
331 case PIPE_SHADER_VERTEX:
332 /* VS can be bound as VS, ES, or LS. */
333 if (has_tess) {
334 if (gfx_level >= GFX10) {
335 return R_00B430_SPI_SHADER_USER_DATA_HS_0;
336 } else if (gfx_level == GFX9) {
337 return R_00B430_SPI_SHADER_USER_DATA_LS_0;
338 } else {
339 return R_00B530_SPI_SHADER_USER_DATA_LS_0;
340 }
341 } else if (gfx_level >= GFX10) {
342 if (ngg || has_gs) {
343 return R_00B230_SPI_SHADER_USER_DATA_GS_0;
344 } else {
345 return R_00B130_SPI_SHADER_USER_DATA_VS_0;
346 }
347 } else if (has_gs) {
348 return R_00B330_SPI_SHADER_USER_DATA_ES_0;
349 } else {
350 return R_00B130_SPI_SHADER_USER_DATA_VS_0;
351 }
352
353 case PIPE_SHADER_TESS_CTRL:
354 if (gfx_level == GFX9) {
355 return R_00B430_SPI_SHADER_USER_DATA_LS_0;
356 } else {
357 return R_00B430_SPI_SHADER_USER_DATA_HS_0;
358 }
359
360 case PIPE_SHADER_TESS_EVAL:
361 /* TES can be bound as ES, VS, or not bound. */
362 if (has_tess) {
363 if (gfx_level >= GFX10) {
364 if (ngg || has_gs) {
365 return R_00B230_SPI_SHADER_USER_DATA_GS_0;
366 } else {
367 return R_00B130_SPI_SHADER_USER_DATA_VS_0;
368 }
369 } else if (has_gs) {
370 return R_00B330_SPI_SHADER_USER_DATA_ES_0;
371 } else {
372 return R_00B130_SPI_SHADER_USER_DATA_VS_0;
373 }
374 } else {
375 return 0;
376 }
377
378 case PIPE_SHADER_GEOMETRY:
379 if (gfx_level == GFX9) {
380 return R_00B330_SPI_SHADER_USER_DATA_ES_0;
381 } else {
382 return R_00B230_SPI_SHADER_USER_DATA_GS_0;
383 }
384
385 default:
386 assert(0);
387 return 0;
388 }
389 }
390
391 #endif
392