1 /*
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * This file contains helpers for writing commands to commands streams.
27 */
28
29 #ifndef SI_BUILD_PM4_H
30 #define SI_BUILD_PM4_H
31
32 #include "si_pipe.h"
33 #include "sid.h"
34
35 #if 0
36 #include "ac_shadowed_regs.h"
37 #define SI_CHECK_SHADOWED_REGS(reg_offset, count) ac_check_shadowed_regs(GFX10, CHIP_NAVI14, reg_offset, count)
38 #else
39 #define SI_CHECK_SHADOWED_REGS(reg_offset, count)
40 #endif
41
42 #define radeon_begin(cs) struct radeon_cmdbuf *__cs = (cs); \
43 unsigned __cs_num = __cs->current.cdw; \
44 UNUSED unsigned __cs_num_initial = __cs_num; \
45 uint32_t *__cs_buf = __cs->current.buf
46
47 #define radeon_begin_again(cs) do { \
48 assert(__cs == NULL); \
49 __cs = (cs); \
50 __cs_num = __cs->current.cdw; \
51 __cs_num_initial = __cs_num; \
52 __cs_buf = __cs->current.buf; \
53 } while (0)
54
55 #define radeon_end() do { \
56 __cs->current.cdw = __cs_num; \
57 assert(__cs->current.cdw <= __cs->current.max_dw); \
58 __cs = NULL; \
59 } while (0)
60
61 #define radeon_emit(value) __cs_buf[__cs_num++] = (value)
62 #define radeon_packets_added() (__cs_num != __cs_num_initial)
63
64 #define radeon_end_update_context_roll(sctx) do { \
65 radeon_end(); \
66 if (radeon_packets_added()) \
67 (sctx)->context_roll = true; \
68 } while (0)
69
70 #define radeon_emit_array(values, num) do { \
71 unsigned __n = (num); \
72 memcpy(__cs_buf + __cs_num, (values), __n * 4); \
73 __cs_num += __n; \
74 } while (0)
75
76 #define radeon_set_config_reg_seq(reg, num) do { \
77 SI_CHECK_SHADOWED_REGS(reg, num); \
78 assert((reg) < SI_CONTEXT_REG_OFFSET); \
79 radeon_emit(PKT3(PKT3_SET_CONFIG_REG, num, 0)); \
80 radeon_emit(((reg) - SI_CONFIG_REG_OFFSET) >> 2); \
81 } while (0)
82
83 #define radeon_set_config_reg(reg, value) do { \
84 radeon_set_config_reg_seq(reg, 1); \
85 radeon_emit(value); \
86 } while (0)
87
88 #define radeon_set_context_reg_seq(reg, num) do { \
89 SI_CHECK_SHADOWED_REGS(reg, num); \
90 assert((reg) >= SI_CONTEXT_REG_OFFSET); \
91 radeon_emit(PKT3(PKT3_SET_CONTEXT_REG, num, 0)); \
92 radeon_emit(((reg) - SI_CONTEXT_REG_OFFSET) >> 2); \
93 } while (0)
94
95 #define radeon_set_context_reg(reg, value) do { \
96 radeon_set_context_reg_seq(reg, 1); \
97 radeon_emit(value); \
98 } while (0)
99
100 #define radeon_set_context_reg_seq_array(reg, num, values) do { \
101 radeon_set_context_reg_seq(reg, num); \
102 radeon_emit_array(values, num); \
103 } while (0)
104
105 #define radeon_set_context_reg_idx(reg, idx, value) do { \
106 SI_CHECK_SHADOWED_REGS(reg, 1); \
107 assert((reg) >= SI_CONTEXT_REG_OFFSET); \
108 radeon_emit(PKT3(PKT3_SET_CONTEXT_REG, 1, 0)); \
109 radeon_emit(((reg) - SI_CONTEXT_REG_OFFSET) >> 2 | ((idx) << 28)); \
110 radeon_emit(value); \
111 } while (0)
112
113 #define radeon_set_sh_reg_seq(reg, num) do { \
114 SI_CHECK_SHADOWED_REGS(reg, num); \
115 assert((reg) >= SI_SH_REG_OFFSET && (reg) < SI_SH_REG_END); \
116 radeon_emit(PKT3(PKT3_SET_SH_REG, num, 0)); \
117 radeon_emit(((reg) - SI_SH_REG_OFFSET) >> 2); \
118 } while (0)
119
120 #define radeon_set_sh_reg(reg, value) do { \
121 radeon_set_sh_reg_seq(reg, 1); \
122 radeon_emit(value); \
123 } while (0)
124
125 #define radeon_set_uconfig_reg_seq(reg, num, perfctr) do { \
126 SI_CHECK_SHADOWED_REGS(reg, num); \
127 assert((reg) >= CIK_UCONFIG_REG_OFFSET && (reg) < CIK_UCONFIG_REG_END); \
128 radeon_emit(PKT3(PKT3_SET_UCONFIG_REG, num, perfctr)); \
129 radeon_emit(((reg) - CIK_UCONFIG_REG_OFFSET) >> 2); \
130 } while (0)
131
132 #define radeon_set_uconfig_reg(reg, value) do { \
133 radeon_set_uconfig_reg_seq(reg, 1, false); \
134 radeon_emit(value); \
135 } while (0)
136
137 #define radeon_set_uconfig_reg_perfctr(reg, value) do { \
138 radeon_set_uconfig_reg_seq(reg, 1, true); \
139 radeon_emit(value); \
140 } while (0)
141
142 #define radeon_set_uconfig_reg_idx(screen, chip_class, reg, idx, value) do { \
143 SI_CHECK_SHADOWED_REGS(reg, 1); \
144 assert((reg) >= CIK_UCONFIG_REG_OFFSET && (reg) < CIK_UCONFIG_REG_END); \
145 assert((idx) != 0); \
146 unsigned __opcode = PKT3_SET_UCONFIG_REG_INDEX; \
147 if ((chip_class) < GFX9 || \
148 ((chip_class) == GFX9 && (screen)->info.me_fw_version < 26)) \
149 __opcode = PKT3_SET_UCONFIG_REG; \
150 radeon_emit(PKT3(__opcode, 1, 0)); \
151 radeon_emit(((reg) - CIK_UCONFIG_REG_OFFSET) >> 2 | ((idx) << 28)); \
152 radeon_emit(value); \
153 } while (0)
154
155 /* Emit PKT3_SET_CONTEXT_REG if the register value is different. */
156 #define radeon_opt_set_context_reg(sctx, offset, reg, val) do { \
157 unsigned __value = val; \
158 if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x1) != 0x1 || \
159 sctx->tracked_regs.reg_value[reg] != __value) { \
160 radeon_set_context_reg(offset, __value); \
161 sctx->tracked_regs.reg_saved |= 0x1ull << (reg); \
162 sctx->tracked_regs.reg_value[reg] = __value; \
163 } \
164 } while (0)
165
166 /**
167 * Set 2 consecutive registers if any registers value is different.
168 * @param offset starting register offset
169 * @param val1 is written to first register
170 * @param val2 is written to second register
171 */
172 #define radeon_opt_set_context_reg2(sctx, offset, reg, val1, val2) do { \
173 unsigned __value1 = (val1), __value2 = (val2); \
174 if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x3) != 0x3 || \
175 sctx->tracked_regs.reg_value[reg] != __value1 || \
176 sctx->tracked_regs.reg_value[(reg) + 1] != __value2) { \
177 radeon_set_context_reg_seq(offset, 2); \
178 radeon_emit(__value1); \
179 radeon_emit(__value2); \
180 sctx->tracked_regs.reg_value[reg] = __value1; \
181 sctx->tracked_regs.reg_value[(reg) + 1] = __value2; \
182 sctx->tracked_regs.reg_saved |= 0x3ull << (reg); \
183 } \
184 } while (0)
185
186 /**
187 * Set 3 consecutive registers if any registers value is different.
188 */
189 #define radeon_opt_set_context_reg3(sctx, offset, reg, val1, val2, val3) do { \
190 unsigned __value1 = (val1), __value2 = (val2), __value3 = (val3); \
191 if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x7) != 0x7 || \
192 sctx->tracked_regs.reg_value[reg] != __value1 || \
193 sctx->tracked_regs.reg_value[(reg) + 1] != __value2 || \
194 sctx->tracked_regs.reg_value[(reg) + 2] != __value3) { \
195 radeon_set_context_reg_seq(offset, 3); \
196 radeon_emit(__value1); \
197 radeon_emit(__value2); \
198 radeon_emit(__value3); \
199 sctx->tracked_regs.reg_value[reg] = __value1; \
200 sctx->tracked_regs.reg_value[(reg) + 1] = __value2; \
201 sctx->tracked_regs.reg_value[(reg) + 2] = __value3; \
202 sctx->tracked_regs.reg_saved |= 0x7ull << (reg); \
203 } \
204 } while (0)
205
206 /**
207 * Set 4 consecutive registers if any registers value is different.
208 */
209 #define radeon_opt_set_context_reg4(sctx, offset, reg, val1, val2, val3, val4) do { \
210 unsigned __value1 = (val1), __value2 = (val2), __value3 = (val3), __value4 = (val4); \
211 if (((sctx->tracked_regs.reg_saved >> (reg)) & 0xf) != 0xf || \
212 sctx->tracked_regs.reg_value[reg] != __value1 || \
213 sctx->tracked_regs.reg_value[(reg) + 1] != __value2 || \
214 sctx->tracked_regs.reg_value[(reg) + 2] != __value3 || \
215 sctx->tracked_regs.reg_value[(reg) + 3] != __value4) { \
216 radeon_set_context_reg_seq(offset, 4); \
217 radeon_emit(__value1); \
218 radeon_emit(__value2); \
219 radeon_emit(__value3); \
220 radeon_emit(__value4); \
221 sctx->tracked_regs.reg_value[reg] = __value1; \
222 sctx->tracked_regs.reg_value[(reg) + 1] = __value2; \
223 sctx->tracked_regs.reg_value[(reg) + 2] = __value3; \
224 sctx->tracked_regs.reg_value[(reg) + 3] = __value4; \
225 sctx->tracked_regs.reg_saved |= 0xfull << (reg); \
226 } \
227 } while (0)
228
229 /**
230 * Set consecutive registers if any registers value is different.
231 */
232 #define radeon_opt_set_context_regn(sctx, offset, value, saved_val, num) do { \
233 if (memcmp(value, saved_val, sizeof(uint32_t) * (num))) { \
234 radeon_set_context_reg_seq(offset, num); \
235 radeon_emit_array(value, num); \
236 memcpy(saved_val, value, sizeof(uint32_t) * (num)); \
237 } \
238 } while (0)
239
240 #define radeon_opt_set_sh_reg(sctx, offset, reg, val) do { \
241 unsigned __value = val; \
242 if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x1) != 0x1 || \
243 sctx->tracked_regs.reg_value[reg] != __value) { \
244 radeon_set_sh_reg(offset, __value); \
245 sctx->tracked_regs.reg_saved |= BITFIELD64_BIT(reg); \
246 sctx->tracked_regs.reg_value[reg] = __value; \
247 } \
248 } while (0)
249
250 #define radeon_opt_set_uconfig_reg(sctx, offset, reg, val) do { \
251 unsigned __value = val; \
252 if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x1) != 0x1 || \
253 sctx->tracked_regs.reg_value[reg] != __value) { \
254 radeon_set_uconfig_reg(offset, __value); \
255 sctx->tracked_regs.reg_saved |= 0x1ull << (reg); \
256 sctx->tracked_regs.reg_value[reg] = __value; \
257 } \
258 } while (0)
259
260 #define radeon_set_privileged_config_reg(reg, value) do { \
261 assert((reg) < CIK_UCONFIG_REG_OFFSET); \
262 radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); \
263 radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_IMM) | \
264 COPY_DATA_DST_SEL(COPY_DATA_PERF)); \
265 radeon_emit(value); \
266 radeon_emit(0); /* unused */ \
267 radeon_emit((reg) >> 2); \
268 radeon_emit(0); /* unused */ \
269 } while (0)
270
271 #define radeon_emit_32bit_pointer(sscreen, va) do { \
272 radeon_emit(va); \
273 assert((va) == 0 || ((va) >> 32) == sscreen->info.address32_hi); \
274 } while (0)
275
276 #define radeon_emit_one_32bit_pointer(sctx, desc, sh_base) do { \
277 unsigned sh_offset = (sh_base) + (desc)->shader_userdata_offset; \
278 radeon_set_sh_reg_seq(sh_offset, 1); \
279 radeon_emit_32bit_pointer(sctx->screen, (desc)->gpu_address); \
280 } while (0)
281
282 /* This should be evaluated at compile time if all parameters are constants. */
283 static ALWAYS_INLINE unsigned
si_get_user_data_base(enum chip_class chip_class,enum si_has_tess has_tess,enum si_has_gs has_gs,enum si_has_ngg ngg,enum pipe_shader_type shader)284 si_get_user_data_base(enum chip_class chip_class, enum si_has_tess has_tess,
285 enum si_has_gs has_gs, enum si_has_ngg ngg,
286 enum pipe_shader_type shader)
287 {
288 switch (shader) {
289 case PIPE_SHADER_VERTEX:
290 /* VS can be bound as VS, ES, or LS. */
291 if (has_tess) {
292 if (chip_class >= GFX10) {
293 return R_00B430_SPI_SHADER_USER_DATA_HS_0;
294 } else if (chip_class == GFX9) {
295 return R_00B430_SPI_SHADER_USER_DATA_LS_0;
296 } else {
297 return R_00B530_SPI_SHADER_USER_DATA_LS_0;
298 }
299 } else if (chip_class >= GFX10) {
300 if (ngg || has_gs) {
301 return R_00B230_SPI_SHADER_USER_DATA_GS_0;
302 } else {
303 return R_00B130_SPI_SHADER_USER_DATA_VS_0;
304 }
305 } else if (has_gs) {
306 return R_00B330_SPI_SHADER_USER_DATA_ES_0;
307 } else {
308 return R_00B130_SPI_SHADER_USER_DATA_VS_0;
309 }
310
311 case PIPE_SHADER_TESS_CTRL:
312 if (chip_class == GFX9) {
313 return R_00B430_SPI_SHADER_USER_DATA_LS_0;
314 } else {
315 return R_00B430_SPI_SHADER_USER_DATA_HS_0;
316 }
317
318 case PIPE_SHADER_TESS_EVAL:
319 /* TES can be bound as ES, VS, or not bound. */
320 if (has_tess) {
321 if (chip_class >= GFX10) {
322 if (ngg || has_gs) {
323 return R_00B230_SPI_SHADER_USER_DATA_GS_0;
324 } else {
325 return R_00B130_SPI_SHADER_USER_DATA_VS_0;
326 }
327 } else if (has_gs) {
328 return R_00B330_SPI_SHADER_USER_DATA_ES_0;
329 } else {
330 return R_00B130_SPI_SHADER_USER_DATA_VS_0;
331 }
332 } else {
333 return 0;
334 }
335
336 case PIPE_SHADER_GEOMETRY:
337 if (chip_class == GFX9) {
338 return R_00B330_SPI_SHADER_USER_DATA_ES_0;
339 } else {
340 return R_00B230_SPI_SHADER_USER_DATA_GS_0;
341 }
342
343 default:
344 assert(0);
345 return 0;
346 }
347 }
348
349 #endif
350