1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #ifndef RADV_CS_H
26 #define RADV_CS_H
27
28 #include <assert.h>
29 #include <stdint.h>
30 #include <string.h>
31 #include "radv_private.h"
32 #include "sid.h"
33
34 static inline unsigned
radeon_check_space(struct radeon_winsys * ws,struct radeon_cmdbuf * cs,unsigned needed)35 radeon_check_space(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, unsigned needed)
36 {
37 assert(cs->cdw <= cs->reserved_dw);
38 if (cs->max_dw - cs->cdw < needed)
39 ws->cs_grow(cs, needed);
40 cs->reserved_dw = MAX2(cs->reserved_dw, cs->cdw + needed);
41 return cs->cdw + needed;
42 }
43
44 static inline void
radeon_set_config_reg_seq(struct radeon_cmdbuf * cs,unsigned reg,unsigned num)45 radeon_set_config_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
46 {
47 assert(reg >= SI_CONFIG_REG_OFFSET && reg < SI_CONFIG_REG_END);
48 assert(cs->cdw + 2 + num <= cs->reserved_dw);
49 assert(num);
50 radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0));
51 radeon_emit(cs, (reg - SI_CONFIG_REG_OFFSET) >> 2);
52 }
53
54 static inline void
radeon_set_config_reg(struct radeon_cmdbuf * cs,unsigned reg,unsigned value)55 radeon_set_config_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
56 {
57 radeon_set_config_reg_seq(cs, reg, 1);
58 radeon_emit(cs, value);
59 }
60
61 static inline void
radeon_set_context_reg_seq(struct radeon_cmdbuf * cs,unsigned reg,unsigned num)62 radeon_set_context_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
63 {
64 assert(reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END);
65 assert(cs->cdw + 2 + num <= cs->reserved_dw);
66 assert(num);
67 radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0));
68 radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
69 }
70
71 static inline void
radeon_set_context_reg(struct radeon_cmdbuf * cs,unsigned reg,unsigned value)72 radeon_set_context_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
73 {
74 radeon_set_context_reg_seq(cs, reg, 1);
75 radeon_emit(cs, value);
76 }
77
78 static inline void
radeon_set_context_reg_idx(struct radeon_cmdbuf * cs,unsigned reg,unsigned idx,unsigned value)79 radeon_set_context_reg_idx(struct radeon_cmdbuf *cs, unsigned reg, unsigned idx, unsigned value)
80 {
81 assert(reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END);
82 assert(cs->cdw + 3 <= cs->reserved_dw);
83 radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, 1, 0));
84 radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2 | (idx << 28));
85 radeon_emit(cs, value);
86 }
87
88 static inline void
radeon_set_sh_reg_seq(struct radeon_cmdbuf * cs,unsigned reg,unsigned num)89 radeon_set_sh_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
90 {
91 assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
92 assert(cs->cdw + 2 + num <= cs->reserved_dw);
93 assert(num);
94 radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0));
95 radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2);
96 }
97
98 static inline void
radeon_set_sh_reg(struct radeon_cmdbuf * cs,unsigned reg,unsigned value)99 radeon_set_sh_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
100 {
101 radeon_set_sh_reg_seq(cs, reg, 1);
102 radeon_emit(cs, value);
103 }
104
105 static inline void
radeon_set_sh_reg_idx(const struct radv_physical_device * pdevice,struct radeon_cmdbuf * cs,unsigned reg,unsigned idx,unsigned value)106 radeon_set_sh_reg_idx(const struct radv_physical_device *pdevice, struct radeon_cmdbuf *cs, unsigned reg, unsigned idx,
107 unsigned value)
108 {
109 assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
110 assert(cs->cdw + 3 <= cs->reserved_dw);
111 assert(idx);
112
113 unsigned opcode = PKT3_SET_SH_REG_INDEX;
114 if (pdevice->rad_info.gfx_level < GFX10)
115 opcode = PKT3_SET_SH_REG;
116
117 radeon_emit(cs, PKT3(opcode, 1, 0));
118 radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2 | (idx << 28));
119 radeon_emit(cs, value);
120 }
121
122 static inline void
radeon_set_uconfig_reg_seq(struct radeon_cmdbuf * cs,unsigned reg,unsigned num)123 radeon_set_uconfig_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
124 {
125 assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
126 assert(cs->cdw + 2 + num <= cs->reserved_dw);
127 assert(num);
128 radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 0));
129 radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
130 }
131
132 static inline void
radeon_set_uconfig_reg_seq_perfctr(enum amd_gfx_level gfx_level,enum radv_queue_family qf,struct radeon_cmdbuf * cs,unsigned reg,unsigned num)133 radeon_set_uconfig_reg_seq_perfctr(enum amd_gfx_level gfx_level, enum radv_queue_family qf, struct radeon_cmdbuf *cs,
134 unsigned reg, unsigned num)
135 {
136 const bool filter_cam_workaround = gfx_level >= GFX10 && qf == RADV_QUEUE_GENERAL;
137
138 assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
139 assert(cs->cdw + 2 + num <= cs->reserved_dw);
140 assert(num);
141 radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 0) | PKT3_RESET_FILTER_CAM_S(filter_cam_workaround));
142 radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
143 }
144
145 static inline void
radeon_set_uconfig_reg_perfctr(enum amd_gfx_level gfx_level,enum radv_queue_family qf,struct radeon_cmdbuf * cs,unsigned reg,unsigned value)146 radeon_set_uconfig_reg_perfctr(enum amd_gfx_level gfx_level, enum radv_queue_family qf, struct radeon_cmdbuf *cs,
147 unsigned reg, unsigned value)
148 {
149 radeon_set_uconfig_reg_seq_perfctr(gfx_level, qf, cs, reg, 1);
150 radeon_emit(cs, value);
151 }
152
153 static inline void
radeon_set_uconfig_reg(struct radeon_cmdbuf * cs,unsigned reg,unsigned value)154 radeon_set_uconfig_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
155 {
156 radeon_set_uconfig_reg_seq(cs, reg, 1);
157 radeon_emit(cs, value);
158 }
159
160 static inline void
radeon_set_uconfig_reg_idx(const struct radv_physical_device * pdevice,struct radeon_cmdbuf * cs,unsigned reg,unsigned idx,unsigned value)161 radeon_set_uconfig_reg_idx(const struct radv_physical_device *pdevice, struct radeon_cmdbuf *cs, unsigned reg,
162 unsigned idx, unsigned value)
163 {
164 assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
165 assert(cs->cdw + 3 <= cs->reserved_dw);
166 assert(idx);
167
168 unsigned opcode = PKT3_SET_UCONFIG_REG_INDEX;
169 if (pdevice->rad_info.gfx_level < GFX9 ||
170 (pdevice->rad_info.gfx_level == GFX9 && pdevice->rad_info.me_fw_version < 26))
171 opcode = PKT3_SET_UCONFIG_REG;
172
173 radeon_emit(cs, PKT3(opcode, 1, 0));
174 radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2 | (idx << 28));
175 radeon_emit(cs, value);
176 }
177
178 static inline void
radeon_set_perfctr_reg(enum amd_gfx_level gfx_level,enum radv_queue_family qf,struct radeon_cmdbuf * cs,unsigned reg,unsigned value)179 radeon_set_perfctr_reg(enum amd_gfx_level gfx_level, enum radv_queue_family qf, struct radeon_cmdbuf *cs, unsigned reg,
180 unsigned value)
181 {
182 assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
183 assert(cs->cdw + 3 <= cs->reserved_dw);
184
185 /*
186 * On GFX10, there is a bug with the ME implementation of its content addressable memory (CAM),
187 * that means that it can skip register writes due to not taking correctly into account the
188 * fields from the GRBM_GFX_INDEX. With this bit we can force the write.
189 */
190 bool filter_cam_workaround = gfx_level >= GFX10 && qf == RADV_QUEUE_GENERAL;
191
192 radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, 1, 0) | PKT3_RESET_FILTER_CAM_S(filter_cam_workaround));
193 radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
194 radeon_emit(cs, value);
195 }
196
197 static inline void
radeon_set_privileged_config_reg(struct radeon_cmdbuf * cs,unsigned reg,unsigned value)198 radeon_set_privileged_config_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
199 {
200 assert(reg < CIK_UCONFIG_REG_OFFSET);
201 assert(cs->cdw + 6 <= cs->reserved_dw);
202
203 radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
204 radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_PERF));
205 radeon_emit(cs, value);
206 radeon_emit(cs, 0); /* unused */
207 radeon_emit(cs, reg >> 2);
208 radeon_emit(cs, 0); /* unused */
209 }
210
211 ALWAYS_INLINE static void
radv_cp_wait_mem(struct radeon_cmdbuf * cs,const enum radv_queue_family qf,const uint32_t op,const uint64_t va,const uint32_t ref,const uint32_t mask)212 radv_cp_wait_mem(struct radeon_cmdbuf *cs, const enum radv_queue_family qf, const uint32_t op, const uint64_t va,
213 const uint32_t ref, const uint32_t mask)
214 {
215 assert(op == WAIT_REG_MEM_EQUAL || op == WAIT_REG_MEM_NOT_EQUAL || op == WAIT_REG_MEM_GREATER_OR_EQUAL);
216
217 if (qf == RADV_QUEUE_GENERAL || qf == RADV_QUEUE_COMPUTE) {
218 radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, false));
219 radeon_emit(cs, op | WAIT_REG_MEM_MEM_SPACE(1));
220 radeon_emit(cs, va);
221 radeon_emit(cs, va >> 32);
222 radeon_emit(cs, ref); /* reference value */
223 radeon_emit(cs, mask); /* mask */
224 radeon_emit(cs, 4); /* poll interval */
225 } else if (qf == RADV_QUEUE_TRANSFER) {
226 radeon_emit(cs, SDMA_PACKET(SDMA_OPCODE_POLL_REGMEM, 0, 0) | op << 28 | SDMA_POLL_MEM);
227 radeon_emit(cs, va);
228 radeon_emit(cs, va >> 32);
229 radeon_emit(cs, ref);
230 radeon_emit(cs, mask);
231 radeon_emit(cs, SDMA_POLL_INTERVAL_160_CLK | SDMA_POLL_RETRY_INDEFINITELY << 16);
232 } else {
233 unreachable("unsupported queue family");
234 }
235 }
236
237 ALWAYS_INLINE static unsigned
radv_cs_write_data_head(const struct radv_device * device,struct radeon_cmdbuf * cs,const enum radv_queue_family qf,const unsigned engine_sel,const uint64_t va,const unsigned count,const bool predicating)238 radv_cs_write_data_head(const struct radv_device *device, struct radeon_cmdbuf *cs, const enum radv_queue_family qf,
239 const unsigned engine_sel, const uint64_t va, const unsigned count, const bool predicating)
240 {
241 /* Return the correct cdw at the end of the packet so the caller can assert it. */
242 const unsigned cdw_end = radeon_check_space(device->ws, cs, 4 + count);
243
244 if (qf == RADV_QUEUE_GENERAL || qf == RADV_QUEUE_COMPUTE) {
245 radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, predicating));
246 radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(engine_sel));
247 radeon_emit(cs, va);
248 radeon_emit(cs, va >> 32);
249 } else if (qf == RADV_QUEUE_TRANSFER) {
250 /* Vulkan transfer queues don't support conditional rendering, so we can ignore predication here.
251 * Furthermore, we can ignore the engine selection here, it is meaningless to the SDMA.
252 */
253 radeon_emit(cs, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
254 radeon_emit(cs, va);
255 radeon_emit(cs, va >> 32);
256 radeon_emit(cs, count - 1);
257 } else {
258 unreachable("unsupported queue family");
259 }
260
261 return cdw_end;
262 }
263
264 ALWAYS_INLINE static void
radv_cs_write_data(const struct radv_device * device,struct radeon_cmdbuf * cs,const enum radv_queue_family qf,const unsigned engine_sel,const uint64_t va,const unsigned count,const uint32_t * dwords,const bool predicating)265 radv_cs_write_data(const struct radv_device *device, struct radeon_cmdbuf *cs, const enum radv_queue_family qf,
266 const unsigned engine_sel, const uint64_t va, const unsigned count, const uint32_t *dwords,
267 const bool predicating)
268 {
269 ASSERTED const unsigned cdw_end = radv_cs_write_data_head(device, cs, qf, engine_sel, va, count, predicating);
270 radeon_emit_array(cs, dwords, count);
271 assert(cs->cdw == cdw_end);
272 }
273
274 #endif /* RADV_CS_H */
275