• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 #ifndef RADV_CS_H
26 #define RADV_CS_H
27 
28 #include <assert.h>
29 #include <stdint.h>
30 #include <string.h>
31 #include "radv_private.h"
32 #include "sid.h"
33 
34 static inline unsigned
radeon_check_space(struct radeon_winsys * ws,struct radeon_cmdbuf * cs,unsigned needed)35 radeon_check_space(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, unsigned needed)
36 {
37    assert(cs->cdw <= cs->reserved_dw);
38    if (cs->max_dw - cs->cdw < needed)
39       ws->cs_grow(cs, needed);
40    cs->reserved_dw = MAX2(cs->reserved_dw, cs->cdw + needed);
41    return cs->cdw + needed;
42 }
43 
44 static inline void
radeon_set_config_reg_seq(struct radeon_cmdbuf * cs,unsigned reg,unsigned num)45 radeon_set_config_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
46 {
47    assert(reg >= SI_CONFIG_REG_OFFSET && reg < SI_CONFIG_REG_END);
48    assert(cs->cdw + 2 + num <= cs->reserved_dw);
49    assert(num);
50    radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0));
51    radeon_emit(cs, (reg - SI_CONFIG_REG_OFFSET) >> 2);
52 }
53 
54 static inline void
radeon_set_config_reg(struct radeon_cmdbuf * cs,unsigned reg,unsigned value)55 radeon_set_config_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
56 {
57    radeon_set_config_reg_seq(cs, reg, 1);
58    radeon_emit(cs, value);
59 }
60 
61 static inline void
radeon_set_context_reg_seq(struct radeon_cmdbuf * cs,unsigned reg,unsigned num)62 radeon_set_context_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
63 {
64    assert(reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END);
65    assert(cs->cdw + 2 + num <= cs->reserved_dw);
66    assert(num);
67    radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0));
68    radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
69 }
70 
71 static inline void
radeon_set_context_reg(struct radeon_cmdbuf * cs,unsigned reg,unsigned value)72 radeon_set_context_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
73 {
74    radeon_set_context_reg_seq(cs, reg, 1);
75    radeon_emit(cs, value);
76 }
77 
78 static inline void
radeon_set_context_reg_idx(struct radeon_cmdbuf * cs,unsigned reg,unsigned idx,unsigned value)79 radeon_set_context_reg_idx(struct radeon_cmdbuf *cs, unsigned reg, unsigned idx, unsigned value)
80 {
81    assert(reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END);
82    assert(cs->cdw + 3 <= cs->reserved_dw);
83    radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, 1, 0));
84    radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2 | (idx << 28));
85    radeon_emit(cs, value);
86 }
87 
88 static inline void
radeon_set_sh_reg_seq(struct radeon_cmdbuf * cs,unsigned reg,unsigned num)89 radeon_set_sh_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
90 {
91    assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
92    assert(cs->cdw + 2 + num <= cs->reserved_dw);
93    assert(num);
94    radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0));
95    radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2);
96 }
97 
98 static inline void
radeon_set_sh_reg(struct radeon_cmdbuf * cs,unsigned reg,unsigned value)99 radeon_set_sh_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
100 {
101    radeon_set_sh_reg_seq(cs, reg, 1);
102    radeon_emit(cs, value);
103 }
104 
105 static inline void
radeon_set_sh_reg_idx(const struct radv_physical_device * pdevice,struct radeon_cmdbuf * cs,unsigned reg,unsigned idx,unsigned value)106 radeon_set_sh_reg_idx(const struct radv_physical_device *pdevice, struct radeon_cmdbuf *cs, unsigned reg, unsigned idx,
107                       unsigned value)
108 {
109    assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
110    assert(cs->cdw + 3 <= cs->reserved_dw);
111    assert(idx);
112 
113    unsigned opcode = PKT3_SET_SH_REG_INDEX;
114    if (pdevice->rad_info.gfx_level < GFX10)
115       opcode = PKT3_SET_SH_REG;
116 
117    radeon_emit(cs, PKT3(opcode, 1, 0));
118    radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2 | (idx << 28));
119    radeon_emit(cs, value);
120 }
121 
122 static inline void
radeon_set_uconfig_reg_seq(struct radeon_cmdbuf * cs,unsigned reg,unsigned num)123 radeon_set_uconfig_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
124 {
125    assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
126    assert(cs->cdw + 2 + num <= cs->reserved_dw);
127    assert(num);
128    radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 0));
129    radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
130 }
131 
132 static inline void
radeon_set_uconfig_reg_seq_perfctr(enum amd_gfx_level gfx_level,enum radv_queue_family qf,struct radeon_cmdbuf * cs,unsigned reg,unsigned num)133 radeon_set_uconfig_reg_seq_perfctr(enum amd_gfx_level gfx_level, enum radv_queue_family qf, struct radeon_cmdbuf *cs,
134                                    unsigned reg, unsigned num)
135 {
136    const bool filter_cam_workaround = gfx_level >= GFX10 && qf == RADV_QUEUE_GENERAL;
137 
138    assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
139    assert(cs->cdw + 2 + num <= cs->reserved_dw);
140    assert(num);
141    radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 0) | PKT3_RESET_FILTER_CAM_S(filter_cam_workaround));
142    radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
143 }
144 
145 static inline void
radeon_set_uconfig_reg_perfctr(enum amd_gfx_level gfx_level,enum radv_queue_family qf,struct radeon_cmdbuf * cs,unsigned reg,unsigned value)146 radeon_set_uconfig_reg_perfctr(enum amd_gfx_level gfx_level, enum radv_queue_family qf, struct radeon_cmdbuf *cs,
147                                unsigned reg, unsigned value)
148 {
149    radeon_set_uconfig_reg_seq_perfctr(gfx_level, qf, cs, reg, 1);
150    radeon_emit(cs, value);
151 }
152 
153 static inline void
radeon_set_uconfig_reg(struct radeon_cmdbuf * cs,unsigned reg,unsigned value)154 radeon_set_uconfig_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
155 {
156    radeon_set_uconfig_reg_seq(cs, reg, 1);
157    radeon_emit(cs, value);
158 }
159 
160 static inline void
radeon_set_uconfig_reg_idx(const struct radv_physical_device * pdevice,struct radeon_cmdbuf * cs,unsigned reg,unsigned idx,unsigned value)161 radeon_set_uconfig_reg_idx(const struct radv_physical_device *pdevice, struct radeon_cmdbuf *cs, unsigned reg,
162                            unsigned idx, unsigned value)
163 {
164    assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
165    assert(cs->cdw + 3 <= cs->reserved_dw);
166    assert(idx);
167 
168    unsigned opcode = PKT3_SET_UCONFIG_REG_INDEX;
169    if (pdevice->rad_info.gfx_level < GFX9 ||
170        (pdevice->rad_info.gfx_level == GFX9 && pdevice->rad_info.me_fw_version < 26))
171       opcode = PKT3_SET_UCONFIG_REG;
172 
173    radeon_emit(cs, PKT3(opcode, 1, 0));
174    radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2 | (idx << 28));
175    radeon_emit(cs, value);
176 }
177 
178 static inline void
radeon_set_perfctr_reg(enum amd_gfx_level gfx_level,enum radv_queue_family qf,struct radeon_cmdbuf * cs,unsigned reg,unsigned value)179 radeon_set_perfctr_reg(enum amd_gfx_level gfx_level, enum radv_queue_family qf, struct radeon_cmdbuf *cs, unsigned reg,
180                        unsigned value)
181 {
182    assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
183    assert(cs->cdw + 3 <= cs->reserved_dw);
184 
185    /*
186     * On GFX10, there is a bug with the ME implementation of its content addressable memory (CAM),
187     * that means that it can skip register writes due to not taking correctly into account the
188     * fields from the GRBM_GFX_INDEX. With this bit we can force the write.
189     */
190    bool filter_cam_workaround = gfx_level >= GFX10 && qf == RADV_QUEUE_GENERAL;
191 
192    radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, 1, 0) | PKT3_RESET_FILTER_CAM_S(filter_cam_workaround));
193    radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
194    radeon_emit(cs, value);
195 }
196 
197 static inline void
radeon_set_privileged_config_reg(struct radeon_cmdbuf * cs,unsigned reg,unsigned value)198 radeon_set_privileged_config_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
199 {
200    assert(reg < CIK_UCONFIG_REG_OFFSET);
201    assert(cs->cdw + 6 <= cs->reserved_dw);
202 
203    radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
204    radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_PERF));
205    radeon_emit(cs, value);
206    radeon_emit(cs, 0); /* unused */
207    radeon_emit(cs, reg >> 2);
208    radeon_emit(cs, 0); /* unused */
209 }
210 
211 ALWAYS_INLINE static void
radv_cp_wait_mem(struct radeon_cmdbuf * cs,const enum radv_queue_family qf,const uint32_t op,const uint64_t va,const uint32_t ref,const uint32_t mask)212 radv_cp_wait_mem(struct radeon_cmdbuf *cs, const enum radv_queue_family qf, const uint32_t op, const uint64_t va,
213                  const uint32_t ref, const uint32_t mask)
214 {
215    assert(op == WAIT_REG_MEM_EQUAL || op == WAIT_REG_MEM_NOT_EQUAL || op == WAIT_REG_MEM_GREATER_OR_EQUAL);
216 
217    if (qf == RADV_QUEUE_GENERAL || qf == RADV_QUEUE_COMPUTE) {
218       radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, false));
219       radeon_emit(cs, op | WAIT_REG_MEM_MEM_SPACE(1));
220       radeon_emit(cs, va);
221       radeon_emit(cs, va >> 32);
222       radeon_emit(cs, ref);  /* reference value */
223       radeon_emit(cs, mask); /* mask */
224       radeon_emit(cs, 4);    /* poll interval */
225    } else if (qf == RADV_QUEUE_TRANSFER) {
226       radeon_emit(cs, SDMA_PACKET(SDMA_OPCODE_POLL_REGMEM, 0, 0) | op << 28 | SDMA_POLL_MEM);
227       radeon_emit(cs, va);
228       radeon_emit(cs, va >> 32);
229       radeon_emit(cs, ref);
230       radeon_emit(cs, mask);
231       radeon_emit(cs, SDMA_POLL_INTERVAL_160_CLK | SDMA_POLL_RETRY_INDEFINITELY << 16);
232    } else {
233       unreachable("unsupported queue family");
234    }
235 }
236 
237 ALWAYS_INLINE static unsigned
radv_cs_write_data_head(const struct radv_device * device,struct radeon_cmdbuf * cs,const enum radv_queue_family qf,const unsigned engine_sel,const uint64_t va,const unsigned count,const bool predicating)238 radv_cs_write_data_head(const struct radv_device *device, struct radeon_cmdbuf *cs, const enum radv_queue_family qf,
239                         const unsigned engine_sel, const uint64_t va, const unsigned count, const bool predicating)
240 {
241    /* Return the correct cdw at the end of the packet so the caller can assert it. */
242    const unsigned cdw_end = radeon_check_space(device->ws, cs, 4 + count);
243 
244    if (qf == RADV_QUEUE_GENERAL || qf == RADV_QUEUE_COMPUTE) {
245       radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, predicating));
246       radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(engine_sel));
247       radeon_emit(cs, va);
248       radeon_emit(cs, va >> 32);
249    } else if (qf == RADV_QUEUE_TRANSFER) {
250       /* Vulkan transfer queues don't support conditional rendering, so we can ignore predication here.
251        * Furthermore, we can ignore the engine selection here, it is meaningless to the SDMA.
252        */
253       radeon_emit(cs, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
254       radeon_emit(cs, va);
255       radeon_emit(cs, va >> 32);
256       radeon_emit(cs, count - 1);
257    } else {
258       unreachable("unsupported queue family");
259    }
260 
261    return cdw_end;
262 }
263 
264 ALWAYS_INLINE static void
radv_cs_write_data(const struct radv_device * device,struct radeon_cmdbuf * cs,const enum radv_queue_family qf,const unsigned engine_sel,const uint64_t va,const unsigned count,const uint32_t * dwords,const bool predicating)265 radv_cs_write_data(const struct radv_device *device, struct radeon_cmdbuf *cs, const enum radv_queue_family qf,
266                    const unsigned engine_sel, const uint64_t va, const unsigned count, const uint32_t *dwords,
267                    const bool predicating)
268 {
269    ASSERTED const unsigned cdw_end = radv_cs_write_data_head(device, cs, qf, engine_sel, va, count, predicating);
270    radeon_emit_array(cs, dwords, count);
271    assert(cs->cdw == cdw_end);
272 }
273 
274 #endif /* RADV_CS_H */
275