1 /*
2 * Copyright © 2024 Collabora Ltd.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #ifndef PANVK_CMD_BUFFER_H
7 #define PANVK_CMD_BUFFER_H
8
9 #ifndef PAN_ARCH
10 #error "PAN_ARCH must be defined"
11 #endif
12
13 #include <stdint.h>
14
15 #include "genxml/cs_builder.h"
16
17 #include "panvk_cmd_desc_state.h"
18 #include "panvk_cmd_dispatch.h"
19 #include "panvk_cmd_draw.h"
20 #include "panvk_cmd_push_constant.h"
21 #include "panvk_queue.h"
22
23 #include "vk_command_buffer.h"
24
25 #include "util/list.h"
26 #include "util/perf/u_trace.h"
27
28 #define MAX_VBS 16
29 #define MAX_RTS 8
30 #define MAX_LAYERS_PER_TILER_DESC 8
31
32 struct panvk_cs_sync32 {
33 uint32_t seqno;
34 uint32_t error;
35 };
36
37 struct panvk_cs_sync64 {
38 uint64_t seqno;
39 uint32_t error;
40 uint32_t pad;
41 };
42
43 struct panvk_cs_desc_ringbuf {
44 uint64_t syncobj;
45 uint64_t ptr;
46 uint32_t pos;
47 uint32_t pad;
48 };
49
50 enum panvk_incremental_rendering_pass {
51 PANVK_IR_FIRST_PASS,
52 PANVK_IR_MIDDLE_PASS,
53 PANVK_IR_LAST_PASS,
54 PANVK_IR_PASS_COUNT
55 };
56
57 static inline uint32_t
get_tiler_oom_handler_idx(bool has_zs_ext,uint32_t rt_count)58 get_tiler_oom_handler_idx(bool has_zs_ext, uint32_t rt_count)
59 {
60 assert(rt_count >= 1 && rt_count <= MAX_RTS);
61 uint32_t idx = has_zs_ext * MAX_RTS + (rt_count - 1);
62 assert(idx < 2 * MAX_RTS);
63 return idx;
64 }
65
66 static inline uint32_t
get_fbd_size(bool has_zs_ext,uint32_t rt_count)67 get_fbd_size(bool has_zs_ext, uint32_t rt_count)
68 {
69 assert(rt_count >= 1 && rt_count <= MAX_RTS);
70 uint32_t fbd_size = pan_size(FRAMEBUFFER);
71 if (has_zs_ext)
72 fbd_size += pan_size(ZS_CRC_EXTENSION);
73 fbd_size += pan_size(RENDER_TARGET) * rt_count;
74 return fbd_size;
75 }
76
77 /* 512k of render descriptors that can be used when
78 * VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT is set on the command buffer. */
79 #define RENDER_DESC_RINGBUF_SIZE (512 * 1024)
80
81 /* Helper defines to get specific fields in the tiler_oom_ctx. */
82 #define TILER_OOM_CTX_FIELD_OFFSET(_name) \
83 offsetof(struct panvk_cs_subqueue_context, tiler_oom_ctx._name)
84 #define TILER_OOM_CTX_FBDPTR_OFFSET(_pass) \
85 (TILER_OOM_CTX_FIELD_OFFSET(fbds) + \
86 (PANVK_IR_##_pass##_PASS * sizeof(uint64_t)))
87
88 struct panvk_cs_occlusion_query {
89 uint64_t next;
90 uint64_t syncobj;
91 };
92
93 struct panvk_cs_subqueue_context {
94 uint64_t syncobjs;
95 uint32_t iter_sb;
96 uint32_t pad;
97 struct {
98 struct panvk_cs_desc_ringbuf desc_ringbuf;
99 uint64_t tiler_heap;
100 uint64_t geom_buf;
101 uint64_t oq_chain;
102 } render;
103 struct {
104 uint32_t counter;
105 uint64_t fbds[PANVK_IR_PASS_COUNT];
106 uint32_t td_count;
107 uint32_t layer_count;
108 uint64_t reg_dump_addr;
109 } tiler_oom_ctx;
110 struct {
111 uint64_t syncobjs;
112 struct {
113 uint64_t cs;
114 } tracebuf;
115 } debug;
116 } __attribute__((aligned(64)));
117
118 struct panvk_cache_flush_info {
119 enum mali_cs_flush_mode l2;
120 enum mali_cs_flush_mode lsc;
121 bool others;
122 };
123
124 struct panvk_cs_deps {
125 bool needs_draw_flush;
126 struct {
127 uint32_t wait_sb_mask;
128 struct panvk_cache_flush_info cache_flush;
129 } src[PANVK_SUBQUEUE_COUNT];
130
131 struct {
132 uint32_t wait_subqueue_mask;
133 } dst[PANVK_SUBQUEUE_COUNT];
134 };
135
136 enum panvk_sb_ids {
137 PANVK_SB_LS = 0,
138 PANVK_SB_IMM_FLUSH = 0,
139 PANVK_SB_DEFERRED_SYNC = 1,
140 PANVK_SB_DEFERRED_FLUSH = 2,
141 PANVK_SB_ITER_START = 3,
142 PANVK_SB_ITER_COUNT = 5,
143 };
144
145 #define SB_IMM_MASK 0
146 #define SB_MASK(nm) BITFIELD_BIT(PANVK_SB_##nm)
147 #define SB_ID(nm) PANVK_SB_##nm
148 #define SB_ITER(x) (PANVK_SB_ITER_START + (x))
149 #define SB_WAIT_ITER(x) BITFIELD_BIT(PANVK_SB_ITER_START + (x))
150 #define SB_ALL_ITERS_MASK \
151 BITFIELD_RANGE(PANVK_SB_ITER_START, PANVK_SB_ITER_COUNT)
152 #define SB_ALL_MASK BITFIELD_MASK(8)
153
154 static inline uint32_t
next_iter_sb(uint32_t sb)155 next_iter_sb(uint32_t sb)
156 {
157 return sb + 1 < PANVK_SB_ITER_COUNT ? sb + 1 : 0;
158 }
159
160 enum panvk_cs_regs {
161 /* RUN_IDVS staging regs. */
162 PANVK_CS_REG_RUN_IDVS_SR_START = 0,
163 PANVK_CS_REG_RUN_IDVS_SR_END = 60,
164
165 /* RUN_FRAGMENT staging regs.
166 * SW ABI:
167 * - r38:39 contain the pointer to the first tiler descriptor. This is
168 * needed to gather completed heap chunks after a run_fragment.
169 */
170 PANVK_CS_REG_RUN_FRAGMENT_SR_START = 38,
171 PANVK_CS_REG_RUN_FRAGMENT_SR_END = 46,
172
173 /* RUN_COMPUTE staging regs. */
174 PANVK_CS_REG_RUN_COMPUTE_SR_START = 0,
175 PANVK_CS_REG_RUN_COMPUTE_SR_END = 39,
176
177 /* Range of registers that can be used to store temporary data on
178 * all queues. Note that some queues have extra space they can use
179 * as scratch space.*/
180 PANVK_CS_REG_SCRATCH_START = 66,
181 PANVK_CS_REG_SCRATCH_END = 83,
182
183 /* Driver context. */
184 PANVK_CS_REG_PROGRESS_SEQNO_START = 84,
185 PANVK_CS_REG_PROGRESS_SEQNO_END = 89,
186 PANVK_CS_REG_SUBQUEUE_CTX_START = 90,
187 PANVK_CS_REG_SUBQUEUE_CTX_END = 91,
188 };
189
190 #define CS_REG_SCRATCH_COUNT \
191 (PANVK_CS_REG_SCRATCH_END - PANVK_CS_REG_SCRATCH_START + 1)
192
193 static inline struct cs_index
cs_scratch_reg_tuple(struct cs_builder * b,unsigned start,unsigned count)194 cs_scratch_reg_tuple(struct cs_builder *b, unsigned start, unsigned count)
195 {
196 assert(start + count <= CS_REG_SCRATCH_COUNT);
197 return cs_reg_tuple(b, PANVK_CS_REG_SCRATCH_START + start, count);
198 }
199
200 static inline struct cs_index
cs_scratch_reg32(struct cs_builder * b,unsigned reg)201 cs_scratch_reg32(struct cs_builder *b, unsigned reg)
202 {
203 return cs_scratch_reg_tuple(b, reg, 1);
204 }
205
206 static inline struct cs_index
cs_scratch_reg64(struct cs_builder * b,unsigned reg)207 cs_scratch_reg64(struct cs_builder *b, unsigned reg)
208 {
209 assert(reg % 2 == 0);
210 return cs_scratch_reg_tuple(b, reg, 2);
211 }
212
213 static inline struct cs_index
cs_sr_reg_tuple(struct cs_builder * b,unsigned start,unsigned count)214 cs_sr_reg_tuple(struct cs_builder *b, unsigned start, unsigned count)
215 {
216 assert(start + count - 1 < PANVK_CS_REG_SCRATCH_START);
217 return cs_reg_tuple(b, start, count);
218 }
219
220 static inline struct cs_index
cs_sr_reg32(struct cs_builder * b,unsigned reg)221 cs_sr_reg32(struct cs_builder *b, unsigned reg)
222 {
223 return cs_sr_reg_tuple(b, reg, 1);
224 }
225
226 static inline struct cs_index
cs_sr_reg64(struct cs_builder * b,unsigned reg)227 cs_sr_reg64(struct cs_builder *b, unsigned reg)
228 {
229 assert(reg % 2 == 0);
230 return cs_sr_reg_tuple(b, reg, 2);
231 }
232
233 static inline struct cs_index
cs_subqueue_ctx_reg(struct cs_builder * b)234 cs_subqueue_ctx_reg(struct cs_builder *b)
235 {
236 return cs_reg64(b, PANVK_CS_REG_SUBQUEUE_CTX_START);
237 }
238
239 static inline struct cs_index
cs_progress_seqno_reg(struct cs_builder * b,enum panvk_subqueue_id subqueue)240 cs_progress_seqno_reg(struct cs_builder *b, enum panvk_subqueue_id subqueue)
241 {
242 assert(PANVK_CS_REG_PROGRESS_SEQNO_START + (subqueue * 2) <
243 PANVK_CS_REG_PROGRESS_SEQNO_END);
244 return cs_reg64(b, PANVK_CS_REG_PROGRESS_SEQNO_START + (subqueue * 2));
245 }
246
247 struct panvk_cs_reg_upd_context {
248 reg_perm_cb_t reg_perm;
249 struct panvk_cs_reg_upd_context *next;
250 };
251
252 struct panvk_cs_state {
253 struct cs_builder builder;
254
255 struct cs_load_store_tracker ls_tracker;
256
257 /* Used to debug register writes in invalid contexts. */
258 struct {
259 struct panvk_cs_reg_upd_context *upd_ctx_stack;
260 reg_perm_cb_t base_perm;
261 } reg_access;
262
263 /* Sync point relative to the beginning of the command buffer.
264 * Needs to be offset with the subqueue sync point. */
265 int32_t relative_sync_point;
266
267 struct cs_tracing_ctx tracing;
268 };
269
270 static inline struct panvk_cs_reg_upd_context *
panvk_cs_reg_ctx_push(struct cs_builder * b,struct panvk_cs_reg_upd_context * ctx,reg_perm_cb_t reg_perm)271 panvk_cs_reg_ctx_push(struct cs_builder *b,
272 struct panvk_cs_reg_upd_context *ctx,
273 reg_perm_cb_t reg_perm)
274 {
275 struct panvk_cs_state *cs_state =
276 container_of(b, struct panvk_cs_state, builder);
277
278 ctx->reg_perm = reg_perm;
279 ctx->next = cs_state->reg_access.upd_ctx_stack;
280 cs_state->reg_access.upd_ctx_stack = ctx;
281 return ctx;
282 }
283
284 static inline void
panvk_cs_reg_ctx_pop(struct cs_builder * b,struct panvk_cs_reg_upd_context * ctx)285 panvk_cs_reg_ctx_pop(struct cs_builder *b, struct panvk_cs_reg_upd_context *ctx)
286 {
287 struct panvk_cs_state *cs_state =
288 container_of(b, struct panvk_cs_state, builder);
289
290 assert(cs_state->reg_access.upd_ctx_stack == ctx);
291
292 cs_state->reg_access.upd_ctx_stack = ctx->next;
293 }
294
295 struct panvk_cs_reg_range {
296 unsigned start;
297 unsigned end;
298 };
299
300 #define PANVK_CS_REG_RANGE(__name) \
301 { \
302 .start = PANVK_CS_REG_##__name##_START, \
303 .end = PANVK_CS_REG_##__name##_END, \
304 }
305
306 #define panvk_cs_reg_blacklist(__name, ...) \
307 static inline enum cs_reg_perm panvk_cs_##__name##_reg_perm( \
308 struct cs_builder *b, unsigned reg) \
309 { \
310 const struct panvk_cs_reg_range ranges[] = { \
311 __VA_ARGS__, \
312 }; \
313 \
314 for (unsigned i = 0; i < ARRAY_SIZE(ranges); i++) { \
315 if (reg >= ranges[i].start && reg <= ranges[i].end) \
316 return CS_REG_RD; \
317 } \
318 \
319 return CS_REG_RW; \
320 }
321
322 panvk_cs_reg_blacklist(vt, PANVK_CS_REG_RANGE(RUN_IDVS_SR),
323 PANVK_CS_REG_RANGE(PROGRESS_SEQNO),
324 PANVK_CS_REG_RANGE(SUBQUEUE_CTX));
325 panvk_cs_reg_blacklist(frag, PANVK_CS_REG_RANGE(RUN_FRAGMENT_SR),
326 PANVK_CS_REG_RANGE(PROGRESS_SEQNO),
327 PANVK_CS_REG_RANGE(SUBQUEUE_CTX));
328 panvk_cs_reg_blacklist(compute, PANVK_CS_REG_RANGE(RUN_COMPUTE_SR),
329 PANVK_CS_REG_RANGE(PROGRESS_SEQNO),
330 PANVK_CS_REG_RANGE(SUBQUEUE_CTX));
331
332 #define panvk_cs_reg_whitelist(__name, ...) \
333 static inline enum cs_reg_perm panvk_cs_##__name##_reg_perm( \
334 struct cs_builder *b, unsigned reg) \
335 { \
336 const struct panvk_cs_reg_range ranges[] = { \
337 __VA_ARGS__, \
338 }; \
339 \
340 for (unsigned i = 0; i < ARRAY_SIZE(ranges); i++) { \
341 if (reg >= ranges[i].start && reg <= ranges[i].end) \
342 return CS_REG_RW; \
343 } \
344 \
345 return CS_REG_RD; \
346 }
347
348 #define panvk_cs_reg_upd_ctx(__b, __name) \
349 for (struct panvk_cs_reg_upd_context __reg_upd_ctx, \
350 *reg_upd_ctxp = panvk_cs_reg_ctx_push(__b, &__reg_upd_ctx, \
351 panvk_cs_##__name##_reg_perm); \
352 reg_upd_ctxp; \
353 panvk_cs_reg_ctx_pop(__b, &__reg_upd_ctx), reg_upd_ctxp = NULL)
354
355 panvk_cs_reg_whitelist(progress_seqno, PANVK_CS_REG_RANGE(PROGRESS_SEQNO));
356 #define cs_update_progress_seqno(__b) panvk_cs_reg_upd_ctx(__b, progress_seqno)
357
358 panvk_cs_reg_whitelist(compute_ctx, PANVK_CS_REG_RANGE(RUN_COMPUTE_SR));
359 #define cs_update_compute_ctx(__b) panvk_cs_reg_upd_ctx(__b, compute_ctx)
360
361 panvk_cs_reg_whitelist(frag_ctx, PANVK_CS_REG_RANGE(RUN_FRAGMENT_SR));
362 #define cs_update_frag_ctx(__b) panvk_cs_reg_upd_ctx(__b, frag_ctx)
363
364 panvk_cs_reg_whitelist(vt_ctx, PANVK_CS_REG_RANGE(RUN_IDVS_SR));
365 #define cs_update_vt_ctx(__b) panvk_cs_reg_upd_ctx(__b, vt_ctx)
366
367 panvk_cs_reg_whitelist(cmdbuf_regs, {PANVK_CS_REG_RUN_IDVS_SR_START,
368 PANVK_CS_REG_SCRATCH_END});
369 #define cs_update_cmdbuf_regs(__b) panvk_cs_reg_upd_ctx(__b, cmdbuf_regs)
370
371 struct panvk_tls_state {
372 struct panfrost_ptr desc;
373 struct pan_tls_info info;
374 unsigned max_wg_count;
375 };
376
377 struct panvk_cmd_buffer {
378 struct vk_command_buffer vk;
379 VkCommandBufferUsageFlags flags;
380 struct panvk_pool cs_pool;
381 struct panvk_pool desc_pool;
382 struct panvk_pool tls_pool;
383 struct list_head push_sets;
384
385 uint32_t flush_id;
386
387 struct {
388 struct u_trace uts[PANVK_SUBQUEUE_COUNT];
389 } utrace;
390
391 struct {
392 struct panvk_cmd_graphics_state gfx;
393 struct panvk_cmd_compute_state compute;
394 struct panvk_push_constant_state push_constants;
395 struct panvk_cs_state cs[PANVK_SUBQUEUE_COUNT];
396 struct panvk_tls_state tls;
397 } state;
398 };
399
400 VK_DEFINE_HANDLE_CASTS(panvk_cmd_buffer, vk.base, VkCommandBuffer,
401 VK_OBJECT_TYPE_COMMAND_BUFFER)
402
403 static inline struct cs_builder *
panvk_get_cs_builder(struct panvk_cmd_buffer * cmdbuf,uint32_t subqueue)404 panvk_get_cs_builder(struct panvk_cmd_buffer *cmdbuf, uint32_t subqueue)
405 {
406 return &cmdbuf->state.cs[subqueue].builder;
407 }
408
409 static inline struct panvk_descriptor_state *
panvk_cmd_get_desc_state(struct panvk_cmd_buffer * cmdbuf,VkPipelineBindPoint bindpoint)410 panvk_cmd_get_desc_state(struct panvk_cmd_buffer *cmdbuf,
411 VkPipelineBindPoint bindpoint)
412 {
413 switch (bindpoint) {
414 case VK_PIPELINE_BIND_POINT_GRAPHICS:
415 return &cmdbuf->state.gfx.desc_state;
416
417 case VK_PIPELINE_BIND_POINT_COMPUTE:
418 return &cmdbuf->state.compute.desc_state;
419
420 default:
421 assert(!"Unsupported bind point");
422 return NULL;
423 }
424 }
425
426 extern const struct vk_command_buffer_ops panvk_per_arch(cmd_buffer_ops);
427
428 void panvk_per_arch(cmd_flush_draws)(struct panvk_cmd_buffer *cmdbuf);
429
430 void panvk_per_arch(cs_pick_iter_sb)(struct panvk_cmd_buffer *cmdbuf,
431 enum panvk_subqueue_id subqueue);
432
433 void panvk_per_arch(get_cs_deps)(struct panvk_cmd_buffer *cmdbuf,
434 const VkDependencyInfo *in,
435 struct panvk_cs_deps *out);
436
437 VkResult panvk_per_arch(cmd_prepare_exec_cmd_for_draws)(
438 struct panvk_cmd_buffer *primary, struct panvk_cmd_buffer *secondary);
439
440 void panvk_per_arch(cmd_inherit_render_state)(
441 struct panvk_cmd_buffer *cmdbuf,
442 const VkCommandBufferBeginInfo *pBeginInfo);
443
444 #endif /* PANVK_CMD_BUFFER_H */
445