1 /*
2 * Copyright © 2021 Collabora Ltd.
3 *
4 * Derived from tu_cmd_buffer.c which is:
5 * Copyright © 2016 Red Hat.
6 * Copyright © 2016 Bas Nieuwenhuizen
7 * Copyright © 2015 Intel Corporation
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice (including the next
17 * paragraph) shall be included in all copies or substantial portions of the
18 * Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 * DEALINGS IN THE SOFTWARE.
27 */
28
29 #include "genxml/gen_macros.h"
30
31 #include "panvk_cs.h"
32 #include "panvk_private.h"
33
34 #include "pan_blitter.h"
35 #include "pan_cs.h"
36 #include "pan_encoder.h"
37
38 #include "util/rounding.h"
39 #include "util/u_pack_color.h"
40 #include "vk_format.h"
41
42 static uint32_t
panvk_debug_adjust_bo_flags(const struct panvk_device * device,uint32_t bo_flags)43 panvk_debug_adjust_bo_flags(const struct panvk_device *device,
44 uint32_t bo_flags)
45 {
46 uint32_t debug_flags =
47 device->physical_device->instance->debug_flags;
48
49 if (debug_flags & PANVK_DEBUG_DUMP)
50 bo_flags &= ~PAN_BO_INVISIBLE;
51
52 return bo_flags;
53 }
54
55 static void
panvk_cmd_prepare_fragment_job(struct panvk_cmd_buffer * cmdbuf)56 panvk_cmd_prepare_fragment_job(struct panvk_cmd_buffer *cmdbuf)
57 {
58 const struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
59 struct panvk_batch *batch = cmdbuf->state.batch;
60 struct panfrost_ptr job_ptr =
61 pan_pool_alloc_desc(&cmdbuf->desc_pool.base, FRAGMENT_JOB);
62
63 GENX(pan_emit_fragment_job)(fbinfo, batch->fb.desc.gpu, job_ptr.cpu),
64 batch->fragment_job = job_ptr.gpu;
65 util_dynarray_append(&batch->jobs, void *, job_ptr.cpu);
66 }
67
68 void
panvk_per_arch(cmd_close_batch)69 panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf)
70 {
71 struct panvk_batch *batch = cmdbuf->state.batch;
72
73 if (!batch)
74 return;
75
76 const struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
77
78 assert(batch);
79
80 bool clear = fbinfo->zs.clear.z | fbinfo->zs.clear.s;
81 for (unsigned i = 0; i < fbinfo->rt_count; i++)
82 clear |= fbinfo->rts[i].clear;
83
84 if (!clear && !batch->scoreboard.first_job) {
85 if (util_dynarray_num_elements(&batch->event_ops, struct panvk_event_op) == 0) {
86 /* Content-less batch, let's drop it */
87 vk_free(&cmdbuf->pool->vk.alloc, batch);
88 } else {
89 /* Batch has no jobs but is needed for synchronization, let's add a
90 * NULL job so the SUBMIT ioctl doesn't choke on it.
91 */
92 struct panfrost_ptr ptr = pan_pool_alloc_desc(&cmdbuf->desc_pool.base,
93 JOB_HEADER);
94 util_dynarray_append(&batch->jobs, void *, ptr.cpu);
95 panfrost_add_job(&cmdbuf->desc_pool.base, &batch->scoreboard,
96 MALI_JOB_TYPE_NULL, false, false, 0, 0,
97 &ptr, false);
98 list_addtail(&batch->node, &cmdbuf->batches);
99 }
100 cmdbuf->state.batch = NULL;
101 return;
102 }
103
104 struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev;
105
106 list_addtail(&batch->node, &cmdbuf->batches);
107
108 if (batch->scoreboard.first_tiler) {
109 struct panfrost_ptr preload_jobs[2];
110 unsigned num_preload_jobs =
111 GENX(pan_preload_fb)(&cmdbuf->desc_pool.base, &batch->scoreboard,
112 &cmdbuf->state.fb.info, batch->tls.gpu,
113 batch->tiler.descs.gpu, preload_jobs);
114 for (unsigned i = 0; i < num_preload_jobs; i++)
115 util_dynarray_append(&batch->jobs, void *, preload_jobs[i].cpu);
116 }
117
118 if (batch->tlsinfo.tls.size) {
119 unsigned size = panfrost_get_total_stack_size(batch->tlsinfo.tls.size,
120 pdev->thread_tls_alloc,
121 pdev->core_id_range);
122 batch->tlsinfo.tls.ptr =
123 pan_pool_alloc_aligned(&cmdbuf->tls_pool.base, size, 4096).gpu;
124 }
125
126 if (batch->tlsinfo.wls.size) {
127 assert(batch->wls_total_size);
128 batch->tlsinfo.wls.ptr =
129 pan_pool_alloc_aligned(&cmdbuf->tls_pool.base, batch->wls_total_size, 4096).gpu;
130 }
131
132 if (batch->tls.cpu)
133 GENX(pan_emit_tls)(&batch->tlsinfo, batch->tls.cpu);
134
135 if (batch->fb.desc.cpu) {
136 batch->fb.desc.gpu |=
137 GENX(pan_emit_fbd)(pdev, &cmdbuf->state.fb.info, &batch->tlsinfo,
138 &batch->tiler.ctx, batch->fb.desc.cpu);
139
140 panvk_cmd_prepare_fragment_job(cmdbuf);
141 }
142
143 cmdbuf->state.batch = NULL;
144 }
145
146 void
panvk_per_arch(CmdNextSubpass2)147 panvk_per_arch(CmdNextSubpass2)(VkCommandBuffer commandBuffer,
148 const VkSubpassBeginInfo *pSubpassBeginInfo,
149 const VkSubpassEndInfo *pSubpassEndInfo)
150 {
151 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
152
153 panvk_per_arch(cmd_close_batch)(cmdbuf);
154
155 cmdbuf->state.subpass++;
156 panvk_cmd_fb_info_set_subpass(cmdbuf);
157 panvk_cmd_open_batch(cmdbuf);
158 }
159
160 void
panvk_per_arch(CmdNextSubpass)161 panvk_per_arch(CmdNextSubpass)(VkCommandBuffer cmd, VkSubpassContents contents)
162 {
163 VkSubpassBeginInfo binfo = {
164 .sType = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO,
165 .contents = contents
166 };
167 VkSubpassEndInfo einfo = {
168 .sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO,
169 };
170
171 panvk_per_arch(CmdNextSubpass2)(cmd, &binfo, &einfo);
172 }
173
174 void
panvk_per_arch(cmd_alloc_fb_desc)175 panvk_per_arch(cmd_alloc_fb_desc)(struct panvk_cmd_buffer *cmdbuf)
176 {
177 struct panvk_batch *batch = cmdbuf->state.batch;
178
179 if (batch->fb.desc.gpu)
180 return;
181
182 const struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
183 bool has_zs_ext = fbinfo->zs.view.zs || fbinfo->zs.view.s;
184 unsigned tags = MALI_FBD_TAG_IS_MFBD;
185
186 batch->fb.info = cmdbuf->state.framebuffer;
187 batch->fb.desc =
188 pan_pool_alloc_desc_aggregate(&cmdbuf->desc_pool.base,
189 PAN_DESC(FRAMEBUFFER),
190 PAN_DESC_ARRAY(has_zs_ext ? 1 : 0, ZS_CRC_EXTENSION),
191 PAN_DESC_ARRAY(MAX2(fbinfo->rt_count, 1), RENDER_TARGET));
192
193 /* Tag the pointer */
194 batch->fb.desc.gpu |= tags;
195
196 memset(&cmdbuf->state.fb.info.bifrost.pre_post.dcds, 0,
197 sizeof(cmdbuf->state.fb.info.bifrost.pre_post.dcds));
198 }
199
200 void
panvk_per_arch(cmd_alloc_tls_desc)201 panvk_per_arch(cmd_alloc_tls_desc)(struct panvk_cmd_buffer *cmdbuf, bool gfx)
202 {
203 struct panvk_batch *batch = cmdbuf->state.batch;
204
205 assert(batch);
206 if (!batch->tls.gpu) {
207 batch->tls =
208 pan_pool_alloc_desc(&cmdbuf->desc_pool.base, LOCAL_STORAGE);
209 }
210 }
211
212 static void
panvk_cmd_prepare_draw_sysvals(struct panvk_cmd_buffer * cmdbuf,struct panvk_cmd_bind_point_state * bind_point_state,struct panvk_draw_info * draw)213 panvk_cmd_prepare_draw_sysvals(struct panvk_cmd_buffer *cmdbuf,
214 struct panvk_cmd_bind_point_state *bind_point_state,
215 struct panvk_draw_info *draw)
216 {
217 struct panvk_sysvals *sysvals = &bind_point_state->desc_state.sysvals;
218
219 unsigned base_vertex = draw->index_size ? draw->vertex_offset : 0;
220 if (sysvals->first_vertex != draw->offset_start ||
221 sysvals->base_vertex != base_vertex ||
222 sysvals->base_instance != draw->first_instance) {
223 sysvals->first_vertex = draw->offset_start;
224 sysvals->base_vertex = base_vertex;
225 sysvals->base_instance = draw->first_instance;
226 bind_point_state->desc_state.sysvals_ptr = 0;
227 }
228
229 if (cmdbuf->state.dirty & PANVK_DYNAMIC_BLEND_CONSTANTS) {
230 memcpy(&sysvals->blend_constants, cmdbuf->state.blend.constants,
231 sizeof(cmdbuf->state.blend.constants));
232 bind_point_state->desc_state.sysvals_ptr = 0;
233 }
234
235 if (cmdbuf->state.dirty & PANVK_DYNAMIC_VIEWPORT) {
236 panvk_sysval_upload_viewport_scale(&cmdbuf->state.viewport,
237 &sysvals->viewport_scale);
238 panvk_sysval_upload_viewport_offset(&cmdbuf->state.viewport,
239 &sysvals->viewport_offset);
240 bind_point_state->desc_state.sysvals_ptr = 0;
241 }
242 }
243
244 static void
panvk_cmd_prepare_sysvals(struct panvk_cmd_buffer * cmdbuf,struct panvk_cmd_bind_point_state * bind_point_state)245 panvk_cmd_prepare_sysvals(struct panvk_cmd_buffer *cmdbuf,
246 struct panvk_cmd_bind_point_state *bind_point_state)
247 {
248 struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
249
250 if (desc_state->sysvals_ptr)
251 return;
252
253 struct panfrost_ptr sysvals =
254 pan_pool_alloc_aligned(&cmdbuf->desc_pool.base,
255 sizeof(desc_state->sysvals), 16);
256 memcpy(sysvals.cpu, &desc_state->sysvals, sizeof(desc_state->sysvals));
257 desc_state->sysvals_ptr = sysvals.gpu;
258 }
259
260 static void
panvk_cmd_prepare_push_constants(struct panvk_cmd_buffer * cmdbuf,struct panvk_cmd_bind_point_state * bind_point_state)261 panvk_cmd_prepare_push_constants(struct panvk_cmd_buffer *cmdbuf,
262 struct panvk_cmd_bind_point_state *bind_point_state)
263 {
264 struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
265 const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
266
267 if (!pipeline->layout->push_constants.size || desc_state->push_constants)
268 return;
269
270 struct panfrost_ptr push_constants =
271 pan_pool_alloc_aligned(&cmdbuf->desc_pool.base,
272 ALIGN_POT(pipeline->layout->push_constants.size, 16),
273 16);
274
275 memcpy(push_constants.cpu, cmdbuf->push_constants,
276 pipeline->layout->push_constants.size);
277 desc_state->push_constants = push_constants.gpu;
278 }
279
280 static void
panvk_cmd_prepare_ubos(struct panvk_cmd_buffer * cmdbuf,struct panvk_cmd_bind_point_state * bind_point_state)281 panvk_cmd_prepare_ubos(struct panvk_cmd_buffer *cmdbuf,
282 struct panvk_cmd_bind_point_state *bind_point_state)
283 {
284 struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
285 const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
286
287 if (!pipeline->num_ubos || desc_state->ubos)
288 return;
289
290 panvk_cmd_prepare_sysvals(cmdbuf, bind_point_state);
291 panvk_cmd_prepare_push_constants(cmdbuf, bind_point_state);
292
293 struct panfrost_ptr ubos =
294 pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base,
295 pipeline->num_ubos,
296 UNIFORM_BUFFER);
297
298 panvk_per_arch(emit_ubos)(pipeline, desc_state, ubos.cpu);
299
300 desc_state->ubos = ubos.gpu;
301 }
302
303 static void
panvk_cmd_prepare_textures(struct panvk_cmd_buffer * cmdbuf,struct panvk_cmd_bind_point_state * bind_point_state)304 panvk_cmd_prepare_textures(struct panvk_cmd_buffer *cmdbuf,
305 struct panvk_cmd_bind_point_state *bind_point_state)
306 {
307 struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
308 const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
309 unsigned num_textures = pipeline->layout->num_textures;
310
311 if (!num_textures || desc_state->textures)
312 return;
313
314 struct panfrost_ptr textures =
315 pan_pool_alloc_aligned(&cmdbuf->desc_pool.base,
316 num_textures * pan_size(TEXTURE),
317 pan_size(TEXTURE));
318
319 void *texture = textures.cpu;
320
321 for (unsigned i = 0; i < ARRAY_SIZE(desc_state->sets); i++) {
322 if (!desc_state->sets[i]) continue;
323
324 memcpy(texture,
325 desc_state->sets[i]->textures,
326 desc_state->sets[i]->layout->num_textures *
327 pan_size(TEXTURE));
328
329 texture += desc_state->sets[i]->layout->num_textures *
330 pan_size(TEXTURE);
331 }
332
333 desc_state->textures = textures.gpu;
334 }
335
336 static void
panvk_cmd_prepare_samplers(struct panvk_cmd_buffer * cmdbuf,struct panvk_cmd_bind_point_state * bind_point_state)337 panvk_cmd_prepare_samplers(struct panvk_cmd_buffer *cmdbuf,
338 struct panvk_cmd_bind_point_state *bind_point_state)
339 {
340 struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
341 const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
342 unsigned num_samplers = pipeline->layout->num_samplers;
343
344 if (!num_samplers || desc_state->samplers)
345 return;
346
347 struct panfrost_ptr samplers =
348 pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base,
349 num_samplers,
350 SAMPLER);
351
352 void *sampler = samplers.cpu;
353
354 /* Prepare the dummy sampler */
355 pan_pack(sampler, SAMPLER, cfg) {
356 cfg.seamless_cube_map = false;
357 cfg.magnify_nearest = true;
358 cfg.minify_nearest = true;
359 cfg.normalized_coordinates = false;
360 }
361
362 sampler += pan_size(SAMPLER);
363
364 for (unsigned i = 0; i < ARRAY_SIZE(desc_state->sets); i++) {
365 if (!desc_state->sets[i]) continue;
366
367 memcpy(sampler,
368 desc_state->sets[i]->samplers,
369 desc_state->sets[i]->layout->num_samplers *
370 pan_size(SAMPLER));
371
372 sampler += desc_state->sets[i]->layout->num_samplers *
373 pan_size(SAMPLER);
374 }
375
376 desc_state->samplers = samplers.gpu;
377 }
378
379 static void
panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)380 panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf,
381 struct panvk_draw_info *draw)
382 {
383 const struct panvk_pipeline *pipeline =
384 panvk_cmd_get_pipeline(cmdbuf, GRAPHICS);
385
386 if (!pipeline->fs.dynamic_rsd) {
387 draw->fs_rsd = pipeline->rsds[MESA_SHADER_FRAGMENT];
388 return;
389 }
390
391 if (!cmdbuf->state.fs_rsd) {
392 struct panfrost_ptr rsd =
393 pan_pool_alloc_desc_aggregate(&cmdbuf->desc_pool.base,
394 PAN_DESC(RENDERER_STATE),
395 PAN_DESC_ARRAY(pipeline->blend.state.rt_count,
396 BLEND));
397
398 struct mali_renderer_state_packed rsd_dyn;
399 struct mali_renderer_state_packed *rsd_templ =
400 (struct mali_renderer_state_packed *)&pipeline->fs.rsd_template;
401
402 STATIC_ASSERT(sizeof(pipeline->fs.rsd_template) >= sizeof(*rsd_templ));
403
404 panvk_per_arch(emit_dyn_fs_rsd)(pipeline, &cmdbuf->state, &rsd_dyn);
405 pan_merge(rsd_dyn, (*rsd_templ), RENDERER_STATE);
406 memcpy(rsd.cpu, &rsd_dyn, sizeof(rsd_dyn));
407
408 void *bd = rsd.cpu + pan_size(RENDERER_STATE);
409 for (unsigned i = 0; i < pipeline->blend.state.rt_count; i++) {
410 if (pipeline->blend.constant[i].index != (uint8_t)~0) {
411 struct mali_blend_packed bd_dyn;
412 struct mali_blend_packed *bd_templ =
413 (struct mali_blend_packed *)&pipeline->blend.bd_template[i];
414
415 STATIC_ASSERT(sizeof(pipeline->blend.bd_template[0]) >= sizeof(*bd_templ));
416 panvk_per_arch(emit_blend_constant)(cmdbuf->device, pipeline, i,
417 cmdbuf->state.blend.constants,
418 &bd_dyn);
419 pan_merge(bd_dyn, (*bd_templ), BLEND);
420 memcpy(bd, &bd_dyn, sizeof(bd_dyn));
421 }
422 bd += pan_size(BLEND);
423 }
424
425 cmdbuf->state.fs_rsd = rsd.gpu;
426 }
427
428 draw->fs_rsd = cmdbuf->state.fs_rsd;
429 }
430
431 void
panvk_per_arch(cmd_get_tiler_context)432 panvk_per_arch(cmd_get_tiler_context)(struct panvk_cmd_buffer *cmdbuf,
433 unsigned width, unsigned height)
434 {
435 struct panvk_batch *batch = cmdbuf->state.batch;
436
437 if (batch->tiler.descs.cpu)
438 return;
439
440 batch->tiler.descs =
441 pan_pool_alloc_desc_aggregate(&cmdbuf->desc_pool.base,
442 PAN_DESC(TILER_CONTEXT),
443 PAN_DESC(TILER_HEAP));
444 STATIC_ASSERT(sizeof(batch->tiler.templ) >=
445 pan_size(TILER_CONTEXT) + pan_size(TILER_HEAP));
446
447 struct panfrost_ptr desc = {
448 .gpu = batch->tiler.descs.gpu,
449 .cpu = batch->tiler.templ,
450 };
451
452 panvk_per_arch(emit_tiler_context)(cmdbuf->device, width, height, &desc);
453 memcpy(batch->tiler.descs.cpu, batch->tiler.templ,
454 pan_size(TILER_CONTEXT) + pan_size(TILER_HEAP));
455 batch->tiler.ctx.bifrost = batch->tiler.descs.gpu;
456 }
457
458 void
panvk_per_arch(cmd_prepare_tiler_context)459 panvk_per_arch(cmd_prepare_tiler_context)(struct panvk_cmd_buffer *cmdbuf)
460 {
461 const struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
462
463 panvk_per_arch(cmd_get_tiler_context)(cmdbuf,
464 fbinfo->width,
465 fbinfo->height);
466 }
467
468 static void
panvk_draw_prepare_tiler_context(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)469 panvk_draw_prepare_tiler_context(struct panvk_cmd_buffer *cmdbuf,
470 struct panvk_draw_info *draw)
471 {
472 struct panvk_batch *batch = cmdbuf->state.batch;
473
474 panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf);
475 draw->tiler_ctx = &batch->tiler.ctx;
476 }
477
478 static void
panvk_draw_prepare_varyings(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)479 panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf,
480 struct panvk_draw_info *draw)
481 {
482 const struct panvk_pipeline *pipeline = panvk_cmd_get_pipeline(cmdbuf, GRAPHICS);
483 struct panvk_varyings_info *varyings = &cmdbuf->state.varyings;
484
485 panvk_varyings_alloc(varyings, &cmdbuf->varying_pool.base,
486 draw->padded_vertex_count * draw->instance_count);
487
488 unsigned buf_count = panvk_varyings_buf_count(varyings);
489 struct panfrost_ptr bufs =
490 pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base,
491 buf_count + 1,
492 ATTRIBUTE_BUFFER);
493
494 panvk_per_arch(emit_varying_bufs)(varyings, bufs.cpu);
495
496 /* We need an empty entry to stop prefetching on Bifrost */
497 memset(bufs.cpu + (pan_size(ATTRIBUTE_BUFFER) * buf_count), 0,
498 pan_size(ATTRIBUTE_BUFFER));
499
500 if (BITSET_TEST(varyings->active, VARYING_SLOT_POS)) {
501 draw->position = varyings->buf[varyings->varying[VARYING_SLOT_POS].buf].address +
502 varyings->varying[VARYING_SLOT_POS].offset;
503 }
504
505 if (pipeline->ia.writes_point_size) {
506 draw->psiz = varyings->buf[varyings->varying[VARYING_SLOT_PSIZ].buf].address +
507 varyings->varying[VARYING_SLOT_POS].offset;
508 } else if (pipeline->ia.topology == MALI_DRAW_MODE_LINES ||
509 pipeline->ia.topology == MALI_DRAW_MODE_LINE_STRIP ||
510 pipeline->ia.topology == MALI_DRAW_MODE_LINE_LOOP) {
511 draw->line_width = pipeline->dynamic_state_mask & PANVK_DYNAMIC_LINE_WIDTH ?
512 cmdbuf->state.rast.line_width : pipeline->rast.line_width;
513 } else {
514 draw->line_width = 1.0f;
515 }
516 draw->varying_bufs = bufs.gpu;
517
518 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
519 if (!varyings->stage[s].count) continue;
520
521 struct panfrost_ptr attribs =
522 pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base,
523 varyings->stage[s].count,
524 ATTRIBUTE);
525
526 panvk_per_arch(emit_varyings)(cmdbuf->device, varyings, s, attribs.cpu);
527 draw->stages[s].varyings = attribs.gpu;
528 }
529 }
530
531 static void
panvk_fill_non_vs_attribs(struct panvk_cmd_buffer * cmdbuf,struct panvk_cmd_bind_point_state * bind_point_state,void * attrib_bufs,void * attribs,unsigned first_buf)532 panvk_fill_non_vs_attribs(struct panvk_cmd_buffer *cmdbuf,
533 struct panvk_cmd_bind_point_state *bind_point_state,
534 void *attrib_bufs, void *attribs,
535 unsigned first_buf)
536 {
537 struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
538 const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
539
540 for (unsigned s = 0; s < pipeline->layout->num_sets; s++) {
541 const struct panvk_descriptor_set *set = desc_state->sets[s];
542
543 if (!set) continue;
544
545 const struct panvk_descriptor_set_layout *layout = set->layout;
546 unsigned img_idx = pipeline->layout->sets[s].img_offset;
547 unsigned offset = img_idx * pan_size(ATTRIBUTE_BUFFER) * 2;
548 unsigned size = layout->num_imgs * pan_size(ATTRIBUTE_BUFFER) * 2;
549
550 memcpy(attrib_bufs + offset, desc_state->sets[s]->img_attrib_bufs, size);
551
552 offset = img_idx * pan_size(ATTRIBUTE);
553 for (unsigned i = 0; i < layout->num_imgs; i++) {
554 pan_pack(attribs + offset, ATTRIBUTE, cfg) {
555 cfg.buffer_index = first_buf + (img_idx + i) * 2;
556 cfg.format = desc_state->sets[s]->img_fmts[i];
557 }
558 offset += pan_size(ATTRIBUTE);
559 }
560 }
561 }
562
563 static void
panvk_prepare_non_vs_attribs(struct panvk_cmd_buffer * cmdbuf,struct panvk_cmd_bind_point_state * bind_point_state)564 panvk_prepare_non_vs_attribs(struct panvk_cmd_buffer *cmdbuf,
565 struct panvk_cmd_bind_point_state *bind_point_state)
566 {
567 struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
568 const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
569
570 if (desc_state->non_vs_attribs || !pipeline->img_access_mask)
571 return;
572
573 unsigned attrib_count = pipeline->layout->num_imgs;
574 unsigned attrib_buf_count = (pipeline->layout->num_imgs * 2);
575 struct panfrost_ptr bufs =
576 pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base,
577 attrib_buf_count + 1,
578 ATTRIBUTE_BUFFER);
579 struct panfrost_ptr attribs =
580 pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, attrib_count,
581 ATTRIBUTE);
582
583 panvk_fill_non_vs_attribs(cmdbuf, bind_point_state, bufs.cpu, attribs.cpu, 0);
584
585 desc_state->non_vs_attrib_bufs = bufs.gpu;
586 desc_state->non_vs_attribs = attribs.gpu;
587 }
588
589 static void
panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)590 panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer *cmdbuf,
591 struct panvk_draw_info *draw)
592 {
593 struct panvk_cmd_bind_point_state *bind_point_state =
594 panvk_cmd_get_bind_point_state(cmdbuf, GRAPHICS);
595 struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
596 const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
597 unsigned num_imgs =
598 pipeline->img_access_mask & BITFIELD_BIT(MESA_SHADER_VERTEX) ?
599 pipeline->layout->num_imgs : 0;
600 unsigned attrib_count = pipeline->attribs.attrib_count + num_imgs;
601
602 if (desc_state->vs_attribs || !attrib_count)
603 return;
604
605 if (!pipeline->attribs.buf_count) {
606 panvk_prepare_non_vs_attribs(cmdbuf, bind_point_state);
607 desc_state->vs_attrib_bufs = desc_state->non_vs_attrib_bufs;
608 desc_state->vs_attribs = desc_state->non_vs_attribs;
609 return;
610 }
611
612 unsigned attrib_buf_count = pipeline->attribs.buf_count * 2;
613 struct panfrost_ptr bufs =
614 pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base,
615 attrib_buf_count + 1,
616 ATTRIBUTE_BUFFER);
617 struct panfrost_ptr attribs =
618 pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, attrib_count,
619 ATTRIBUTE);
620
621 panvk_per_arch(emit_attrib_bufs)(&pipeline->attribs,
622 cmdbuf->state.vb.bufs,
623 cmdbuf->state.vb.count,
624 draw, bufs.cpu);
625 panvk_per_arch(emit_attribs)(cmdbuf->device, draw, &pipeline->attribs,
626 cmdbuf->state.vb.bufs, cmdbuf->state.vb.count,
627 attribs.cpu);
628
629 if (attrib_count > pipeline->attribs.buf_count) {
630 unsigned bufs_offset = pipeline->attribs.buf_count * pan_size(ATTRIBUTE_BUFFER) * 2;
631 unsigned attribs_offset = pipeline->attribs.buf_count * pan_size(ATTRIBUTE);
632
633 panvk_fill_non_vs_attribs(cmdbuf, bind_point_state,
634 bufs.cpu + bufs_offset, attribs.cpu + attribs_offset,
635 pipeline->attribs.buf_count * 2);
636 }
637
638 /* A NULL entry is needed to stop prefecting on Bifrost */
639 memset(bufs.cpu + (pan_size(ATTRIBUTE_BUFFER) * attrib_buf_count), 0,
640 pan_size(ATTRIBUTE_BUFFER));
641
642 desc_state->vs_attrib_bufs = bufs.gpu;
643 desc_state->vs_attribs = attribs.gpu;
644 }
645
646 static void
panvk_draw_prepare_attributes(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)647 panvk_draw_prepare_attributes(struct panvk_cmd_buffer *cmdbuf,
648 struct panvk_draw_info *draw)
649 {
650 struct panvk_cmd_bind_point_state *bind_point_state =
651 panvk_cmd_get_bind_point_state(cmdbuf, GRAPHICS);
652 struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
653 const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
654
655 for (unsigned i = 0; i < ARRAY_SIZE(draw->stages); i++) {
656 if (i == MESA_SHADER_VERTEX) {
657 panvk_draw_prepare_vs_attribs(cmdbuf, draw);
658 draw->stages[i].attributes = desc_state->vs_attribs;
659 draw->stages[i].attribute_bufs = desc_state->vs_attrib_bufs;
660 } else if (pipeline->img_access_mask & BITFIELD_BIT(i)) {
661 panvk_prepare_non_vs_attribs(cmdbuf, bind_point_state);
662 draw->stages[i].attributes = desc_state->non_vs_attribs;
663 draw->stages[i].attribute_bufs = desc_state->non_vs_attrib_bufs;
664 }
665 }
666 }
667
668 static void
panvk_draw_prepare_viewport(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)669 panvk_draw_prepare_viewport(struct panvk_cmd_buffer *cmdbuf,
670 struct panvk_draw_info *draw)
671 {
672 const struct panvk_pipeline *pipeline = panvk_cmd_get_pipeline(cmdbuf, GRAPHICS);
673
674 if (pipeline->vpd) {
675 draw->viewport = pipeline->vpd;
676 } else if (cmdbuf->state.vpd) {
677 draw->viewport = cmdbuf->state.vpd;
678 } else {
679 struct panfrost_ptr vp =
680 pan_pool_alloc_desc(&cmdbuf->desc_pool.base, VIEWPORT);
681
682 const VkViewport *viewport =
683 pipeline->dynamic_state_mask & PANVK_DYNAMIC_VIEWPORT ?
684 &cmdbuf->state.viewport : &pipeline->viewport;
685 const VkRect2D *scissor =
686 pipeline->dynamic_state_mask & PANVK_DYNAMIC_SCISSOR ?
687 &cmdbuf->state.scissor : &pipeline->scissor;
688
689 panvk_per_arch(emit_viewport)(viewport, scissor, vp.cpu);
690 draw->viewport = cmdbuf->state.vpd = vp.gpu;
691 }
692 }
693
694 static void
panvk_draw_prepare_vertex_job(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)695 panvk_draw_prepare_vertex_job(struct panvk_cmd_buffer *cmdbuf,
696 struct panvk_draw_info *draw)
697 {
698 const struct panvk_pipeline *pipeline = panvk_cmd_get_pipeline(cmdbuf, GRAPHICS);
699 struct panvk_batch *batch = cmdbuf->state.batch;
700 struct panfrost_ptr ptr =
701 pan_pool_alloc_desc(&cmdbuf->desc_pool.base, COMPUTE_JOB);
702
703 util_dynarray_append(&batch->jobs, void *, ptr.cpu);
704 draw->jobs.vertex = ptr;
705 panvk_per_arch(emit_vertex_job)(pipeline, draw, ptr.cpu);
706 }
707
708 static void
panvk_draw_prepare_tiler_job(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)709 panvk_draw_prepare_tiler_job(struct panvk_cmd_buffer *cmdbuf,
710 struct panvk_draw_info *draw)
711 {
712 const struct panvk_pipeline *pipeline = panvk_cmd_get_pipeline(cmdbuf, GRAPHICS);
713 struct panvk_batch *batch = cmdbuf->state.batch;
714 struct panfrost_ptr ptr =
715 pan_pool_alloc_desc(&cmdbuf->desc_pool.base, TILER_JOB);
716
717 util_dynarray_append(&batch->jobs, void *, ptr.cpu);
718 draw->jobs.tiler = ptr;
719 panvk_per_arch(emit_tiler_job)(pipeline, draw, ptr.cpu);
720 }
721
722 static void
panvk_cmd_draw(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)723 panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf,
724 struct panvk_draw_info *draw)
725 {
726 struct panvk_batch *batch = cmdbuf->state.batch;
727 struct panvk_cmd_bind_point_state *bind_point_state =
728 panvk_cmd_get_bind_point_state(cmdbuf, GRAPHICS);
729 const struct panvk_pipeline *pipeline =
730 panvk_cmd_get_pipeline(cmdbuf, GRAPHICS);
731
732 /* There are only 16 bits in the descriptor for the job ID, make sure all
733 * the 3 (2 in Bifrost) jobs in this draw are in the same batch.
734 */
735 if (batch->scoreboard.job_index >= (UINT16_MAX - 3)) {
736 panvk_per_arch(cmd_close_batch)(cmdbuf);
737 panvk_cmd_preload_fb_after_batch_split(cmdbuf);
738 batch = panvk_cmd_open_batch(cmdbuf);
739 }
740
741 if (pipeline->rast.enable)
742 panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
743
744 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true);
745
746 panvk_cmd_prepare_draw_sysvals(cmdbuf, bind_point_state, draw);
747 panvk_cmd_prepare_ubos(cmdbuf, bind_point_state);
748 panvk_cmd_prepare_textures(cmdbuf, bind_point_state);
749 panvk_cmd_prepare_samplers(cmdbuf, bind_point_state);
750
751 /* TODO: indexed draws */
752 struct panvk_descriptor_state *desc_state =
753 panvk_cmd_get_desc_state(cmdbuf, GRAPHICS);
754
755 draw->tls = batch->tls.gpu;
756 draw->fb = batch->fb.desc.gpu;
757 draw->ubos = desc_state->ubos;
758 draw->textures = desc_state->textures;
759 draw->samplers = desc_state->samplers;
760
761 STATIC_ASSERT(sizeof(draw->invocation) >= sizeof(struct mali_invocation_packed));
762 panfrost_pack_work_groups_compute((struct mali_invocation_packed *)&draw->invocation,
763 1, draw->vertex_range, draw->instance_count,
764 1, 1, 1, true, false);
765
766 panvk_draw_prepare_fs_rsd(cmdbuf, draw);
767 panvk_draw_prepare_varyings(cmdbuf, draw);
768 panvk_draw_prepare_attributes(cmdbuf, draw);
769 panvk_draw_prepare_viewport(cmdbuf, draw);
770 panvk_draw_prepare_tiler_context(cmdbuf, draw);
771 panvk_draw_prepare_vertex_job(cmdbuf, draw);
772 panvk_draw_prepare_tiler_job(cmdbuf, draw);
773 batch->tlsinfo.tls.size = MAX2(pipeline->tls_size, batch->tlsinfo.tls.size);
774 assert(!pipeline->wls_size);
775
776 unsigned vjob_id =
777 panfrost_add_job(&cmdbuf->desc_pool.base, &batch->scoreboard,
778 MALI_JOB_TYPE_VERTEX, false, false, 0, 0,
779 &draw->jobs.vertex, false);
780
781 if (pipeline->rast.enable) {
782 panfrost_add_job(&cmdbuf->desc_pool.base, &batch->scoreboard,
783 MALI_JOB_TYPE_TILER, false, false, vjob_id, 0,
784 &draw->jobs.tiler, false);
785 }
786
787 /* Clear the dirty flags all at once */
788 desc_state->dirty = cmdbuf->state.dirty = 0;
789 }
790
791 void
panvk_per_arch(CmdDraw)792 panvk_per_arch(CmdDraw)(VkCommandBuffer commandBuffer,
793 uint32_t vertexCount,
794 uint32_t instanceCount,
795 uint32_t firstVertex,
796 uint32_t firstInstance)
797 {
798 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
799
800 if (instanceCount == 0 || vertexCount == 0)
801 return;
802
803 struct panvk_draw_info draw = {
804 .first_vertex = firstVertex,
805 .vertex_count = vertexCount,
806 .vertex_range = vertexCount,
807 .first_instance = firstInstance,
808 .instance_count = instanceCount,
809 .padded_vertex_count = instanceCount > 1 ?
810 panfrost_padded_vertex_count(vertexCount) :
811 vertexCount,
812 .offset_start = firstVertex,
813 };
814
815 panvk_cmd_draw(cmdbuf, &draw);
816 }
817
818 static void
panvk_index_minmax_search(struct panvk_cmd_buffer * cmdbuf,uint32_t start,uint32_t count,bool restart,uint32_t * min,uint32_t * max)819 panvk_index_minmax_search(struct panvk_cmd_buffer *cmdbuf,
820 uint32_t start, uint32_t count,
821 bool restart,
822 uint32_t *min, uint32_t *max)
823 {
824 void *ptr = cmdbuf->state.ib.buffer->bo->ptr.cpu +
825 cmdbuf->state.ib.buffer->bo_offset +
826 cmdbuf->state.ib.offset;
827
828 fprintf(stderr, "WARNING: Crawling index buffers from the CPU isn't valid in Vulkan\n");
829
830 assert(cmdbuf->state.ib.buffer);
831 assert(cmdbuf->state.ib.buffer->bo);
832 assert(cmdbuf->state.ib.buffer->bo->ptr.cpu);
833
834 *max = 0;
835
836 /* TODO: Use panfrost_minmax_cache */
837 /* TODO: Read full cacheline of data to mitigate the uncached
838 * mapping slowness.
839 */
840 switch (cmdbuf->state.ib.index_size) {
841 #define MINMAX_SEARCH_CASE(sz) \
842 case sz: { \
843 uint ## sz ## _t *indices = ptr; \
844 *min = UINT ## sz ## _MAX; \
845 for (uint32_t i = 0; i < count; i++) { \
846 if (restart && indices[i + start] == UINT ## sz ##_MAX) continue; \
847 *min = MIN2(indices[i + start], *min); \
848 *max = MAX2(indices[i + start], *max); \
849 } \
850 break; \
851 }
852 MINMAX_SEARCH_CASE(32)
853 MINMAX_SEARCH_CASE(16)
854 MINMAX_SEARCH_CASE(8)
855 #undef MINMAX_SEARCH_CASE
856 default:
857 unreachable("Invalid index size");
858 }
859 }
860
861 void
panvk_per_arch(CmdDrawIndexed)862 panvk_per_arch(CmdDrawIndexed)(VkCommandBuffer commandBuffer,
863 uint32_t indexCount,
864 uint32_t instanceCount,
865 uint32_t firstIndex,
866 int32_t vertexOffset,
867 uint32_t firstInstance)
868 {
869 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
870 uint32_t min_vertex, max_vertex;
871
872 if (instanceCount == 0 || indexCount == 0)
873 return;
874
875 const struct panvk_pipeline *pipeline =
876 panvk_cmd_get_pipeline(cmdbuf, GRAPHICS);
877 bool primitive_restart = pipeline->ia.primitive_restart;
878
879 panvk_index_minmax_search(cmdbuf, firstIndex, indexCount, primitive_restart,
880 &min_vertex, &max_vertex);
881
882 unsigned vertex_range = max_vertex - min_vertex + 1;
883 struct panvk_draw_info draw = {
884 .index_size = cmdbuf->state.ib.index_size,
885 .first_index = firstIndex,
886 .index_count = indexCount,
887 .vertex_offset = vertexOffset,
888 .first_instance = firstInstance,
889 .instance_count = instanceCount,
890 .vertex_range = vertex_range,
891 .vertex_count = indexCount + abs(vertexOffset),
892 .padded_vertex_count = instanceCount > 1 ?
893 panfrost_padded_vertex_count(vertex_range) :
894 vertex_range,
895 .offset_start = min_vertex + vertexOffset,
896 .indices = panvk_buffer_gpu_ptr(cmdbuf->state.ib.buffer,
897 cmdbuf->state.ib.offset) +
898 (firstIndex * (cmdbuf->state.ib.index_size / 8)),
899 };
900
901 panvk_cmd_draw(cmdbuf, &draw);
902 }
903
904 VkResult
panvk_per_arch(EndCommandBuffer)905 panvk_per_arch(EndCommandBuffer)(VkCommandBuffer commandBuffer)
906 {
907 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
908 VkResult ret =
909 cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY ?
910 cmdbuf->vk.cmd_queue.error : cmdbuf->record_result;
911
912 panvk_per_arch(cmd_close_batch)(cmdbuf);
913 cmdbuf->status = ret == VK_SUCCESS ?
914 PANVK_CMD_BUFFER_STATUS_EXECUTABLE :
915 PANVK_CMD_BUFFER_STATUS_INVALID;
916 return ret;
917 }
918
919 void
panvk_per_arch(CmdEndRenderPass2)920 panvk_per_arch(CmdEndRenderPass2)(VkCommandBuffer commandBuffer,
921 const VkSubpassEndInfo *pSubpassEndInfo)
922 {
923 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
924
925 panvk_per_arch(cmd_close_batch)(cmdbuf);
926 vk_free(&cmdbuf->pool->vk.alloc, cmdbuf->state.clear);
927 cmdbuf->state.batch = NULL;
928 cmdbuf->state.pass = NULL;
929 cmdbuf->state.subpass = NULL;
930 cmdbuf->state.framebuffer = NULL;
931 cmdbuf->state.clear = NULL;
932 }
933
934 void
panvk_per_arch(CmdEndRenderPass)935 panvk_per_arch(CmdEndRenderPass)(VkCommandBuffer cmd)
936 {
937 VkSubpassEndInfo einfo = {
938 .sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO,
939 };
940
941 panvk_per_arch(CmdEndRenderPass2)(cmd, &einfo);
942 }
943
944
945 void
panvk_per_arch(CmdPipelineBarrier2)946 panvk_per_arch(CmdPipelineBarrier2)(VkCommandBuffer commandBuffer,
947 const VkDependencyInfo *pDependencyInfo)
948 {
949 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
950
951 /* Caches are flushed/invalidated at batch boundaries for now, nothing to do
952 * for memory barriers assuming we implement barriers with the creation of a
953 * new batch.
954 * FIXME: We can probably do better with a CacheFlush job that has the
955 * barrier flag set to true.
956 */
957 if (cmdbuf->state.batch) {
958 panvk_per_arch(cmd_close_batch)(cmdbuf);
959 panvk_cmd_preload_fb_after_batch_split(cmdbuf);
960 panvk_cmd_open_batch(cmdbuf);
961 }
962 }
963
964 static void
panvk_add_set_event_operation(struct panvk_cmd_buffer * cmdbuf,struct panvk_event * event,enum panvk_event_op_type type)965 panvk_add_set_event_operation(struct panvk_cmd_buffer *cmdbuf,
966 struct panvk_event *event,
967 enum panvk_event_op_type type)
968 {
969 struct panvk_event_op op = {
970 .type = type,
971 .event = event,
972 };
973
974 if (cmdbuf->state.batch == NULL) {
975 /* No open batch, let's create a new one so this operation happens in
976 * the right order.
977 */
978 panvk_cmd_open_batch(cmdbuf);
979 util_dynarray_append(&cmdbuf->state.batch->event_ops,
980 struct panvk_event_op,
981 op);
982 panvk_per_arch(cmd_close_batch)(cmdbuf);
983 } else {
984 /* Let's close the current batch so the operation executes before any
985 * future commands.
986 */
987 util_dynarray_append(&cmdbuf->state.batch->event_ops,
988 struct panvk_event_op,
989 op);
990 panvk_per_arch(cmd_close_batch)(cmdbuf);
991 panvk_cmd_preload_fb_after_batch_split(cmdbuf);
992 panvk_cmd_open_batch(cmdbuf);
993 }
994 }
995
996 static void
panvk_add_wait_event_operation(struct panvk_cmd_buffer * cmdbuf,struct panvk_event * event)997 panvk_add_wait_event_operation(struct panvk_cmd_buffer *cmdbuf,
998 struct panvk_event *event)
999 {
1000 struct panvk_event_op op = {
1001 .type = PANVK_EVENT_OP_WAIT,
1002 .event = event,
1003 };
1004
1005 if (cmdbuf->state.batch == NULL) {
1006 /* No open batch, let's create a new one and have it wait for this event. */
1007 panvk_cmd_open_batch(cmdbuf);
1008 util_dynarray_append(&cmdbuf->state.batch->event_ops,
1009 struct panvk_event_op,
1010 op);
1011 } else {
1012 /* Let's close the current batch so any future commands wait on the
1013 * event signal operation.
1014 */
1015 if (cmdbuf->state.batch->fragment_job ||
1016 cmdbuf->state.batch->scoreboard.first_job) {
1017 panvk_per_arch(cmd_close_batch)(cmdbuf);
1018 panvk_cmd_preload_fb_after_batch_split(cmdbuf);
1019 panvk_cmd_open_batch(cmdbuf);
1020 }
1021 util_dynarray_append(&cmdbuf->state.batch->event_ops,
1022 struct panvk_event_op,
1023 op);
1024 }
1025 }
1026
1027 void
panvk_per_arch(CmdSetEvent2)1028 panvk_per_arch(CmdSetEvent2)(VkCommandBuffer commandBuffer,
1029 VkEvent _event,
1030 const VkDependencyInfo *pDependencyInfo)
1031 {
1032 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1033 VK_FROM_HANDLE(panvk_event, event, _event);
1034
1035 /* vkCmdSetEvent cannot be called inside a render pass */
1036 assert(cmdbuf->state.pass == NULL);
1037
1038 panvk_add_set_event_operation(cmdbuf, event, PANVK_EVENT_OP_SET);
1039 }
1040
1041 void
panvk_per_arch(CmdResetEvent2)1042 panvk_per_arch(CmdResetEvent2)(VkCommandBuffer commandBuffer,
1043 VkEvent _event,
1044 VkPipelineStageFlags2 stageMask)
1045 {
1046 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1047 VK_FROM_HANDLE(panvk_event, event, _event);
1048
1049 /* vkCmdResetEvent cannot be called inside a render pass */
1050 assert(cmdbuf->state.pass == NULL);
1051
1052 panvk_add_set_event_operation(cmdbuf, event, PANVK_EVENT_OP_RESET);
1053 }
1054
1055 void
panvk_per_arch(CmdWaitEvents2)1056 panvk_per_arch(CmdWaitEvents2)(VkCommandBuffer commandBuffer,
1057 uint32_t eventCount,
1058 const VkEvent *pEvents,
1059 const VkDependencyInfo *pDependencyInfos)
1060 {
1061 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1062
1063 assert(eventCount > 0);
1064
1065 for (uint32_t i = 0; i < eventCount; i++) {
1066 VK_FROM_HANDLE(panvk_event, event, pEvents[i]);
1067 panvk_add_wait_event_operation(cmdbuf, event);
1068 }
1069 }
1070
1071 static VkResult
panvk_reset_cmdbuf(struct panvk_cmd_buffer * cmdbuf)1072 panvk_reset_cmdbuf(struct panvk_cmd_buffer *cmdbuf)
1073 {
1074 vk_command_buffer_reset(&cmdbuf->vk);
1075
1076 cmdbuf->record_result = VK_SUCCESS;
1077
1078 list_for_each_entry_safe(struct panvk_batch, batch, &cmdbuf->batches, node) {
1079 list_del(&batch->node);
1080 util_dynarray_fini(&batch->jobs);
1081 util_dynarray_fini(&batch->event_ops);
1082
1083 vk_free(&cmdbuf->pool->vk.alloc, batch);
1084 }
1085
1086 panvk_pool_reset(&cmdbuf->desc_pool);
1087 panvk_pool_reset(&cmdbuf->tls_pool);
1088 panvk_pool_reset(&cmdbuf->varying_pool);
1089 cmdbuf->status = PANVK_CMD_BUFFER_STATUS_INITIAL;
1090
1091 for (unsigned i = 0; i < MAX_BIND_POINTS; i++)
1092 memset(&cmdbuf->bind_points[i].desc_state.sets, 0, sizeof(cmdbuf->bind_points[0].desc_state.sets));
1093
1094 return cmdbuf->record_result;
1095 }
1096
1097 static void
panvk_destroy_cmdbuf(struct panvk_cmd_buffer * cmdbuf)1098 panvk_destroy_cmdbuf(struct panvk_cmd_buffer *cmdbuf)
1099 {
1100 struct panvk_device *device = cmdbuf->device;
1101
1102 list_del(&cmdbuf->pool_link);
1103
1104 list_for_each_entry_safe(struct panvk_batch, batch, &cmdbuf->batches, node) {
1105 list_del(&batch->node);
1106 util_dynarray_fini(&batch->jobs);
1107 util_dynarray_fini(&batch->event_ops);
1108
1109 vk_free(&cmdbuf->pool->vk.alloc, batch);
1110 }
1111
1112 panvk_pool_cleanup(&cmdbuf->desc_pool);
1113 panvk_pool_cleanup(&cmdbuf->tls_pool);
1114 panvk_pool_cleanup(&cmdbuf->varying_pool);
1115 vk_command_buffer_finish(&cmdbuf->vk);
1116 vk_free(&device->vk.alloc, cmdbuf);
1117 }
1118
1119 static VkResult
panvk_create_cmdbuf(struct panvk_device * device,struct panvk_cmd_pool * pool,VkCommandBufferLevel level,struct panvk_cmd_buffer ** cmdbuf_out)1120 panvk_create_cmdbuf(struct panvk_device *device,
1121 struct panvk_cmd_pool *pool,
1122 VkCommandBufferLevel level,
1123 struct panvk_cmd_buffer **cmdbuf_out)
1124 {
1125 struct panvk_cmd_buffer *cmdbuf;
1126
1127 cmdbuf = vk_zalloc(&device->vk.alloc, sizeof(*cmdbuf),
1128 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1129 if (!cmdbuf)
1130 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1131
1132 VkResult result = vk_command_buffer_init(&cmdbuf->vk, &pool->vk, level);
1133 if (result != VK_SUCCESS) {
1134 vk_free(&device->vk.alloc, cmdbuf);
1135 return result;
1136 }
1137
1138 cmdbuf->device = device;
1139 cmdbuf->pool = pool;
1140
1141 if (pool) {
1142 list_addtail(&cmdbuf->pool_link, &pool->active_cmd_buffers);
1143 cmdbuf->queue_family_index = pool->vk.queue_family_index;
1144 } else {
1145 /* Init the pool_link so we can safely call list_del when we destroy
1146 * the command buffer
1147 */
1148 list_inithead(&cmdbuf->pool_link);
1149 cmdbuf->queue_family_index = PANVK_QUEUE_GENERAL;
1150 }
1151
1152 panvk_pool_init(&cmdbuf->desc_pool, &device->physical_device->pdev,
1153 pool ? &pool->desc_bo_pool : NULL, 0, 64 * 1024,
1154 "Command buffer descriptor pool", true);
1155 panvk_pool_init(&cmdbuf->tls_pool, &device->physical_device->pdev,
1156 pool ? &pool->tls_bo_pool : NULL,
1157 panvk_debug_adjust_bo_flags(device, PAN_BO_INVISIBLE),
1158 64 * 1024, "TLS pool", false);
1159 panvk_pool_init(&cmdbuf->varying_pool, &device->physical_device->pdev,
1160 pool ? &pool->varying_bo_pool : NULL,
1161 panvk_debug_adjust_bo_flags(device, PAN_BO_INVISIBLE),
1162 64 * 1024, "Varyings pool", false);
1163 list_inithead(&cmdbuf->batches);
1164 cmdbuf->status = PANVK_CMD_BUFFER_STATUS_INITIAL;
1165 *cmdbuf_out = cmdbuf;
1166 return VK_SUCCESS;
1167 }
1168
1169 VkResult
panvk_per_arch(AllocateCommandBuffers)1170 panvk_per_arch(AllocateCommandBuffers)(VkDevice _device,
1171 const VkCommandBufferAllocateInfo *pAllocateInfo,
1172 VkCommandBuffer *pCommandBuffers)
1173 {
1174 VK_FROM_HANDLE(panvk_device, device, _device);
1175 VK_FROM_HANDLE(panvk_cmd_pool, pool, pAllocateInfo->commandPool);
1176
1177 VkResult result = VK_SUCCESS;
1178 unsigned i;
1179
1180 for (i = 0; i < pAllocateInfo->commandBufferCount; i++) {
1181 struct panvk_cmd_buffer *cmdbuf = NULL;
1182
1183 if (!list_is_empty(&pool->free_cmd_buffers)) {
1184 cmdbuf = list_first_entry(
1185 &pool->free_cmd_buffers, struct panvk_cmd_buffer, pool_link);
1186
1187 list_del(&cmdbuf->pool_link);
1188 list_addtail(&cmdbuf->pool_link, &pool->active_cmd_buffers);
1189
1190 vk_command_buffer_finish(&cmdbuf->vk);
1191 result = vk_command_buffer_init(&cmdbuf->vk, &pool->vk, pAllocateInfo->level);
1192 } else {
1193 result = panvk_create_cmdbuf(device, pool, pAllocateInfo->level, &cmdbuf);
1194 }
1195
1196 if (result != VK_SUCCESS)
1197 goto err_free_cmd_bufs;
1198
1199 pCommandBuffers[i] = panvk_cmd_buffer_to_handle(cmdbuf);
1200 }
1201
1202 return VK_SUCCESS;
1203
1204 err_free_cmd_bufs:
1205 panvk_per_arch(FreeCommandBuffers)(_device, pAllocateInfo->commandPool, i,
1206 pCommandBuffers);
1207 for (unsigned j = 0; j < i; j++)
1208 pCommandBuffers[j] = VK_NULL_HANDLE;
1209
1210 return result;
1211 }
1212
1213 void
panvk_per_arch(FreeCommandBuffers)1214 panvk_per_arch(FreeCommandBuffers)(VkDevice device,
1215 VkCommandPool commandPool,
1216 uint32_t commandBufferCount,
1217 const VkCommandBuffer *pCommandBuffers)
1218 {
1219 for (uint32_t i = 0; i < commandBufferCount; i++) {
1220 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, pCommandBuffers[i]);
1221
1222 if (cmdbuf) {
1223 if (cmdbuf->pool) {
1224 list_del(&cmdbuf->pool_link);
1225 panvk_reset_cmdbuf(cmdbuf);
1226 list_addtail(&cmdbuf->pool_link,
1227 &cmdbuf->pool->free_cmd_buffers);
1228 } else
1229 panvk_destroy_cmdbuf(cmdbuf);
1230 }
1231 }
1232 }
1233
1234 VkResult
panvk_per_arch(ResetCommandBuffer)1235 panvk_per_arch(ResetCommandBuffer)(VkCommandBuffer commandBuffer,
1236 VkCommandBufferResetFlags flags)
1237 {
1238 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1239
1240 return panvk_reset_cmdbuf(cmdbuf);
1241 }
1242
1243 VkResult
panvk_per_arch(BeginCommandBuffer)1244 panvk_per_arch(BeginCommandBuffer)(VkCommandBuffer commandBuffer,
1245 const VkCommandBufferBeginInfo *pBeginInfo)
1246 {
1247 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1248 VkResult result = VK_SUCCESS;
1249
1250 if (cmdbuf->status != PANVK_CMD_BUFFER_STATUS_INITIAL) {
1251 /* If the command buffer has already been reset with
1252 * vkResetCommandBuffer, no need to do it again.
1253 */
1254 result = panvk_reset_cmdbuf(cmdbuf);
1255 if (result != VK_SUCCESS)
1256 return result;
1257 }
1258
1259 memset(&cmdbuf->state, 0, sizeof(cmdbuf->state));
1260
1261 cmdbuf->status = PANVK_CMD_BUFFER_STATUS_RECORDING;
1262
1263 return VK_SUCCESS;
1264 }
1265
1266 void
panvk_per_arch(DestroyCommandPool)1267 panvk_per_arch(DestroyCommandPool)(VkDevice _device,
1268 VkCommandPool commandPool,
1269 const VkAllocationCallbacks *pAllocator)
1270 {
1271 VK_FROM_HANDLE(panvk_device, device, _device);
1272 VK_FROM_HANDLE(panvk_cmd_pool, pool, commandPool);
1273
1274 list_for_each_entry_safe(struct panvk_cmd_buffer, cmdbuf,
1275 &pool->active_cmd_buffers, pool_link)
1276 panvk_destroy_cmdbuf(cmdbuf);
1277
1278 list_for_each_entry_safe(struct panvk_cmd_buffer, cmdbuf,
1279 &pool->free_cmd_buffers, pool_link)
1280 panvk_destroy_cmdbuf(cmdbuf);
1281
1282 panvk_bo_pool_cleanup(&pool->desc_bo_pool);
1283 panvk_bo_pool_cleanup(&pool->varying_bo_pool);
1284 panvk_bo_pool_cleanup(&pool->tls_bo_pool);
1285
1286 vk_command_pool_finish(&pool->vk);
1287 vk_free2(&device->vk.alloc, pAllocator, pool);
1288 }
1289
1290 VkResult
panvk_per_arch(ResetCommandPool)1291 panvk_per_arch(ResetCommandPool)(VkDevice device,
1292 VkCommandPool commandPool,
1293 VkCommandPoolResetFlags flags)
1294 {
1295 VK_FROM_HANDLE(panvk_cmd_pool, pool, commandPool);
1296 VkResult result;
1297
1298 list_for_each_entry(struct panvk_cmd_buffer, cmdbuf, &pool->active_cmd_buffers,
1299 pool_link)
1300 {
1301 result = panvk_reset_cmdbuf(cmdbuf);
1302 if (result != VK_SUCCESS)
1303 return result;
1304 }
1305
1306 return VK_SUCCESS;
1307 }
1308
1309 void
panvk_per_arch(TrimCommandPool)1310 panvk_per_arch(TrimCommandPool)(VkDevice device,
1311 VkCommandPool commandPool,
1312 VkCommandPoolTrimFlags flags)
1313 {
1314 VK_FROM_HANDLE(panvk_cmd_pool, pool, commandPool);
1315
1316 if (!pool)
1317 return;
1318
1319 list_for_each_entry_safe(struct panvk_cmd_buffer, cmdbuf,
1320 &pool->free_cmd_buffers, pool_link)
1321 panvk_destroy_cmdbuf(cmdbuf);
1322 }
1323
1324 void
panvk_per_arch(CmdDispatch)1325 panvk_per_arch(CmdDispatch)(VkCommandBuffer commandBuffer,
1326 uint32_t x,
1327 uint32_t y,
1328 uint32_t z)
1329 {
1330 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1331 const struct panfrost_device *pdev =
1332 &cmdbuf->device->physical_device->pdev;
1333 struct panvk_dispatch_info dispatch = {
1334 .wg_count = { x, y, z },
1335 };
1336
1337 panvk_per_arch(cmd_close_batch)(cmdbuf);
1338 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1339
1340 struct panvk_cmd_bind_point_state *bind_point_state =
1341 panvk_cmd_get_bind_point_state(cmdbuf, COMPUTE);
1342 struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
1343 const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
1344 struct panfrost_ptr job =
1345 pan_pool_alloc_desc(&cmdbuf->desc_pool.base, COMPUTE_JOB);
1346
1347 struct panvk_sysvals *sysvals = &desc_state->sysvals;
1348 sysvals->num_work_groups.u32[0] = x;
1349 sysvals->num_work_groups.u32[1] = y;
1350 sysvals->num_work_groups.u32[2] = z;
1351 sysvals->local_group_size.u32[0] = pipeline->cs.local_size.x;
1352 sysvals->local_group_size.u32[1] = pipeline->cs.local_size.y;
1353 sysvals->local_group_size.u32[2] = pipeline->cs.local_size.z;
1354 desc_state->sysvals_ptr = 0;
1355
1356 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
1357 dispatch.tsd = batch->tls.gpu;
1358
1359 panvk_prepare_non_vs_attribs(cmdbuf, bind_point_state);
1360 dispatch.attributes = desc_state->non_vs_attribs;
1361 dispatch.attribute_bufs = desc_state->non_vs_attrib_bufs;
1362
1363 panvk_cmd_prepare_ubos(cmdbuf, bind_point_state);
1364 dispatch.ubos = desc_state->ubos;
1365
1366 panvk_cmd_prepare_textures(cmdbuf, bind_point_state);
1367 dispatch.textures = desc_state->textures;
1368
1369 panvk_cmd_prepare_samplers(cmdbuf, bind_point_state);
1370 dispatch.samplers = desc_state->samplers;
1371
1372 panvk_per_arch(emit_compute_job)(pipeline, &dispatch, job.cpu);
1373 panfrost_add_job(&cmdbuf->desc_pool.base, &batch->scoreboard,
1374 MALI_JOB_TYPE_COMPUTE, false, false, 0, 0,
1375 &job, false);
1376
1377 batch->tlsinfo.tls.size = pipeline->tls_size;
1378 batch->tlsinfo.wls.size = pipeline->wls_size;
1379 if (batch->tlsinfo.wls.size) {
1380 batch->wls_total_size =
1381 pan_wls_mem_size(pdev, &dispatch.wg_count, batch->tlsinfo.wls.size);
1382 }
1383
1384 panvk_per_arch(cmd_close_batch)(cmdbuf);
1385 desc_state->dirty = 0;
1386 }
1387