• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Collabora Ltd.
3  *
4  * Derived from tu_cmd_buffer.c which is:
5  * Copyright © 2016 Red Hat.
6  * Copyright © 2016 Bas Nieuwenhuizen
7  * Copyright © 2015 Intel Corporation
8  *
9  * Permission is hereby granted, free of charge, to any person obtaining a
10  * copy of this software and associated documentation files (the "Software"),
11  * to deal in the Software without restriction, including without limitation
12  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13  * and/or sell copies of the Software, and to permit persons to whom the
14  * Software is furnished to do so, subject to the following conditions:
15  *
16  * The above copyright notice and this permission notice (including the next
17  * paragraph) shall be included in all copies or substantial portions of the
18  * Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26  * DEALINGS IN THE SOFTWARE.
27  */
28 
29 #include "genxml/gen_macros.h"
30 
31 #include "panvk_cs.h"
32 #include "panvk_private.h"
33 
34 #include "pan_blitter.h"
35 #include "pan_cs.h"
36 #include "pan_encoder.h"
37 
38 #include "util/rounding.h"
39 #include "util/u_pack_color.h"
40 #include "vk_format.h"
41 
42 static uint32_t
panvk_debug_adjust_bo_flags(const struct panvk_device * device,uint32_t bo_flags)43 panvk_debug_adjust_bo_flags(const struct panvk_device *device,
44                       uint32_t bo_flags)
45 {
46    uint32_t debug_flags =
47       device->physical_device->instance->debug_flags;
48 
49    if (debug_flags & PANVK_DEBUG_DUMP)
50       bo_flags &= ~PAN_BO_INVISIBLE;
51 
52    return bo_flags;
53 }
54 
55 static void
panvk_cmd_prepare_fragment_job(struct panvk_cmd_buffer * cmdbuf)56 panvk_cmd_prepare_fragment_job(struct panvk_cmd_buffer *cmdbuf)
57 {
58    const struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
59    struct panvk_batch *batch = cmdbuf->state.batch;
60    struct panfrost_ptr job_ptr =
61       pan_pool_alloc_desc(&cmdbuf->desc_pool.base, FRAGMENT_JOB);
62 
63    GENX(pan_emit_fragment_job)(fbinfo, batch->fb.desc.gpu, job_ptr.cpu),
64    batch->fragment_job = job_ptr.gpu;
65    util_dynarray_append(&batch->jobs, void *, job_ptr.cpu);
66 }
67 
68 void
panvk_per_arch(cmd_close_batch)69 panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf)
70 {
71    struct panvk_batch *batch = cmdbuf->state.batch;
72 
73    if (!batch)
74       return;
75 
76    const struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
77 
78    assert(batch);
79 
80    bool clear = fbinfo->zs.clear.z | fbinfo->zs.clear.s;
81    for (unsigned i = 0; i < fbinfo->rt_count; i++)
82       clear |= fbinfo->rts[i].clear;
83 
84    if (!clear && !batch->scoreboard.first_job) {
85       if (util_dynarray_num_elements(&batch->event_ops, struct panvk_event_op) == 0) {
86          /* Content-less batch, let's drop it */
87          vk_free(&cmdbuf->pool->vk.alloc, batch);
88       } else {
89          /* Batch has no jobs but is needed for synchronization, let's add a
90           * NULL job so the SUBMIT ioctl doesn't choke on it.
91           */
92          struct panfrost_ptr ptr = pan_pool_alloc_desc(&cmdbuf->desc_pool.base,
93                                                        JOB_HEADER);
94          util_dynarray_append(&batch->jobs, void *, ptr.cpu);
95          panfrost_add_job(&cmdbuf->desc_pool.base, &batch->scoreboard,
96                           MALI_JOB_TYPE_NULL, false, false, 0, 0,
97                           &ptr, false);
98          list_addtail(&batch->node, &cmdbuf->batches);
99       }
100       cmdbuf->state.batch = NULL;
101       return;
102    }
103 
104    struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev;
105 
106    list_addtail(&batch->node, &cmdbuf->batches);
107 
108    if (batch->scoreboard.first_tiler) {
109       struct panfrost_ptr preload_jobs[2];
110       unsigned num_preload_jobs =
111          GENX(pan_preload_fb)(&cmdbuf->desc_pool.base, &batch->scoreboard,
112                               &cmdbuf->state.fb.info, batch->tls.gpu,
113                               batch->tiler.descs.gpu, preload_jobs);
114       for (unsigned i = 0; i < num_preload_jobs; i++)
115          util_dynarray_append(&batch->jobs, void *, preload_jobs[i].cpu);
116    }
117 
118    if (batch->tlsinfo.tls.size) {
119       unsigned size = panfrost_get_total_stack_size(batch->tlsinfo.tls.size,
120                                                     pdev->thread_tls_alloc,
121                                                     pdev->core_id_range);
122       batch->tlsinfo.tls.ptr =
123          pan_pool_alloc_aligned(&cmdbuf->tls_pool.base, size, 4096).gpu;
124    }
125 
126    if (batch->tlsinfo.wls.size) {
127       assert(batch->wls_total_size);
128       batch->tlsinfo.wls.ptr =
129          pan_pool_alloc_aligned(&cmdbuf->tls_pool.base, batch->wls_total_size, 4096).gpu;
130    }
131 
132    if (batch->tls.cpu)
133       GENX(pan_emit_tls)(&batch->tlsinfo, batch->tls.cpu);
134 
135    if (batch->fb.desc.cpu) {
136       batch->fb.desc.gpu |=
137          GENX(pan_emit_fbd)(pdev, &cmdbuf->state.fb.info, &batch->tlsinfo,
138                             &batch->tiler.ctx, batch->fb.desc.cpu);
139 
140       panvk_cmd_prepare_fragment_job(cmdbuf);
141    }
142 
143    cmdbuf->state.batch = NULL;
144 }
145 
146 void
panvk_per_arch(CmdNextSubpass2)147 panvk_per_arch(CmdNextSubpass2)(VkCommandBuffer commandBuffer,
148                                 const VkSubpassBeginInfo *pSubpassBeginInfo,
149                                 const VkSubpassEndInfo *pSubpassEndInfo)
150 {
151    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
152 
153    panvk_per_arch(cmd_close_batch)(cmdbuf);
154 
155    cmdbuf->state.subpass++;
156    panvk_cmd_fb_info_set_subpass(cmdbuf);
157    panvk_cmd_open_batch(cmdbuf);
158 }
159 
160 void
panvk_per_arch(CmdNextSubpass)161 panvk_per_arch(CmdNextSubpass)(VkCommandBuffer cmd, VkSubpassContents contents)
162 {
163    VkSubpassBeginInfo binfo = {
164       .sType = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO,
165       .contents = contents
166    };
167    VkSubpassEndInfo einfo = {
168       .sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO,
169    };
170 
171    panvk_per_arch(CmdNextSubpass2)(cmd, &binfo, &einfo);
172 }
173 
174 void
panvk_per_arch(cmd_alloc_fb_desc)175 panvk_per_arch(cmd_alloc_fb_desc)(struct panvk_cmd_buffer *cmdbuf)
176 {
177    struct panvk_batch *batch = cmdbuf->state.batch;
178 
179    if (batch->fb.desc.gpu)
180       return;
181 
182    const struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
183    bool has_zs_ext = fbinfo->zs.view.zs || fbinfo->zs.view.s;
184    unsigned tags = MALI_FBD_TAG_IS_MFBD;
185 
186    batch->fb.info = cmdbuf->state.framebuffer;
187    batch->fb.desc =
188       pan_pool_alloc_desc_aggregate(&cmdbuf->desc_pool.base,
189                                     PAN_DESC(FRAMEBUFFER),
190                                     PAN_DESC_ARRAY(has_zs_ext ? 1 : 0, ZS_CRC_EXTENSION),
191                                     PAN_DESC_ARRAY(MAX2(fbinfo->rt_count, 1), RENDER_TARGET));
192 
193    /* Tag the pointer */
194    batch->fb.desc.gpu |= tags;
195 
196    memset(&cmdbuf->state.fb.info.bifrost.pre_post.dcds, 0,
197           sizeof(cmdbuf->state.fb.info.bifrost.pre_post.dcds));
198 }
199 
200 void
panvk_per_arch(cmd_alloc_tls_desc)201 panvk_per_arch(cmd_alloc_tls_desc)(struct panvk_cmd_buffer *cmdbuf, bool gfx)
202 {
203    struct panvk_batch *batch = cmdbuf->state.batch;
204 
205    assert(batch);
206    if (!batch->tls.gpu) {
207       batch->tls =
208          pan_pool_alloc_desc(&cmdbuf->desc_pool.base, LOCAL_STORAGE);
209    }
210 }
211 
212 static void
panvk_cmd_prepare_draw_sysvals(struct panvk_cmd_buffer * cmdbuf,struct panvk_cmd_bind_point_state * bind_point_state,struct panvk_draw_info * draw)213 panvk_cmd_prepare_draw_sysvals(struct panvk_cmd_buffer *cmdbuf,
214                                struct panvk_cmd_bind_point_state *bind_point_state,
215                                struct panvk_draw_info *draw)
216 {
217    struct panvk_sysvals *sysvals = &bind_point_state->desc_state.sysvals;
218 
219    unsigned base_vertex = draw->index_size ? draw->vertex_offset : 0;
220    if (sysvals->first_vertex != draw->offset_start ||
221        sysvals->base_vertex != base_vertex ||
222        sysvals->base_instance != draw->first_instance) {
223       sysvals->first_vertex = draw->offset_start;
224       sysvals->base_vertex = base_vertex;
225       sysvals->base_instance = draw->first_instance;
226       bind_point_state->desc_state.sysvals_ptr = 0;
227    }
228 
229    if (cmdbuf->state.dirty & PANVK_DYNAMIC_BLEND_CONSTANTS) {
230       memcpy(&sysvals->blend_constants, cmdbuf->state.blend.constants,
231              sizeof(cmdbuf->state.blend.constants));
232       bind_point_state->desc_state.sysvals_ptr = 0;
233    }
234 
235    if (cmdbuf->state.dirty & PANVK_DYNAMIC_VIEWPORT) {
236       panvk_sysval_upload_viewport_scale(&cmdbuf->state.viewport,
237                                          &sysvals->viewport_scale);
238       panvk_sysval_upload_viewport_offset(&cmdbuf->state.viewport,
239                                           &sysvals->viewport_offset);
240       bind_point_state->desc_state.sysvals_ptr = 0;
241    }
242 }
243 
244 static void
panvk_cmd_prepare_sysvals(struct panvk_cmd_buffer * cmdbuf,struct panvk_cmd_bind_point_state * bind_point_state)245 panvk_cmd_prepare_sysvals(struct panvk_cmd_buffer *cmdbuf,
246                           struct panvk_cmd_bind_point_state *bind_point_state)
247 {
248    struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
249 
250    if (desc_state->sysvals_ptr)
251       return;
252 
253    struct panfrost_ptr sysvals =
254       pan_pool_alloc_aligned(&cmdbuf->desc_pool.base,
255                              sizeof(desc_state->sysvals), 16);
256    memcpy(sysvals.cpu, &desc_state->sysvals, sizeof(desc_state->sysvals));
257    desc_state->sysvals_ptr = sysvals.gpu;
258 }
259 
260 static void
panvk_cmd_prepare_push_constants(struct panvk_cmd_buffer * cmdbuf,struct panvk_cmd_bind_point_state * bind_point_state)261 panvk_cmd_prepare_push_constants(struct panvk_cmd_buffer *cmdbuf,
262                                  struct panvk_cmd_bind_point_state *bind_point_state)
263 {
264    struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
265    const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
266 
267    if (!pipeline->layout->push_constants.size || desc_state->push_constants)
268       return;
269 
270    struct panfrost_ptr push_constants =
271       pan_pool_alloc_aligned(&cmdbuf->desc_pool.base,
272                              ALIGN_POT(pipeline->layout->push_constants.size, 16),
273                              16);
274 
275    memcpy(push_constants.cpu, cmdbuf->push_constants,
276           pipeline->layout->push_constants.size);
277    desc_state->push_constants = push_constants.gpu;
278 }
279 
280 static void
panvk_cmd_prepare_ubos(struct panvk_cmd_buffer * cmdbuf,struct panvk_cmd_bind_point_state * bind_point_state)281 panvk_cmd_prepare_ubos(struct panvk_cmd_buffer *cmdbuf,
282                        struct panvk_cmd_bind_point_state *bind_point_state)
283 {
284    struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
285    const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
286 
287    if (!pipeline->num_ubos || desc_state->ubos)
288       return;
289 
290    panvk_cmd_prepare_sysvals(cmdbuf, bind_point_state);
291    panvk_cmd_prepare_push_constants(cmdbuf, bind_point_state);
292 
293    struct panfrost_ptr ubos =
294       pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base,
295                                 pipeline->num_ubos,
296                                 UNIFORM_BUFFER);
297 
298    panvk_per_arch(emit_ubos)(pipeline, desc_state, ubos.cpu);
299 
300    desc_state->ubos = ubos.gpu;
301 }
302 
303 static void
panvk_cmd_prepare_textures(struct panvk_cmd_buffer * cmdbuf,struct panvk_cmd_bind_point_state * bind_point_state)304 panvk_cmd_prepare_textures(struct panvk_cmd_buffer *cmdbuf,
305                            struct panvk_cmd_bind_point_state *bind_point_state)
306 {
307    struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
308    const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
309    unsigned num_textures = pipeline->layout->num_textures;
310 
311    if (!num_textures || desc_state->textures)
312       return;
313 
314    struct panfrost_ptr textures =
315       pan_pool_alloc_aligned(&cmdbuf->desc_pool.base,
316                              num_textures * pan_size(TEXTURE),
317                              pan_size(TEXTURE));
318 
319    void *texture = textures.cpu;
320 
321    for (unsigned i = 0; i < ARRAY_SIZE(desc_state->sets); i++) {
322       if (!desc_state->sets[i]) continue;
323 
324       memcpy(texture,
325              desc_state->sets[i]->textures,
326              desc_state->sets[i]->layout->num_textures *
327              pan_size(TEXTURE));
328 
329       texture += desc_state->sets[i]->layout->num_textures *
330                  pan_size(TEXTURE);
331    }
332 
333    desc_state->textures = textures.gpu;
334 }
335 
336 static void
panvk_cmd_prepare_samplers(struct panvk_cmd_buffer * cmdbuf,struct panvk_cmd_bind_point_state * bind_point_state)337 panvk_cmd_prepare_samplers(struct panvk_cmd_buffer *cmdbuf,
338                            struct panvk_cmd_bind_point_state *bind_point_state)
339 {
340    struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
341    const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
342    unsigned num_samplers = pipeline->layout->num_samplers;
343 
344    if (!num_samplers || desc_state->samplers)
345       return;
346 
347    struct panfrost_ptr samplers =
348       pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base,
349                                 num_samplers,
350                                 SAMPLER);
351 
352    void *sampler = samplers.cpu;
353 
354    /* Prepare the dummy sampler */
355    pan_pack(sampler, SAMPLER, cfg) {
356       cfg.seamless_cube_map = false;
357       cfg.magnify_nearest = true;
358       cfg.minify_nearest = true;
359       cfg.normalized_coordinates = false;
360    }
361 
362    sampler += pan_size(SAMPLER);
363 
364    for (unsigned i = 0; i < ARRAY_SIZE(desc_state->sets); i++) {
365       if (!desc_state->sets[i]) continue;
366 
367       memcpy(sampler,
368              desc_state->sets[i]->samplers,
369              desc_state->sets[i]->layout->num_samplers *
370              pan_size(SAMPLER));
371 
372       sampler += desc_state->sets[i]->layout->num_samplers *
373                  pan_size(SAMPLER);
374    }
375 
376    desc_state->samplers = samplers.gpu;
377 }
378 
379 static void
panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)380 panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf,
381                           struct panvk_draw_info *draw)
382 {
383    const struct panvk_pipeline *pipeline =
384       panvk_cmd_get_pipeline(cmdbuf, GRAPHICS);
385 
386    if (!pipeline->fs.dynamic_rsd) {
387       draw->fs_rsd = pipeline->rsds[MESA_SHADER_FRAGMENT];
388       return;
389    }
390 
391    if (!cmdbuf->state.fs_rsd) {
392       struct panfrost_ptr rsd =
393          pan_pool_alloc_desc_aggregate(&cmdbuf->desc_pool.base,
394                                        PAN_DESC(RENDERER_STATE),
395                                        PAN_DESC_ARRAY(pipeline->blend.state.rt_count,
396                                                       BLEND));
397 
398       struct mali_renderer_state_packed rsd_dyn;
399       struct mali_renderer_state_packed *rsd_templ =
400          (struct mali_renderer_state_packed *)&pipeline->fs.rsd_template;
401 
402       STATIC_ASSERT(sizeof(pipeline->fs.rsd_template) >= sizeof(*rsd_templ));
403 
404       panvk_per_arch(emit_dyn_fs_rsd)(pipeline, &cmdbuf->state, &rsd_dyn);
405       pan_merge(rsd_dyn, (*rsd_templ), RENDERER_STATE);
406       memcpy(rsd.cpu, &rsd_dyn, sizeof(rsd_dyn));
407 
408       void *bd = rsd.cpu + pan_size(RENDERER_STATE);
409       for (unsigned i = 0; i < pipeline->blend.state.rt_count; i++) {
410          if (pipeline->blend.constant[i].index != (uint8_t)~0) {
411             struct mali_blend_packed bd_dyn;
412             struct mali_blend_packed *bd_templ =
413                (struct mali_blend_packed *)&pipeline->blend.bd_template[i];
414 
415             STATIC_ASSERT(sizeof(pipeline->blend.bd_template[0]) >= sizeof(*bd_templ));
416             panvk_per_arch(emit_blend_constant)(cmdbuf->device, pipeline, i,
417                                                 cmdbuf->state.blend.constants,
418                                                 &bd_dyn);
419             pan_merge(bd_dyn, (*bd_templ), BLEND);
420             memcpy(bd, &bd_dyn, sizeof(bd_dyn));
421          }
422          bd += pan_size(BLEND);
423       }
424 
425       cmdbuf->state.fs_rsd = rsd.gpu;
426    }
427 
428    draw->fs_rsd = cmdbuf->state.fs_rsd;
429 }
430 
431 void
panvk_per_arch(cmd_get_tiler_context)432 panvk_per_arch(cmd_get_tiler_context)(struct panvk_cmd_buffer *cmdbuf,
433                                       unsigned width, unsigned height)
434 {
435    struct panvk_batch *batch = cmdbuf->state.batch;
436 
437    if (batch->tiler.descs.cpu)
438       return;
439 
440    batch->tiler.descs =
441       pan_pool_alloc_desc_aggregate(&cmdbuf->desc_pool.base,
442                                     PAN_DESC(TILER_CONTEXT),
443                                     PAN_DESC(TILER_HEAP));
444    STATIC_ASSERT(sizeof(batch->tiler.templ) >=
445                  pan_size(TILER_CONTEXT) + pan_size(TILER_HEAP));
446 
447    struct panfrost_ptr desc = {
448       .gpu = batch->tiler.descs.gpu,
449       .cpu = batch->tiler.templ,
450    };
451 
452    panvk_per_arch(emit_tiler_context)(cmdbuf->device, width, height, &desc);
453    memcpy(batch->tiler.descs.cpu, batch->tiler.templ,
454           pan_size(TILER_CONTEXT) + pan_size(TILER_HEAP));
455    batch->tiler.ctx.bifrost = batch->tiler.descs.gpu;
456 }
457 
458 void
panvk_per_arch(cmd_prepare_tiler_context)459 panvk_per_arch(cmd_prepare_tiler_context)(struct panvk_cmd_buffer *cmdbuf)
460 {
461    const struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
462 
463    panvk_per_arch(cmd_get_tiler_context)(cmdbuf,
464                                          fbinfo->width,
465                                          fbinfo->height);
466 }
467 
468 static void
panvk_draw_prepare_tiler_context(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)469 panvk_draw_prepare_tiler_context(struct panvk_cmd_buffer *cmdbuf,
470                                  struct panvk_draw_info *draw)
471 {
472    struct panvk_batch *batch = cmdbuf->state.batch;
473 
474    panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf);
475    draw->tiler_ctx = &batch->tiler.ctx;
476 }
477 
478 static void
panvk_draw_prepare_varyings(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)479 panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf,
480                             struct panvk_draw_info *draw)
481 {
482    const struct panvk_pipeline *pipeline = panvk_cmd_get_pipeline(cmdbuf, GRAPHICS);
483    struct panvk_varyings_info *varyings = &cmdbuf->state.varyings;
484 
485    panvk_varyings_alloc(varyings, &cmdbuf->varying_pool.base,
486                         draw->padded_vertex_count * draw->instance_count);
487 
488    unsigned buf_count = panvk_varyings_buf_count(varyings);
489    struct panfrost_ptr bufs =
490       pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base,
491                                 buf_count + 1,
492                                 ATTRIBUTE_BUFFER);
493 
494    panvk_per_arch(emit_varying_bufs)(varyings, bufs.cpu);
495 
496    /* We need an empty entry to stop prefetching on Bifrost */
497    memset(bufs.cpu + (pan_size(ATTRIBUTE_BUFFER) * buf_count), 0,
498           pan_size(ATTRIBUTE_BUFFER));
499 
500    if (BITSET_TEST(varyings->active, VARYING_SLOT_POS)) {
501       draw->position = varyings->buf[varyings->varying[VARYING_SLOT_POS].buf].address +
502                        varyings->varying[VARYING_SLOT_POS].offset;
503    }
504 
505    if (pipeline->ia.writes_point_size) {
506       draw->psiz = varyings->buf[varyings->varying[VARYING_SLOT_PSIZ].buf].address +
507                        varyings->varying[VARYING_SLOT_POS].offset;
508    } else if (pipeline->ia.topology == MALI_DRAW_MODE_LINES ||
509               pipeline->ia.topology == MALI_DRAW_MODE_LINE_STRIP ||
510               pipeline->ia.topology == MALI_DRAW_MODE_LINE_LOOP) {
511       draw->line_width = pipeline->dynamic_state_mask & PANVK_DYNAMIC_LINE_WIDTH ?
512                          cmdbuf->state.rast.line_width : pipeline->rast.line_width;
513    } else {
514       draw->line_width = 1.0f;
515    }
516    draw->varying_bufs = bufs.gpu;
517 
518    for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
519       if (!varyings->stage[s].count) continue;
520 
521       struct panfrost_ptr attribs =
522          pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base,
523                                    varyings->stage[s].count,
524                                    ATTRIBUTE);
525 
526       panvk_per_arch(emit_varyings)(cmdbuf->device, varyings, s, attribs.cpu);
527       draw->stages[s].varyings = attribs.gpu;
528    }
529 }
530 
531 static void
panvk_fill_non_vs_attribs(struct panvk_cmd_buffer * cmdbuf,struct panvk_cmd_bind_point_state * bind_point_state,void * attrib_bufs,void * attribs,unsigned first_buf)532 panvk_fill_non_vs_attribs(struct panvk_cmd_buffer *cmdbuf,
533                           struct panvk_cmd_bind_point_state *bind_point_state,
534                           void *attrib_bufs, void *attribs,
535                           unsigned first_buf)
536 {
537    struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
538    const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
539 
540    for (unsigned s = 0; s < pipeline->layout->num_sets; s++) {
541       const struct panvk_descriptor_set *set = desc_state->sets[s];
542 
543       if (!set) continue;
544 
545       const struct panvk_descriptor_set_layout *layout = set->layout;
546       unsigned img_idx = pipeline->layout->sets[s].img_offset;
547       unsigned offset = img_idx * pan_size(ATTRIBUTE_BUFFER) * 2;
548       unsigned size = layout->num_imgs * pan_size(ATTRIBUTE_BUFFER) * 2;
549 
550       memcpy(attrib_bufs + offset, desc_state->sets[s]->img_attrib_bufs, size);
551 
552       offset = img_idx * pan_size(ATTRIBUTE);
553       for (unsigned i = 0; i < layout->num_imgs; i++) {
554          pan_pack(attribs + offset, ATTRIBUTE, cfg) {
555             cfg.buffer_index = first_buf + (img_idx + i) * 2;
556             cfg.format = desc_state->sets[s]->img_fmts[i];
557          }
558          offset += pan_size(ATTRIBUTE);
559       }
560    }
561 }
562 
563 static void
panvk_prepare_non_vs_attribs(struct panvk_cmd_buffer * cmdbuf,struct panvk_cmd_bind_point_state * bind_point_state)564 panvk_prepare_non_vs_attribs(struct panvk_cmd_buffer *cmdbuf,
565                              struct panvk_cmd_bind_point_state *bind_point_state)
566 {
567    struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
568    const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
569 
570    if (desc_state->non_vs_attribs || !pipeline->img_access_mask)
571       return;
572 
573    unsigned attrib_count = pipeline->layout->num_imgs;
574    unsigned attrib_buf_count = (pipeline->layout->num_imgs * 2);
575    struct panfrost_ptr bufs =
576       pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base,
577                                 attrib_buf_count + 1,
578                                 ATTRIBUTE_BUFFER);
579    struct panfrost_ptr attribs =
580       pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, attrib_count,
581                                 ATTRIBUTE);
582 
583    panvk_fill_non_vs_attribs(cmdbuf, bind_point_state, bufs.cpu, attribs.cpu, 0);
584 
585    desc_state->non_vs_attrib_bufs = bufs.gpu;
586    desc_state->non_vs_attribs = attribs.gpu;
587 }
588 
589 static void
panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)590 panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer *cmdbuf,
591                               struct panvk_draw_info *draw)
592 {
593    struct panvk_cmd_bind_point_state *bind_point_state =
594       panvk_cmd_get_bind_point_state(cmdbuf, GRAPHICS);
595    struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
596    const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
597    unsigned num_imgs =
598       pipeline->img_access_mask & BITFIELD_BIT(MESA_SHADER_VERTEX) ?
599       pipeline->layout->num_imgs : 0;
600    unsigned attrib_count = pipeline->attribs.attrib_count + num_imgs;
601 
602    if (desc_state->vs_attribs || !attrib_count)
603       return;
604 
605    if (!pipeline->attribs.buf_count) {
606       panvk_prepare_non_vs_attribs(cmdbuf, bind_point_state);
607       desc_state->vs_attrib_bufs = desc_state->non_vs_attrib_bufs;
608       desc_state->vs_attribs = desc_state->non_vs_attribs;
609       return;
610    }
611 
612    unsigned attrib_buf_count = pipeline->attribs.buf_count * 2;
613    struct panfrost_ptr bufs =
614       pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base,
615                                 attrib_buf_count + 1,
616                                 ATTRIBUTE_BUFFER);
617    struct panfrost_ptr attribs =
618       pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, attrib_count,
619                                 ATTRIBUTE);
620 
621    panvk_per_arch(emit_attrib_bufs)(&pipeline->attribs,
622                                     cmdbuf->state.vb.bufs,
623                                     cmdbuf->state.vb.count,
624                                     draw, bufs.cpu);
625    panvk_per_arch(emit_attribs)(cmdbuf->device, draw, &pipeline->attribs,
626                                 cmdbuf->state.vb.bufs, cmdbuf->state.vb.count,
627                                 attribs.cpu);
628 
629    if (attrib_count > pipeline->attribs.buf_count) {
630       unsigned bufs_offset = pipeline->attribs.buf_count * pan_size(ATTRIBUTE_BUFFER) * 2;
631       unsigned attribs_offset = pipeline->attribs.buf_count * pan_size(ATTRIBUTE);
632 
633       panvk_fill_non_vs_attribs(cmdbuf, bind_point_state,
634                                 bufs.cpu + bufs_offset, attribs.cpu + attribs_offset,
635                                 pipeline->attribs.buf_count * 2);
636    }
637 
638    /* A NULL entry is needed to stop prefecting on Bifrost */
639    memset(bufs.cpu + (pan_size(ATTRIBUTE_BUFFER) * attrib_buf_count), 0,
640           pan_size(ATTRIBUTE_BUFFER));
641 
642    desc_state->vs_attrib_bufs = bufs.gpu;
643    desc_state->vs_attribs = attribs.gpu;
644 }
645 
646 static void
panvk_draw_prepare_attributes(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)647 panvk_draw_prepare_attributes(struct panvk_cmd_buffer *cmdbuf,
648                               struct panvk_draw_info *draw)
649 {
650    struct panvk_cmd_bind_point_state *bind_point_state =
651       panvk_cmd_get_bind_point_state(cmdbuf, GRAPHICS);
652    struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
653    const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
654 
655    for (unsigned i = 0; i < ARRAY_SIZE(draw->stages); i++) {
656       if (i == MESA_SHADER_VERTEX) {
657          panvk_draw_prepare_vs_attribs(cmdbuf, draw);
658          draw->stages[i].attributes = desc_state->vs_attribs;
659          draw->stages[i].attribute_bufs = desc_state->vs_attrib_bufs;
660       } else if (pipeline->img_access_mask & BITFIELD_BIT(i)) {
661          panvk_prepare_non_vs_attribs(cmdbuf, bind_point_state);
662          draw->stages[i].attributes = desc_state->non_vs_attribs;
663          draw->stages[i].attribute_bufs = desc_state->non_vs_attrib_bufs;
664       }
665    }
666 }
667 
668 static void
panvk_draw_prepare_viewport(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)669 panvk_draw_prepare_viewport(struct panvk_cmd_buffer *cmdbuf,
670                             struct panvk_draw_info *draw)
671 {
672    const struct panvk_pipeline *pipeline = panvk_cmd_get_pipeline(cmdbuf, GRAPHICS);
673 
674    if (pipeline->vpd) {
675       draw->viewport = pipeline->vpd;
676    } else if (cmdbuf->state.vpd) {
677       draw->viewport = cmdbuf->state.vpd;
678    } else {
679       struct panfrost_ptr vp =
680          pan_pool_alloc_desc(&cmdbuf->desc_pool.base, VIEWPORT);
681 
682       const VkViewport *viewport =
683          pipeline->dynamic_state_mask & PANVK_DYNAMIC_VIEWPORT ?
684          &cmdbuf->state.viewport : &pipeline->viewport;
685       const VkRect2D *scissor =
686          pipeline->dynamic_state_mask & PANVK_DYNAMIC_SCISSOR ?
687          &cmdbuf->state.scissor : &pipeline->scissor;
688 
689       panvk_per_arch(emit_viewport)(viewport, scissor, vp.cpu);
690       draw->viewport = cmdbuf->state.vpd = vp.gpu;
691    }
692 }
693 
694 static void
panvk_draw_prepare_vertex_job(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)695 panvk_draw_prepare_vertex_job(struct panvk_cmd_buffer *cmdbuf,
696                               struct panvk_draw_info *draw)
697 {
698    const struct panvk_pipeline *pipeline = panvk_cmd_get_pipeline(cmdbuf, GRAPHICS);
699    struct panvk_batch *batch = cmdbuf->state.batch;
700    struct panfrost_ptr ptr =
701       pan_pool_alloc_desc(&cmdbuf->desc_pool.base, COMPUTE_JOB);
702 
703    util_dynarray_append(&batch->jobs, void *, ptr.cpu);
704    draw->jobs.vertex = ptr;
705    panvk_per_arch(emit_vertex_job)(pipeline, draw, ptr.cpu);
706 }
707 
708 static void
panvk_draw_prepare_tiler_job(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)709 panvk_draw_prepare_tiler_job(struct panvk_cmd_buffer *cmdbuf,
710                              struct panvk_draw_info *draw)
711 {
712    const struct panvk_pipeline *pipeline = panvk_cmd_get_pipeline(cmdbuf, GRAPHICS);
713    struct panvk_batch *batch = cmdbuf->state.batch;
714    struct panfrost_ptr ptr =
715       pan_pool_alloc_desc(&cmdbuf->desc_pool.base, TILER_JOB);
716 
717    util_dynarray_append(&batch->jobs, void *, ptr.cpu);
718    draw->jobs.tiler = ptr;
719    panvk_per_arch(emit_tiler_job)(pipeline, draw, ptr.cpu);
720 }
721 
722 static void
panvk_cmd_draw(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)723 panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf,
724                struct panvk_draw_info *draw)
725 {
726    struct panvk_batch *batch = cmdbuf->state.batch;
727    struct panvk_cmd_bind_point_state *bind_point_state =
728       panvk_cmd_get_bind_point_state(cmdbuf, GRAPHICS);
729    const struct panvk_pipeline *pipeline =
730       panvk_cmd_get_pipeline(cmdbuf, GRAPHICS);
731 
732    /* There are only 16 bits in the descriptor for the job ID, make sure all
733     * the 3 (2 in Bifrost) jobs in this draw are in the same batch.
734     */
735    if (batch->scoreboard.job_index >= (UINT16_MAX - 3)) {
736       panvk_per_arch(cmd_close_batch)(cmdbuf);
737       panvk_cmd_preload_fb_after_batch_split(cmdbuf);
738       batch = panvk_cmd_open_batch(cmdbuf);
739    }
740 
741    if (pipeline->rast.enable)
742       panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
743 
744    panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true);
745 
746    panvk_cmd_prepare_draw_sysvals(cmdbuf, bind_point_state, draw);
747    panvk_cmd_prepare_ubos(cmdbuf, bind_point_state);
748    panvk_cmd_prepare_textures(cmdbuf, bind_point_state);
749    panvk_cmd_prepare_samplers(cmdbuf, bind_point_state);
750 
751    /* TODO: indexed draws */
752    struct panvk_descriptor_state *desc_state =
753       panvk_cmd_get_desc_state(cmdbuf, GRAPHICS);
754 
755    draw->tls = batch->tls.gpu;
756    draw->fb = batch->fb.desc.gpu;
757    draw->ubos = desc_state->ubos;
758    draw->textures = desc_state->textures;
759    draw->samplers = desc_state->samplers;
760 
761    STATIC_ASSERT(sizeof(draw->invocation) >= sizeof(struct mali_invocation_packed));
762    panfrost_pack_work_groups_compute((struct mali_invocation_packed *)&draw->invocation,
763                                       1, draw->vertex_range, draw->instance_count,
764                                       1, 1, 1, true, false);
765 
766    panvk_draw_prepare_fs_rsd(cmdbuf, draw);
767    panvk_draw_prepare_varyings(cmdbuf, draw);
768    panvk_draw_prepare_attributes(cmdbuf, draw);
769    panvk_draw_prepare_viewport(cmdbuf, draw);
770    panvk_draw_prepare_tiler_context(cmdbuf, draw);
771    panvk_draw_prepare_vertex_job(cmdbuf, draw);
772    panvk_draw_prepare_tiler_job(cmdbuf, draw);
773    batch->tlsinfo.tls.size = MAX2(pipeline->tls_size, batch->tlsinfo.tls.size);
774    assert(!pipeline->wls_size);
775 
776    unsigned vjob_id =
777       panfrost_add_job(&cmdbuf->desc_pool.base, &batch->scoreboard,
778                        MALI_JOB_TYPE_VERTEX, false, false, 0, 0,
779                        &draw->jobs.vertex, false);
780 
781    if (pipeline->rast.enable) {
782       panfrost_add_job(&cmdbuf->desc_pool.base, &batch->scoreboard,
783                        MALI_JOB_TYPE_TILER, false, false, vjob_id, 0,
784                        &draw->jobs.tiler, false);
785    }
786 
787    /* Clear the dirty flags all at once */
788    desc_state->dirty = cmdbuf->state.dirty = 0;
789 }
790 
791 void
panvk_per_arch(CmdDraw)792 panvk_per_arch(CmdDraw)(VkCommandBuffer commandBuffer,
793                         uint32_t vertexCount,
794                         uint32_t instanceCount,
795                         uint32_t firstVertex,
796                         uint32_t firstInstance)
797 {
798    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
799 
800    if (instanceCount == 0 || vertexCount == 0)
801       return;
802 
803    struct panvk_draw_info draw = {
804       .first_vertex = firstVertex,
805       .vertex_count = vertexCount,
806       .vertex_range = vertexCount,
807       .first_instance = firstInstance,
808       .instance_count = instanceCount,
809       .padded_vertex_count = instanceCount > 1 ?
810                              panfrost_padded_vertex_count(vertexCount) :
811                              vertexCount,
812       .offset_start = firstVertex,
813    };
814 
815    panvk_cmd_draw(cmdbuf, &draw);
816 }
817 
818 static void
panvk_index_minmax_search(struct panvk_cmd_buffer * cmdbuf,uint32_t start,uint32_t count,bool restart,uint32_t * min,uint32_t * max)819 panvk_index_minmax_search(struct panvk_cmd_buffer *cmdbuf,
820                           uint32_t start, uint32_t count,
821                           bool restart,
822                           uint32_t *min, uint32_t *max)
823 {
824    void *ptr = cmdbuf->state.ib.buffer->bo->ptr.cpu +
825                cmdbuf->state.ib.buffer->bo_offset +
826                cmdbuf->state.ib.offset;
827 
828    fprintf(stderr, "WARNING: Crawling index buffers from the CPU isn't valid in Vulkan\n");
829 
830    assert(cmdbuf->state.ib.buffer);
831    assert(cmdbuf->state.ib.buffer->bo);
832    assert(cmdbuf->state.ib.buffer->bo->ptr.cpu);
833 
834    *max = 0;
835 
836    /* TODO: Use panfrost_minmax_cache */
837    /* TODO: Read full cacheline of data to mitigate the uncached
838     * mapping slowness.
839     */
840    switch (cmdbuf->state.ib.index_size) {
841 #define MINMAX_SEARCH_CASE(sz) \
842    case sz: { \
843       uint ## sz ## _t *indices = ptr; \
844       *min = UINT ## sz ## _MAX; \
845       for (uint32_t i = 0; i < count; i++) { \
846          if (restart && indices[i + start] == UINT ## sz ##_MAX) continue; \
847          *min = MIN2(indices[i + start], *min); \
848          *max = MAX2(indices[i + start], *max); \
849       } \
850       break; \
851    }
852    MINMAX_SEARCH_CASE(32)
853    MINMAX_SEARCH_CASE(16)
854    MINMAX_SEARCH_CASE(8)
855 #undef MINMAX_SEARCH_CASE
856    default:
857       unreachable("Invalid index size");
858    }
859 }
860 
861 void
panvk_per_arch(CmdDrawIndexed)862 panvk_per_arch(CmdDrawIndexed)(VkCommandBuffer commandBuffer,
863                                uint32_t indexCount,
864                                uint32_t instanceCount,
865                                uint32_t firstIndex,
866                                int32_t vertexOffset,
867                                uint32_t firstInstance)
868 {
869    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
870    uint32_t min_vertex, max_vertex;
871 
872    if (instanceCount == 0 || indexCount == 0)
873       return;
874 
875    const struct panvk_pipeline *pipeline =
876       panvk_cmd_get_pipeline(cmdbuf, GRAPHICS);
877    bool primitive_restart = pipeline->ia.primitive_restart;
878 
879    panvk_index_minmax_search(cmdbuf, firstIndex, indexCount, primitive_restart,
880                              &min_vertex, &max_vertex);
881 
882    unsigned vertex_range = max_vertex - min_vertex + 1;
883    struct panvk_draw_info draw = {
884       .index_size = cmdbuf->state.ib.index_size,
885       .first_index = firstIndex,
886       .index_count = indexCount,
887       .vertex_offset = vertexOffset,
888       .first_instance = firstInstance,
889       .instance_count = instanceCount,
890       .vertex_range = vertex_range,
891       .vertex_count = indexCount + abs(vertexOffset),
892       .padded_vertex_count = instanceCount > 1 ?
893                              panfrost_padded_vertex_count(vertex_range) :
894                              vertex_range,
895       .offset_start = min_vertex + vertexOffset,
896       .indices = panvk_buffer_gpu_ptr(cmdbuf->state.ib.buffer,
897                                       cmdbuf->state.ib.offset) +
898                  (firstIndex * (cmdbuf->state.ib.index_size / 8)),
899    };
900 
901    panvk_cmd_draw(cmdbuf, &draw);
902 }
903 
904 VkResult
panvk_per_arch(EndCommandBuffer)905 panvk_per_arch(EndCommandBuffer)(VkCommandBuffer commandBuffer)
906 {
907    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
908    VkResult ret =
909       cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY ?
910       cmdbuf->vk.cmd_queue.error : cmdbuf->record_result;
911 
912    panvk_per_arch(cmd_close_batch)(cmdbuf);
913    cmdbuf->status = ret == VK_SUCCESS ?
914                     PANVK_CMD_BUFFER_STATUS_EXECUTABLE :
915                     PANVK_CMD_BUFFER_STATUS_INVALID;
916    return ret;
917 }
918 
919 void
panvk_per_arch(CmdEndRenderPass2)920 panvk_per_arch(CmdEndRenderPass2)(VkCommandBuffer commandBuffer,
921                                   const VkSubpassEndInfo *pSubpassEndInfo)
922 {
923    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
924 
925    panvk_per_arch(cmd_close_batch)(cmdbuf);
926    vk_free(&cmdbuf->pool->vk.alloc, cmdbuf->state.clear);
927    cmdbuf->state.batch = NULL;
928    cmdbuf->state.pass = NULL;
929    cmdbuf->state.subpass = NULL;
930    cmdbuf->state.framebuffer = NULL;
931    cmdbuf->state.clear = NULL;
932 }
933 
934 void
panvk_per_arch(CmdEndRenderPass)935 panvk_per_arch(CmdEndRenderPass)(VkCommandBuffer cmd)
936 {
937    VkSubpassEndInfo einfo = {
938       .sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO,
939    };
940 
941    panvk_per_arch(CmdEndRenderPass2)(cmd, &einfo);
942 }
943 
944 
945 void
panvk_per_arch(CmdPipelineBarrier2)946 panvk_per_arch(CmdPipelineBarrier2)(VkCommandBuffer commandBuffer,
947                                     const VkDependencyInfo *pDependencyInfo)
948 {
949    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
950 
951    /* Caches are flushed/invalidated at batch boundaries for now, nothing to do
952     * for memory barriers assuming we implement barriers with the creation of a
953     * new batch.
954     * FIXME: We can probably do better with a CacheFlush job that has the
955     * barrier flag set to true.
956     */
957    if (cmdbuf->state.batch) {
958       panvk_per_arch(cmd_close_batch)(cmdbuf);
959       panvk_cmd_preload_fb_after_batch_split(cmdbuf);
960       panvk_cmd_open_batch(cmdbuf);
961    }
962 }
963 
964 static void
panvk_add_set_event_operation(struct panvk_cmd_buffer * cmdbuf,struct panvk_event * event,enum panvk_event_op_type type)965 panvk_add_set_event_operation(struct panvk_cmd_buffer *cmdbuf,
966                               struct panvk_event *event,
967                               enum panvk_event_op_type type)
968 {
969    struct panvk_event_op op = {
970       .type = type,
971       .event = event,
972    };
973 
974    if (cmdbuf->state.batch == NULL) {
975       /* No open batch, let's create a new one so this operation happens in
976        * the right order.
977        */
978       panvk_cmd_open_batch(cmdbuf);
979       util_dynarray_append(&cmdbuf->state.batch->event_ops,
980                            struct panvk_event_op,
981                            op);
982       panvk_per_arch(cmd_close_batch)(cmdbuf);
983    } else {
984       /* Let's close the current batch so the operation executes before any
985        * future commands.
986        */
987       util_dynarray_append(&cmdbuf->state.batch->event_ops,
988                            struct panvk_event_op,
989                            op);
990       panvk_per_arch(cmd_close_batch)(cmdbuf);
991       panvk_cmd_preload_fb_after_batch_split(cmdbuf);
992       panvk_cmd_open_batch(cmdbuf);
993    }
994 }
995 
996 static void
panvk_add_wait_event_operation(struct panvk_cmd_buffer * cmdbuf,struct panvk_event * event)997 panvk_add_wait_event_operation(struct panvk_cmd_buffer *cmdbuf,
998                                struct panvk_event *event)
999 {
1000    struct panvk_event_op op = {
1001       .type = PANVK_EVENT_OP_WAIT,
1002       .event = event,
1003    };
1004 
1005    if (cmdbuf->state.batch == NULL) {
1006       /* No open batch, let's create a new one and have it wait for this event. */
1007       panvk_cmd_open_batch(cmdbuf);
1008       util_dynarray_append(&cmdbuf->state.batch->event_ops,
1009                            struct panvk_event_op,
1010                            op);
1011    } else {
1012       /* Let's close the current batch so any future commands wait on the
1013        * event signal operation.
1014        */
1015       if (cmdbuf->state.batch->fragment_job ||
1016           cmdbuf->state.batch->scoreboard.first_job) {
1017          panvk_per_arch(cmd_close_batch)(cmdbuf);
1018          panvk_cmd_preload_fb_after_batch_split(cmdbuf);
1019          panvk_cmd_open_batch(cmdbuf);
1020       }
1021       util_dynarray_append(&cmdbuf->state.batch->event_ops,
1022                            struct panvk_event_op,
1023                            op);
1024    }
1025 }
1026 
1027 void
panvk_per_arch(CmdSetEvent2)1028 panvk_per_arch(CmdSetEvent2)(VkCommandBuffer commandBuffer,
1029                              VkEvent _event,
1030                              const VkDependencyInfo *pDependencyInfo)
1031 {
1032    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1033    VK_FROM_HANDLE(panvk_event, event, _event);
1034 
1035    /* vkCmdSetEvent cannot be called inside a render pass */
1036    assert(cmdbuf->state.pass == NULL);
1037 
1038    panvk_add_set_event_operation(cmdbuf, event, PANVK_EVENT_OP_SET);
1039 }
1040 
1041 void
panvk_per_arch(CmdResetEvent2)1042 panvk_per_arch(CmdResetEvent2)(VkCommandBuffer commandBuffer,
1043                                VkEvent _event,
1044                                VkPipelineStageFlags2 stageMask)
1045 {
1046    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1047    VK_FROM_HANDLE(panvk_event, event, _event);
1048 
1049    /* vkCmdResetEvent cannot be called inside a render pass */
1050    assert(cmdbuf->state.pass == NULL);
1051 
1052    panvk_add_set_event_operation(cmdbuf, event, PANVK_EVENT_OP_RESET);
1053 }
1054 
1055 void
panvk_per_arch(CmdWaitEvents2)1056 panvk_per_arch(CmdWaitEvents2)(VkCommandBuffer commandBuffer,
1057                                uint32_t eventCount,
1058                                const VkEvent *pEvents,
1059                                const VkDependencyInfo *pDependencyInfos)
1060 {
1061    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1062 
1063    assert(eventCount > 0);
1064 
1065    for (uint32_t i = 0; i < eventCount; i++) {
1066       VK_FROM_HANDLE(panvk_event, event, pEvents[i]);
1067       panvk_add_wait_event_operation(cmdbuf, event);
1068    }
1069 }
1070 
1071 static VkResult
panvk_reset_cmdbuf(struct panvk_cmd_buffer * cmdbuf)1072 panvk_reset_cmdbuf(struct panvk_cmd_buffer *cmdbuf)
1073 {
1074    vk_command_buffer_reset(&cmdbuf->vk);
1075 
1076    cmdbuf->record_result = VK_SUCCESS;
1077 
1078    list_for_each_entry_safe(struct panvk_batch, batch, &cmdbuf->batches, node) {
1079       list_del(&batch->node);
1080       util_dynarray_fini(&batch->jobs);
1081       util_dynarray_fini(&batch->event_ops);
1082 
1083       vk_free(&cmdbuf->pool->vk.alloc, batch);
1084    }
1085 
1086    panvk_pool_reset(&cmdbuf->desc_pool);
1087    panvk_pool_reset(&cmdbuf->tls_pool);
1088    panvk_pool_reset(&cmdbuf->varying_pool);
1089    cmdbuf->status = PANVK_CMD_BUFFER_STATUS_INITIAL;
1090 
1091    for (unsigned i = 0; i < MAX_BIND_POINTS; i++)
1092       memset(&cmdbuf->bind_points[i].desc_state.sets, 0, sizeof(cmdbuf->bind_points[0].desc_state.sets));
1093 
1094    return cmdbuf->record_result;
1095 }
1096 
1097 static void
panvk_destroy_cmdbuf(struct panvk_cmd_buffer * cmdbuf)1098 panvk_destroy_cmdbuf(struct panvk_cmd_buffer *cmdbuf)
1099 {
1100    struct panvk_device *device = cmdbuf->device;
1101 
1102    list_del(&cmdbuf->pool_link);
1103 
1104    list_for_each_entry_safe(struct panvk_batch, batch, &cmdbuf->batches, node) {
1105       list_del(&batch->node);
1106       util_dynarray_fini(&batch->jobs);
1107       util_dynarray_fini(&batch->event_ops);
1108 
1109       vk_free(&cmdbuf->pool->vk.alloc, batch);
1110    }
1111 
1112    panvk_pool_cleanup(&cmdbuf->desc_pool);
1113    panvk_pool_cleanup(&cmdbuf->tls_pool);
1114    panvk_pool_cleanup(&cmdbuf->varying_pool);
1115    vk_command_buffer_finish(&cmdbuf->vk);
1116    vk_free(&device->vk.alloc, cmdbuf);
1117 }
1118 
1119 static VkResult
panvk_create_cmdbuf(struct panvk_device * device,struct panvk_cmd_pool * pool,VkCommandBufferLevel level,struct panvk_cmd_buffer ** cmdbuf_out)1120 panvk_create_cmdbuf(struct panvk_device *device,
1121                     struct panvk_cmd_pool *pool,
1122                     VkCommandBufferLevel level,
1123                     struct panvk_cmd_buffer **cmdbuf_out)
1124 {
1125    struct panvk_cmd_buffer *cmdbuf;
1126 
1127    cmdbuf = vk_zalloc(&device->vk.alloc, sizeof(*cmdbuf),
1128                       8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1129    if (!cmdbuf)
1130       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1131 
1132    VkResult result = vk_command_buffer_init(&cmdbuf->vk, &pool->vk, level);
1133    if (result != VK_SUCCESS) {
1134       vk_free(&device->vk.alloc, cmdbuf);
1135       return result;
1136    }
1137 
1138    cmdbuf->device = device;
1139    cmdbuf->pool = pool;
1140 
1141    if (pool) {
1142       list_addtail(&cmdbuf->pool_link, &pool->active_cmd_buffers);
1143       cmdbuf->queue_family_index = pool->vk.queue_family_index;
1144    } else {
1145       /* Init the pool_link so we can safely call list_del when we destroy
1146        * the command buffer
1147        */
1148       list_inithead(&cmdbuf->pool_link);
1149       cmdbuf->queue_family_index = PANVK_QUEUE_GENERAL;
1150    }
1151 
1152    panvk_pool_init(&cmdbuf->desc_pool, &device->physical_device->pdev,
1153                    pool ? &pool->desc_bo_pool : NULL, 0, 64 * 1024,
1154                    "Command buffer descriptor pool", true);
1155    panvk_pool_init(&cmdbuf->tls_pool, &device->physical_device->pdev,
1156                    pool ? &pool->tls_bo_pool : NULL,
1157                    panvk_debug_adjust_bo_flags(device, PAN_BO_INVISIBLE),
1158                    64 * 1024, "TLS pool", false);
1159    panvk_pool_init(&cmdbuf->varying_pool, &device->physical_device->pdev,
1160                    pool ? &pool->varying_bo_pool : NULL,
1161                    panvk_debug_adjust_bo_flags(device, PAN_BO_INVISIBLE),
1162                    64 * 1024, "Varyings pool", false);
1163    list_inithead(&cmdbuf->batches);
1164    cmdbuf->status = PANVK_CMD_BUFFER_STATUS_INITIAL;
1165    *cmdbuf_out = cmdbuf;
1166    return VK_SUCCESS;
1167 }
1168 
1169 VkResult
panvk_per_arch(AllocateCommandBuffers)1170 panvk_per_arch(AllocateCommandBuffers)(VkDevice _device,
1171                                        const VkCommandBufferAllocateInfo *pAllocateInfo,
1172                                        VkCommandBuffer *pCommandBuffers)
1173 {
1174    VK_FROM_HANDLE(panvk_device, device, _device);
1175    VK_FROM_HANDLE(panvk_cmd_pool, pool, pAllocateInfo->commandPool);
1176 
1177    VkResult result = VK_SUCCESS;
1178    unsigned i;
1179 
1180    for (i = 0; i < pAllocateInfo->commandBufferCount; i++) {
1181       struct panvk_cmd_buffer *cmdbuf = NULL;
1182 
1183       if (!list_is_empty(&pool->free_cmd_buffers)) {
1184          cmdbuf = list_first_entry(
1185             &pool->free_cmd_buffers, struct panvk_cmd_buffer, pool_link);
1186 
1187          list_del(&cmdbuf->pool_link);
1188          list_addtail(&cmdbuf->pool_link, &pool->active_cmd_buffers);
1189 
1190          vk_command_buffer_finish(&cmdbuf->vk);
1191          result = vk_command_buffer_init(&cmdbuf->vk, &pool->vk, pAllocateInfo->level);
1192       } else {
1193          result = panvk_create_cmdbuf(device, pool, pAllocateInfo->level, &cmdbuf);
1194       }
1195 
1196       if (result != VK_SUCCESS)
1197          goto err_free_cmd_bufs;
1198 
1199       pCommandBuffers[i] = panvk_cmd_buffer_to_handle(cmdbuf);
1200    }
1201 
1202    return VK_SUCCESS;
1203 
1204 err_free_cmd_bufs:
1205    panvk_per_arch(FreeCommandBuffers)(_device, pAllocateInfo->commandPool, i,
1206                                       pCommandBuffers);
1207    for (unsigned j = 0; j < i; j++)
1208       pCommandBuffers[j] = VK_NULL_HANDLE;
1209 
1210    return result;
1211 }
1212 
1213 void
panvk_per_arch(FreeCommandBuffers)1214 panvk_per_arch(FreeCommandBuffers)(VkDevice device,
1215                                    VkCommandPool commandPool,
1216                                    uint32_t commandBufferCount,
1217                                    const VkCommandBuffer *pCommandBuffers)
1218 {
1219    for (uint32_t i = 0; i < commandBufferCount; i++) {
1220       VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, pCommandBuffers[i]);
1221 
1222       if (cmdbuf) {
1223          if (cmdbuf->pool) {
1224             list_del(&cmdbuf->pool_link);
1225             panvk_reset_cmdbuf(cmdbuf);
1226             list_addtail(&cmdbuf->pool_link,
1227                          &cmdbuf->pool->free_cmd_buffers);
1228          } else
1229             panvk_destroy_cmdbuf(cmdbuf);
1230       }
1231    }
1232 }
1233 
1234 VkResult
panvk_per_arch(ResetCommandBuffer)1235 panvk_per_arch(ResetCommandBuffer)(VkCommandBuffer commandBuffer,
1236                                    VkCommandBufferResetFlags flags)
1237 {
1238    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1239 
1240    return panvk_reset_cmdbuf(cmdbuf);
1241 }
1242 
1243 VkResult
panvk_per_arch(BeginCommandBuffer)1244 panvk_per_arch(BeginCommandBuffer)(VkCommandBuffer commandBuffer,
1245                                    const VkCommandBufferBeginInfo *pBeginInfo)
1246 {
1247    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1248    VkResult result = VK_SUCCESS;
1249 
1250    if (cmdbuf->status != PANVK_CMD_BUFFER_STATUS_INITIAL) {
1251       /* If the command buffer has already been reset with
1252        * vkResetCommandBuffer, no need to do it again.
1253        */
1254       result = panvk_reset_cmdbuf(cmdbuf);
1255       if (result != VK_SUCCESS)
1256          return result;
1257    }
1258 
1259    memset(&cmdbuf->state, 0, sizeof(cmdbuf->state));
1260 
1261    cmdbuf->status = PANVK_CMD_BUFFER_STATUS_RECORDING;
1262 
1263    return VK_SUCCESS;
1264 }
1265 
1266 void
panvk_per_arch(DestroyCommandPool)1267 panvk_per_arch(DestroyCommandPool)(VkDevice _device,
1268                                    VkCommandPool commandPool,
1269                                    const VkAllocationCallbacks *pAllocator)
1270 {
1271    VK_FROM_HANDLE(panvk_device, device, _device);
1272    VK_FROM_HANDLE(panvk_cmd_pool, pool, commandPool);
1273 
1274    list_for_each_entry_safe(struct panvk_cmd_buffer, cmdbuf,
1275                             &pool->active_cmd_buffers, pool_link)
1276       panvk_destroy_cmdbuf(cmdbuf);
1277 
1278    list_for_each_entry_safe(struct panvk_cmd_buffer, cmdbuf,
1279                             &pool->free_cmd_buffers, pool_link)
1280       panvk_destroy_cmdbuf(cmdbuf);
1281 
1282    panvk_bo_pool_cleanup(&pool->desc_bo_pool);
1283    panvk_bo_pool_cleanup(&pool->varying_bo_pool);
1284    panvk_bo_pool_cleanup(&pool->tls_bo_pool);
1285 
1286    vk_command_pool_finish(&pool->vk);
1287    vk_free2(&device->vk.alloc, pAllocator, pool);
1288 }
1289 
1290 VkResult
panvk_per_arch(ResetCommandPool)1291 panvk_per_arch(ResetCommandPool)(VkDevice device,
1292                                  VkCommandPool commandPool,
1293                                  VkCommandPoolResetFlags flags)
1294 {
1295    VK_FROM_HANDLE(panvk_cmd_pool, pool, commandPool);
1296    VkResult result;
1297 
1298    list_for_each_entry(struct panvk_cmd_buffer, cmdbuf, &pool->active_cmd_buffers,
1299                        pool_link)
1300    {
1301       result = panvk_reset_cmdbuf(cmdbuf);
1302       if (result != VK_SUCCESS)
1303          return result;
1304    }
1305 
1306    return VK_SUCCESS;
1307 }
1308 
1309 void
panvk_per_arch(TrimCommandPool)1310 panvk_per_arch(TrimCommandPool)(VkDevice device,
1311                                 VkCommandPool commandPool,
1312                                 VkCommandPoolTrimFlags flags)
1313 {
1314    VK_FROM_HANDLE(panvk_cmd_pool, pool, commandPool);
1315 
1316    if (!pool)
1317       return;
1318 
1319    list_for_each_entry_safe(struct panvk_cmd_buffer, cmdbuf,
1320                             &pool->free_cmd_buffers, pool_link)
1321       panvk_destroy_cmdbuf(cmdbuf);
1322 }
1323 
1324 void
panvk_per_arch(CmdDispatch)1325 panvk_per_arch(CmdDispatch)(VkCommandBuffer commandBuffer,
1326                             uint32_t x,
1327                             uint32_t y,
1328                             uint32_t z)
1329 {
1330    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1331    const struct panfrost_device *pdev =
1332       &cmdbuf->device->physical_device->pdev;
1333    struct panvk_dispatch_info dispatch = {
1334       .wg_count = { x, y, z },
1335    };
1336 
1337    panvk_per_arch(cmd_close_batch)(cmdbuf);
1338    struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1339 
1340    struct panvk_cmd_bind_point_state *bind_point_state =
1341       panvk_cmd_get_bind_point_state(cmdbuf, COMPUTE);
1342    struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
1343    const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
1344    struct panfrost_ptr job =
1345       pan_pool_alloc_desc(&cmdbuf->desc_pool.base, COMPUTE_JOB);
1346 
1347    struct panvk_sysvals *sysvals = &desc_state->sysvals;
1348    sysvals->num_work_groups.u32[0] = x;
1349    sysvals->num_work_groups.u32[1] = y;
1350    sysvals->num_work_groups.u32[2] = z;
1351    sysvals->local_group_size.u32[0] = pipeline->cs.local_size.x;
1352    sysvals->local_group_size.u32[1] = pipeline->cs.local_size.y;
1353    sysvals->local_group_size.u32[2] = pipeline->cs.local_size.z;
1354    desc_state->sysvals_ptr = 0;
1355 
1356    panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
1357    dispatch.tsd = batch->tls.gpu;
1358 
1359    panvk_prepare_non_vs_attribs(cmdbuf, bind_point_state);
1360    dispatch.attributes = desc_state->non_vs_attribs;
1361    dispatch.attribute_bufs = desc_state->non_vs_attrib_bufs;
1362 
1363    panvk_cmd_prepare_ubos(cmdbuf, bind_point_state);
1364    dispatch.ubos = desc_state->ubos;
1365 
1366    panvk_cmd_prepare_textures(cmdbuf, bind_point_state);
1367    dispatch.textures = desc_state->textures;
1368 
1369    panvk_cmd_prepare_samplers(cmdbuf, bind_point_state);
1370    dispatch.samplers = desc_state->samplers;
1371 
1372    panvk_per_arch(emit_compute_job)(pipeline, &dispatch, job.cpu);
1373    panfrost_add_job(&cmdbuf->desc_pool.base, &batch->scoreboard,
1374                     MALI_JOB_TYPE_COMPUTE, false, false, 0, 0,
1375                     &job, false);
1376 
1377    batch->tlsinfo.tls.size = pipeline->tls_size;
1378    batch->tlsinfo.wls.size = pipeline->wls_size;
1379    if (batch->tlsinfo.wls.size) {
1380       batch->wls_total_size =
1381          pan_wls_mem_size(pdev, &dispatch.wg_count, batch->tlsinfo.wls.size);
1382    }
1383 
1384    panvk_per_arch(cmd_close_batch)(cmdbuf);
1385    desc_state->dirty = 0;
1386 }
1387