• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Collabora Ltd.
3  *
4  * Derived from tu_cmd_buffer.c which is:
5  * Copyright © 2016 Red Hat.
6  * Copyright © 2016 Bas Nieuwenhuizen
7  * Copyright © 2015 Intel Corporation
8  *
9  * Permission is hereby granted, free of charge, to any person obtaining a
10  * copy of this software and associated documentation files (the "Software"),
11  * to deal in the Software without restriction, including without limitation
12  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13  * and/or sell copies of the Software, and to permit persons to whom the
14  * Software is furnished to do so, subject to the following conditions:
15  *
16  * The above copyright notice and this permission notice (including the next
17  * paragraph) shall be included in all copies or substantial portions of the
18  * Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26  * DEALINGS IN THE SOFTWARE.
27  */
28 
29 #include "genxml/gen_macros.h"
30 
31 #include "panvk_cs.h"
32 #include "panvk_private.h"
33 
34 #include "pan_blitter.h"
35 #include "pan_desc.h"
36 #include "pan_encoder.h"
37 #include "pan_props.h"
38 #include "pan_samples.h"
39 
40 #include "util/rounding.h"
41 #include "util/u_pack_color.h"
42 #include "vk_format.h"
43 
44 static uint32_t
panvk_debug_adjust_bo_flags(const struct panvk_device * device,uint32_t bo_flags)45 panvk_debug_adjust_bo_flags(const struct panvk_device *device,
46                             uint32_t bo_flags)
47 {
48    uint32_t debug_flags = device->physical_device->instance->debug_flags;
49 
50    if (debug_flags & PANVK_DEBUG_DUMP)
51       bo_flags &= ~PAN_KMOD_BO_FLAG_NO_MMAP;
52 
53    return bo_flags;
54 }
55 
56 static void
panvk_cmd_prepare_fragment_job(struct panvk_cmd_buffer * cmdbuf)57 panvk_cmd_prepare_fragment_job(struct panvk_cmd_buffer *cmdbuf)
58 {
59    const struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
60    struct panvk_batch *batch = cmdbuf->state.batch;
61    struct panfrost_ptr job_ptr =
62       pan_pool_alloc_desc(&cmdbuf->desc_pool.base, FRAGMENT_JOB);
63 
64    GENX(pan_emit_fragment_job)
65    (fbinfo, batch->fb.desc.gpu, job_ptr.cpu), batch->fragment_job = job_ptr.gpu;
66    util_dynarray_append(&batch->jobs, void *, job_ptr.cpu);
67 }
68 
69 void
panvk_per_arch(cmd_close_batch)70 panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf)
71 {
72    struct panvk_batch *batch = cmdbuf->state.batch;
73 
74    if (!batch)
75       return;
76 
77    struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
78 
79    assert(batch);
80 
81    bool clear = fbinfo->zs.clear.z | fbinfo->zs.clear.s;
82    for (unsigned i = 0; i < fbinfo->rt_count; i++)
83       clear |= fbinfo->rts[i].clear;
84 
85    if (!clear && !batch->jc.first_job) {
86       if (util_dynarray_num_elements(&batch->event_ops,
87                                      struct panvk_event_op) == 0) {
88          /* Content-less batch, let's drop it */
89          vk_free(&cmdbuf->vk.pool->alloc, batch);
90       } else {
91          /* Batch has no jobs but is needed for synchronization, let's add a
92           * NULL job so the SUBMIT ioctl doesn't choke on it.
93           */
94          struct panfrost_ptr ptr =
95             pan_pool_alloc_desc(&cmdbuf->desc_pool.base, JOB_HEADER);
96          util_dynarray_append(&batch->jobs, void *, ptr.cpu);
97          pan_jc_add_job(&cmdbuf->desc_pool.base, &batch->jc, MALI_JOB_TYPE_NULL,
98                         false, false, 0, 0, &ptr, false);
99          list_addtail(&batch->node, &cmdbuf->batches);
100       }
101       cmdbuf->state.batch = NULL;
102       return;
103    }
104 
105    struct panvk_device *dev = cmdbuf->device;
106 
107    list_addtail(&batch->node, &cmdbuf->batches);
108 
109    if (batch->jc.first_tiler) {
110       struct panfrost_ptr preload_jobs[2];
111       unsigned num_preload_jobs = GENX(pan_preload_fb)(
112          &dev->meta.blitter.cache, &cmdbuf->desc_pool.base, &batch->jc,
113          &cmdbuf->state.fb.info, batch->tls.gpu, batch->tiler.descs.gpu,
114          preload_jobs);
115       for (unsigned i = 0; i < num_preload_jobs; i++)
116          util_dynarray_append(&batch->jobs, void *, preload_jobs[i].cpu);
117    }
118 
119    if (batch->tlsinfo.tls.size) {
120       unsigned thread_tls_alloc =
121          panfrost_query_thread_tls_alloc(&dev->physical_device->kmod.props);
122       unsigned core_id_range;
123 
124       panfrost_query_core_count(&dev->physical_device->kmod.props,
125                                 &core_id_range);
126 
127       unsigned size = panfrost_get_total_stack_size(
128          batch->tlsinfo.tls.size, thread_tls_alloc, core_id_range);
129       batch->tlsinfo.tls.ptr =
130          pan_pool_alloc_aligned(&cmdbuf->tls_pool.base, size, 4096).gpu;
131    }
132 
133    if (batch->tlsinfo.wls.size) {
134       assert(batch->wls_total_size);
135       batch->tlsinfo.wls.ptr =
136          pan_pool_alloc_aligned(&cmdbuf->tls_pool.base, batch->wls_total_size,
137                                 4096)
138             .gpu;
139    }
140 
141    if (batch->tls.cpu)
142       GENX(pan_emit_tls)(&batch->tlsinfo, batch->tls.cpu);
143 
144    if (batch->fb.desc.cpu) {
145       fbinfo->sample_positions = cmdbuf->device->sample_positions->addr.dev +
146                                  panfrost_sample_positions_offset(
147                                     pan_sample_pattern(fbinfo->nr_samples));
148 
149       batch->fb.desc.gpu |=
150          GENX(pan_emit_fbd)(&cmdbuf->state.fb.info, &batch->tlsinfo,
151                             &batch->tiler.ctx, batch->fb.desc.cpu);
152 
153       panvk_cmd_prepare_fragment_job(cmdbuf);
154    }
155 
156    cmdbuf->state.batch = NULL;
157 }
158 
159 void
panvk_per_arch(CmdNextSubpass2)160 panvk_per_arch(CmdNextSubpass2)(VkCommandBuffer commandBuffer,
161                                 const VkSubpassBeginInfo *pSubpassBeginInfo,
162                                 const VkSubpassEndInfo *pSubpassEndInfo)
163 {
164    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
165 
166    panvk_per_arch(cmd_close_batch)(cmdbuf);
167 
168    cmdbuf->state.subpass++;
169    panvk_cmd_fb_info_set_subpass(cmdbuf);
170    panvk_cmd_open_batch(cmdbuf);
171 }
172 
173 void
panvk_per_arch(CmdNextSubpass)174 panvk_per_arch(CmdNextSubpass)(VkCommandBuffer cmd, VkSubpassContents contents)
175 {
176    VkSubpassBeginInfo binfo = {.sType = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO,
177                                .contents = contents};
178    VkSubpassEndInfo einfo = {
179       .sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO,
180    };
181 
182    panvk_per_arch(CmdNextSubpass2)(cmd, &binfo, &einfo);
183 }
184 
185 void
panvk_per_arch(cmd_alloc_fb_desc)186 panvk_per_arch(cmd_alloc_fb_desc)(struct panvk_cmd_buffer *cmdbuf)
187 {
188    struct panvk_batch *batch = cmdbuf->state.batch;
189 
190    if (batch->fb.desc.gpu)
191       return;
192 
193    const struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
194    bool has_zs_ext = fbinfo->zs.view.zs || fbinfo->zs.view.s;
195 
196    batch->fb.info = cmdbuf->state.framebuffer;
197    batch->fb.desc = pan_pool_alloc_desc_aggregate(
198       &cmdbuf->desc_pool.base, PAN_DESC(FRAMEBUFFER),
199       PAN_DESC_ARRAY(has_zs_ext ? 1 : 0, ZS_CRC_EXTENSION),
200       PAN_DESC_ARRAY(MAX2(fbinfo->rt_count, 1), RENDER_TARGET));
201 
202    memset(&cmdbuf->state.fb.info.bifrost.pre_post.dcds, 0,
203           sizeof(cmdbuf->state.fb.info.bifrost.pre_post.dcds));
204 }
205 
206 void
panvk_per_arch(cmd_alloc_tls_desc)207 panvk_per_arch(cmd_alloc_tls_desc)(struct panvk_cmd_buffer *cmdbuf, bool gfx)
208 {
209    struct panvk_batch *batch = cmdbuf->state.batch;
210 
211    assert(batch);
212    if (!batch->tls.gpu) {
213       batch->tls = pan_pool_alloc_desc(&cmdbuf->desc_pool.base, LOCAL_STORAGE);
214    }
215 }
216 
217 static void
panvk_cmd_prepare_draw_sysvals(struct panvk_cmd_buffer * cmdbuf,struct panvk_cmd_bind_point_state * bind_point_state,struct panvk_draw_info * draw)218 panvk_cmd_prepare_draw_sysvals(
219    struct panvk_cmd_buffer *cmdbuf,
220    struct panvk_cmd_bind_point_state *bind_point_state,
221    struct panvk_draw_info *draw)
222 {
223    struct panvk_sysvals *sysvals = &bind_point_state->desc_state.sysvals;
224 
225    unsigned base_vertex = draw->index_size ? draw->vertex_offset : 0;
226    if (sysvals->first_vertex != draw->offset_start ||
227        sysvals->base_vertex != base_vertex ||
228        sysvals->base_instance != draw->first_instance) {
229       sysvals->first_vertex = draw->offset_start;
230       sysvals->base_vertex = base_vertex;
231       sysvals->base_instance = draw->first_instance;
232       bind_point_state->desc_state.sysvals_ptr = 0;
233    }
234 
235    if (cmdbuf->state.dirty & PANVK_DYNAMIC_BLEND_CONSTANTS) {
236       memcpy(&sysvals->blend_constants, cmdbuf->state.blend.constants,
237              sizeof(cmdbuf->state.blend.constants));
238       bind_point_state->desc_state.sysvals_ptr = 0;
239    }
240 
241    if (cmdbuf->state.dirty & PANVK_DYNAMIC_VIEWPORT) {
242       panvk_sysval_upload_viewport_scale(&cmdbuf->state.viewport,
243                                          &sysvals->viewport_scale);
244       panvk_sysval_upload_viewport_offset(&cmdbuf->state.viewport,
245                                           &sysvals->viewport_offset);
246       bind_point_state->desc_state.sysvals_ptr = 0;
247    }
248 }
249 
250 static void
panvk_cmd_prepare_sysvals(struct panvk_cmd_buffer * cmdbuf,struct panvk_cmd_bind_point_state * bind_point_state)251 panvk_cmd_prepare_sysvals(struct panvk_cmd_buffer *cmdbuf,
252                           struct panvk_cmd_bind_point_state *bind_point_state)
253 {
254    struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
255 
256    if (desc_state->sysvals_ptr)
257       return;
258 
259    struct panfrost_ptr sysvals = pan_pool_alloc_aligned(
260       &cmdbuf->desc_pool.base, sizeof(desc_state->sysvals), 16);
261    memcpy(sysvals.cpu, &desc_state->sysvals, sizeof(desc_state->sysvals));
262    desc_state->sysvals_ptr = sysvals.gpu;
263 }
264 
265 static void
panvk_cmd_prepare_push_constants(struct panvk_cmd_buffer * cmdbuf,struct panvk_cmd_bind_point_state * bind_point_state)266 panvk_cmd_prepare_push_constants(
267    struct panvk_cmd_buffer *cmdbuf,
268    struct panvk_cmd_bind_point_state *bind_point_state)
269 {
270    struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
271    const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
272 
273    if (!pipeline->layout->push_constants.size || desc_state->push_constants)
274       return;
275 
276    struct panfrost_ptr push_constants = pan_pool_alloc_aligned(
277       &cmdbuf->desc_pool.base,
278       ALIGN_POT(pipeline->layout->push_constants.size, 16), 16);
279 
280    memcpy(push_constants.cpu, cmdbuf->push_constants,
281           pipeline->layout->push_constants.size);
282    desc_state->push_constants = push_constants.gpu;
283 }
284 
285 static void
panvk_cmd_prepare_ubos(struct panvk_cmd_buffer * cmdbuf,struct panvk_cmd_bind_point_state * bind_point_state)286 panvk_cmd_prepare_ubos(struct panvk_cmd_buffer *cmdbuf,
287                        struct panvk_cmd_bind_point_state *bind_point_state)
288 {
289    struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
290    const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
291 
292    if (!pipeline->num_ubos || desc_state->ubos)
293       return;
294 
295    panvk_cmd_prepare_sysvals(cmdbuf, bind_point_state);
296    panvk_cmd_prepare_push_constants(cmdbuf, bind_point_state);
297 
298    struct panfrost_ptr ubos = pan_pool_alloc_desc_array(
299       &cmdbuf->desc_pool.base, pipeline->num_ubos, UNIFORM_BUFFER);
300 
301    panvk_per_arch(emit_ubos)(pipeline, desc_state, ubos.cpu);
302 
303    desc_state->ubos = ubos.gpu;
304 }
305 
306 static void
panvk_cmd_prepare_textures(struct panvk_cmd_buffer * cmdbuf,struct panvk_cmd_bind_point_state * bind_point_state)307 panvk_cmd_prepare_textures(struct panvk_cmd_buffer *cmdbuf,
308                            struct panvk_cmd_bind_point_state *bind_point_state)
309 {
310    struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
311    const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
312    unsigned num_textures = pipeline->layout->num_textures;
313 
314    if (!num_textures || desc_state->textures)
315       return;
316 
317    struct panfrost_ptr textures = pan_pool_alloc_aligned(
318       &cmdbuf->desc_pool.base, num_textures * pan_size(TEXTURE),
319       pan_size(TEXTURE));
320 
321    void *texture = textures.cpu;
322 
323    for (unsigned i = 0; i < ARRAY_SIZE(desc_state->sets); i++) {
324       if (!desc_state->sets[i])
325          continue;
326 
327       memcpy(texture, desc_state->sets[i]->textures,
328              desc_state->sets[i]->layout->num_textures * pan_size(TEXTURE));
329 
330       texture += desc_state->sets[i]->layout->num_textures * pan_size(TEXTURE);
331    }
332 
333    desc_state->textures = textures.gpu;
334 }
335 
336 static void
panvk_cmd_prepare_samplers(struct panvk_cmd_buffer * cmdbuf,struct panvk_cmd_bind_point_state * bind_point_state)337 panvk_cmd_prepare_samplers(struct panvk_cmd_buffer *cmdbuf,
338                            struct panvk_cmd_bind_point_state *bind_point_state)
339 {
340    struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
341    const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
342    unsigned num_samplers = pipeline->layout->num_samplers;
343 
344    if (!num_samplers || desc_state->samplers)
345       return;
346 
347    struct panfrost_ptr samplers =
348       pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, num_samplers, SAMPLER);
349 
350    void *sampler = samplers.cpu;
351 
352    /* Prepare the dummy sampler */
353    pan_pack(sampler, SAMPLER, cfg) {
354       cfg.seamless_cube_map = false;
355       cfg.magnify_nearest = true;
356       cfg.minify_nearest = true;
357       cfg.normalized_coordinates = false;
358    }
359 
360    sampler += pan_size(SAMPLER);
361 
362    for (unsigned i = 0; i < ARRAY_SIZE(desc_state->sets); i++) {
363       if (!desc_state->sets[i])
364          continue;
365 
366       memcpy(sampler, desc_state->sets[i]->samplers,
367              desc_state->sets[i]->layout->num_samplers * pan_size(SAMPLER));
368 
369       sampler += desc_state->sets[i]->layout->num_samplers * pan_size(SAMPLER);
370    }
371 
372    desc_state->samplers = samplers.gpu;
373 }
374 
375 static void
panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)376 panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf,
377                           struct panvk_draw_info *draw)
378 {
379    const struct panvk_pipeline *pipeline =
380       panvk_cmd_get_pipeline(cmdbuf, GRAPHICS);
381 
382    if (!pipeline->fs.dynamic_rsd) {
383       draw->fs_rsd = pipeline->rsds[MESA_SHADER_FRAGMENT];
384       return;
385    }
386 
387    if (!cmdbuf->state.fs_rsd) {
388       struct panfrost_ptr rsd = pan_pool_alloc_desc_aggregate(
389          &cmdbuf->desc_pool.base, PAN_DESC(RENDERER_STATE),
390          PAN_DESC_ARRAY(pipeline->blend.state.rt_count, BLEND));
391 
392       struct mali_renderer_state_packed rsd_dyn;
393       struct mali_renderer_state_packed *rsd_templ =
394          (struct mali_renderer_state_packed *)&pipeline->fs.rsd_template;
395 
396       STATIC_ASSERT(sizeof(pipeline->fs.rsd_template) >= sizeof(*rsd_templ));
397 
398       panvk_per_arch(emit_dyn_fs_rsd)(pipeline, &cmdbuf->state, &rsd_dyn);
399       pan_merge(rsd_dyn, (*rsd_templ), RENDERER_STATE);
400       memcpy(rsd.cpu, &rsd_dyn, sizeof(rsd_dyn));
401 
402       void *bd = rsd.cpu + pan_size(RENDERER_STATE);
403       for (unsigned i = 0; i < pipeline->blend.state.rt_count; i++) {
404          if (pipeline->blend.constant[i].index != (uint8_t)~0) {
405             struct mali_blend_packed bd_dyn;
406             struct mali_blend_packed *bd_templ =
407                (struct mali_blend_packed *)&pipeline->blend.bd_template[i];
408 
409             STATIC_ASSERT(sizeof(pipeline->blend.bd_template[0]) >=
410                           sizeof(*bd_templ));
411             panvk_per_arch(emit_blend_constant)(cmdbuf->device, pipeline, i,
412                                                 cmdbuf->state.blend.constants,
413                                                 &bd_dyn);
414             pan_merge(bd_dyn, (*bd_templ), BLEND);
415             memcpy(bd, &bd_dyn, sizeof(bd_dyn));
416          }
417          bd += pan_size(BLEND);
418       }
419 
420       cmdbuf->state.fs_rsd = rsd.gpu;
421    }
422 
423    draw->fs_rsd = cmdbuf->state.fs_rsd;
424 }
425 
426 void
panvk_per_arch(cmd_get_tiler_context)427 panvk_per_arch(cmd_get_tiler_context)(struct panvk_cmd_buffer *cmdbuf,
428                                       unsigned width, unsigned height)
429 {
430    struct panvk_batch *batch = cmdbuf->state.batch;
431 
432    if (batch->tiler.descs.cpu)
433       return;
434 
435    batch->tiler.descs = pan_pool_alloc_desc_aggregate(
436       &cmdbuf->desc_pool.base, PAN_DESC(TILER_CONTEXT), PAN_DESC(TILER_HEAP));
437    STATIC_ASSERT(sizeof(batch->tiler.templ) >=
438                  pan_size(TILER_CONTEXT) + pan_size(TILER_HEAP));
439 
440    struct panfrost_ptr desc = {
441       .gpu = batch->tiler.descs.gpu,
442       .cpu = batch->tiler.templ,
443    };
444 
445    panvk_per_arch(emit_tiler_context)(cmdbuf->device, width, height, &desc);
446    memcpy(batch->tiler.descs.cpu, batch->tiler.templ,
447           pan_size(TILER_CONTEXT) + pan_size(TILER_HEAP));
448    batch->tiler.ctx.bifrost = batch->tiler.descs.gpu;
449 }
450 
451 void
panvk_per_arch(cmd_prepare_tiler_context)452 panvk_per_arch(cmd_prepare_tiler_context)(struct panvk_cmd_buffer *cmdbuf)
453 {
454    const struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
455 
456    panvk_per_arch(cmd_get_tiler_context)(cmdbuf, fbinfo->width, fbinfo->height);
457 }
458 
459 static void
panvk_draw_prepare_tiler_context(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)460 panvk_draw_prepare_tiler_context(struct panvk_cmd_buffer *cmdbuf,
461                                  struct panvk_draw_info *draw)
462 {
463    struct panvk_batch *batch = cmdbuf->state.batch;
464 
465    panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf);
466    draw->tiler_ctx = &batch->tiler.ctx;
467 }
468 
469 static void
panvk_draw_prepare_varyings(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)470 panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf,
471                             struct panvk_draw_info *draw)
472 {
473    const struct panvk_pipeline *pipeline =
474       panvk_cmd_get_pipeline(cmdbuf, GRAPHICS);
475    struct panvk_varyings_info *varyings = &cmdbuf->state.varyings;
476 
477    panvk_varyings_alloc(varyings, &cmdbuf->varying_pool.base,
478                         draw->padded_vertex_count * draw->instance_count);
479 
480    unsigned buf_count = panvk_varyings_buf_count(varyings);
481    struct panfrost_ptr bufs = pan_pool_alloc_desc_array(
482       &cmdbuf->desc_pool.base, buf_count + 1, ATTRIBUTE_BUFFER);
483 
484    panvk_per_arch(emit_varying_bufs)(varyings, bufs.cpu);
485 
486    /* We need an empty entry to stop prefetching on Bifrost */
487    memset(bufs.cpu + (pan_size(ATTRIBUTE_BUFFER) * buf_count), 0,
488           pan_size(ATTRIBUTE_BUFFER));
489 
490    if (BITSET_TEST(varyings->active, VARYING_SLOT_POS)) {
491       draw->position =
492          varyings->buf[varyings->varying[VARYING_SLOT_POS].buf].address +
493          varyings->varying[VARYING_SLOT_POS].offset;
494    }
495 
496    if (pipeline->ia.writes_point_size) {
497       draw->psiz =
498          varyings->buf[varyings->varying[VARYING_SLOT_PSIZ].buf].address +
499          varyings->varying[VARYING_SLOT_POS].offset;
500    } else if (pipeline->ia.topology == MALI_DRAW_MODE_LINES ||
501               pipeline->ia.topology == MALI_DRAW_MODE_LINE_STRIP ||
502               pipeline->ia.topology == MALI_DRAW_MODE_LINE_LOOP) {
503       draw->line_width = pipeline->dynamic_state_mask & PANVK_DYNAMIC_LINE_WIDTH
504                             ? cmdbuf->state.rast.line_width
505                             : pipeline->rast.line_width;
506    } else {
507       draw->line_width = 1.0f;
508    }
509    draw->varying_bufs = bufs.gpu;
510 
511    for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
512       if (!varyings->stage[s].count)
513          continue;
514 
515       struct panfrost_ptr attribs = pan_pool_alloc_desc_array(
516          &cmdbuf->desc_pool.base, varyings->stage[s].count, ATTRIBUTE);
517 
518       panvk_per_arch(emit_varyings)(cmdbuf->device, varyings, s, attribs.cpu);
519       draw->stages[s].varyings = attribs.gpu;
520    }
521 }
522 
523 static void
panvk_fill_non_vs_attribs(struct panvk_cmd_buffer * cmdbuf,struct panvk_cmd_bind_point_state * bind_point_state,void * attrib_bufs,void * attribs,unsigned first_buf)524 panvk_fill_non_vs_attribs(struct panvk_cmd_buffer *cmdbuf,
525                           struct panvk_cmd_bind_point_state *bind_point_state,
526                           void *attrib_bufs, void *attribs, unsigned first_buf)
527 {
528    struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
529    const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
530 
531    for (unsigned s = 0; s < pipeline->layout->vk.set_count; s++) {
532       const struct panvk_descriptor_set *set = desc_state->sets[s];
533 
534       if (!set)
535          continue;
536 
537       const struct panvk_descriptor_set_layout *layout = set->layout;
538       unsigned img_idx = pipeline->layout->sets[s].img_offset;
539       unsigned offset = img_idx * pan_size(ATTRIBUTE_BUFFER) * 2;
540       unsigned size = layout->num_imgs * pan_size(ATTRIBUTE_BUFFER) * 2;
541 
542       memcpy(attrib_bufs + offset, desc_state->sets[s]->img_attrib_bufs, size);
543 
544       offset = img_idx * pan_size(ATTRIBUTE);
545       for (unsigned i = 0; i < layout->num_imgs; i++) {
546          pan_pack(attribs + offset, ATTRIBUTE, cfg) {
547             cfg.buffer_index = first_buf + (img_idx + i) * 2;
548             cfg.format = desc_state->sets[s]->img_fmts[i];
549          }
550          offset += pan_size(ATTRIBUTE);
551       }
552    }
553 }
554 
555 static void
panvk_prepare_non_vs_attribs(struct panvk_cmd_buffer * cmdbuf,struct panvk_cmd_bind_point_state * bind_point_state)556 panvk_prepare_non_vs_attribs(struct panvk_cmd_buffer *cmdbuf,
557                              struct panvk_cmd_bind_point_state *bind_point_state)
558 {
559    struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
560    const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
561 
562    if (desc_state->non_vs_attribs || !pipeline->img_access_mask)
563       return;
564 
565    unsigned attrib_count = pipeline->layout->num_imgs;
566    unsigned attrib_buf_count = (pipeline->layout->num_imgs * 2);
567    struct panfrost_ptr bufs = pan_pool_alloc_desc_array(
568       &cmdbuf->desc_pool.base, attrib_buf_count + 1, ATTRIBUTE_BUFFER);
569    struct panfrost_ptr attribs = pan_pool_alloc_desc_array(
570       &cmdbuf->desc_pool.base, attrib_count, ATTRIBUTE);
571 
572    panvk_fill_non_vs_attribs(cmdbuf, bind_point_state, bufs.cpu, attribs.cpu,
573                              0);
574 
575    desc_state->non_vs_attrib_bufs = bufs.gpu;
576    desc_state->non_vs_attribs = attribs.gpu;
577 }
578 
579 static void
panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)580 panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer *cmdbuf,
581                               struct panvk_draw_info *draw)
582 {
583    struct panvk_cmd_bind_point_state *bind_point_state =
584       panvk_cmd_get_bind_point_state(cmdbuf, GRAPHICS);
585    struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
586    const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
587    unsigned num_imgs =
588       pipeline->img_access_mask & BITFIELD_BIT(MESA_SHADER_VERTEX)
589          ? pipeline->layout->num_imgs
590          : 0;
591    unsigned attrib_count = pipeline->attribs.attrib_count + num_imgs;
592 
593    if (desc_state->vs_attribs || !attrib_count)
594       return;
595 
596    if (!pipeline->attribs.buf_count) {
597       panvk_prepare_non_vs_attribs(cmdbuf, bind_point_state);
598       desc_state->vs_attrib_bufs = desc_state->non_vs_attrib_bufs;
599       desc_state->vs_attribs = desc_state->non_vs_attribs;
600       return;
601    }
602 
603    unsigned attrib_buf_count = pipeline->attribs.buf_count * 2;
604    struct panfrost_ptr bufs = pan_pool_alloc_desc_array(
605       &cmdbuf->desc_pool.base, attrib_buf_count + 1, ATTRIBUTE_BUFFER);
606    struct panfrost_ptr attribs = pan_pool_alloc_desc_array(
607       &cmdbuf->desc_pool.base, attrib_count, ATTRIBUTE);
608 
609    panvk_per_arch(emit_attrib_bufs)(&pipeline->attribs, cmdbuf->state.vb.bufs,
610                                     cmdbuf->state.vb.count, draw, bufs.cpu);
611    panvk_per_arch(emit_attribs)(cmdbuf->device, draw, &pipeline->attribs,
612                                 cmdbuf->state.vb.bufs, cmdbuf->state.vb.count,
613                                 attribs.cpu);
614 
615    if (attrib_count > pipeline->attribs.buf_count) {
616       unsigned bufs_offset =
617          pipeline->attribs.buf_count * pan_size(ATTRIBUTE_BUFFER) * 2;
618       unsigned attribs_offset =
619          pipeline->attribs.buf_count * pan_size(ATTRIBUTE);
620 
621       panvk_fill_non_vs_attribs(
622          cmdbuf, bind_point_state, bufs.cpu + bufs_offset,
623          attribs.cpu + attribs_offset, pipeline->attribs.buf_count * 2);
624    }
625 
626    /* A NULL entry is needed to stop prefecting on Bifrost */
627    memset(bufs.cpu + (pan_size(ATTRIBUTE_BUFFER) * attrib_buf_count), 0,
628           pan_size(ATTRIBUTE_BUFFER));
629 
630    desc_state->vs_attrib_bufs = bufs.gpu;
631    desc_state->vs_attribs = attribs.gpu;
632 }
633 
634 static void
panvk_draw_prepare_attributes(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)635 panvk_draw_prepare_attributes(struct panvk_cmd_buffer *cmdbuf,
636                               struct panvk_draw_info *draw)
637 {
638    struct panvk_cmd_bind_point_state *bind_point_state =
639       panvk_cmd_get_bind_point_state(cmdbuf, GRAPHICS);
640    struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
641    const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
642 
643    for (unsigned i = 0; i < ARRAY_SIZE(draw->stages); i++) {
644       if (i == MESA_SHADER_VERTEX) {
645          panvk_draw_prepare_vs_attribs(cmdbuf, draw);
646          draw->stages[i].attributes = desc_state->vs_attribs;
647          draw->stages[i].attribute_bufs = desc_state->vs_attrib_bufs;
648       } else if (pipeline->img_access_mask & BITFIELD_BIT(i)) {
649          panvk_prepare_non_vs_attribs(cmdbuf, bind_point_state);
650          draw->stages[i].attributes = desc_state->non_vs_attribs;
651          draw->stages[i].attribute_bufs = desc_state->non_vs_attrib_bufs;
652       }
653    }
654 }
655 
656 static void
panvk_draw_prepare_viewport(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)657 panvk_draw_prepare_viewport(struct panvk_cmd_buffer *cmdbuf,
658                             struct panvk_draw_info *draw)
659 {
660    const struct panvk_pipeline *pipeline =
661       panvk_cmd_get_pipeline(cmdbuf, GRAPHICS);
662 
663    if (pipeline->vpd) {
664       draw->viewport = pipeline->vpd;
665    } else if (cmdbuf->state.vpd) {
666       draw->viewport = cmdbuf->state.vpd;
667    } else {
668       struct panfrost_ptr vp =
669          pan_pool_alloc_desc(&cmdbuf->desc_pool.base, VIEWPORT);
670 
671       const VkViewport *viewport =
672          pipeline->dynamic_state_mask & PANVK_DYNAMIC_VIEWPORT
673             ? &cmdbuf->state.viewport
674             : &pipeline->viewport;
675       const VkRect2D *scissor =
676          pipeline->dynamic_state_mask & PANVK_DYNAMIC_SCISSOR
677             ? &cmdbuf->state.scissor
678             : &pipeline->scissor;
679 
680       panvk_per_arch(emit_viewport)(viewport, scissor, vp.cpu);
681       draw->viewport = cmdbuf->state.vpd = vp.gpu;
682    }
683 }
684 
685 static void
panvk_draw_prepare_vertex_job(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)686 panvk_draw_prepare_vertex_job(struct panvk_cmd_buffer *cmdbuf,
687                               struct panvk_draw_info *draw)
688 {
689    const struct panvk_pipeline *pipeline =
690       panvk_cmd_get_pipeline(cmdbuf, GRAPHICS);
691    struct panvk_batch *batch = cmdbuf->state.batch;
692    struct panfrost_ptr ptr =
693       pan_pool_alloc_desc(&cmdbuf->desc_pool.base, COMPUTE_JOB);
694 
695    util_dynarray_append(&batch->jobs, void *, ptr.cpu);
696    draw->jobs.vertex = ptr;
697    panvk_per_arch(emit_vertex_job)(pipeline, draw, ptr.cpu);
698 }
699 
700 static void
panvk_draw_prepare_tiler_job(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)701 panvk_draw_prepare_tiler_job(struct panvk_cmd_buffer *cmdbuf,
702                              struct panvk_draw_info *draw)
703 {
704    const struct panvk_pipeline *pipeline =
705       panvk_cmd_get_pipeline(cmdbuf, GRAPHICS);
706    struct panvk_batch *batch = cmdbuf->state.batch;
707    struct panfrost_ptr ptr =
708       pan_pool_alloc_desc(&cmdbuf->desc_pool.base, TILER_JOB);
709 
710    util_dynarray_append(&batch->jobs, void *, ptr.cpu);
711    draw->jobs.tiler = ptr;
712    panvk_per_arch(emit_tiler_job)(pipeline, draw, ptr.cpu);
713 }
714 
715 static void
panvk_cmd_draw(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)716 panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
717 {
718    struct panvk_batch *batch = cmdbuf->state.batch;
719    struct panvk_cmd_bind_point_state *bind_point_state =
720       panvk_cmd_get_bind_point_state(cmdbuf, GRAPHICS);
721    const struct panvk_pipeline *pipeline =
722       panvk_cmd_get_pipeline(cmdbuf, GRAPHICS);
723 
724    /* There are only 16 bits in the descriptor for the job ID, make sure all
725     * the 3 (2 in Bifrost) jobs in this draw are in the same batch.
726     */
727    if (batch->jc.job_index >= (UINT16_MAX - 3)) {
728       panvk_per_arch(cmd_close_batch)(cmdbuf);
729       panvk_cmd_preload_fb_after_batch_split(cmdbuf);
730       batch = panvk_cmd_open_batch(cmdbuf);
731    }
732 
733    if (pipeline->rast.enable)
734       panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
735 
736    panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true);
737 
738    panvk_cmd_prepare_draw_sysvals(cmdbuf, bind_point_state, draw);
739    panvk_cmd_prepare_ubos(cmdbuf, bind_point_state);
740    panvk_cmd_prepare_textures(cmdbuf, bind_point_state);
741    panvk_cmd_prepare_samplers(cmdbuf, bind_point_state);
742 
743    /* TODO: indexed draws */
744    struct panvk_descriptor_state *desc_state =
745       panvk_cmd_get_desc_state(cmdbuf, GRAPHICS);
746 
747    draw->tls = batch->tls.gpu;
748    draw->fb = batch->fb.desc.gpu;
749    draw->ubos = desc_state->ubos;
750    draw->textures = desc_state->textures;
751    draw->samplers = desc_state->samplers;
752 
753    STATIC_ASSERT(sizeof(draw->invocation) >=
754                  sizeof(struct mali_invocation_packed));
755    panfrost_pack_work_groups_compute(
756       (struct mali_invocation_packed *)&draw->invocation, 1, draw->vertex_range,
757       draw->instance_count, 1, 1, 1, true, false);
758 
759    panvk_draw_prepare_fs_rsd(cmdbuf, draw);
760    panvk_draw_prepare_varyings(cmdbuf, draw);
761    panvk_draw_prepare_attributes(cmdbuf, draw);
762    panvk_draw_prepare_viewport(cmdbuf, draw);
763    panvk_draw_prepare_tiler_context(cmdbuf, draw);
764    panvk_draw_prepare_vertex_job(cmdbuf, draw);
765    panvk_draw_prepare_tiler_job(cmdbuf, draw);
766    batch->tlsinfo.tls.size = MAX2(pipeline->tls_size, batch->tlsinfo.tls.size);
767    assert(!pipeline->wls_size);
768 
769    unsigned vjob_id =
770       pan_jc_add_job(&cmdbuf->desc_pool.base, &batch->jc, MALI_JOB_TYPE_VERTEX,
771                      false, false, 0, 0, &draw->jobs.vertex, false);
772 
773    if (pipeline->rast.enable) {
774       pan_jc_add_job(&cmdbuf->desc_pool.base, &batch->jc, MALI_JOB_TYPE_TILER,
775                      false, false, vjob_id, 0, &draw->jobs.tiler, false);
776    }
777 
778    /* Clear the dirty flags all at once */
779    desc_state->dirty = cmdbuf->state.dirty = 0;
780 }
781 
782 void
panvk_per_arch(CmdDraw)783 panvk_per_arch(CmdDraw)(VkCommandBuffer commandBuffer, uint32_t vertexCount,
784                         uint32_t instanceCount, uint32_t firstVertex,
785                         uint32_t firstInstance)
786 {
787    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
788 
789    if (instanceCount == 0 || vertexCount == 0)
790       return;
791 
792    struct panvk_draw_info draw = {
793       .first_vertex = firstVertex,
794       .vertex_count = vertexCount,
795       .vertex_range = vertexCount,
796       .first_instance = firstInstance,
797       .instance_count = instanceCount,
798       .padded_vertex_count = instanceCount > 1
799                                 ? panfrost_padded_vertex_count(vertexCount)
800                                 : vertexCount,
801       .offset_start = firstVertex,
802    };
803 
804    panvk_cmd_draw(cmdbuf, &draw);
805 }
806 
807 static void
panvk_index_minmax_search(struct panvk_cmd_buffer * cmdbuf,uint32_t start,uint32_t count,bool restart,uint32_t * min,uint32_t * max)808 panvk_index_minmax_search(struct panvk_cmd_buffer *cmdbuf, uint32_t start,
809                           uint32_t count, bool restart, uint32_t *min,
810                           uint32_t *max)
811 {
812    void *ptr = cmdbuf->state.ib.buffer->host_ptr + cmdbuf->state.ib.offset;
813 
814    assert(cmdbuf->state.ib.buffer);
815    assert(cmdbuf->state.ib.buffer->bo);
816    assert(cmdbuf->state.ib.buffer->host_ptr);
817 
818    uint32_t debug_flags =
819       cmdbuf->device->physical_device->instance->debug_flags;
820 
821    if (!(debug_flags & PANVK_DEBUG_NO_KNOWN_WARN)) {
822       fprintf(
823          stderr,
824          "WARNING: Crawling index buffers from the CPU isn't valid in Vulkan\n");
825    }
826 
827    *max = 0;
828 
829    /* TODO: Use panfrost_minmax_cache */
830    /* TODO: Read full cacheline of data to mitigate the uncached
831     * mapping slowness.
832     */
833    switch (cmdbuf->state.ib.index_size) {
834 #define MINMAX_SEARCH_CASE(sz)                                                 \
835    case sz: {                                                                  \
836       uint##sz##_t *indices = ptr;                                             \
837       *min = UINT##sz##_MAX;                                                   \
838       for (uint32_t i = 0; i < count; i++) {                                   \
839          if (restart && indices[i + start] == UINT##sz##_MAX)                  \
840             continue;                                                          \
841          *min = MIN2(indices[i + start], *min);                                \
842          *max = MAX2(indices[i + start], *max);                                \
843       }                                                                        \
844       break;                                                                   \
845    }
846       MINMAX_SEARCH_CASE(32)
847       MINMAX_SEARCH_CASE(16)
848       MINMAX_SEARCH_CASE(8)
849 #undef MINMAX_SEARCH_CASE
850    default:
851       unreachable("Invalid index size");
852    }
853 }
854 
855 void
panvk_per_arch(CmdDrawIndexed)856 panvk_per_arch(CmdDrawIndexed)(VkCommandBuffer commandBuffer,
857                                uint32_t indexCount, uint32_t instanceCount,
858                                uint32_t firstIndex, int32_t vertexOffset,
859                                uint32_t firstInstance)
860 {
861    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
862    uint32_t min_vertex, max_vertex;
863 
864    if (instanceCount == 0 || indexCount == 0)
865       return;
866 
867    const struct panvk_pipeline *pipeline =
868       panvk_cmd_get_pipeline(cmdbuf, GRAPHICS);
869    bool primitive_restart = pipeline->ia.primitive_restart;
870 
871    panvk_index_minmax_search(cmdbuf, firstIndex, indexCount, primitive_restart,
872                              &min_vertex, &max_vertex);
873 
874    unsigned vertex_range = max_vertex - min_vertex + 1;
875    struct panvk_draw_info draw = {
876       .index_size = cmdbuf->state.ib.index_size,
877       .first_index = firstIndex,
878       .index_count = indexCount,
879       .vertex_offset = vertexOffset,
880       .first_instance = firstInstance,
881       .instance_count = instanceCount,
882       .vertex_range = vertex_range,
883       .vertex_count = indexCount + abs(vertexOffset),
884       .padded_vertex_count = instanceCount > 1
885                                 ? panfrost_padded_vertex_count(vertex_range)
886                                 : vertex_range,
887       .offset_start = min_vertex + vertexOffset,
888       .indices = panvk_buffer_gpu_ptr(cmdbuf->state.ib.buffer,
889                                       cmdbuf->state.ib.offset) +
890                  (firstIndex * (cmdbuf->state.ib.index_size / 8)),
891    };
892 
893    panvk_cmd_draw(cmdbuf, &draw);
894 }
895 
896 VkResult
panvk_per_arch(EndCommandBuffer)897 panvk_per_arch(EndCommandBuffer)(VkCommandBuffer commandBuffer)
898 {
899    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
900 
901    panvk_per_arch(cmd_close_batch)(cmdbuf);
902 
903    return vk_command_buffer_end(&cmdbuf->vk);
904 }
905 
906 void
panvk_per_arch(CmdEndRenderPass2)907 panvk_per_arch(CmdEndRenderPass2)(VkCommandBuffer commandBuffer,
908                                   const VkSubpassEndInfo *pSubpassEndInfo)
909 {
910    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
911 
912    panvk_per_arch(cmd_close_batch)(cmdbuf);
913    vk_free(&cmdbuf->vk.pool->alloc, cmdbuf->state.clear);
914    cmdbuf->state.batch = NULL;
915    cmdbuf->state.pass = NULL;
916    cmdbuf->state.subpass = NULL;
917    cmdbuf->state.framebuffer = NULL;
918    cmdbuf->state.clear = NULL;
919 }
920 
921 void
panvk_per_arch(CmdEndRenderPass)922 panvk_per_arch(CmdEndRenderPass)(VkCommandBuffer cmd)
923 {
924    VkSubpassEndInfo einfo = {
925       .sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO,
926    };
927 
928    panvk_per_arch(CmdEndRenderPass2)(cmd, &einfo);
929 }
930 
931 void
panvk_per_arch(CmdPipelineBarrier2)932 panvk_per_arch(CmdPipelineBarrier2)(VkCommandBuffer commandBuffer,
933                                     const VkDependencyInfo *pDependencyInfo)
934 {
935    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
936 
937    /* Caches are flushed/invalidated at batch boundaries for now, nothing to do
938     * for memory barriers assuming we implement barriers with the creation of a
939     * new batch.
940     * FIXME: We can probably do better with a CacheFlush job that has the
941     * barrier flag set to true.
942     */
943    if (cmdbuf->state.batch) {
944       panvk_per_arch(cmd_close_batch)(cmdbuf);
945       panvk_cmd_preload_fb_after_batch_split(cmdbuf);
946       panvk_cmd_open_batch(cmdbuf);
947    }
948 }
949 
950 static void
panvk_add_set_event_operation(struct panvk_cmd_buffer * cmdbuf,struct panvk_event * event,enum panvk_event_op_type type)951 panvk_add_set_event_operation(struct panvk_cmd_buffer *cmdbuf,
952                               struct panvk_event *event,
953                               enum panvk_event_op_type type)
954 {
955    struct panvk_event_op op = {
956       .type = type,
957       .event = event,
958    };
959 
960    if (cmdbuf->state.batch == NULL) {
961       /* No open batch, let's create a new one so this operation happens in
962        * the right order.
963        */
964       panvk_cmd_open_batch(cmdbuf);
965       util_dynarray_append(&cmdbuf->state.batch->event_ops,
966                            struct panvk_event_op, op);
967       panvk_per_arch(cmd_close_batch)(cmdbuf);
968    } else {
969       /* Let's close the current batch so the operation executes before any
970        * future commands.
971        */
972       util_dynarray_append(&cmdbuf->state.batch->event_ops,
973                            struct panvk_event_op, op);
974       panvk_per_arch(cmd_close_batch)(cmdbuf);
975       panvk_cmd_preload_fb_after_batch_split(cmdbuf);
976       panvk_cmd_open_batch(cmdbuf);
977    }
978 }
979 
980 static void
panvk_add_wait_event_operation(struct panvk_cmd_buffer * cmdbuf,struct panvk_event * event)981 panvk_add_wait_event_operation(struct panvk_cmd_buffer *cmdbuf,
982                                struct panvk_event *event)
983 {
984    struct panvk_event_op op = {
985       .type = PANVK_EVENT_OP_WAIT,
986       .event = event,
987    };
988 
989    if (cmdbuf->state.batch == NULL) {
990       /* No open batch, let's create a new one and have it wait for this event. */
991       panvk_cmd_open_batch(cmdbuf);
992       util_dynarray_append(&cmdbuf->state.batch->event_ops,
993                            struct panvk_event_op, op);
994    } else {
995       /* Let's close the current batch so any future commands wait on the
996        * event signal operation.
997        */
998       if (cmdbuf->state.batch->fragment_job ||
999           cmdbuf->state.batch->jc.first_job) {
1000          panvk_per_arch(cmd_close_batch)(cmdbuf);
1001          panvk_cmd_preload_fb_after_batch_split(cmdbuf);
1002          panvk_cmd_open_batch(cmdbuf);
1003       }
1004       util_dynarray_append(&cmdbuf->state.batch->event_ops,
1005                            struct panvk_event_op, op);
1006    }
1007 }
1008 
1009 void
panvk_per_arch(CmdSetEvent2)1010 panvk_per_arch(CmdSetEvent2)(VkCommandBuffer commandBuffer, VkEvent _event,
1011                              const VkDependencyInfo *pDependencyInfo)
1012 {
1013    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1014    VK_FROM_HANDLE(panvk_event, event, _event);
1015 
1016    /* vkCmdSetEvent cannot be called inside a render pass */
1017    assert(cmdbuf->state.pass == NULL);
1018 
1019    panvk_add_set_event_operation(cmdbuf, event, PANVK_EVENT_OP_SET);
1020 }
1021 
1022 void
panvk_per_arch(CmdResetEvent2)1023 panvk_per_arch(CmdResetEvent2)(VkCommandBuffer commandBuffer, VkEvent _event,
1024                                VkPipelineStageFlags2 stageMask)
1025 {
1026    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1027    VK_FROM_HANDLE(panvk_event, event, _event);
1028 
1029    /* vkCmdResetEvent cannot be called inside a render pass */
1030    assert(cmdbuf->state.pass == NULL);
1031 
1032    panvk_add_set_event_operation(cmdbuf, event, PANVK_EVENT_OP_RESET);
1033 }
1034 
1035 void
panvk_per_arch(CmdWaitEvents2)1036 panvk_per_arch(CmdWaitEvents2)(VkCommandBuffer commandBuffer,
1037                                uint32_t eventCount, const VkEvent *pEvents,
1038                                const VkDependencyInfo *pDependencyInfos)
1039 {
1040    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1041 
1042    assert(eventCount > 0);
1043 
1044    for (uint32_t i = 0; i < eventCount; i++) {
1045       VK_FROM_HANDLE(panvk_event, event, pEvents[i]);
1046       panvk_add_wait_event_operation(cmdbuf, event);
1047    }
1048 }
1049 
1050 static void
panvk_reset_cmdbuf(struct vk_command_buffer * vk_cmdbuf,VkCommandBufferResetFlags flags)1051 panvk_reset_cmdbuf(struct vk_command_buffer *vk_cmdbuf,
1052                    VkCommandBufferResetFlags flags)
1053 {
1054    struct panvk_cmd_buffer *cmdbuf =
1055       container_of(vk_cmdbuf, struct panvk_cmd_buffer, vk);
1056 
1057    vk_command_buffer_reset(&cmdbuf->vk);
1058 
1059    list_for_each_entry_safe(struct panvk_batch, batch, &cmdbuf->batches, node) {
1060       list_del(&batch->node);
1061       util_dynarray_fini(&batch->jobs);
1062       util_dynarray_fini(&batch->event_ops);
1063 
1064       vk_free(&cmdbuf->vk.pool->alloc, batch);
1065    }
1066 
1067    panvk_pool_reset(&cmdbuf->desc_pool);
1068    panvk_pool_reset(&cmdbuf->tls_pool);
1069    panvk_pool_reset(&cmdbuf->varying_pool);
1070 
1071    for (unsigned i = 0; i < MAX_BIND_POINTS; i++)
1072       memset(&cmdbuf->bind_points[i].desc_state.sets, 0,
1073              sizeof(cmdbuf->bind_points[0].desc_state.sets));
1074 }
1075 
1076 static void
panvk_destroy_cmdbuf(struct vk_command_buffer * vk_cmdbuf)1077 panvk_destroy_cmdbuf(struct vk_command_buffer *vk_cmdbuf)
1078 {
1079    struct panvk_cmd_buffer *cmdbuf =
1080       container_of(vk_cmdbuf, struct panvk_cmd_buffer, vk);
1081    struct panvk_device *device = cmdbuf->device;
1082 
1083    list_for_each_entry_safe(struct panvk_batch, batch, &cmdbuf->batches, node) {
1084       list_del(&batch->node);
1085       util_dynarray_fini(&batch->jobs);
1086       util_dynarray_fini(&batch->event_ops);
1087 
1088       vk_free(&cmdbuf->vk.pool->alloc, batch);
1089    }
1090 
1091    panvk_pool_cleanup(&cmdbuf->desc_pool);
1092    panvk_pool_cleanup(&cmdbuf->tls_pool);
1093    panvk_pool_cleanup(&cmdbuf->varying_pool);
1094    vk_command_buffer_finish(&cmdbuf->vk);
1095    vk_free(&device->vk.alloc, cmdbuf);
1096 }
1097 
1098 static VkResult
panvk_create_cmdbuf(struct vk_command_pool * vk_pool,struct vk_command_buffer ** cmdbuf_out)1099 panvk_create_cmdbuf(struct vk_command_pool *vk_pool,
1100                     struct vk_command_buffer **cmdbuf_out)
1101 {
1102    struct panvk_device *device =
1103       container_of(vk_pool->base.device, struct panvk_device, vk);
1104    struct panvk_cmd_pool *pool =
1105       container_of(vk_pool, struct panvk_cmd_pool, vk);
1106    struct panvk_cmd_buffer *cmdbuf;
1107 
1108    cmdbuf = vk_zalloc(&device->vk.alloc, sizeof(*cmdbuf), 8,
1109                       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1110    if (!cmdbuf)
1111       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1112 
1113    VkResult result = vk_command_buffer_init(&pool->vk, &cmdbuf->vk,
1114                                             &panvk_per_arch(cmd_buffer_ops), 0);
1115    if (result != VK_SUCCESS) {
1116       vk_free(&device->vk.alloc, cmdbuf);
1117       return result;
1118    }
1119 
1120    cmdbuf->device = device;
1121 
1122    panvk_pool_init(&cmdbuf->desc_pool, device, &pool->desc_bo_pool, 0,
1123                    64 * 1024, "Command buffer descriptor pool", true);
1124    panvk_pool_init(
1125       &cmdbuf->tls_pool, device, &pool->tls_bo_pool,
1126       panvk_debug_adjust_bo_flags(device, PAN_KMOD_BO_FLAG_NO_MMAP), 64 * 1024,
1127       "TLS pool", false);
1128    panvk_pool_init(
1129       &cmdbuf->varying_pool, device, &pool->varying_bo_pool,
1130       panvk_debug_adjust_bo_flags(device, PAN_KMOD_BO_FLAG_NO_MMAP), 64 * 1024,
1131       "Varyings pool", false);
1132    list_inithead(&cmdbuf->batches);
1133    *cmdbuf_out = &cmdbuf->vk;
1134    return VK_SUCCESS;
1135 }
1136 
1137 const struct vk_command_buffer_ops panvk_per_arch(cmd_buffer_ops) = {
1138    .create = panvk_create_cmdbuf,
1139    .reset = panvk_reset_cmdbuf,
1140    .destroy = panvk_destroy_cmdbuf,
1141 };
1142 
1143 VkResult
panvk_per_arch(BeginCommandBuffer)1144 panvk_per_arch(BeginCommandBuffer)(VkCommandBuffer commandBuffer,
1145                                    const VkCommandBufferBeginInfo *pBeginInfo)
1146 {
1147    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1148 
1149    vk_command_buffer_begin(&cmdbuf->vk, pBeginInfo);
1150 
1151    memset(&cmdbuf->state, 0, sizeof(cmdbuf->state));
1152 
1153    return VK_SUCCESS;
1154 }
1155 
1156 void
panvk_per_arch(DestroyCommandPool)1157 panvk_per_arch(DestroyCommandPool)(VkDevice _device, VkCommandPool commandPool,
1158                                    const VkAllocationCallbacks *pAllocator)
1159 {
1160    VK_FROM_HANDLE(panvk_device, device, _device);
1161    VK_FROM_HANDLE(panvk_cmd_pool, pool, commandPool);
1162 
1163    vk_command_pool_finish(&pool->vk);
1164 
1165    panvk_bo_pool_cleanup(&pool->desc_bo_pool);
1166    panvk_bo_pool_cleanup(&pool->varying_bo_pool);
1167    panvk_bo_pool_cleanup(&pool->tls_bo_pool);
1168 
1169    vk_free2(&device->vk.alloc, pAllocator, pool);
1170 }
1171 
1172 void
panvk_per_arch(CmdDispatch)1173 panvk_per_arch(CmdDispatch)(VkCommandBuffer commandBuffer, uint32_t x,
1174                             uint32_t y, uint32_t z)
1175 {
1176    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1177    struct panvk_dispatch_info dispatch = {
1178       .wg_count = {x, y, z},
1179    };
1180 
1181    panvk_per_arch(cmd_close_batch)(cmdbuf);
1182    struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1183    struct panvk_device *dev = cmdbuf->device;
1184 
1185    struct panvk_cmd_bind_point_state *bind_point_state =
1186       panvk_cmd_get_bind_point_state(cmdbuf, COMPUTE);
1187    struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
1188    const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
1189    struct panfrost_ptr job =
1190       pan_pool_alloc_desc(&cmdbuf->desc_pool.base, COMPUTE_JOB);
1191 
1192    struct panvk_sysvals *sysvals = &desc_state->sysvals;
1193    sysvals->num_work_groups.u32[0] = x;
1194    sysvals->num_work_groups.u32[1] = y;
1195    sysvals->num_work_groups.u32[2] = z;
1196    sysvals->local_group_size.u32[0] = pipeline->cs.local_size.x;
1197    sysvals->local_group_size.u32[1] = pipeline->cs.local_size.y;
1198    sysvals->local_group_size.u32[2] = pipeline->cs.local_size.z;
1199    desc_state->sysvals_ptr = 0;
1200 
1201    panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
1202    dispatch.tsd = batch->tls.gpu;
1203 
1204    panvk_prepare_non_vs_attribs(cmdbuf, bind_point_state);
1205    dispatch.attributes = desc_state->non_vs_attribs;
1206    dispatch.attribute_bufs = desc_state->non_vs_attrib_bufs;
1207 
1208    panvk_cmd_prepare_ubos(cmdbuf, bind_point_state);
1209    dispatch.ubos = desc_state->ubos;
1210 
1211    panvk_cmd_prepare_textures(cmdbuf, bind_point_state);
1212    dispatch.textures = desc_state->textures;
1213 
1214    panvk_cmd_prepare_samplers(cmdbuf, bind_point_state);
1215    dispatch.samplers = desc_state->samplers;
1216 
1217    panvk_per_arch(emit_compute_job)(pipeline, &dispatch, job.cpu);
1218    pan_jc_add_job(&cmdbuf->desc_pool.base, &batch->jc, MALI_JOB_TYPE_COMPUTE,
1219                   false, false, 0, 0, &job, false);
1220 
1221    batch->tlsinfo.tls.size = pipeline->tls_size;
1222    batch->tlsinfo.wls.size = pipeline->wls_size;
1223    if (batch->tlsinfo.wls.size) {
1224       unsigned core_id_range;
1225 
1226       panfrost_query_core_count(&dev->physical_device->kmod.props,
1227                                 &core_id_range);
1228       batch->wls_total_size = pan_wls_adjust_size(batch->tlsinfo.wls.size) *
1229                               pan_wls_instances(&dispatch.wg_count) *
1230                               core_id_range;
1231    }
1232 
1233    panvk_per_arch(cmd_close_batch)(cmdbuf);
1234    desc_state->dirty = 0;
1235 }
1236