1 /*
2 * Copyright © 2021 Collabora Ltd.
3 *
4 * Derived from tu_cmd_buffer.c which is:
5 * Copyright © 2016 Red Hat.
6 * Copyright © 2016 Bas Nieuwenhuizen
7 * Copyright © 2015 Intel Corporation
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice (including the next
17 * paragraph) shall be included in all copies or substantial portions of the
18 * Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 * DEALINGS IN THE SOFTWARE.
27 */
28
29 #include "genxml/gen_macros.h"
30
31 #include "panvk_cs.h"
32 #include "panvk_private.h"
33
34 #include "pan_blitter.h"
35 #include "pan_desc.h"
36 #include "pan_encoder.h"
37 #include "pan_props.h"
38 #include "pan_samples.h"
39
40 #include "util/rounding.h"
41 #include "util/u_pack_color.h"
42 #include "vk_format.h"
43
44 static uint32_t
panvk_debug_adjust_bo_flags(const struct panvk_device * device,uint32_t bo_flags)45 panvk_debug_adjust_bo_flags(const struct panvk_device *device,
46 uint32_t bo_flags)
47 {
48 uint32_t debug_flags = device->physical_device->instance->debug_flags;
49
50 if (debug_flags & PANVK_DEBUG_DUMP)
51 bo_flags &= ~PAN_KMOD_BO_FLAG_NO_MMAP;
52
53 return bo_flags;
54 }
55
56 static void
panvk_cmd_prepare_fragment_job(struct panvk_cmd_buffer * cmdbuf)57 panvk_cmd_prepare_fragment_job(struct panvk_cmd_buffer *cmdbuf)
58 {
59 const struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
60 struct panvk_batch *batch = cmdbuf->state.batch;
61 struct panfrost_ptr job_ptr =
62 pan_pool_alloc_desc(&cmdbuf->desc_pool.base, FRAGMENT_JOB);
63
64 GENX(pan_emit_fragment_job)
65 (fbinfo, batch->fb.desc.gpu, job_ptr.cpu), batch->fragment_job = job_ptr.gpu;
66 util_dynarray_append(&batch->jobs, void *, job_ptr.cpu);
67 }
68
69 void
panvk_per_arch(cmd_close_batch)70 panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf)
71 {
72 struct panvk_batch *batch = cmdbuf->state.batch;
73
74 if (!batch)
75 return;
76
77 struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
78
79 assert(batch);
80
81 bool clear = fbinfo->zs.clear.z | fbinfo->zs.clear.s;
82 for (unsigned i = 0; i < fbinfo->rt_count; i++)
83 clear |= fbinfo->rts[i].clear;
84
85 if (!clear && !batch->jc.first_job) {
86 if (util_dynarray_num_elements(&batch->event_ops,
87 struct panvk_event_op) == 0) {
88 /* Content-less batch, let's drop it */
89 vk_free(&cmdbuf->vk.pool->alloc, batch);
90 } else {
91 /* Batch has no jobs but is needed for synchronization, let's add a
92 * NULL job so the SUBMIT ioctl doesn't choke on it.
93 */
94 struct panfrost_ptr ptr =
95 pan_pool_alloc_desc(&cmdbuf->desc_pool.base, JOB_HEADER);
96 util_dynarray_append(&batch->jobs, void *, ptr.cpu);
97 pan_jc_add_job(&cmdbuf->desc_pool.base, &batch->jc, MALI_JOB_TYPE_NULL,
98 false, false, 0, 0, &ptr, false);
99 list_addtail(&batch->node, &cmdbuf->batches);
100 }
101 cmdbuf->state.batch = NULL;
102 return;
103 }
104
105 struct panvk_device *dev = cmdbuf->device;
106
107 list_addtail(&batch->node, &cmdbuf->batches);
108
109 if (batch->jc.first_tiler) {
110 struct panfrost_ptr preload_jobs[2];
111 unsigned num_preload_jobs = GENX(pan_preload_fb)(
112 &dev->meta.blitter.cache, &cmdbuf->desc_pool.base, &batch->jc,
113 &cmdbuf->state.fb.info, batch->tls.gpu, batch->tiler.descs.gpu,
114 preload_jobs);
115 for (unsigned i = 0; i < num_preload_jobs; i++)
116 util_dynarray_append(&batch->jobs, void *, preload_jobs[i].cpu);
117 }
118
119 if (batch->tlsinfo.tls.size) {
120 unsigned thread_tls_alloc =
121 panfrost_query_thread_tls_alloc(&dev->physical_device->kmod.props);
122 unsigned core_id_range;
123
124 panfrost_query_core_count(&dev->physical_device->kmod.props,
125 &core_id_range);
126
127 unsigned size = panfrost_get_total_stack_size(
128 batch->tlsinfo.tls.size, thread_tls_alloc, core_id_range);
129 batch->tlsinfo.tls.ptr =
130 pan_pool_alloc_aligned(&cmdbuf->tls_pool.base, size, 4096).gpu;
131 }
132
133 if (batch->tlsinfo.wls.size) {
134 assert(batch->wls_total_size);
135 batch->tlsinfo.wls.ptr =
136 pan_pool_alloc_aligned(&cmdbuf->tls_pool.base, batch->wls_total_size,
137 4096)
138 .gpu;
139 }
140
141 if (batch->tls.cpu)
142 GENX(pan_emit_tls)(&batch->tlsinfo, batch->tls.cpu);
143
144 if (batch->fb.desc.cpu) {
145 fbinfo->sample_positions = cmdbuf->device->sample_positions->addr.dev +
146 panfrost_sample_positions_offset(
147 pan_sample_pattern(fbinfo->nr_samples));
148
149 batch->fb.desc.gpu |=
150 GENX(pan_emit_fbd)(&cmdbuf->state.fb.info, &batch->tlsinfo,
151 &batch->tiler.ctx, batch->fb.desc.cpu);
152
153 panvk_cmd_prepare_fragment_job(cmdbuf);
154 }
155
156 cmdbuf->state.batch = NULL;
157 }
158
159 void
panvk_per_arch(CmdNextSubpass2)160 panvk_per_arch(CmdNextSubpass2)(VkCommandBuffer commandBuffer,
161 const VkSubpassBeginInfo *pSubpassBeginInfo,
162 const VkSubpassEndInfo *pSubpassEndInfo)
163 {
164 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
165
166 panvk_per_arch(cmd_close_batch)(cmdbuf);
167
168 cmdbuf->state.subpass++;
169 panvk_cmd_fb_info_set_subpass(cmdbuf);
170 panvk_cmd_open_batch(cmdbuf);
171 }
172
173 void
panvk_per_arch(CmdNextSubpass)174 panvk_per_arch(CmdNextSubpass)(VkCommandBuffer cmd, VkSubpassContents contents)
175 {
176 VkSubpassBeginInfo binfo = {.sType = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO,
177 .contents = contents};
178 VkSubpassEndInfo einfo = {
179 .sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO,
180 };
181
182 panvk_per_arch(CmdNextSubpass2)(cmd, &binfo, &einfo);
183 }
184
185 void
panvk_per_arch(cmd_alloc_fb_desc)186 panvk_per_arch(cmd_alloc_fb_desc)(struct panvk_cmd_buffer *cmdbuf)
187 {
188 struct panvk_batch *batch = cmdbuf->state.batch;
189
190 if (batch->fb.desc.gpu)
191 return;
192
193 const struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
194 bool has_zs_ext = fbinfo->zs.view.zs || fbinfo->zs.view.s;
195
196 batch->fb.info = cmdbuf->state.framebuffer;
197 batch->fb.desc = pan_pool_alloc_desc_aggregate(
198 &cmdbuf->desc_pool.base, PAN_DESC(FRAMEBUFFER),
199 PAN_DESC_ARRAY(has_zs_ext ? 1 : 0, ZS_CRC_EXTENSION),
200 PAN_DESC_ARRAY(MAX2(fbinfo->rt_count, 1), RENDER_TARGET));
201
202 memset(&cmdbuf->state.fb.info.bifrost.pre_post.dcds, 0,
203 sizeof(cmdbuf->state.fb.info.bifrost.pre_post.dcds));
204 }
205
206 void
panvk_per_arch(cmd_alloc_tls_desc)207 panvk_per_arch(cmd_alloc_tls_desc)(struct panvk_cmd_buffer *cmdbuf, bool gfx)
208 {
209 struct panvk_batch *batch = cmdbuf->state.batch;
210
211 assert(batch);
212 if (!batch->tls.gpu) {
213 batch->tls = pan_pool_alloc_desc(&cmdbuf->desc_pool.base, LOCAL_STORAGE);
214 }
215 }
216
217 static void
panvk_cmd_prepare_draw_sysvals(struct panvk_cmd_buffer * cmdbuf,struct panvk_cmd_bind_point_state * bind_point_state,struct panvk_draw_info * draw)218 panvk_cmd_prepare_draw_sysvals(
219 struct panvk_cmd_buffer *cmdbuf,
220 struct panvk_cmd_bind_point_state *bind_point_state,
221 struct panvk_draw_info *draw)
222 {
223 struct panvk_sysvals *sysvals = &bind_point_state->desc_state.sysvals;
224
225 unsigned base_vertex = draw->index_size ? draw->vertex_offset : 0;
226 if (sysvals->first_vertex != draw->offset_start ||
227 sysvals->base_vertex != base_vertex ||
228 sysvals->base_instance != draw->first_instance) {
229 sysvals->first_vertex = draw->offset_start;
230 sysvals->base_vertex = base_vertex;
231 sysvals->base_instance = draw->first_instance;
232 bind_point_state->desc_state.sysvals_ptr = 0;
233 }
234
235 if (cmdbuf->state.dirty & PANVK_DYNAMIC_BLEND_CONSTANTS) {
236 memcpy(&sysvals->blend_constants, cmdbuf->state.blend.constants,
237 sizeof(cmdbuf->state.blend.constants));
238 bind_point_state->desc_state.sysvals_ptr = 0;
239 }
240
241 if (cmdbuf->state.dirty & PANVK_DYNAMIC_VIEWPORT) {
242 panvk_sysval_upload_viewport_scale(&cmdbuf->state.viewport,
243 &sysvals->viewport_scale);
244 panvk_sysval_upload_viewport_offset(&cmdbuf->state.viewport,
245 &sysvals->viewport_offset);
246 bind_point_state->desc_state.sysvals_ptr = 0;
247 }
248 }
249
250 static void
panvk_cmd_prepare_sysvals(struct panvk_cmd_buffer * cmdbuf,struct panvk_cmd_bind_point_state * bind_point_state)251 panvk_cmd_prepare_sysvals(struct panvk_cmd_buffer *cmdbuf,
252 struct panvk_cmd_bind_point_state *bind_point_state)
253 {
254 struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
255
256 if (desc_state->sysvals_ptr)
257 return;
258
259 struct panfrost_ptr sysvals = pan_pool_alloc_aligned(
260 &cmdbuf->desc_pool.base, sizeof(desc_state->sysvals), 16);
261 memcpy(sysvals.cpu, &desc_state->sysvals, sizeof(desc_state->sysvals));
262 desc_state->sysvals_ptr = sysvals.gpu;
263 }
264
265 static void
panvk_cmd_prepare_push_constants(struct panvk_cmd_buffer * cmdbuf,struct panvk_cmd_bind_point_state * bind_point_state)266 panvk_cmd_prepare_push_constants(
267 struct panvk_cmd_buffer *cmdbuf,
268 struct panvk_cmd_bind_point_state *bind_point_state)
269 {
270 struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
271 const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
272
273 if (!pipeline->layout->push_constants.size || desc_state->push_constants)
274 return;
275
276 struct panfrost_ptr push_constants = pan_pool_alloc_aligned(
277 &cmdbuf->desc_pool.base,
278 ALIGN_POT(pipeline->layout->push_constants.size, 16), 16);
279
280 memcpy(push_constants.cpu, cmdbuf->push_constants,
281 pipeline->layout->push_constants.size);
282 desc_state->push_constants = push_constants.gpu;
283 }
284
285 static void
panvk_cmd_prepare_ubos(struct panvk_cmd_buffer * cmdbuf,struct panvk_cmd_bind_point_state * bind_point_state)286 panvk_cmd_prepare_ubos(struct panvk_cmd_buffer *cmdbuf,
287 struct panvk_cmd_bind_point_state *bind_point_state)
288 {
289 struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
290 const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
291
292 if (!pipeline->num_ubos || desc_state->ubos)
293 return;
294
295 panvk_cmd_prepare_sysvals(cmdbuf, bind_point_state);
296 panvk_cmd_prepare_push_constants(cmdbuf, bind_point_state);
297
298 struct panfrost_ptr ubos = pan_pool_alloc_desc_array(
299 &cmdbuf->desc_pool.base, pipeline->num_ubos, UNIFORM_BUFFER);
300
301 panvk_per_arch(emit_ubos)(pipeline, desc_state, ubos.cpu);
302
303 desc_state->ubos = ubos.gpu;
304 }
305
306 static void
panvk_cmd_prepare_textures(struct panvk_cmd_buffer * cmdbuf,struct panvk_cmd_bind_point_state * bind_point_state)307 panvk_cmd_prepare_textures(struct panvk_cmd_buffer *cmdbuf,
308 struct panvk_cmd_bind_point_state *bind_point_state)
309 {
310 struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
311 const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
312 unsigned num_textures = pipeline->layout->num_textures;
313
314 if (!num_textures || desc_state->textures)
315 return;
316
317 struct panfrost_ptr textures = pan_pool_alloc_aligned(
318 &cmdbuf->desc_pool.base, num_textures * pan_size(TEXTURE),
319 pan_size(TEXTURE));
320
321 void *texture = textures.cpu;
322
323 for (unsigned i = 0; i < ARRAY_SIZE(desc_state->sets); i++) {
324 if (!desc_state->sets[i])
325 continue;
326
327 memcpy(texture, desc_state->sets[i]->textures,
328 desc_state->sets[i]->layout->num_textures * pan_size(TEXTURE));
329
330 texture += desc_state->sets[i]->layout->num_textures * pan_size(TEXTURE);
331 }
332
333 desc_state->textures = textures.gpu;
334 }
335
336 static void
panvk_cmd_prepare_samplers(struct panvk_cmd_buffer * cmdbuf,struct panvk_cmd_bind_point_state * bind_point_state)337 panvk_cmd_prepare_samplers(struct panvk_cmd_buffer *cmdbuf,
338 struct panvk_cmd_bind_point_state *bind_point_state)
339 {
340 struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
341 const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
342 unsigned num_samplers = pipeline->layout->num_samplers;
343
344 if (!num_samplers || desc_state->samplers)
345 return;
346
347 struct panfrost_ptr samplers =
348 pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, num_samplers, SAMPLER);
349
350 void *sampler = samplers.cpu;
351
352 /* Prepare the dummy sampler */
353 pan_pack(sampler, SAMPLER, cfg) {
354 cfg.seamless_cube_map = false;
355 cfg.magnify_nearest = true;
356 cfg.minify_nearest = true;
357 cfg.normalized_coordinates = false;
358 }
359
360 sampler += pan_size(SAMPLER);
361
362 for (unsigned i = 0; i < ARRAY_SIZE(desc_state->sets); i++) {
363 if (!desc_state->sets[i])
364 continue;
365
366 memcpy(sampler, desc_state->sets[i]->samplers,
367 desc_state->sets[i]->layout->num_samplers * pan_size(SAMPLER));
368
369 sampler += desc_state->sets[i]->layout->num_samplers * pan_size(SAMPLER);
370 }
371
372 desc_state->samplers = samplers.gpu;
373 }
374
375 static void
panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)376 panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf,
377 struct panvk_draw_info *draw)
378 {
379 const struct panvk_pipeline *pipeline =
380 panvk_cmd_get_pipeline(cmdbuf, GRAPHICS);
381
382 if (!pipeline->fs.dynamic_rsd) {
383 draw->fs_rsd = pipeline->rsds[MESA_SHADER_FRAGMENT];
384 return;
385 }
386
387 if (!cmdbuf->state.fs_rsd) {
388 struct panfrost_ptr rsd = pan_pool_alloc_desc_aggregate(
389 &cmdbuf->desc_pool.base, PAN_DESC(RENDERER_STATE),
390 PAN_DESC_ARRAY(pipeline->blend.state.rt_count, BLEND));
391
392 struct mali_renderer_state_packed rsd_dyn;
393 struct mali_renderer_state_packed *rsd_templ =
394 (struct mali_renderer_state_packed *)&pipeline->fs.rsd_template;
395
396 STATIC_ASSERT(sizeof(pipeline->fs.rsd_template) >= sizeof(*rsd_templ));
397
398 panvk_per_arch(emit_dyn_fs_rsd)(pipeline, &cmdbuf->state, &rsd_dyn);
399 pan_merge(rsd_dyn, (*rsd_templ), RENDERER_STATE);
400 memcpy(rsd.cpu, &rsd_dyn, sizeof(rsd_dyn));
401
402 void *bd = rsd.cpu + pan_size(RENDERER_STATE);
403 for (unsigned i = 0; i < pipeline->blend.state.rt_count; i++) {
404 if (pipeline->blend.constant[i].index != (uint8_t)~0) {
405 struct mali_blend_packed bd_dyn;
406 struct mali_blend_packed *bd_templ =
407 (struct mali_blend_packed *)&pipeline->blend.bd_template[i];
408
409 STATIC_ASSERT(sizeof(pipeline->blend.bd_template[0]) >=
410 sizeof(*bd_templ));
411 panvk_per_arch(emit_blend_constant)(cmdbuf->device, pipeline, i,
412 cmdbuf->state.blend.constants,
413 &bd_dyn);
414 pan_merge(bd_dyn, (*bd_templ), BLEND);
415 memcpy(bd, &bd_dyn, sizeof(bd_dyn));
416 }
417 bd += pan_size(BLEND);
418 }
419
420 cmdbuf->state.fs_rsd = rsd.gpu;
421 }
422
423 draw->fs_rsd = cmdbuf->state.fs_rsd;
424 }
425
426 void
panvk_per_arch(cmd_get_tiler_context)427 panvk_per_arch(cmd_get_tiler_context)(struct panvk_cmd_buffer *cmdbuf,
428 unsigned width, unsigned height)
429 {
430 struct panvk_batch *batch = cmdbuf->state.batch;
431
432 if (batch->tiler.descs.cpu)
433 return;
434
435 batch->tiler.descs = pan_pool_alloc_desc_aggregate(
436 &cmdbuf->desc_pool.base, PAN_DESC(TILER_CONTEXT), PAN_DESC(TILER_HEAP));
437 STATIC_ASSERT(sizeof(batch->tiler.templ) >=
438 pan_size(TILER_CONTEXT) + pan_size(TILER_HEAP));
439
440 struct panfrost_ptr desc = {
441 .gpu = batch->tiler.descs.gpu,
442 .cpu = batch->tiler.templ,
443 };
444
445 panvk_per_arch(emit_tiler_context)(cmdbuf->device, width, height, &desc);
446 memcpy(batch->tiler.descs.cpu, batch->tiler.templ,
447 pan_size(TILER_CONTEXT) + pan_size(TILER_HEAP));
448 batch->tiler.ctx.bifrost = batch->tiler.descs.gpu;
449 }
450
451 void
panvk_per_arch(cmd_prepare_tiler_context)452 panvk_per_arch(cmd_prepare_tiler_context)(struct panvk_cmd_buffer *cmdbuf)
453 {
454 const struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
455
456 panvk_per_arch(cmd_get_tiler_context)(cmdbuf, fbinfo->width, fbinfo->height);
457 }
458
459 static void
panvk_draw_prepare_tiler_context(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)460 panvk_draw_prepare_tiler_context(struct panvk_cmd_buffer *cmdbuf,
461 struct panvk_draw_info *draw)
462 {
463 struct panvk_batch *batch = cmdbuf->state.batch;
464
465 panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf);
466 draw->tiler_ctx = &batch->tiler.ctx;
467 }
468
469 static void
panvk_draw_prepare_varyings(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)470 panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf,
471 struct panvk_draw_info *draw)
472 {
473 const struct panvk_pipeline *pipeline =
474 panvk_cmd_get_pipeline(cmdbuf, GRAPHICS);
475 struct panvk_varyings_info *varyings = &cmdbuf->state.varyings;
476
477 panvk_varyings_alloc(varyings, &cmdbuf->varying_pool.base,
478 draw->padded_vertex_count * draw->instance_count);
479
480 unsigned buf_count = panvk_varyings_buf_count(varyings);
481 struct panfrost_ptr bufs = pan_pool_alloc_desc_array(
482 &cmdbuf->desc_pool.base, buf_count + 1, ATTRIBUTE_BUFFER);
483
484 panvk_per_arch(emit_varying_bufs)(varyings, bufs.cpu);
485
486 /* We need an empty entry to stop prefetching on Bifrost */
487 memset(bufs.cpu + (pan_size(ATTRIBUTE_BUFFER) * buf_count), 0,
488 pan_size(ATTRIBUTE_BUFFER));
489
490 if (BITSET_TEST(varyings->active, VARYING_SLOT_POS)) {
491 draw->position =
492 varyings->buf[varyings->varying[VARYING_SLOT_POS].buf].address +
493 varyings->varying[VARYING_SLOT_POS].offset;
494 }
495
496 if (pipeline->ia.writes_point_size) {
497 draw->psiz =
498 varyings->buf[varyings->varying[VARYING_SLOT_PSIZ].buf].address +
499 varyings->varying[VARYING_SLOT_POS].offset;
500 } else if (pipeline->ia.topology == MALI_DRAW_MODE_LINES ||
501 pipeline->ia.topology == MALI_DRAW_MODE_LINE_STRIP ||
502 pipeline->ia.topology == MALI_DRAW_MODE_LINE_LOOP) {
503 draw->line_width = pipeline->dynamic_state_mask & PANVK_DYNAMIC_LINE_WIDTH
504 ? cmdbuf->state.rast.line_width
505 : pipeline->rast.line_width;
506 } else {
507 draw->line_width = 1.0f;
508 }
509 draw->varying_bufs = bufs.gpu;
510
511 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
512 if (!varyings->stage[s].count)
513 continue;
514
515 struct panfrost_ptr attribs = pan_pool_alloc_desc_array(
516 &cmdbuf->desc_pool.base, varyings->stage[s].count, ATTRIBUTE);
517
518 panvk_per_arch(emit_varyings)(cmdbuf->device, varyings, s, attribs.cpu);
519 draw->stages[s].varyings = attribs.gpu;
520 }
521 }
522
523 static void
panvk_fill_non_vs_attribs(struct panvk_cmd_buffer * cmdbuf,struct panvk_cmd_bind_point_state * bind_point_state,void * attrib_bufs,void * attribs,unsigned first_buf)524 panvk_fill_non_vs_attribs(struct panvk_cmd_buffer *cmdbuf,
525 struct panvk_cmd_bind_point_state *bind_point_state,
526 void *attrib_bufs, void *attribs, unsigned first_buf)
527 {
528 struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
529 const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
530
531 for (unsigned s = 0; s < pipeline->layout->vk.set_count; s++) {
532 const struct panvk_descriptor_set *set = desc_state->sets[s];
533
534 if (!set)
535 continue;
536
537 const struct panvk_descriptor_set_layout *layout = set->layout;
538 unsigned img_idx = pipeline->layout->sets[s].img_offset;
539 unsigned offset = img_idx * pan_size(ATTRIBUTE_BUFFER) * 2;
540 unsigned size = layout->num_imgs * pan_size(ATTRIBUTE_BUFFER) * 2;
541
542 memcpy(attrib_bufs + offset, desc_state->sets[s]->img_attrib_bufs, size);
543
544 offset = img_idx * pan_size(ATTRIBUTE);
545 for (unsigned i = 0; i < layout->num_imgs; i++) {
546 pan_pack(attribs + offset, ATTRIBUTE, cfg) {
547 cfg.buffer_index = first_buf + (img_idx + i) * 2;
548 cfg.format = desc_state->sets[s]->img_fmts[i];
549 }
550 offset += pan_size(ATTRIBUTE);
551 }
552 }
553 }
554
555 static void
panvk_prepare_non_vs_attribs(struct panvk_cmd_buffer * cmdbuf,struct panvk_cmd_bind_point_state * bind_point_state)556 panvk_prepare_non_vs_attribs(struct panvk_cmd_buffer *cmdbuf,
557 struct panvk_cmd_bind_point_state *bind_point_state)
558 {
559 struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
560 const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
561
562 if (desc_state->non_vs_attribs || !pipeline->img_access_mask)
563 return;
564
565 unsigned attrib_count = pipeline->layout->num_imgs;
566 unsigned attrib_buf_count = (pipeline->layout->num_imgs * 2);
567 struct panfrost_ptr bufs = pan_pool_alloc_desc_array(
568 &cmdbuf->desc_pool.base, attrib_buf_count + 1, ATTRIBUTE_BUFFER);
569 struct panfrost_ptr attribs = pan_pool_alloc_desc_array(
570 &cmdbuf->desc_pool.base, attrib_count, ATTRIBUTE);
571
572 panvk_fill_non_vs_attribs(cmdbuf, bind_point_state, bufs.cpu, attribs.cpu,
573 0);
574
575 desc_state->non_vs_attrib_bufs = bufs.gpu;
576 desc_state->non_vs_attribs = attribs.gpu;
577 }
578
579 static void
panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)580 panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer *cmdbuf,
581 struct panvk_draw_info *draw)
582 {
583 struct panvk_cmd_bind_point_state *bind_point_state =
584 panvk_cmd_get_bind_point_state(cmdbuf, GRAPHICS);
585 struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
586 const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
587 unsigned num_imgs =
588 pipeline->img_access_mask & BITFIELD_BIT(MESA_SHADER_VERTEX)
589 ? pipeline->layout->num_imgs
590 : 0;
591 unsigned attrib_count = pipeline->attribs.attrib_count + num_imgs;
592
593 if (desc_state->vs_attribs || !attrib_count)
594 return;
595
596 if (!pipeline->attribs.buf_count) {
597 panvk_prepare_non_vs_attribs(cmdbuf, bind_point_state);
598 desc_state->vs_attrib_bufs = desc_state->non_vs_attrib_bufs;
599 desc_state->vs_attribs = desc_state->non_vs_attribs;
600 return;
601 }
602
603 unsigned attrib_buf_count = pipeline->attribs.buf_count * 2;
604 struct panfrost_ptr bufs = pan_pool_alloc_desc_array(
605 &cmdbuf->desc_pool.base, attrib_buf_count + 1, ATTRIBUTE_BUFFER);
606 struct panfrost_ptr attribs = pan_pool_alloc_desc_array(
607 &cmdbuf->desc_pool.base, attrib_count, ATTRIBUTE);
608
609 panvk_per_arch(emit_attrib_bufs)(&pipeline->attribs, cmdbuf->state.vb.bufs,
610 cmdbuf->state.vb.count, draw, bufs.cpu);
611 panvk_per_arch(emit_attribs)(cmdbuf->device, draw, &pipeline->attribs,
612 cmdbuf->state.vb.bufs, cmdbuf->state.vb.count,
613 attribs.cpu);
614
615 if (attrib_count > pipeline->attribs.buf_count) {
616 unsigned bufs_offset =
617 pipeline->attribs.buf_count * pan_size(ATTRIBUTE_BUFFER) * 2;
618 unsigned attribs_offset =
619 pipeline->attribs.buf_count * pan_size(ATTRIBUTE);
620
621 panvk_fill_non_vs_attribs(
622 cmdbuf, bind_point_state, bufs.cpu + bufs_offset,
623 attribs.cpu + attribs_offset, pipeline->attribs.buf_count * 2);
624 }
625
626 /* A NULL entry is needed to stop prefecting on Bifrost */
627 memset(bufs.cpu + (pan_size(ATTRIBUTE_BUFFER) * attrib_buf_count), 0,
628 pan_size(ATTRIBUTE_BUFFER));
629
630 desc_state->vs_attrib_bufs = bufs.gpu;
631 desc_state->vs_attribs = attribs.gpu;
632 }
633
634 static void
panvk_draw_prepare_attributes(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)635 panvk_draw_prepare_attributes(struct panvk_cmd_buffer *cmdbuf,
636 struct panvk_draw_info *draw)
637 {
638 struct panvk_cmd_bind_point_state *bind_point_state =
639 panvk_cmd_get_bind_point_state(cmdbuf, GRAPHICS);
640 struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
641 const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
642
643 for (unsigned i = 0; i < ARRAY_SIZE(draw->stages); i++) {
644 if (i == MESA_SHADER_VERTEX) {
645 panvk_draw_prepare_vs_attribs(cmdbuf, draw);
646 draw->stages[i].attributes = desc_state->vs_attribs;
647 draw->stages[i].attribute_bufs = desc_state->vs_attrib_bufs;
648 } else if (pipeline->img_access_mask & BITFIELD_BIT(i)) {
649 panvk_prepare_non_vs_attribs(cmdbuf, bind_point_state);
650 draw->stages[i].attributes = desc_state->non_vs_attribs;
651 draw->stages[i].attribute_bufs = desc_state->non_vs_attrib_bufs;
652 }
653 }
654 }
655
656 static void
panvk_draw_prepare_viewport(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)657 panvk_draw_prepare_viewport(struct panvk_cmd_buffer *cmdbuf,
658 struct panvk_draw_info *draw)
659 {
660 const struct panvk_pipeline *pipeline =
661 panvk_cmd_get_pipeline(cmdbuf, GRAPHICS);
662
663 if (pipeline->vpd) {
664 draw->viewport = pipeline->vpd;
665 } else if (cmdbuf->state.vpd) {
666 draw->viewport = cmdbuf->state.vpd;
667 } else {
668 struct panfrost_ptr vp =
669 pan_pool_alloc_desc(&cmdbuf->desc_pool.base, VIEWPORT);
670
671 const VkViewport *viewport =
672 pipeline->dynamic_state_mask & PANVK_DYNAMIC_VIEWPORT
673 ? &cmdbuf->state.viewport
674 : &pipeline->viewport;
675 const VkRect2D *scissor =
676 pipeline->dynamic_state_mask & PANVK_DYNAMIC_SCISSOR
677 ? &cmdbuf->state.scissor
678 : &pipeline->scissor;
679
680 panvk_per_arch(emit_viewport)(viewport, scissor, vp.cpu);
681 draw->viewport = cmdbuf->state.vpd = vp.gpu;
682 }
683 }
684
685 static void
panvk_draw_prepare_vertex_job(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)686 panvk_draw_prepare_vertex_job(struct panvk_cmd_buffer *cmdbuf,
687 struct panvk_draw_info *draw)
688 {
689 const struct panvk_pipeline *pipeline =
690 panvk_cmd_get_pipeline(cmdbuf, GRAPHICS);
691 struct panvk_batch *batch = cmdbuf->state.batch;
692 struct panfrost_ptr ptr =
693 pan_pool_alloc_desc(&cmdbuf->desc_pool.base, COMPUTE_JOB);
694
695 util_dynarray_append(&batch->jobs, void *, ptr.cpu);
696 draw->jobs.vertex = ptr;
697 panvk_per_arch(emit_vertex_job)(pipeline, draw, ptr.cpu);
698 }
699
700 static void
panvk_draw_prepare_tiler_job(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)701 panvk_draw_prepare_tiler_job(struct panvk_cmd_buffer *cmdbuf,
702 struct panvk_draw_info *draw)
703 {
704 const struct panvk_pipeline *pipeline =
705 panvk_cmd_get_pipeline(cmdbuf, GRAPHICS);
706 struct panvk_batch *batch = cmdbuf->state.batch;
707 struct panfrost_ptr ptr =
708 pan_pool_alloc_desc(&cmdbuf->desc_pool.base, TILER_JOB);
709
710 util_dynarray_append(&batch->jobs, void *, ptr.cpu);
711 draw->jobs.tiler = ptr;
712 panvk_per_arch(emit_tiler_job)(pipeline, draw, ptr.cpu);
713 }
714
715 static void
panvk_cmd_draw(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)716 panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
717 {
718 struct panvk_batch *batch = cmdbuf->state.batch;
719 struct panvk_cmd_bind_point_state *bind_point_state =
720 panvk_cmd_get_bind_point_state(cmdbuf, GRAPHICS);
721 const struct panvk_pipeline *pipeline =
722 panvk_cmd_get_pipeline(cmdbuf, GRAPHICS);
723
724 /* There are only 16 bits in the descriptor for the job ID, make sure all
725 * the 3 (2 in Bifrost) jobs in this draw are in the same batch.
726 */
727 if (batch->jc.job_index >= (UINT16_MAX - 3)) {
728 panvk_per_arch(cmd_close_batch)(cmdbuf);
729 panvk_cmd_preload_fb_after_batch_split(cmdbuf);
730 batch = panvk_cmd_open_batch(cmdbuf);
731 }
732
733 if (pipeline->rast.enable)
734 panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
735
736 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true);
737
738 panvk_cmd_prepare_draw_sysvals(cmdbuf, bind_point_state, draw);
739 panvk_cmd_prepare_ubos(cmdbuf, bind_point_state);
740 panvk_cmd_prepare_textures(cmdbuf, bind_point_state);
741 panvk_cmd_prepare_samplers(cmdbuf, bind_point_state);
742
743 /* TODO: indexed draws */
744 struct panvk_descriptor_state *desc_state =
745 panvk_cmd_get_desc_state(cmdbuf, GRAPHICS);
746
747 draw->tls = batch->tls.gpu;
748 draw->fb = batch->fb.desc.gpu;
749 draw->ubos = desc_state->ubos;
750 draw->textures = desc_state->textures;
751 draw->samplers = desc_state->samplers;
752
753 STATIC_ASSERT(sizeof(draw->invocation) >=
754 sizeof(struct mali_invocation_packed));
755 panfrost_pack_work_groups_compute(
756 (struct mali_invocation_packed *)&draw->invocation, 1, draw->vertex_range,
757 draw->instance_count, 1, 1, 1, true, false);
758
759 panvk_draw_prepare_fs_rsd(cmdbuf, draw);
760 panvk_draw_prepare_varyings(cmdbuf, draw);
761 panvk_draw_prepare_attributes(cmdbuf, draw);
762 panvk_draw_prepare_viewport(cmdbuf, draw);
763 panvk_draw_prepare_tiler_context(cmdbuf, draw);
764 panvk_draw_prepare_vertex_job(cmdbuf, draw);
765 panvk_draw_prepare_tiler_job(cmdbuf, draw);
766 batch->tlsinfo.tls.size = MAX2(pipeline->tls_size, batch->tlsinfo.tls.size);
767 assert(!pipeline->wls_size);
768
769 unsigned vjob_id =
770 pan_jc_add_job(&cmdbuf->desc_pool.base, &batch->jc, MALI_JOB_TYPE_VERTEX,
771 false, false, 0, 0, &draw->jobs.vertex, false);
772
773 if (pipeline->rast.enable) {
774 pan_jc_add_job(&cmdbuf->desc_pool.base, &batch->jc, MALI_JOB_TYPE_TILER,
775 false, false, vjob_id, 0, &draw->jobs.tiler, false);
776 }
777
778 /* Clear the dirty flags all at once */
779 desc_state->dirty = cmdbuf->state.dirty = 0;
780 }
781
782 void
panvk_per_arch(CmdDraw)783 panvk_per_arch(CmdDraw)(VkCommandBuffer commandBuffer, uint32_t vertexCount,
784 uint32_t instanceCount, uint32_t firstVertex,
785 uint32_t firstInstance)
786 {
787 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
788
789 if (instanceCount == 0 || vertexCount == 0)
790 return;
791
792 struct panvk_draw_info draw = {
793 .first_vertex = firstVertex,
794 .vertex_count = vertexCount,
795 .vertex_range = vertexCount,
796 .first_instance = firstInstance,
797 .instance_count = instanceCount,
798 .padded_vertex_count = instanceCount > 1
799 ? panfrost_padded_vertex_count(vertexCount)
800 : vertexCount,
801 .offset_start = firstVertex,
802 };
803
804 panvk_cmd_draw(cmdbuf, &draw);
805 }
806
807 static void
panvk_index_minmax_search(struct panvk_cmd_buffer * cmdbuf,uint32_t start,uint32_t count,bool restart,uint32_t * min,uint32_t * max)808 panvk_index_minmax_search(struct panvk_cmd_buffer *cmdbuf, uint32_t start,
809 uint32_t count, bool restart, uint32_t *min,
810 uint32_t *max)
811 {
812 void *ptr = cmdbuf->state.ib.buffer->host_ptr + cmdbuf->state.ib.offset;
813
814 assert(cmdbuf->state.ib.buffer);
815 assert(cmdbuf->state.ib.buffer->bo);
816 assert(cmdbuf->state.ib.buffer->host_ptr);
817
818 uint32_t debug_flags =
819 cmdbuf->device->physical_device->instance->debug_flags;
820
821 if (!(debug_flags & PANVK_DEBUG_NO_KNOWN_WARN)) {
822 fprintf(
823 stderr,
824 "WARNING: Crawling index buffers from the CPU isn't valid in Vulkan\n");
825 }
826
827 *max = 0;
828
829 /* TODO: Use panfrost_minmax_cache */
830 /* TODO: Read full cacheline of data to mitigate the uncached
831 * mapping slowness.
832 */
833 switch (cmdbuf->state.ib.index_size) {
834 #define MINMAX_SEARCH_CASE(sz) \
835 case sz: { \
836 uint##sz##_t *indices = ptr; \
837 *min = UINT##sz##_MAX; \
838 for (uint32_t i = 0; i < count; i++) { \
839 if (restart && indices[i + start] == UINT##sz##_MAX) \
840 continue; \
841 *min = MIN2(indices[i + start], *min); \
842 *max = MAX2(indices[i + start], *max); \
843 } \
844 break; \
845 }
846 MINMAX_SEARCH_CASE(32)
847 MINMAX_SEARCH_CASE(16)
848 MINMAX_SEARCH_CASE(8)
849 #undef MINMAX_SEARCH_CASE
850 default:
851 unreachable("Invalid index size");
852 }
853 }
854
855 void
panvk_per_arch(CmdDrawIndexed)856 panvk_per_arch(CmdDrawIndexed)(VkCommandBuffer commandBuffer,
857 uint32_t indexCount, uint32_t instanceCount,
858 uint32_t firstIndex, int32_t vertexOffset,
859 uint32_t firstInstance)
860 {
861 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
862 uint32_t min_vertex, max_vertex;
863
864 if (instanceCount == 0 || indexCount == 0)
865 return;
866
867 const struct panvk_pipeline *pipeline =
868 panvk_cmd_get_pipeline(cmdbuf, GRAPHICS);
869 bool primitive_restart = pipeline->ia.primitive_restart;
870
871 panvk_index_minmax_search(cmdbuf, firstIndex, indexCount, primitive_restart,
872 &min_vertex, &max_vertex);
873
874 unsigned vertex_range = max_vertex - min_vertex + 1;
875 struct panvk_draw_info draw = {
876 .index_size = cmdbuf->state.ib.index_size,
877 .first_index = firstIndex,
878 .index_count = indexCount,
879 .vertex_offset = vertexOffset,
880 .first_instance = firstInstance,
881 .instance_count = instanceCount,
882 .vertex_range = vertex_range,
883 .vertex_count = indexCount + abs(vertexOffset),
884 .padded_vertex_count = instanceCount > 1
885 ? panfrost_padded_vertex_count(vertex_range)
886 : vertex_range,
887 .offset_start = min_vertex + vertexOffset,
888 .indices = panvk_buffer_gpu_ptr(cmdbuf->state.ib.buffer,
889 cmdbuf->state.ib.offset) +
890 (firstIndex * (cmdbuf->state.ib.index_size / 8)),
891 };
892
893 panvk_cmd_draw(cmdbuf, &draw);
894 }
895
896 VkResult
panvk_per_arch(EndCommandBuffer)897 panvk_per_arch(EndCommandBuffer)(VkCommandBuffer commandBuffer)
898 {
899 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
900
901 panvk_per_arch(cmd_close_batch)(cmdbuf);
902
903 return vk_command_buffer_end(&cmdbuf->vk);
904 }
905
906 void
panvk_per_arch(CmdEndRenderPass2)907 panvk_per_arch(CmdEndRenderPass2)(VkCommandBuffer commandBuffer,
908 const VkSubpassEndInfo *pSubpassEndInfo)
909 {
910 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
911
912 panvk_per_arch(cmd_close_batch)(cmdbuf);
913 vk_free(&cmdbuf->vk.pool->alloc, cmdbuf->state.clear);
914 cmdbuf->state.batch = NULL;
915 cmdbuf->state.pass = NULL;
916 cmdbuf->state.subpass = NULL;
917 cmdbuf->state.framebuffer = NULL;
918 cmdbuf->state.clear = NULL;
919 }
920
921 void
panvk_per_arch(CmdEndRenderPass)922 panvk_per_arch(CmdEndRenderPass)(VkCommandBuffer cmd)
923 {
924 VkSubpassEndInfo einfo = {
925 .sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO,
926 };
927
928 panvk_per_arch(CmdEndRenderPass2)(cmd, &einfo);
929 }
930
931 void
panvk_per_arch(CmdPipelineBarrier2)932 panvk_per_arch(CmdPipelineBarrier2)(VkCommandBuffer commandBuffer,
933 const VkDependencyInfo *pDependencyInfo)
934 {
935 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
936
937 /* Caches are flushed/invalidated at batch boundaries for now, nothing to do
938 * for memory barriers assuming we implement barriers with the creation of a
939 * new batch.
940 * FIXME: We can probably do better with a CacheFlush job that has the
941 * barrier flag set to true.
942 */
943 if (cmdbuf->state.batch) {
944 panvk_per_arch(cmd_close_batch)(cmdbuf);
945 panvk_cmd_preload_fb_after_batch_split(cmdbuf);
946 panvk_cmd_open_batch(cmdbuf);
947 }
948 }
949
950 static void
panvk_add_set_event_operation(struct panvk_cmd_buffer * cmdbuf,struct panvk_event * event,enum panvk_event_op_type type)951 panvk_add_set_event_operation(struct panvk_cmd_buffer *cmdbuf,
952 struct panvk_event *event,
953 enum panvk_event_op_type type)
954 {
955 struct panvk_event_op op = {
956 .type = type,
957 .event = event,
958 };
959
960 if (cmdbuf->state.batch == NULL) {
961 /* No open batch, let's create a new one so this operation happens in
962 * the right order.
963 */
964 panvk_cmd_open_batch(cmdbuf);
965 util_dynarray_append(&cmdbuf->state.batch->event_ops,
966 struct panvk_event_op, op);
967 panvk_per_arch(cmd_close_batch)(cmdbuf);
968 } else {
969 /* Let's close the current batch so the operation executes before any
970 * future commands.
971 */
972 util_dynarray_append(&cmdbuf->state.batch->event_ops,
973 struct panvk_event_op, op);
974 panvk_per_arch(cmd_close_batch)(cmdbuf);
975 panvk_cmd_preload_fb_after_batch_split(cmdbuf);
976 panvk_cmd_open_batch(cmdbuf);
977 }
978 }
979
980 static void
panvk_add_wait_event_operation(struct panvk_cmd_buffer * cmdbuf,struct panvk_event * event)981 panvk_add_wait_event_operation(struct panvk_cmd_buffer *cmdbuf,
982 struct panvk_event *event)
983 {
984 struct panvk_event_op op = {
985 .type = PANVK_EVENT_OP_WAIT,
986 .event = event,
987 };
988
989 if (cmdbuf->state.batch == NULL) {
990 /* No open batch, let's create a new one and have it wait for this event. */
991 panvk_cmd_open_batch(cmdbuf);
992 util_dynarray_append(&cmdbuf->state.batch->event_ops,
993 struct panvk_event_op, op);
994 } else {
995 /* Let's close the current batch so any future commands wait on the
996 * event signal operation.
997 */
998 if (cmdbuf->state.batch->fragment_job ||
999 cmdbuf->state.batch->jc.first_job) {
1000 panvk_per_arch(cmd_close_batch)(cmdbuf);
1001 panvk_cmd_preload_fb_after_batch_split(cmdbuf);
1002 panvk_cmd_open_batch(cmdbuf);
1003 }
1004 util_dynarray_append(&cmdbuf->state.batch->event_ops,
1005 struct panvk_event_op, op);
1006 }
1007 }
1008
1009 void
panvk_per_arch(CmdSetEvent2)1010 panvk_per_arch(CmdSetEvent2)(VkCommandBuffer commandBuffer, VkEvent _event,
1011 const VkDependencyInfo *pDependencyInfo)
1012 {
1013 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1014 VK_FROM_HANDLE(panvk_event, event, _event);
1015
1016 /* vkCmdSetEvent cannot be called inside a render pass */
1017 assert(cmdbuf->state.pass == NULL);
1018
1019 panvk_add_set_event_operation(cmdbuf, event, PANVK_EVENT_OP_SET);
1020 }
1021
1022 void
panvk_per_arch(CmdResetEvent2)1023 panvk_per_arch(CmdResetEvent2)(VkCommandBuffer commandBuffer, VkEvent _event,
1024 VkPipelineStageFlags2 stageMask)
1025 {
1026 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1027 VK_FROM_HANDLE(panvk_event, event, _event);
1028
1029 /* vkCmdResetEvent cannot be called inside a render pass */
1030 assert(cmdbuf->state.pass == NULL);
1031
1032 panvk_add_set_event_operation(cmdbuf, event, PANVK_EVENT_OP_RESET);
1033 }
1034
1035 void
panvk_per_arch(CmdWaitEvents2)1036 panvk_per_arch(CmdWaitEvents2)(VkCommandBuffer commandBuffer,
1037 uint32_t eventCount, const VkEvent *pEvents,
1038 const VkDependencyInfo *pDependencyInfos)
1039 {
1040 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1041
1042 assert(eventCount > 0);
1043
1044 for (uint32_t i = 0; i < eventCount; i++) {
1045 VK_FROM_HANDLE(panvk_event, event, pEvents[i]);
1046 panvk_add_wait_event_operation(cmdbuf, event);
1047 }
1048 }
1049
1050 static void
panvk_reset_cmdbuf(struct vk_command_buffer * vk_cmdbuf,VkCommandBufferResetFlags flags)1051 panvk_reset_cmdbuf(struct vk_command_buffer *vk_cmdbuf,
1052 VkCommandBufferResetFlags flags)
1053 {
1054 struct panvk_cmd_buffer *cmdbuf =
1055 container_of(vk_cmdbuf, struct panvk_cmd_buffer, vk);
1056
1057 vk_command_buffer_reset(&cmdbuf->vk);
1058
1059 list_for_each_entry_safe(struct panvk_batch, batch, &cmdbuf->batches, node) {
1060 list_del(&batch->node);
1061 util_dynarray_fini(&batch->jobs);
1062 util_dynarray_fini(&batch->event_ops);
1063
1064 vk_free(&cmdbuf->vk.pool->alloc, batch);
1065 }
1066
1067 panvk_pool_reset(&cmdbuf->desc_pool);
1068 panvk_pool_reset(&cmdbuf->tls_pool);
1069 panvk_pool_reset(&cmdbuf->varying_pool);
1070
1071 for (unsigned i = 0; i < MAX_BIND_POINTS; i++)
1072 memset(&cmdbuf->bind_points[i].desc_state.sets, 0,
1073 sizeof(cmdbuf->bind_points[0].desc_state.sets));
1074 }
1075
1076 static void
panvk_destroy_cmdbuf(struct vk_command_buffer * vk_cmdbuf)1077 panvk_destroy_cmdbuf(struct vk_command_buffer *vk_cmdbuf)
1078 {
1079 struct panvk_cmd_buffer *cmdbuf =
1080 container_of(vk_cmdbuf, struct panvk_cmd_buffer, vk);
1081 struct panvk_device *device = cmdbuf->device;
1082
1083 list_for_each_entry_safe(struct panvk_batch, batch, &cmdbuf->batches, node) {
1084 list_del(&batch->node);
1085 util_dynarray_fini(&batch->jobs);
1086 util_dynarray_fini(&batch->event_ops);
1087
1088 vk_free(&cmdbuf->vk.pool->alloc, batch);
1089 }
1090
1091 panvk_pool_cleanup(&cmdbuf->desc_pool);
1092 panvk_pool_cleanup(&cmdbuf->tls_pool);
1093 panvk_pool_cleanup(&cmdbuf->varying_pool);
1094 vk_command_buffer_finish(&cmdbuf->vk);
1095 vk_free(&device->vk.alloc, cmdbuf);
1096 }
1097
1098 static VkResult
panvk_create_cmdbuf(struct vk_command_pool * vk_pool,struct vk_command_buffer ** cmdbuf_out)1099 panvk_create_cmdbuf(struct vk_command_pool *vk_pool,
1100 struct vk_command_buffer **cmdbuf_out)
1101 {
1102 struct panvk_device *device =
1103 container_of(vk_pool->base.device, struct panvk_device, vk);
1104 struct panvk_cmd_pool *pool =
1105 container_of(vk_pool, struct panvk_cmd_pool, vk);
1106 struct panvk_cmd_buffer *cmdbuf;
1107
1108 cmdbuf = vk_zalloc(&device->vk.alloc, sizeof(*cmdbuf), 8,
1109 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1110 if (!cmdbuf)
1111 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1112
1113 VkResult result = vk_command_buffer_init(&pool->vk, &cmdbuf->vk,
1114 &panvk_per_arch(cmd_buffer_ops), 0);
1115 if (result != VK_SUCCESS) {
1116 vk_free(&device->vk.alloc, cmdbuf);
1117 return result;
1118 }
1119
1120 cmdbuf->device = device;
1121
1122 panvk_pool_init(&cmdbuf->desc_pool, device, &pool->desc_bo_pool, 0,
1123 64 * 1024, "Command buffer descriptor pool", true);
1124 panvk_pool_init(
1125 &cmdbuf->tls_pool, device, &pool->tls_bo_pool,
1126 panvk_debug_adjust_bo_flags(device, PAN_KMOD_BO_FLAG_NO_MMAP), 64 * 1024,
1127 "TLS pool", false);
1128 panvk_pool_init(
1129 &cmdbuf->varying_pool, device, &pool->varying_bo_pool,
1130 panvk_debug_adjust_bo_flags(device, PAN_KMOD_BO_FLAG_NO_MMAP), 64 * 1024,
1131 "Varyings pool", false);
1132 list_inithead(&cmdbuf->batches);
1133 *cmdbuf_out = &cmdbuf->vk;
1134 return VK_SUCCESS;
1135 }
1136
1137 const struct vk_command_buffer_ops panvk_per_arch(cmd_buffer_ops) = {
1138 .create = panvk_create_cmdbuf,
1139 .reset = panvk_reset_cmdbuf,
1140 .destroy = panvk_destroy_cmdbuf,
1141 };
1142
1143 VkResult
panvk_per_arch(BeginCommandBuffer)1144 panvk_per_arch(BeginCommandBuffer)(VkCommandBuffer commandBuffer,
1145 const VkCommandBufferBeginInfo *pBeginInfo)
1146 {
1147 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1148
1149 vk_command_buffer_begin(&cmdbuf->vk, pBeginInfo);
1150
1151 memset(&cmdbuf->state, 0, sizeof(cmdbuf->state));
1152
1153 return VK_SUCCESS;
1154 }
1155
1156 void
panvk_per_arch(DestroyCommandPool)1157 panvk_per_arch(DestroyCommandPool)(VkDevice _device, VkCommandPool commandPool,
1158 const VkAllocationCallbacks *pAllocator)
1159 {
1160 VK_FROM_HANDLE(panvk_device, device, _device);
1161 VK_FROM_HANDLE(panvk_cmd_pool, pool, commandPool);
1162
1163 vk_command_pool_finish(&pool->vk);
1164
1165 panvk_bo_pool_cleanup(&pool->desc_bo_pool);
1166 panvk_bo_pool_cleanup(&pool->varying_bo_pool);
1167 panvk_bo_pool_cleanup(&pool->tls_bo_pool);
1168
1169 vk_free2(&device->vk.alloc, pAllocator, pool);
1170 }
1171
1172 void
panvk_per_arch(CmdDispatch)1173 panvk_per_arch(CmdDispatch)(VkCommandBuffer commandBuffer, uint32_t x,
1174 uint32_t y, uint32_t z)
1175 {
1176 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1177 struct panvk_dispatch_info dispatch = {
1178 .wg_count = {x, y, z},
1179 };
1180
1181 panvk_per_arch(cmd_close_batch)(cmdbuf);
1182 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1183 struct panvk_device *dev = cmdbuf->device;
1184
1185 struct panvk_cmd_bind_point_state *bind_point_state =
1186 panvk_cmd_get_bind_point_state(cmdbuf, COMPUTE);
1187 struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
1188 const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
1189 struct panfrost_ptr job =
1190 pan_pool_alloc_desc(&cmdbuf->desc_pool.base, COMPUTE_JOB);
1191
1192 struct panvk_sysvals *sysvals = &desc_state->sysvals;
1193 sysvals->num_work_groups.u32[0] = x;
1194 sysvals->num_work_groups.u32[1] = y;
1195 sysvals->num_work_groups.u32[2] = z;
1196 sysvals->local_group_size.u32[0] = pipeline->cs.local_size.x;
1197 sysvals->local_group_size.u32[1] = pipeline->cs.local_size.y;
1198 sysvals->local_group_size.u32[2] = pipeline->cs.local_size.z;
1199 desc_state->sysvals_ptr = 0;
1200
1201 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
1202 dispatch.tsd = batch->tls.gpu;
1203
1204 panvk_prepare_non_vs_attribs(cmdbuf, bind_point_state);
1205 dispatch.attributes = desc_state->non_vs_attribs;
1206 dispatch.attribute_bufs = desc_state->non_vs_attrib_bufs;
1207
1208 panvk_cmd_prepare_ubos(cmdbuf, bind_point_state);
1209 dispatch.ubos = desc_state->ubos;
1210
1211 panvk_cmd_prepare_textures(cmdbuf, bind_point_state);
1212 dispatch.textures = desc_state->textures;
1213
1214 panvk_cmd_prepare_samplers(cmdbuf, bind_point_state);
1215 dispatch.samplers = desc_state->samplers;
1216
1217 panvk_per_arch(emit_compute_job)(pipeline, &dispatch, job.cpu);
1218 pan_jc_add_job(&cmdbuf->desc_pool.base, &batch->jc, MALI_JOB_TYPE_COMPUTE,
1219 false, false, 0, 0, &job, false);
1220
1221 batch->tlsinfo.tls.size = pipeline->tls_size;
1222 batch->tlsinfo.wls.size = pipeline->wls_size;
1223 if (batch->tlsinfo.wls.size) {
1224 unsigned core_id_range;
1225
1226 panfrost_query_core_count(&dev->physical_device->kmod.props,
1227 &core_id_range);
1228 batch->wls_total_size = pan_wls_adjust_size(batch->tlsinfo.wls.size) *
1229 pan_wls_instances(&dispatch.wg_count) *
1230 core_id_range;
1231 }
1232
1233 panvk_per_arch(cmd_close_batch)(cmdbuf);
1234 desc_state->dirty = 0;
1235 }
1236