• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Collabora Ltd.
3  *
4  * Derived from tu_cmd_buffer.c which is:
5  * Copyright © 2016 Red Hat.
6  * Copyright © 2016 Bas Nieuwenhuizen
7  * Copyright © 2015 Intel Corporation
8  *
9  * Permission is hereby granted, free of charge, to any person obtaining a
10  * copy of this software and associated documentation files (the "Software"),
11  * to deal in the Software without restriction, including without limitation
12  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13  * and/or sell copies of the Software, and to permit persons to whom the
14  * Software is furnished to do so, subject to the following conditions:
15  *
16  * The above copyright notice and this permission notice (including the next
17  * paragraph) shall be included in all copies or substantial portions of the
18  * Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26  * DEALINGS IN THE SOFTWARE.
27  */
28 
29 #include "genxml/gen_macros.h"
30 
31 #include "panvk_buffer.h"
32 #include "panvk_cmd_alloc.h"
33 #include "panvk_cmd_buffer.h"
34 #include "panvk_cmd_desc_state.h"
35 #include "panvk_cmd_draw.h"
36 #include "panvk_cmd_fb_preload.h"
37 #include "panvk_cmd_pool.h"
38 #include "panvk_cmd_push_constant.h"
39 #include "panvk_device.h"
40 #include "panvk_entrypoints.h"
41 #include "panvk_instance.h"
42 #include "panvk_physical_device.h"
43 #include "panvk_priv_bo.h"
44 
45 #include "pan_desc.h"
46 #include "pan_encoder.h"
47 #include "pan_props.h"
48 #include "pan_samples.h"
49 
50 #include "vk_descriptor_update_template.h"
51 #include "vk_format.h"
52 
53 static VkResult
panvk_cmd_prepare_fragment_job(struct panvk_cmd_buffer * cmdbuf,uint64_t fbd)54 panvk_cmd_prepare_fragment_job(struct panvk_cmd_buffer *cmdbuf, uint64_t fbd)
55 {
56    const struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
57    struct panvk_batch *batch = cmdbuf->cur_batch;
58    struct panfrost_ptr job_ptr = panvk_cmd_alloc_desc(cmdbuf, FRAGMENT_JOB);
59 
60    if (!job_ptr.gpu)
61       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
62 
63    GENX(pan_emit_fragment_job_payload)(fbinfo, fbd, job_ptr.cpu);
64 
65    pan_section_pack(job_ptr.cpu, FRAGMENT_JOB, HEADER, header) {
66       header.type = MALI_JOB_TYPE_FRAGMENT;
67       header.index = 1;
68    }
69 
70    pan_jc_add_job(&batch->frag_jc, MALI_JOB_TYPE_FRAGMENT, false, false, 0, 0,
71                   &job_ptr, false);
72    util_dynarray_append(&batch->jobs, void *, job_ptr.cpu);
73    return VK_SUCCESS;
74 }
75 
76 void
panvk_per_arch(cmd_close_batch)77 panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf)
78 {
79    struct panvk_batch *batch = cmdbuf->cur_batch;
80 
81    if (!batch)
82       return;
83 
84    struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
85 
86    assert(batch);
87 
88    if (!batch->fb.desc.gpu && !batch->vtc_jc.first_job) {
89       if (util_dynarray_num_elements(&batch->event_ops,
90                                      struct panvk_cmd_event_op) == 0) {
91          /* Content-less batch, let's drop it */
92          vk_free(&cmdbuf->vk.pool->alloc, batch);
93       } else {
94          /* Batch has no jobs but is needed for synchronization, let's add a
95           * NULL job so the SUBMIT ioctl doesn't choke on it.
96           */
97          struct panfrost_ptr ptr = panvk_cmd_alloc_desc(cmdbuf, JOB_HEADER);
98 
99          if (ptr.gpu) {
100             util_dynarray_append(&batch->jobs, void *, ptr.cpu);
101             pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_NULL, false, false, 0,
102                            0, &ptr, false);
103          }
104 
105          list_addtail(&batch->node, &cmdbuf->batches);
106       }
107       cmdbuf->cur_batch = NULL;
108       return;
109    }
110 
111    struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
112    struct panvk_physical_device *phys_dev =
113       to_panvk_physical_device(dev->vk.physical);
114 
115    list_addtail(&batch->node, &cmdbuf->batches);
116 
117    if (batch->tlsinfo.tls.size) {
118       unsigned thread_tls_alloc =
119          panfrost_query_thread_tls_alloc(&phys_dev->kmod.props);
120       unsigned core_id_range;
121 
122       panfrost_query_core_count(&phys_dev->kmod.props, &core_id_range);
123 
124       unsigned size = panfrost_get_total_stack_size(
125          batch->tlsinfo.tls.size, thread_tls_alloc, core_id_range);
126       batch->tlsinfo.tls.ptr =
127          panvk_cmd_alloc_dev_mem(cmdbuf, tls, size, 4096).gpu;
128    }
129 
130    if (batch->tlsinfo.wls.size) {
131       assert(batch->wls_total_size);
132       batch->tlsinfo.wls.ptr =
133          panvk_cmd_alloc_dev_mem(cmdbuf, tls, batch->wls_total_size, 4096).gpu;
134    }
135 
136    if (batch->tls.cpu)
137       GENX(pan_emit_tls)(&batch->tlsinfo, batch->tls.cpu);
138 
139    if (batch->fb.desc.cpu) {
140       fbinfo->sample_positions = dev->sample_positions->addr.dev +
141                                  panfrost_sample_positions_offset(
142                                     pan_sample_pattern(fbinfo->nr_samples));
143 
144       if (batch->vtc_jc.first_tiler) {
145          VkResult result = panvk_per_arch(cmd_fb_preload)(cmdbuf, fbinfo);
146          if (result != VK_SUCCESS)
147             return;
148       }
149 
150       for (uint32_t i = 0; i < batch->fb.layer_count; i++) {
151          VkResult result;
152 
153          uint64_t fbd = batch->fb.desc.gpu + (batch->fb.desc_stride * i);
154 
155          result = panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf, i);
156          if (result != VK_SUCCESS)
157             break;
158 
159          fbd |= GENX(pan_emit_fbd)(
160             &cmdbuf->state.gfx.render.fb.info, i, &batch->tlsinfo,
161             &batch->tiler.ctx,
162             batch->fb.desc.cpu + (batch->fb.desc_stride * i));
163 
164          result = panvk_cmd_prepare_fragment_job(cmdbuf, fbd);
165          if (result != VK_SUCCESS)
166             break;
167       }
168    }
169 
170    cmdbuf->cur_batch = NULL;
171 }
172 
173 VkResult
panvk_per_arch(cmd_alloc_fb_desc)174 panvk_per_arch(cmd_alloc_fb_desc)(struct panvk_cmd_buffer *cmdbuf)
175 {
176    struct panvk_batch *batch = cmdbuf->cur_batch;
177 
178    if (batch->fb.desc.gpu)
179       return VK_SUCCESS;
180 
181    const struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
182    bool has_zs_ext = fbinfo->zs.view.zs || fbinfo->zs.view.s;
183    batch->fb.layer_count = cmdbuf->state.gfx.render.layer_count;
184    unsigned fbd_size = pan_size(FRAMEBUFFER);
185 
186    if (has_zs_ext)
187       fbd_size = ALIGN_POT(fbd_size, pan_alignment(ZS_CRC_EXTENSION)) +
188                  pan_size(ZS_CRC_EXTENSION);
189 
190    fbd_size = ALIGN_POT(fbd_size, pan_alignment(RENDER_TARGET)) +
191               (MAX2(fbinfo->rt_count, 1) * pan_size(RENDER_TARGET));
192 
193    batch->fb.bo_count = cmdbuf->state.gfx.render.fb.bo_count;
194    memcpy(batch->fb.bos, cmdbuf->state.gfx.render.fb.bos,
195           batch->fb.bo_count * sizeof(batch->fb.bos[0]));
196 
197    batch->fb.desc =
198       panvk_cmd_alloc_dev_mem(cmdbuf, desc, fbd_size * batch->fb.layer_count,
199                               pan_alignment(FRAMEBUFFER));
200    batch->fb.desc_stride = fbd_size;
201 
202    memset(&cmdbuf->state.gfx.render.fb.info.bifrost.pre_post.dcds, 0,
203           sizeof(cmdbuf->state.gfx.render.fb.info.bifrost.pre_post.dcds));
204 
205    return batch->fb.desc.gpu ? VK_SUCCESS : VK_ERROR_OUT_OF_DEVICE_MEMORY;
206 }
207 
208 VkResult
panvk_per_arch(cmd_alloc_tls_desc)209 panvk_per_arch(cmd_alloc_tls_desc)(struct panvk_cmd_buffer *cmdbuf, bool gfx)
210 {
211    struct panvk_batch *batch = cmdbuf->cur_batch;
212 
213    assert(batch);
214    if (!batch->tls.gpu) {
215       batch->tls = panvk_cmd_alloc_desc(cmdbuf, LOCAL_STORAGE);
216       if (!batch->tls.gpu)
217          return VK_ERROR_OUT_OF_DEVICE_MEMORY;
218    }
219 
220    return VK_SUCCESS;
221 }
222 
223 VkResult
panvk_per_arch(cmd_prepare_tiler_context)224 panvk_per_arch(cmd_prepare_tiler_context)(struct panvk_cmd_buffer *cmdbuf,
225                                           uint32_t layer_idx)
226 {
227    struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
228    struct panvk_physical_device *phys_dev =
229       to_panvk_physical_device(cmdbuf->vk.base.device->physical);
230    struct panvk_batch *batch = cmdbuf->cur_batch;
231    uint64_t tiler_desc;
232 
233    if (batch->tiler.ctx_descs.gpu) {
234       tiler_desc =
235          batch->tiler.ctx_descs.gpu + (pan_size(TILER_CONTEXT) * layer_idx);
236       goto out_set_layer_ctx;
237    }
238 
239    const struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
240    uint32_t layer_count = cmdbuf->state.gfx.render.layer_count;
241    batch->tiler.heap_desc = panvk_cmd_alloc_desc(cmdbuf, TILER_HEAP);
242    batch->tiler.ctx_descs =
243       panvk_cmd_alloc_desc_array(cmdbuf, layer_count, TILER_CONTEXT);
244    if (!batch->tiler.heap_desc.gpu || !batch->tiler.ctx_descs.gpu)
245       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
246 
247    tiler_desc =
248       batch->tiler.ctx_descs.gpu + (pan_size(TILER_CONTEXT) * layer_idx);
249 
250    pan_pack(&batch->tiler.heap_templ, TILER_HEAP, cfg) {
251       cfg.size = pan_kmod_bo_size(dev->tiler_heap->bo);
252       cfg.base = dev->tiler_heap->addr.dev;
253       cfg.bottom = dev->tiler_heap->addr.dev;
254       cfg.top = cfg.base + cfg.size;
255    }
256 
257    pan_pack(&batch->tiler.ctx_templ, TILER_CONTEXT, cfg) {
258       cfg.hierarchy_mask =
259          panvk_select_tiler_hierarchy_mask(phys_dev, &cmdbuf->state.gfx);
260       cfg.fb_width = fbinfo->width;
261       cfg.fb_height = fbinfo->height;
262       cfg.heap = batch->tiler.heap_desc.gpu;
263       cfg.sample_pattern = pan_sample_pattern(fbinfo->nr_samples);
264    }
265 
266    memcpy(batch->tiler.heap_desc.cpu, &batch->tiler.heap_templ,
267           sizeof(batch->tiler.heap_templ));
268 
269    struct mali_tiler_context_packed *ctxs = batch->tiler.ctx_descs.cpu;
270 
271    assert(layer_count > 0);
272    for (uint32_t i = 0; i < layer_count; i++) {
273       STATIC_ASSERT(
274          !(pan_size(TILER_CONTEXT) & (pan_alignment(TILER_CONTEXT) - 1)));
275 
276       memcpy(&ctxs[i], &batch->tiler.ctx_templ, sizeof(*ctxs));
277    }
278 
279 out_set_layer_ctx:
280    if (PAN_ARCH >= 9)
281       batch->tiler.ctx.valhall.desc = tiler_desc;
282    else
283       batch->tiler.ctx.bifrost.desc = tiler_desc;
284 
285    return VK_SUCCESS;
286 }
287 
288 struct panvk_batch *
panvk_per_arch(cmd_open_batch)289 panvk_per_arch(cmd_open_batch)(struct panvk_cmd_buffer *cmdbuf)
290 {
291    assert(!cmdbuf->cur_batch);
292    cmdbuf->cur_batch =
293       vk_zalloc(&cmdbuf->vk.pool->alloc, sizeof(*cmdbuf->cur_batch), 8,
294                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
295    util_dynarray_init(&cmdbuf->cur_batch->jobs, NULL);
296    util_dynarray_init(&cmdbuf->cur_batch->event_ops, NULL);
297    assert(cmdbuf->cur_batch);
298    return cmdbuf->cur_batch;
299 }
300 
301 VKAPI_ATTR VkResult VKAPI_CALL
panvk_per_arch(EndCommandBuffer)302 panvk_per_arch(EndCommandBuffer)(VkCommandBuffer commandBuffer)
303 {
304    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
305 
306    panvk_per_arch(cmd_close_batch)(cmdbuf);
307 
308    return vk_command_buffer_end(&cmdbuf->vk);
309 }
310 
311 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdPipelineBarrier2)312 panvk_per_arch(CmdPipelineBarrier2)(VkCommandBuffer commandBuffer,
313                                     const VkDependencyInfo *pDependencyInfo)
314 {
315    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
316 
317    /* Caches are flushed/invalidated at batch boundaries for now, nothing to do
318     * for memory barriers assuming we implement barriers with the creation of a
319     * new batch.
320     * FIXME: We can probably do better with a CacheFlush job that has the
321     * barrier flag set to true.
322     */
323    if (cmdbuf->cur_batch) {
324       panvk_per_arch(cmd_close_batch)(cmdbuf);
325       panvk_per_arch(cmd_preload_fb_after_batch_split)(cmdbuf);
326       panvk_per_arch(cmd_open_batch)(cmdbuf);
327    }
328 }
329 
330 static void
panvk_reset_cmdbuf(struct vk_command_buffer * vk_cmdbuf,VkCommandBufferResetFlags flags)331 panvk_reset_cmdbuf(struct vk_command_buffer *vk_cmdbuf,
332                    VkCommandBufferResetFlags flags)
333 {
334    struct panvk_cmd_buffer *cmdbuf =
335       container_of(vk_cmdbuf, struct panvk_cmd_buffer, vk);
336 
337    vk_command_buffer_reset(&cmdbuf->vk);
338 
339    list_for_each_entry_safe(struct panvk_batch, batch, &cmdbuf->batches, node) {
340       list_del(&batch->node);
341       util_dynarray_fini(&batch->jobs);
342       util_dynarray_fini(&batch->event_ops);
343 
344       vk_free(&cmdbuf->vk.pool->alloc, batch);
345    }
346 
347    panvk_pool_reset(&cmdbuf->desc_pool);
348    panvk_pool_reset(&cmdbuf->tls_pool);
349    panvk_pool_reset(&cmdbuf->varying_pool);
350    panvk_cmd_buffer_obj_list_reset(cmdbuf, push_sets);
351 
352    memset(&cmdbuf->state, 0, sizeof(cmdbuf->state));
353 }
354 
355 static void
panvk_destroy_cmdbuf(struct vk_command_buffer * vk_cmdbuf)356 panvk_destroy_cmdbuf(struct vk_command_buffer *vk_cmdbuf)
357 {
358    struct panvk_cmd_buffer *cmdbuf =
359       container_of(vk_cmdbuf, struct panvk_cmd_buffer, vk);
360    struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
361 
362    list_for_each_entry_safe(struct panvk_batch, batch, &cmdbuf->batches, node) {
363       list_del(&batch->node);
364       util_dynarray_fini(&batch->jobs);
365       util_dynarray_fini(&batch->event_ops);
366 
367       vk_free(&cmdbuf->vk.pool->alloc, batch);
368    }
369 
370    panvk_pool_cleanup(&cmdbuf->desc_pool);
371    panvk_pool_cleanup(&cmdbuf->tls_pool);
372    panvk_pool_cleanup(&cmdbuf->varying_pool);
373    panvk_cmd_buffer_obj_list_cleanup(cmdbuf, push_sets);
374    vk_command_buffer_finish(&cmdbuf->vk);
375    vk_free(&dev->vk.alloc, cmdbuf);
376 }
377 
378 static VkResult
panvk_create_cmdbuf(struct vk_command_pool * vk_pool,VkCommandBufferLevel level,struct vk_command_buffer ** cmdbuf_out)379 panvk_create_cmdbuf(struct vk_command_pool *vk_pool, VkCommandBufferLevel level,
380                     struct vk_command_buffer **cmdbuf_out)
381 {
382    struct panvk_device *device =
383       container_of(vk_pool->base.device, struct panvk_device, vk);
384    struct panvk_cmd_pool *pool =
385       container_of(vk_pool, struct panvk_cmd_pool, vk);
386    struct panvk_cmd_buffer *cmdbuf;
387 
388    cmdbuf = vk_zalloc(&device->vk.alloc, sizeof(*cmdbuf), 8,
389                       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
390    if (!cmdbuf)
391       return panvk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
392 
393    VkResult result = vk_command_buffer_init(
394       &pool->vk, &cmdbuf->vk, &panvk_per_arch(cmd_buffer_ops), level);
395    if (result != VK_SUCCESS) {
396       vk_free(&device->vk.alloc, cmdbuf);
397       return result;
398    }
399 
400    panvk_cmd_buffer_obj_list_init(cmdbuf, push_sets);
401    cmdbuf->vk.dynamic_graphics_state.vi = &cmdbuf->state.gfx.dynamic.vi;
402    cmdbuf->vk.dynamic_graphics_state.ms.sample_locations =
403       &cmdbuf->state.gfx.dynamic.sl;
404 
405    struct panvk_pool_properties desc_pool_props = {
406       .create_flags = 0,
407       .slab_size = 64 * 1024,
408       .label = "Command buffer descriptor pool",
409       .prealloc = true,
410       .owns_bos = true,
411       .needs_locking = false,
412    };
413    panvk_pool_init(&cmdbuf->desc_pool, device, &pool->desc_bo_pool,
414                    &desc_pool_props);
415 
416    struct panvk_pool_properties tls_pool_props = {
417       .create_flags =
418          panvk_device_adjust_bo_flags(device, PAN_KMOD_BO_FLAG_NO_MMAP),
419       .slab_size = 64 * 1024,
420       .label = "TLS pool",
421       .prealloc = false,
422       .owns_bos = true,
423       .needs_locking = false,
424    };
425    panvk_pool_init(&cmdbuf->tls_pool, device, &pool->tls_bo_pool,
426                    &tls_pool_props);
427 
428    struct panvk_pool_properties var_pool_props = {
429       .create_flags =
430          panvk_device_adjust_bo_flags(device, PAN_KMOD_BO_FLAG_NO_MMAP),
431       .slab_size = 64 * 1024,
432       .label = "Varying pool",
433       .prealloc = false,
434       .owns_bos = true,
435       .needs_locking = false,
436    };
437    panvk_pool_init(&cmdbuf->varying_pool, device, &pool->varying_bo_pool,
438                    &var_pool_props);
439 
440    list_inithead(&cmdbuf->batches);
441    *cmdbuf_out = &cmdbuf->vk;
442    return VK_SUCCESS;
443 }
444 
445 const struct vk_command_buffer_ops panvk_per_arch(cmd_buffer_ops) = {
446    .create = panvk_create_cmdbuf,
447    .reset = panvk_reset_cmdbuf,
448    .destroy = panvk_destroy_cmdbuf,
449 };
450 
451 VKAPI_ATTR VkResult VKAPI_CALL
panvk_per_arch(BeginCommandBuffer)452 panvk_per_arch(BeginCommandBuffer)(VkCommandBuffer commandBuffer,
453                                    const VkCommandBufferBeginInfo *pBeginInfo)
454 {
455    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
456 
457    vk_command_buffer_begin(&cmdbuf->vk, pBeginInfo);
458 
459    return VK_SUCCESS;
460 }
461