• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2024 Collabora Ltd.
3  *
4  * Derived from tu_cmd_buffer.c which is:
5  * Copyright © 2016 Red Hat.
6  * Copyright © 2016 Bas Nieuwenhuizen
7  * Copyright © 2015 Intel Corporation
8  *
9  * SPDX-License-Identifier: MIT
10  */
11 
12 #include "genxml/gen_macros.h"
13 
14 #include "panvk_cmd_alloc.h"
15 #include "panvk_cmd_buffer.h"
16 #include "panvk_cmd_desc_state.h"
17 #include "panvk_device.h"
18 #include "panvk_entrypoints.h"
19 #include "panvk_meta.h"
20 #include "panvk_physical_device.h"
21 
22 #include "pan_desc.h"
23 #include "pan_encoder.h"
24 #include "pan_jc.h"
25 #include "pan_props.h"
26 
27 #include <vulkan/vulkan_core.h>
28 
29 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdDispatchBase)30 panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer,
31                                 uint32_t baseGroupX, uint32_t baseGroupY,
32                                 uint32_t baseGroupZ, uint32_t groupCountX,
33                                 uint32_t groupCountY, uint32_t groupCountZ)
34 {
35    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
36    const struct panvk_shader *shader = cmdbuf->state.compute.shader;
37    VkResult result;
38 
39    if (groupCountX == 0 || groupCountY == 0 || groupCountZ == 0)
40       return;
41 
42    /* If there's no compute shader, we can skip the dispatch. */
43    if (!panvk_priv_mem_dev_addr(shader->rsd))
44       return;
45 
46    struct panvk_dispatch_info info = {
47       .wg_base = {baseGroupX, baseGroupY, baseGroupZ},
48       .direct.wg_count = {groupCountX, groupCountY, groupCountZ},
49    };
50    struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
51    struct panvk_physical_device *phys_dev =
52       to_panvk_physical_device(dev->vk.physical);
53    struct pan_compute_dim wg_count = {groupCountX, groupCountY, groupCountZ};
54 
55    panvk_per_arch(cmd_close_batch)(cmdbuf);
56    struct panvk_batch *batch = panvk_per_arch(cmd_open_batch)(cmdbuf);
57 
58    struct panvk_descriptor_state *desc_state =
59       &cmdbuf->state.compute.desc_state;
60    struct panvk_shader_desc_state *cs_desc_state =
61       &cmdbuf->state.compute.cs.desc;
62 
63    panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
64    uint64_t tsd = batch->tls.gpu;
65 
66    result = panvk_per_arch(cmd_prepare_push_descs)(
67       cmdbuf, desc_state, shader->desc_info.used_set_mask);
68    if (result != VK_SUCCESS)
69       return;
70 
71    if (compute_state_dirty(cmdbuf, CS) ||
72        compute_state_dirty(cmdbuf, DESC_STATE)) {
73       result = panvk_per_arch(cmd_prepare_dyn_ssbos)(cmdbuf, desc_state, shader,
74                                                      cs_desc_state);
75       if (result != VK_SUCCESS)
76          return;
77    }
78 
79    panvk_per_arch(cmd_prepare_dispatch_sysvals)(cmdbuf, &info);
80 
81    result = panvk_per_arch(cmd_prepare_push_uniforms)(
82       cmdbuf, cmdbuf->state.compute.shader);
83    if (result != VK_SUCCESS)
84       return;
85 
86    struct panfrost_ptr copy_desc_job = {0};
87 
88    if (compute_state_dirty(cmdbuf, CS) ||
89        compute_state_dirty(cmdbuf, DESC_STATE)) {
90       result = panvk_per_arch(cmd_prepare_shader_desc_tables)(
91          cmdbuf, desc_state, shader, cs_desc_state);
92 
93       result = panvk_per_arch(meta_get_copy_desc_job)(
94          cmdbuf, shader, &cmdbuf->state.compute.desc_state, cs_desc_state, 0,
95          &copy_desc_job);
96       if (result != VK_SUCCESS)
97          return;
98 
99       if (copy_desc_job.cpu)
100          util_dynarray_append(&batch->jobs, void *, copy_desc_job.cpu);
101    }
102 
103    struct panfrost_ptr job = panvk_cmd_alloc_desc(cmdbuf, COMPUTE_JOB);
104    if (!job.gpu)
105       return;
106 
107    util_dynarray_append(&batch->jobs, void *, job.cpu);
108 
109    panfrost_pack_work_groups_compute(
110       pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION), wg_count.x, wg_count.y,
111       wg_count.z, shader->local_size.x, shader->local_size.y,
112       shader->local_size.z, false, false);
113 
114    pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
115       cfg.job_task_split = util_logbase2_ceil(shader->local_size.x + 1) +
116                            util_logbase2_ceil(shader->local_size.y + 1) +
117                            util_logbase2_ceil(shader->local_size.z + 1);
118    }
119 
120    pan_section_pack(job.cpu, COMPUTE_JOB, DRAW, cfg) {
121       cfg.state = panvk_priv_mem_dev_addr(shader->rsd);
122       cfg.attributes = cs_desc_state->img_attrib_table;
123       cfg.attribute_buffers =
124          cs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_IMG];
125       cfg.thread_storage = tsd;
126       cfg.uniform_buffers = cs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_UBO];
127       cfg.push_uniforms = cmdbuf->state.compute.push_uniforms;
128       cfg.textures = cs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_TEXTURE];
129       cfg.samplers = cs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_SAMPLER];
130    }
131 
132    unsigned copy_desc_dep =
133       copy_desc_job.gpu
134          ? pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_COMPUTE, false, false,
135                           0, 0, &copy_desc_job, false)
136          : 0;
137 
138    pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_COMPUTE, false, false, 0,
139                   copy_desc_dep, &job, false);
140 
141    batch->tlsinfo.tls.size = shader->info.tls_size;
142    batch->tlsinfo.wls.size = shader->info.wls_size;
143    if (batch->tlsinfo.wls.size) {
144       unsigned core_id_range;
145 
146       panfrost_query_core_count(&phys_dev->kmod.props, &core_id_range);
147       batch->tlsinfo.wls.instances = pan_wls_instances(&wg_count);
148       batch->wls_total_size = pan_wls_adjust_size(batch->tlsinfo.wls.size) *
149                               batch->tlsinfo.wls.instances * core_id_range;
150    }
151 
152    panvk_per_arch(cmd_close_batch)(cmdbuf);
153    clear_dirty_after_dispatch(cmdbuf);
154 }
155 
156 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdDispatchIndirect)157 panvk_per_arch(CmdDispatchIndirect)(VkCommandBuffer commandBuffer,
158                                     VkBuffer _buffer, VkDeviceSize offset)
159 {
160    panvk_stub();
161 }
162