1 /*
2 * Copyright © 2024 Collabora Ltd.
3 *
4 * Derived from tu_cmd_buffer.c which is:
5 * Copyright © 2016 Red Hat.
6 * Copyright © 2016 Bas Nieuwenhuizen
7 * Copyright © 2015 Intel Corporation
8 *
9 * SPDX-License-Identifier: MIT
10 */
11
12 #include "genxml/gen_macros.h"
13
14 #include "panvk_cmd_alloc.h"
15 #include "panvk_cmd_buffer.h"
16 #include "panvk_cmd_desc_state.h"
17 #include "panvk_device.h"
18 #include "panvk_entrypoints.h"
19 #include "panvk_meta.h"
20 #include "panvk_physical_device.h"
21
22 #include "pan_desc.h"
23 #include "pan_encoder.h"
24 #include "pan_jc.h"
25 #include "pan_props.h"
26
27 #include <vulkan/vulkan_core.h>
28
29 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdDispatchBase)30 panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer,
31 uint32_t baseGroupX, uint32_t baseGroupY,
32 uint32_t baseGroupZ, uint32_t groupCountX,
33 uint32_t groupCountY, uint32_t groupCountZ)
34 {
35 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
36 const struct panvk_shader *shader = cmdbuf->state.compute.shader;
37 VkResult result;
38
39 if (groupCountX == 0 || groupCountY == 0 || groupCountZ == 0)
40 return;
41
42 /* If there's no compute shader, we can skip the dispatch. */
43 if (!panvk_priv_mem_dev_addr(shader->rsd))
44 return;
45
46 struct panvk_dispatch_info info = {
47 .wg_base = {baseGroupX, baseGroupY, baseGroupZ},
48 .direct.wg_count = {groupCountX, groupCountY, groupCountZ},
49 };
50 struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
51 struct panvk_physical_device *phys_dev =
52 to_panvk_physical_device(dev->vk.physical);
53 struct pan_compute_dim wg_count = {groupCountX, groupCountY, groupCountZ};
54
55 panvk_per_arch(cmd_close_batch)(cmdbuf);
56 struct panvk_batch *batch = panvk_per_arch(cmd_open_batch)(cmdbuf);
57
58 struct panvk_descriptor_state *desc_state =
59 &cmdbuf->state.compute.desc_state;
60 struct panvk_shader_desc_state *cs_desc_state =
61 &cmdbuf->state.compute.cs.desc;
62
63 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
64 uint64_t tsd = batch->tls.gpu;
65
66 result = panvk_per_arch(cmd_prepare_push_descs)(
67 cmdbuf, desc_state, shader->desc_info.used_set_mask);
68 if (result != VK_SUCCESS)
69 return;
70
71 if (compute_state_dirty(cmdbuf, CS) ||
72 compute_state_dirty(cmdbuf, DESC_STATE)) {
73 result = panvk_per_arch(cmd_prepare_dyn_ssbos)(cmdbuf, desc_state, shader,
74 cs_desc_state);
75 if (result != VK_SUCCESS)
76 return;
77 }
78
79 panvk_per_arch(cmd_prepare_dispatch_sysvals)(cmdbuf, &info);
80
81 result = panvk_per_arch(cmd_prepare_push_uniforms)(
82 cmdbuf, cmdbuf->state.compute.shader);
83 if (result != VK_SUCCESS)
84 return;
85
86 struct panfrost_ptr copy_desc_job = {0};
87
88 if (compute_state_dirty(cmdbuf, CS) ||
89 compute_state_dirty(cmdbuf, DESC_STATE)) {
90 result = panvk_per_arch(cmd_prepare_shader_desc_tables)(
91 cmdbuf, desc_state, shader, cs_desc_state);
92
93 result = panvk_per_arch(meta_get_copy_desc_job)(
94 cmdbuf, shader, &cmdbuf->state.compute.desc_state, cs_desc_state, 0,
95 ©_desc_job);
96 if (result != VK_SUCCESS)
97 return;
98
99 if (copy_desc_job.cpu)
100 util_dynarray_append(&batch->jobs, void *, copy_desc_job.cpu);
101 }
102
103 struct panfrost_ptr job = panvk_cmd_alloc_desc(cmdbuf, COMPUTE_JOB);
104 if (!job.gpu)
105 return;
106
107 util_dynarray_append(&batch->jobs, void *, job.cpu);
108
109 panfrost_pack_work_groups_compute(
110 pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION), wg_count.x, wg_count.y,
111 wg_count.z, shader->local_size.x, shader->local_size.y,
112 shader->local_size.z, false, false);
113
114 pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
115 cfg.job_task_split = util_logbase2_ceil(shader->local_size.x + 1) +
116 util_logbase2_ceil(shader->local_size.y + 1) +
117 util_logbase2_ceil(shader->local_size.z + 1);
118 }
119
120 pan_section_pack(job.cpu, COMPUTE_JOB, DRAW, cfg) {
121 cfg.state = panvk_priv_mem_dev_addr(shader->rsd);
122 cfg.attributes = cs_desc_state->img_attrib_table;
123 cfg.attribute_buffers =
124 cs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_IMG];
125 cfg.thread_storage = tsd;
126 cfg.uniform_buffers = cs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_UBO];
127 cfg.push_uniforms = cmdbuf->state.compute.push_uniforms;
128 cfg.textures = cs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_TEXTURE];
129 cfg.samplers = cs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_SAMPLER];
130 }
131
132 unsigned copy_desc_dep =
133 copy_desc_job.gpu
134 ? pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_COMPUTE, false, false,
135 0, 0, ©_desc_job, false)
136 : 0;
137
138 pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_COMPUTE, false, false, 0,
139 copy_desc_dep, &job, false);
140
141 batch->tlsinfo.tls.size = shader->info.tls_size;
142 batch->tlsinfo.wls.size = shader->info.wls_size;
143 if (batch->tlsinfo.wls.size) {
144 unsigned core_id_range;
145
146 panfrost_query_core_count(&phys_dev->kmod.props, &core_id_range);
147 batch->tlsinfo.wls.instances = pan_wls_instances(&wg_count);
148 batch->wls_total_size = pan_wls_adjust_size(batch->tlsinfo.wls.size) *
149 batch->tlsinfo.wls.instances * core_id_range;
150 }
151
152 panvk_per_arch(cmd_close_batch)(cmdbuf);
153 clear_dirty_after_dispatch(cmdbuf);
154 }
155
156 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdDispatchIndirect)157 panvk_per_arch(CmdDispatchIndirect)(VkCommandBuffer commandBuffer,
158 VkBuffer _buffer, VkDeviceSize offset)
159 {
160 panvk_stub();
161 }
162