1 /*
2 * Copyright © 2024 Collabora Ltd.
3 *
4 * Derived from tu_cmd_buffer.c which is:
5 * Copyright © 2016 Red Hat.
6 * Copyright © 2016 Bas Nieuwenhuizen
7 * Copyright © 2015 Intel Corporation
8 *
9 * SPDX-License-Identifier: MIT
10 */
11
12 #include "genxml/gen_macros.h"
13
14 #include "panvk_buffer.h"
15 #include "panvk_cmd_alloc.h"
16 #include "panvk_cmd_buffer.h"
17 #include "panvk_cmd_desc_state.h"
18 #include "panvk_entrypoints.h"
19
20 #include "pan_pool.h"
21
22 #include "util/rounding.h"
23
24 #include "vk_alloc.h"
25 #include "vk_command_buffer.h"
26 #include "vk_command_pool.h"
27
28 static void
cmd_desc_state_bind_sets(struct panvk_descriptor_state * desc_state,const VkBindDescriptorSetsInfoKHR * info)29 cmd_desc_state_bind_sets(struct panvk_descriptor_state *desc_state,
30 const VkBindDescriptorSetsInfoKHR *info)
31 {
32 unsigned dynoffset_idx = 0;
33 for (unsigned i = 0; i < info->descriptorSetCount; ++i) {
34 unsigned set_idx = i + info->firstSet;
35 VK_FROM_HANDLE(panvk_descriptor_set, set, info->pDescriptorSets[i]);
36
37 /* Invalidate the push set. */
38 if (desc_state->sets[set_idx] &&
39 desc_state->sets[set_idx] == desc_state->push_sets[set_idx])
40 desc_state->push_sets[set_idx]->descs.dev = 0;
41
42 desc_state->sets[set_idx] = set;
43
44 if (!set || !set->layout->dyn_buf_count)
45 continue;
46
47 for (unsigned b = 0; b < set->layout->binding_count; b++) {
48 VkDescriptorType type = set->layout->bindings[b].type;
49
50 if (!vk_descriptor_type_is_dynamic(type))
51 continue;
52
53 unsigned dyn_buf_idx = set->layout->bindings[b].desc_idx;
54 for (unsigned e = 0; e < set->layout->bindings[b].desc_count; e++) {
55 desc_state->dyn_buf_offsets[set_idx][dyn_buf_idx++] =
56 info->pDynamicOffsets[dynoffset_idx++];
57 }
58 }
59 }
60
61 assert(dynoffset_idx == info->dynamicOffsetCount);
62 }
63
64 static struct panvk_descriptor_set *
cmd_get_push_desc_set(struct vk_command_buffer * vk_cmdbuf,struct panvk_descriptor_state * desc_state,uint32_t set_idx)65 cmd_get_push_desc_set(struct vk_command_buffer *vk_cmdbuf,
66 struct panvk_descriptor_state *desc_state,
67 uint32_t set_idx)
68 {
69 struct panvk_cmd_buffer *cmdbuf =
70 container_of(vk_cmdbuf, struct panvk_cmd_buffer, vk);
71 struct panvk_cmd_pool *pool =
72 container_of(cmdbuf->vk.pool, struct panvk_cmd_pool, vk);
73 struct panvk_push_set *push_set;
74
75 assert(set_idx < MAX_SETS);
76
77 if (likely(desc_state->push_sets[set_idx])) {
78 push_set = container_of(desc_state->push_sets[set_idx],
79 struct panvk_push_set, set);
80 } else if (!list_is_empty(&pool->push_sets)) {
81 push_set =
82 list_first_entry(&pool->push_sets, struct panvk_push_set, base.node);
83 list_del(&push_set->base.node);
84 list_addtail(&push_set->base.node, &cmdbuf->push_sets);
85 memset(push_set->descs, 0, sizeof(push_set->descs));
86 } else {
87 push_set = vk_zalloc(&pool->vk.alloc, sizeof(*push_set), 8,
88 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
89 list_addtail(&push_set->base.node, &cmdbuf->push_sets);
90 }
91
92 if (unlikely(!push_set)) {
93 vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
94 return NULL;
95 }
96
97 if (desc_state->push_sets[set_idx] == NULL) {
98 desc_state->push_sets[set_idx] = &push_set->set;
99 push_set->set.descs.host = push_set->descs;
100 }
101
102 struct panvk_descriptor_set *set = desc_state->push_sets[set_idx];
103
104 /* Pushing descriptors replaces whatever sets are bound */
105 desc_state->sets[set_idx] = set;
106
107 BITSET_SET(desc_state->dirty_push_sets, set_idx);
108 return set;
109 }
110
111 #if PAN_ARCH <= 7
112 VkResult
panvk_per_arch(cmd_prepare_dyn_ssbos)113 panvk_per_arch(cmd_prepare_dyn_ssbos)(
114 struct panvk_cmd_buffer *cmdbuf,
115 const struct panvk_descriptor_state *desc_state,
116 const struct panvk_shader *shader,
117 struct panvk_shader_desc_state *shader_desc_state)
118 {
119 shader_desc_state->dyn_ssbos = 0;
120
121 if (!shader || !shader->desc_info.dyn_ssbos.count)
122 return VK_SUCCESS;
123
124 struct panfrost_ptr ptr = panvk_cmd_alloc_dev_mem(
125 cmdbuf, desc, shader->desc_info.dyn_ssbos.count * PANVK_DESCRIPTOR_SIZE,
126 PANVK_DESCRIPTOR_SIZE);
127 if (!ptr.gpu)
128 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
129
130 struct panvk_ssbo_addr *ssbos = ptr.cpu;
131 for (uint32_t i = 0; i < shader->desc_info.dyn_ssbos.count; i++) {
132 uint32_t src_handle = shader->desc_info.dyn_ssbos.map[i];
133 uint32_t set_idx = COPY_DESC_HANDLE_EXTRACT_TABLE(src_handle);
134 uint32_t dyn_buf_idx = COPY_DESC_HANDLE_EXTRACT_INDEX(src_handle);
135 const struct panvk_descriptor_set *set = desc_state->sets[set_idx];
136 const uint32_t dyn_buf_offset =
137 desc_state->dyn_buf_offsets[set_idx][dyn_buf_idx];
138
139 assert(set_idx < MAX_SETS);
140 assert(set);
141
142 ssbos[i] = (struct panvk_ssbo_addr){
143 .base_addr = set->dyn_bufs[dyn_buf_idx].dev_addr + dyn_buf_offset,
144 .size = set->dyn_bufs[dyn_buf_idx].size,
145 };
146 }
147
148 shader_desc_state->dyn_ssbos = ptr.gpu;
149 return VK_SUCCESS;
150 }
151
152 static void
panvk_cmd_fill_dyn_ubos(const struct panvk_descriptor_state * desc_state,const struct panvk_shader * shader,struct mali_uniform_buffer_packed * ubos,uint32_t ubo_count)153 panvk_cmd_fill_dyn_ubos(const struct panvk_descriptor_state *desc_state,
154 const struct panvk_shader *shader,
155 struct mali_uniform_buffer_packed *ubos,
156 uint32_t ubo_count)
157 {
158 for (uint32_t i = 0; i < shader->desc_info.dyn_ubos.count; i++) {
159 uint32_t src_handle = shader->desc_info.dyn_ubos.map[i];
160 uint32_t set_idx = COPY_DESC_HANDLE_EXTRACT_TABLE(src_handle);
161 uint32_t dyn_buf_idx = COPY_DESC_HANDLE_EXTRACT_INDEX(src_handle);
162 uint32_t ubo_idx =
163 i + shader->desc_info.others.count[PANVK_BIFROST_DESC_TABLE_UBO];
164 const struct panvk_descriptor_set *set = desc_state->sets[set_idx];
165 const uint32_t dyn_buf_offset =
166 desc_state->dyn_buf_offsets[set_idx][dyn_buf_idx];
167
168 assert(set_idx < MAX_SETS);
169 assert(set);
170 assert(ubo_idx < ubo_count);
171
172 pan_pack(&ubos[ubo_idx], UNIFORM_BUFFER, cfg) {
173 cfg.pointer = set->dyn_bufs[dyn_buf_idx].dev_addr + dyn_buf_offset;
174 cfg.entries = DIV_ROUND_UP(set->dyn_bufs[dyn_buf_idx].size, 16);
175 }
176 }
177 }
178
179 VkResult
panvk_per_arch(cmd_prepare_shader_desc_tables)180 panvk_per_arch(cmd_prepare_shader_desc_tables)(
181 struct panvk_cmd_buffer *cmdbuf,
182 const struct panvk_descriptor_state *desc_state,
183 const struct panvk_shader *shader,
184 struct panvk_shader_desc_state *shader_desc_state)
185 {
186 memset(shader_desc_state->tables, 0, sizeof(shader_desc_state->tables));
187 shader_desc_state->img_attrib_table = 0;
188
189 if (!shader)
190 return VK_SUCCESS;
191
192 for (uint32_t i = 0; i < ARRAY_SIZE(shader->desc_info.others.count); i++) {
193 uint32_t desc_count =
194 shader->desc_info.others.count[i] +
195 (i == PANVK_BIFROST_DESC_TABLE_UBO ? shader->desc_info.dyn_ubos.count
196 : 0);
197 uint32_t desc_size =
198 i == PANVK_BIFROST_DESC_TABLE_UBO ? 8 : PANVK_DESCRIPTOR_SIZE;
199
200 if (!desc_count)
201 continue;
202
203 struct panfrost_ptr ptr = panvk_cmd_alloc_dev_mem(
204 cmdbuf, desc, desc_count * desc_size, PANVK_DESCRIPTOR_SIZE);
205 if (!ptr.gpu)
206 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
207
208 shader_desc_state->tables[i] = ptr.gpu;
209
210 if (i == PANVK_BIFROST_DESC_TABLE_UBO)
211 panvk_cmd_fill_dyn_ubos(desc_state, shader, ptr.cpu, desc_count);
212
213 /* The image table being actually the attribute table, this is handled
214 * separately for vertex shaders. */
215 if (i == PANVK_BIFROST_DESC_TABLE_IMG &&
216 shader->info.stage != MESA_SHADER_VERTEX) {
217 ptr = panvk_cmd_alloc_desc_array(cmdbuf, desc_count, ATTRIBUTE);
218 if (!ptr.gpu)
219 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
220
221 shader_desc_state->img_attrib_table = ptr.gpu;
222 }
223 }
224
225 uint32_t tex_count =
226 shader->desc_info.others.count[PANVK_BIFROST_DESC_TABLE_TEXTURE];
227 uint32_t sampler_count =
228 shader->desc_info.others.count[PANVK_BIFROST_DESC_TABLE_SAMPLER];
229
230 if (tex_count && !sampler_count) {
231 struct panfrost_ptr sampler = panvk_cmd_alloc_desc(cmdbuf, SAMPLER);
232 if (!sampler.gpu)
233 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
234
235 /* Emit a dummy sampler if we have to. */
236 pan_cast_and_pack(sampler.cpu, SAMPLER, cfg) {
237 cfg.clamp_integer_array_indices = false;
238 }
239
240 shader_desc_state->tables[PANVK_BIFROST_DESC_TABLE_SAMPLER] = sampler.gpu;
241 }
242
243 return VK_SUCCESS;
244 }
245 #else
246 void
panvk_per_arch(cmd_fill_dyn_bufs)247 panvk_per_arch(cmd_fill_dyn_bufs)(
248 const struct panvk_descriptor_state *desc_state,
249 const struct panvk_shader *shader, struct mali_buffer_packed *buffers)
250 {
251 if (!shader)
252 return;
253
254 for (uint32_t i = 0; i < shader->desc_info.dyn_bufs.count; i++) {
255 uint32_t src_handle = shader->desc_info.dyn_bufs.map[i];
256 uint32_t set_idx = COPY_DESC_HANDLE_EXTRACT_TABLE(src_handle);
257 uint32_t dyn_buf_idx = COPY_DESC_HANDLE_EXTRACT_INDEX(src_handle);
258 const struct panvk_descriptor_set *set = desc_state->sets[set_idx];
259 const uint32_t dyn_buf_offset =
260 desc_state->dyn_buf_offsets[set_idx][dyn_buf_idx];
261
262 assert(set_idx < MAX_SETS);
263 assert(set);
264
265 pan_pack(&buffers[i], BUFFER, cfg) {
266 cfg.size = set->dyn_bufs[dyn_buf_idx].size;
267 cfg.address = set->dyn_bufs[dyn_buf_idx].dev_addr + dyn_buf_offset;
268 }
269 }
270 }
271
272 VkResult
panvk_per_arch(cmd_prepare_shader_res_table)273 panvk_per_arch(cmd_prepare_shader_res_table)(
274 struct panvk_cmd_buffer *cmdbuf,
275 const struct panvk_descriptor_state *desc_state,
276 const struct panvk_shader *shader,
277 struct panvk_shader_desc_state *shader_desc_state)
278 {
279 if (!shader) {
280 shader_desc_state->res_table = 0;
281 return VK_SUCCESS;
282 }
283
284 uint32_t first_unused_set = util_last_bit(shader->desc_info.used_set_mask);
285 uint32_t res_count = 1 + first_unused_set;
286 struct panfrost_ptr ptr =
287 panvk_cmd_alloc_desc_array(cmdbuf, res_count, RESOURCE);
288 if (!ptr.gpu)
289 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
290
291 struct mali_resource_packed *res_table = ptr.cpu;
292
293 /* First entry is the driver set table, where we store the vertex attributes,
294 * the dummy sampler, the dynamic buffers and the vertex buffers. */
295 pan_pack(&res_table[0], RESOURCE, cfg) {
296 cfg.address = shader_desc_state->driver_set.dev_addr;
297 cfg.size = shader_desc_state->driver_set.size;
298 cfg.contains_descriptors = cfg.size > 0;
299 }
300
301 for (uint32_t i = 0; i < first_unused_set; i++) {
302 const struct panvk_descriptor_set *set = desc_state->sets[i];
303
304 pan_pack(&res_table[i + 1], RESOURCE, cfg) {
305 if (shader->desc_info.used_set_mask & BITFIELD_BIT(i)) {
306 cfg.address = set->descs.dev;
307 cfg.contains_descriptors = true;
308 cfg.size = set->desc_count * PANVK_DESCRIPTOR_SIZE;
309 } else {
310 cfg.address = 0;
311 cfg.contains_descriptors = false;
312 cfg.size = 0;
313 }
314 }
315 }
316
317 shader_desc_state->res_table = ptr.gpu | res_count;
318 return VK_SUCCESS;
319 }
320 #endif
321
322 VkResult
panvk_per_arch(cmd_prepare_push_descs)323 panvk_per_arch(cmd_prepare_push_descs)(struct panvk_cmd_buffer *cmdbuf,
324 struct panvk_descriptor_state *desc_state,
325 uint32_t used_set_mask)
326 {
327 for (unsigned i = 0; i < ARRAY_SIZE(desc_state->push_sets); i++) {
328 struct panvk_descriptor_set *push_set = desc_state->push_sets[i];
329
330 if (!(used_set_mask & BITFIELD_BIT(i)) || !push_set ||
331 desc_state->sets[i] != push_set || push_set->descs.dev ||
332 !BITSET_TEST(desc_state->dirty_push_sets, i))
333 continue;
334
335 struct panfrost_ptr ptr = panvk_cmd_alloc_dev_mem(
336 cmdbuf, desc, push_set->desc_count * PANVK_DESCRIPTOR_SIZE,
337 PANVK_DESCRIPTOR_SIZE);
338 if (!ptr.gpu)
339 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
340
341 memcpy(ptr.cpu, push_set->descs.host,
342 push_set->desc_count * PANVK_DESCRIPTOR_SIZE);
343 push_set->descs.dev = ptr.gpu;
344
345 BITSET_CLEAR(desc_state->dirty_push_sets, i);
346 }
347
348 return VK_SUCCESS;
349 }
350
351 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdBindDescriptorSets2KHR)352 panvk_per_arch(CmdBindDescriptorSets2KHR)(
353 VkCommandBuffer commandBuffer,
354 const VkBindDescriptorSetsInfoKHR *pBindDescriptorSetsInfo)
355 {
356 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
357
358 /* TODO: Invalidate only if the shader tables are disturbed */
359 if (pBindDescriptorSetsInfo->stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS) {
360 cmd_desc_state_bind_sets(&cmdbuf->state.gfx.desc_state,
361 pBindDescriptorSetsInfo);
362
363 gfx_state_set_dirty(cmdbuf, DESC_STATE);
364 }
365
366 if (pBindDescriptorSetsInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
367 cmd_desc_state_bind_sets(&cmdbuf->state.compute.desc_state,
368 pBindDescriptorSetsInfo);
369
370 compute_state_set_dirty(cmdbuf, DESC_STATE);
371 }
372 }
373
374 static void
push_desc_set_write(struct panvk_cmd_buffer * cmd,struct panvk_descriptor_state * desc,const VkPushDescriptorSetInfoKHR * info)375 push_desc_set_write(struct panvk_cmd_buffer *cmd,
376 struct panvk_descriptor_state *desc,
377 const VkPushDescriptorSetInfoKHR *info)
378 {
379 VK_FROM_HANDLE(vk_pipeline_layout, playout, info->layout);
380
381 const struct panvk_descriptor_set_layout *set_layout =
382 to_panvk_descriptor_set_layout(playout->set_layouts[info->set]);
383
384 struct panvk_descriptor_set *push_set =
385 cmd_get_push_desc_set(&cmd->vk, desc, info->set);
386 if (!push_set)
387 return;
388
389 push_set->layout = set_layout;
390 push_set->desc_count = set_layout->desc_count;
391
392 for (uint32_t i = 0; i < info->descriptorWriteCount; i++)
393 panvk_per_arch(descriptor_set_write)(push_set,
394 &info->pDescriptorWrites[i], true);
395
396 push_set->descs.dev = 0;
397 push_set->layout = NULL;
398 }
399
400 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdPushDescriptorSet2KHR)401 panvk_per_arch(CmdPushDescriptorSet2KHR)(
402 VkCommandBuffer commandBuffer,
403 const VkPushDescriptorSetInfoKHR *pPushDescriptorSetInfo)
404 {
405 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
406
407 if (pPushDescriptorSetInfo->stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS) {
408 push_desc_set_write(cmdbuf, &cmdbuf->state.gfx.desc_state,
409 pPushDescriptorSetInfo);
410
411 gfx_state_set_dirty(cmdbuf, DESC_STATE);
412 }
413
414 if (pPushDescriptorSetInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
415 push_desc_set_write(cmdbuf, &cmdbuf->state.compute.desc_state,
416 pPushDescriptorSetInfo);
417
418 compute_state_set_dirty(cmdbuf, DESC_STATE);
419 }
420 }
421
422 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdPushDescriptorSetWithTemplate2KHR)423 panvk_per_arch(CmdPushDescriptorSetWithTemplate2KHR)(
424 VkCommandBuffer commandBuffer, const VkPushDescriptorSetWithTemplateInfoKHR
425 *pPushDescriptorSetWithTemplateInfo)
426 {
427 VK_FROM_HANDLE(vk_descriptor_update_template, template,
428 pPushDescriptorSetWithTemplateInfo->descriptorUpdateTemplate);
429 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
430 VK_FROM_HANDLE(vk_pipeline_layout, playout,
431 pPushDescriptorSetWithTemplateInfo->layout);
432 const uint32_t set = pPushDescriptorSetWithTemplateInfo->set;
433 const struct panvk_descriptor_set_layout *set_layout =
434 to_panvk_descriptor_set_layout(playout->set_layouts[set]);
435 struct panvk_descriptor_state *desc_state =
436 panvk_cmd_get_desc_state(cmdbuf, template->bind_point);
437 struct panvk_descriptor_set *push_set =
438 cmd_get_push_desc_set(&cmdbuf->vk, desc_state, set);
439 if (!push_set)
440 return;
441
442 push_set->layout = set_layout;
443 push_set->desc_count = set_layout->desc_count;
444
445 panvk_per_arch(descriptor_set_write_template)(
446 push_set, template, pPushDescriptorSetWithTemplateInfo->pData, true);
447
448 push_set->descs.dev = 0;
449 push_set->layout = NULL;
450
451 if (template->bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS)
452 gfx_state_set_dirty(cmdbuf, DESC_STATE);
453 else
454 compute_state_set_dirty(cmdbuf, DESC_STATE);
455 }
456