1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 * SPDX-License-Identifier: MIT
5 */
6
7 /**
8 * @file
9 *
10 * We use the bindless descriptor model, which maps fairly closely to how
11 * Vulkan descriptor sets work. The two exceptions are input attachments and
12 * dynamic descriptors, which have to be patched when recording command
13 * buffers. We reserve an extra descriptor set for these. This descriptor set
14 * contains all the input attachments in the pipeline, in order, and then all
15 * the dynamic descriptors. The dynamic descriptors are stored in the CPU-side
16 * datastructure for each tu_descriptor_set, and then combined into one big
17 * descriptor set at CmdBindDescriptors time/draw time.
18 */
19
20 #include "tu_descriptor_set.h"
21
22 #include <fcntl.h>
23
24 #include "vulkan/vulkan_android.h"
25
26 #include "util/mesa-sha1.h"
27 #include "vk_descriptors.h"
28 #include "vk_util.h"
29 #include "vk_acceleration_structure.h"
30
31 #include "tu_buffer.h"
32 #include "tu_buffer_view.h"
33 #include "tu_device.h"
34 #include "tu_image.h"
35 #include "tu_formats.h"
36 #include "tu_rmv.h"
37 #include "bvh/tu_build_interface.h"
38
39 static inline uint8_t *
pool_base(struct tu_descriptor_pool * pool)40 pool_base(struct tu_descriptor_pool *pool)
41 {
42 return pool->host_bo ?: (uint8_t *) pool->bo->map;
43 }
44
45 static uint32_t
descriptor_size(struct tu_device * dev,const VkDescriptorSetLayoutBinding * binding,VkDescriptorType type)46 descriptor_size(struct tu_device *dev,
47 const VkDescriptorSetLayoutBinding *binding,
48 VkDescriptorType type)
49 {
50 switch (type) {
51 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
52 /* We make offsets and sizes all 16 dwords, to match how the hardware
53 * interprets indices passed to sample/load/store instructions in
54 * multiples of 16 dwords. This means that "normal" descriptors are all
55 * of size 16, with padding for smaller descriptors like uniform storage
56 * descriptors which are less than 16 dwords. However combined images
57 * and samplers are actually two descriptors, so they have size 2.
58 */
59 return A6XX_TEX_CONST_DWORDS * 4 * 2;
60 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
61 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
62 /* isam.v allows using a single 16-bit descriptor for both 16-bit and
63 * 32-bit loads. If not available but 16-bit storage is still supported,
64 * two separate descriptors are required.
65 */
66 return A6XX_TEX_CONST_DWORDS * 4 * (1 +
67 COND(dev->physical_device->info->a6xx.storage_16bit &&
68 !dev->physical_device->info->a6xx.has_isam_v, 1) +
69 COND(dev->physical_device->info->a7xx.storage_8bit, 1));
70 case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
71 return binding->descriptorCount;
72 case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR:
73 default:
74 return A6XX_TEX_CONST_DWORDS * 4;
75 }
76 }
77
78 static uint32_t
mutable_descriptor_size(struct tu_device * dev,const VkMutableDescriptorTypeListEXT * list)79 mutable_descriptor_size(struct tu_device *dev,
80 const VkMutableDescriptorTypeListEXT *list)
81 {
82 uint32_t max_size = 0;
83
84 for (uint32_t i = 0; i < list->descriptorTypeCount; i++) {
85 uint32_t size = descriptor_size(dev, NULL, list->pDescriptorTypes[i]);
86 max_size = MAX2(max_size, size);
87 }
88
89 return max_size;
90 }
91
92 static void
tu_descriptor_set_layout_destroy(struct vk_device * vk_dev,struct vk_descriptor_set_layout * vk_layout)93 tu_descriptor_set_layout_destroy(struct vk_device *vk_dev,
94 struct vk_descriptor_set_layout *vk_layout)
95 {
96 struct tu_device *dev = container_of(vk_dev, struct tu_device, vk);
97 struct tu_descriptor_set_layout *layout =
98 container_of(vk_layout, struct tu_descriptor_set_layout, vk);
99
100 if (layout->embedded_samplers)
101 tu_bo_finish(dev, layout->embedded_samplers);
102 vk_descriptor_set_layout_destroy(vk_dev, vk_layout);
103 }
104
105 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateDescriptorSetLayout(VkDevice _device,const VkDescriptorSetLayoutCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDescriptorSetLayout * pSetLayout)106 tu_CreateDescriptorSetLayout(
107 VkDevice _device,
108 const VkDescriptorSetLayoutCreateInfo *pCreateInfo,
109 const VkAllocationCallbacks *pAllocator,
110 VkDescriptorSetLayout *pSetLayout)
111 {
112 VK_FROM_HANDLE(tu_device, device, _device);
113 struct tu_descriptor_set_layout *set_layout;
114
115 assert(pCreateInfo->sType ==
116 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO);
117 const VkDescriptorSetLayoutBindingFlagsCreateInfo *variable_flags =
118 vk_find_struct_const(
119 pCreateInfo->pNext,
120 DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO);
121 const VkMutableDescriptorTypeCreateInfoEXT *mutable_info =
122 vk_find_struct_const(
123 pCreateInfo->pNext,
124 MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_EXT);
125
126 uint32_t num_bindings = 0;
127 uint32_t immutable_sampler_count = 0;
128 uint32_t ycbcr_sampler_count = 0;
129 for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
130 num_bindings = MAX2(num_bindings, pCreateInfo->pBindings[j].binding + 1);
131 if ((pCreateInfo->pBindings[j].descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ||
132 pCreateInfo->pBindings[j].descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) &&
133 pCreateInfo->pBindings[j].pImmutableSamplers) {
134 immutable_sampler_count += pCreateInfo->pBindings[j].descriptorCount;
135
136 bool has_ycbcr_sampler = false;
137 for (unsigned i = 0; i < pCreateInfo->pBindings[j].descriptorCount; ++i) {
138 if (tu_sampler_from_handle(pCreateInfo->pBindings[j].pImmutableSamplers[i])->vk.ycbcr_conversion)
139 has_ycbcr_sampler = true;
140 }
141
142 if (has_ycbcr_sampler)
143 ycbcr_sampler_count += pCreateInfo->pBindings[j].descriptorCount;
144 }
145 }
146
147 uint32_t samplers_offset =
148 offsetof_arr(struct tu_descriptor_set_layout, binding, num_bindings);
149
150 /* note: only need to store TEX_SAMP_DWORDS for immutable samples,
151 * but using struct tu_sampler makes things simpler */
152 uint32_t size = samplers_offset +
153 immutable_sampler_count * sizeof(struct tu_sampler) +
154 ycbcr_sampler_count * sizeof(struct vk_ycbcr_conversion);
155
156 set_layout =
157 (struct tu_descriptor_set_layout *) vk_descriptor_set_layout_zalloc(
158 &device->vk, size);
159 if (!set_layout)
160 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
161
162 set_layout->flags = pCreateInfo->flags;
163 set_layout->vk.destroy = tu_descriptor_set_layout_destroy;
164
165 /* We just allocate all the immutable samplers at the end of the struct */
166 struct tu_sampler *samplers =
167 (struct tu_sampler *) &set_layout->binding[num_bindings];
168 struct vk_ycbcr_conversion_state *ycbcr_samplers =
169 (struct vk_ycbcr_conversion_state *) &samplers[immutable_sampler_count];
170
171 VkDescriptorSetLayoutBinding *bindings = NULL;
172 VkResult result = vk_create_sorted_bindings(
173 pCreateInfo->pBindings, pCreateInfo->bindingCount, &bindings);
174 if (result != VK_SUCCESS) {
175 vk_object_free(&device->vk, pAllocator, set_layout);
176 return vk_error(device, result);
177 }
178
179 set_layout->binding_count = num_bindings;
180 set_layout->shader_stages = 0;
181 set_layout->has_immutable_samplers = false;
182 set_layout->has_inline_uniforms = false;
183 set_layout->size = 0;
184
185 uint32_t dynamic_offset_size = 0;
186
187 for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
188 const VkDescriptorSetLayoutBinding *binding = bindings + j;
189 uint32_t b = binding->binding;
190
191 set_layout->binding[b].type = binding->descriptorType;
192 set_layout->binding[b].array_size =
193 binding->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK ?
194 1 : binding->descriptorCount;
195 set_layout->binding[b].offset = set_layout->size;
196 set_layout->binding[b].dynamic_offset_offset = dynamic_offset_size;
197 set_layout->binding[b].shader_stages = binding->stageFlags;
198
199 if (binding->descriptorType == VK_DESCRIPTOR_TYPE_MUTABLE_EXT) {
200 /* For mutable descriptor types we must allocate a size that fits the
201 * largest descriptor type that the binding can mutate to.
202 */
203 set_layout->binding[b].size =
204 mutable_descriptor_size(device, &mutable_info->pMutableDescriptorTypeLists[j]);
205 } else {
206 set_layout->binding[b].size =
207 descriptor_size(device, binding, binding->descriptorType);
208 }
209
210 if (binding->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK)
211 set_layout->has_inline_uniforms = true;
212
213 if (variable_flags && binding->binding < variable_flags->bindingCount &&
214 (variable_flags->pBindingFlags[binding->binding] &
215 VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT)) {
216 assert(!binding->pImmutableSamplers); /* Terribly ill defined how
217 many samplers are valid */
218 assert(binding->binding == num_bindings - 1);
219
220 set_layout->has_variable_descriptors = true;
221 }
222
223 if ((binding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ||
224 binding->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) &&
225 binding->pImmutableSamplers) {
226 set_layout->binding[b].immutable_samplers_offset = samplers_offset;
227 set_layout->has_immutable_samplers = true;
228
229 for (uint32_t i = 0; i < binding->descriptorCount; i++)
230 samplers[i] = *tu_sampler_from_handle(binding->pImmutableSamplers[i]);
231
232 samplers += binding->descriptorCount;
233 samplers_offset += sizeof(struct tu_sampler) * binding->descriptorCount;
234
235 bool has_ycbcr_sampler = false;
236 for (unsigned i = 0; i < pCreateInfo->pBindings[j].descriptorCount; ++i) {
237 if (tu_sampler_from_handle(binding->pImmutableSamplers[i])->vk.ycbcr_conversion)
238 has_ycbcr_sampler = true;
239 }
240
241 if (has_ycbcr_sampler) {
242 set_layout->binding[b].ycbcr_samplers_offset =
243 (const char*)ycbcr_samplers - (const char*)set_layout;
244 for (uint32_t i = 0; i < binding->descriptorCount; i++) {
245 struct tu_sampler *sampler = tu_sampler_from_handle(binding->pImmutableSamplers[i]);
246 if (sampler->vk.ycbcr_conversion)
247 ycbcr_samplers[i] = sampler->vk.ycbcr_conversion->state;
248 else
249 ycbcr_samplers[i].ycbcr_model = VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY;
250 }
251 ycbcr_samplers += binding->descriptorCount;
252 } else {
253 set_layout->binding[b].ycbcr_samplers_offset = 0;
254 }
255 }
256
257 uint32_t size =
258 ALIGN_POT(set_layout->binding[b].array_size * set_layout->binding[b].size, 4 * A6XX_TEX_CONST_DWORDS);
259 if (vk_descriptor_type_is_dynamic(binding->descriptorType)) {
260 dynamic_offset_size += size;
261 } else {
262 set_layout->size += size;
263 }
264
265 set_layout->shader_stages |= binding->stageFlags;
266 }
267
268 free(bindings);
269
270 set_layout->dynamic_offset_size = dynamic_offset_size;
271
272 if (pCreateInfo->flags &
273 VK_DESCRIPTOR_SET_LAYOUT_CREATE_EMBEDDED_IMMUTABLE_SAMPLERS_BIT_EXT) {
274 result = tu_bo_init_new(device, &set_layout->vk.base,
275 &set_layout->embedded_samplers, set_layout->size,
276 (enum tu_bo_alloc_flags) (TU_BO_ALLOC_ALLOW_DUMP |
277 TU_BO_ALLOC_INTERNAL_RESOURCE),
278 "embedded samplers");
279 if (result != VK_SUCCESS) {
280 vk_object_free(&device->vk, pAllocator, set_layout);
281 return vk_error(device, result);
282 }
283
284 result = tu_bo_map(device, set_layout->embedded_samplers, NULL);
285 if (result != VK_SUCCESS) {
286 tu_bo_finish(device, set_layout->embedded_samplers);
287 vk_object_free(&device->vk, pAllocator, set_layout);
288 return vk_error(device, result);
289 }
290
291 char *map = (char *) set_layout->embedded_samplers->map;
292 for (unsigned i = 0; i < set_layout->binding_count; i++) {
293 if (!set_layout->binding[i].immutable_samplers_offset)
294 continue;
295
296 unsigned offset = set_layout->binding[i].offset;
297 const struct tu_sampler *sampler =
298 (const struct tu_sampler *)((const char *)set_layout +
299 set_layout->binding[i].immutable_samplers_offset);
300 assert(set_layout->binding[i].array_size == 1);
301 memcpy(map + offset, sampler->descriptor,
302 sizeof(sampler->descriptor));
303 }
304 }
305
306 *pSetLayout = tu_descriptor_set_layout_to_handle(set_layout);
307
308 return VK_SUCCESS;
309 }
310
311 VKAPI_ATTR void VKAPI_CALL
tu_GetDescriptorSetLayoutSupport(VkDevice _device,const VkDescriptorSetLayoutCreateInfo * pCreateInfo,VkDescriptorSetLayoutSupport * pSupport)312 tu_GetDescriptorSetLayoutSupport(
313 VkDevice _device,
314 const VkDescriptorSetLayoutCreateInfo *pCreateInfo,
315 VkDescriptorSetLayoutSupport *pSupport)
316 {
317 VK_FROM_HANDLE(tu_device, device, _device);
318
319 VkDescriptorSetLayoutBinding *bindings = NULL;
320 VkResult result = vk_create_sorted_bindings(
321 pCreateInfo->pBindings, pCreateInfo->bindingCount, &bindings);
322 if (result != VK_SUCCESS) {
323 pSupport->supported = false;
324 return;
325 }
326
327 const VkDescriptorSetLayoutBindingFlagsCreateInfo *variable_flags =
328 vk_find_struct_const(
329 pCreateInfo->pNext,
330 DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO);
331 VkDescriptorSetVariableDescriptorCountLayoutSupport *variable_count =
332 vk_find_struct(
333 pSupport->pNext,
334 DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_LAYOUT_SUPPORT);
335 const VkMutableDescriptorTypeCreateInfoEXT *mutable_info =
336 vk_find_struct_const(
337 pCreateInfo->pNext,
338 MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_EXT);
339
340 if (variable_count) {
341 variable_count->maxVariableDescriptorCount = 0;
342 }
343
344 bool supported = true;
345 uint64_t size = 0;
346 for (uint32_t i = 0; i < pCreateInfo->bindingCount; i++) {
347 const VkDescriptorSetLayoutBinding *binding = bindings + i;
348
349 uint64_t descriptor_sz;
350
351 if (vk_descriptor_type_is_dynamic(binding->descriptorType)) {
352 descriptor_sz = 0;
353 } else if (binding->descriptorType == VK_DESCRIPTOR_TYPE_MUTABLE_EXT) {
354 const VkMutableDescriptorTypeListEXT *list =
355 &mutable_info->pMutableDescriptorTypeLists[i];
356
357 for (uint32_t j = 0; j < list->descriptorTypeCount; j++) {
358 /* Don't support the input attachement and combined image sampler type
359 * for mutable descriptors */
360 if (list->pDescriptorTypes[j] == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ||
361 list->pDescriptorTypes[j] == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
362 supported = false;
363 goto out;
364 }
365 }
366
367 descriptor_sz =
368 mutable_descriptor_size(device, &mutable_info->pMutableDescriptorTypeLists[i]);
369 } else {
370 descriptor_sz = descriptor_size(device, binding, binding->descriptorType);
371 }
372 uint64_t descriptor_alignment = 4 * A6XX_TEX_CONST_DWORDS;
373
374 if (size && !ALIGN_POT(size, descriptor_alignment)) {
375 supported = false;
376 }
377 size = ALIGN_POT(size, descriptor_alignment);
378
379 uint64_t max_count = MAX_SET_SIZE;
380 unsigned descriptor_count = binding->descriptorCount;
381 if (binding->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
382 max_count = MAX_SET_SIZE - size;
383 descriptor_count = descriptor_sz;
384 descriptor_sz = 1;
385 } else if (descriptor_sz) {
386 max_count = (MAX_SET_SIZE - size) / descriptor_sz;
387 }
388
389 if (max_count < descriptor_count) {
390 supported = false;
391 }
392
393 if (variable_flags && binding->binding < variable_flags->bindingCount &&
394 variable_count &&
395 (variable_flags->pBindingFlags[binding->binding] &
396 VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT)) {
397 variable_count->maxVariableDescriptorCount =
398 MIN2(UINT32_MAX, max_count);
399 }
400 size += descriptor_count * descriptor_sz;
401 }
402
403 out:
404 free(bindings);
405
406 pSupport->supported = supported;
407 }
408
409 VKAPI_ATTR void VKAPI_CALL
tu_GetDescriptorSetLayoutSizeEXT(VkDevice _device,VkDescriptorSetLayout _layout,VkDeviceSize * pLayoutSizeInBytes)410 tu_GetDescriptorSetLayoutSizeEXT(
411 VkDevice _device,
412 VkDescriptorSetLayout _layout,
413 VkDeviceSize *pLayoutSizeInBytes)
414 {
415 VK_FROM_HANDLE(tu_descriptor_set_layout, layout, _layout);
416
417 *pLayoutSizeInBytes = layout->size;
418 }
419
420 VKAPI_ATTR void VKAPI_CALL
tu_GetDescriptorSetLayoutBindingOffsetEXT(VkDevice _device,VkDescriptorSetLayout _layout,uint32_t binding,VkDeviceSize * pOffset)421 tu_GetDescriptorSetLayoutBindingOffsetEXT(
422 VkDevice _device,
423 VkDescriptorSetLayout _layout,
424 uint32_t binding,
425 VkDeviceSize *pOffset)
426 {
427 VK_FROM_HANDLE(tu_descriptor_set_layout, layout, _layout);
428
429 assert(binding < layout->binding_count);
430 *pOffset = layout->binding[binding].offset;
431 }
432
433 /* Note: we must hash any values used in tu_lower_io(). */
434
435 #define SHA1_UPDATE_VALUE(ctx, x) _mesa_sha1_update(ctx, &(x), sizeof(x));
436
437 static void
sha1_update_ycbcr_sampler(struct mesa_sha1 * ctx,const struct vk_ycbcr_conversion_state * sampler)438 sha1_update_ycbcr_sampler(struct mesa_sha1 *ctx,
439 const struct vk_ycbcr_conversion_state *sampler)
440 {
441 SHA1_UPDATE_VALUE(ctx, sampler->ycbcr_model);
442 SHA1_UPDATE_VALUE(ctx, sampler->ycbcr_range);
443 SHA1_UPDATE_VALUE(ctx, sampler->format);
444 }
445
446 static void
sha1_update_descriptor_set_binding_layout(struct mesa_sha1 * ctx,const struct tu_descriptor_set_binding_layout * layout,const struct tu_descriptor_set_layout * set_layout)447 sha1_update_descriptor_set_binding_layout(struct mesa_sha1 *ctx,
448 const struct tu_descriptor_set_binding_layout *layout,
449 const struct tu_descriptor_set_layout *set_layout)
450 {
451 SHA1_UPDATE_VALUE(ctx, layout->type);
452 SHA1_UPDATE_VALUE(ctx, layout->offset);
453 SHA1_UPDATE_VALUE(ctx, layout->size);
454 SHA1_UPDATE_VALUE(ctx, layout->array_size);
455 SHA1_UPDATE_VALUE(ctx, layout->dynamic_offset_offset);
456 SHA1_UPDATE_VALUE(ctx, layout->immutable_samplers_offset);
457
458 const struct vk_ycbcr_conversion_state *ycbcr_samplers =
459 tu_immutable_ycbcr_samplers(set_layout, layout);
460
461 if (ycbcr_samplers) {
462 for (unsigned i = 0; i < layout->array_size; i++)
463 sha1_update_ycbcr_sampler(ctx, ycbcr_samplers + i);
464 }
465 }
466
467
468 static void
sha1_update_descriptor_set_layout(struct mesa_sha1 * ctx,const struct tu_descriptor_set_layout * layout)469 sha1_update_descriptor_set_layout(struct mesa_sha1 *ctx,
470 const struct tu_descriptor_set_layout *layout)
471 {
472 SHA1_UPDATE_VALUE(ctx, layout->has_variable_descriptors);
473
474 for (uint16_t i = 0; i < layout->binding_count; i++)
475 sha1_update_descriptor_set_binding_layout(ctx, &layout->binding[i],
476 layout);
477 }
478
479 /*
480 * Pipeline layouts. These have nothing to do with the pipeline. They are
481 * just multiple descriptor set layouts pasted together.
482 */
483
484 void
tu_pipeline_layout_init(struct tu_pipeline_layout * layout)485 tu_pipeline_layout_init(struct tu_pipeline_layout *layout)
486 {
487 struct mesa_sha1 ctx;
488 _mesa_sha1_init(&ctx);
489 for (unsigned s = 0; s < layout->num_sets; s++) {
490 if (layout->set[s].layout)
491 sha1_update_descriptor_set_layout(&ctx, layout->set[s].layout);
492 }
493 _mesa_sha1_update(&ctx, &layout->num_sets, sizeof(layout->num_sets));
494 _mesa_sha1_update(&ctx, &layout->push_constant_size,
495 sizeof(layout->push_constant_size));
496 _mesa_sha1_final(&ctx, layout->sha1);
497 }
498
499 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreatePipelineLayout(VkDevice _device,const VkPipelineLayoutCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipelineLayout * pPipelineLayout)500 tu_CreatePipelineLayout(VkDevice _device,
501 const VkPipelineLayoutCreateInfo *pCreateInfo,
502 const VkAllocationCallbacks *pAllocator,
503 VkPipelineLayout *pPipelineLayout)
504 {
505 VK_FROM_HANDLE(tu_device, device, _device);
506 struct tu_pipeline_layout *layout;
507
508 assert(pCreateInfo->sType ==
509 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO);
510
511 layout = (struct tu_pipeline_layout *) vk_object_alloc(
512 &device->vk, pAllocator, sizeof(*layout),
513 VK_OBJECT_TYPE_PIPELINE_LAYOUT);
514 if (layout == NULL)
515 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
516
517 layout->num_sets = pCreateInfo->setLayoutCount;
518 for (uint32_t set = 0; set < pCreateInfo->setLayoutCount; set++) {
519 VK_FROM_HANDLE(tu_descriptor_set_layout, set_layout,
520 pCreateInfo->pSetLayouts[set]);
521
522 assert(set < device->physical_device->usable_sets);
523 layout->set[set].layout = set_layout;
524 if (set_layout)
525 vk_descriptor_set_layout_ref(&set_layout->vk);
526 }
527
528 layout->push_constant_size = 0;
529
530 for (unsigned i = 0; i < pCreateInfo->pushConstantRangeCount; ++i) {
531 const VkPushConstantRange *range = pCreateInfo->pPushConstantRanges + i;
532 layout->push_constant_size =
533 MAX2(layout->push_constant_size, range->offset + range->size);
534 }
535
536 layout->push_constant_size = align(layout->push_constant_size, 16);
537
538 tu_pipeline_layout_init(layout);
539
540 *pPipelineLayout = tu_pipeline_layout_to_handle(layout);
541
542 return VK_SUCCESS;
543 }
544
545 VKAPI_ATTR void VKAPI_CALL
tu_DestroyPipelineLayout(VkDevice _device,VkPipelineLayout _pipelineLayout,const VkAllocationCallbacks * pAllocator)546 tu_DestroyPipelineLayout(VkDevice _device,
547 VkPipelineLayout _pipelineLayout,
548 const VkAllocationCallbacks *pAllocator)
549 {
550 VK_FROM_HANDLE(tu_device, device, _device);
551 VK_FROM_HANDLE(tu_pipeline_layout, pipeline_layout, _pipelineLayout);
552
553 if (!pipeline_layout)
554 return;
555
556 for (uint32_t i = 0; i < pipeline_layout->num_sets; i++) {
557 if (pipeline_layout->set[i].layout)
558 vk_descriptor_set_layout_unref(&device->vk, &pipeline_layout->set[i].layout->vk);
559 }
560
561 vk_object_free(&device->vk, pAllocator, pipeline_layout);
562 }
563
564 #define EMPTY 1
565
566 static VkResult
tu_descriptor_set_create(struct tu_device * device,struct tu_descriptor_pool * pool,struct tu_descriptor_set_layout * layout,uint32_t variable_count,struct tu_descriptor_set ** out_set)567 tu_descriptor_set_create(struct tu_device *device,
568 struct tu_descriptor_pool *pool,
569 struct tu_descriptor_set_layout *layout,
570 uint32_t variable_count,
571 struct tu_descriptor_set **out_set)
572 {
573 struct tu_descriptor_set *set;
574 unsigned dynamic_offset = sizeof(struct tu_descriptor_set);
575 unsigned mem_size = dynamic_offset + layout->dynamic_offset_size;
576
577 if (pool->host_memory_base) {
578 if (pool->host_memory_end - pool->host_memory_ptr < mem_size)
579 return vk_error(device, VK_ERROR_OUT_OF_POOL_MEMORY);
580
581 set = (struct tu_descriptor_set*)pool->host_memory_ptr;
582 pool->host_memory_ptr += mem_size;
583 } else {
584 set = (struct tu_descriptor_set *) vk_alloc2(
585 &device->vk.alloc, NULL, mem_size, 8,
586 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
587
588 if (!set)
589 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
590 }
591
592 memset(set, 0, mem_size);
593 vk_object_base_init(&device->vk, &set->base, VK_OBJECT_TYPE_DESCRIPTOR_SET);
594
595 if (layout->dynamic_offset_size) {
596 set->dynamic_descriptors = (uint32_t *)((uint8_t*)set + dynamic_offset);
597 }
598
599 set->layout = layout;
600 set->pool = pool;
601 uint32_t layout_size = layout->size;
602 if (layout->has_variable_descriptors) {
603 struct tu_descriptor_set_binding_layout *binding =
604 &layout->binding[layout->binding_count - 1];
605 if (binding->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
606 layout_size = binding->offset +
607 ALIGN(variable_count, 4 * A6XX_TEX_CONST_DWORDS);
608 } else {
609 uint32_t stride = binding->size;
610 layout_size = binding->offset + variable_count * stride;
611 }
612 }
613
614 if (layout_size) {
615 set->size = layout_size;
616
617 if (!pool->host_memory_base && pool->entry_count == pool->max_entry_count) {
618 vk_object_free(&device->vk, NULL, set);
619 return vk_error(device, VK_ERROR_OUT_OF_POOL_MEMORY);
620 }
621
622 /* try to allocate linearly first, so that we don't spend
623 * time looking for gaps if the app only allocates &
624 * resets via the pool. */
625 if (pool->current_offset + layout_size <= pool->size) {
626 set->mapped_ptr = (uint32_t*)(pool_base(pool) + pool->current_offset);
627 set->va = pool->host_bo ? 0 : pool->bo->iova + pool->current_offset;
628
629 if (!pool->host_memory_base) {
630 pool->entries[pool->entry_count].offset = pool->current_offset;
631 pool->entries[pool->entry_count].size = layout_size;
632 pool->entries[pool->entry_count].set = set;
633 pool->entry_count++;
634 }
635 pool->current_offset += layout_size;
636 } else if (!pool->host_memory_base) {
637 uint64_t offset = 0;
638 int index;
639
640 for (index = 0; index < pool->entry_count; ++index) {
641 if (pool->entries[index].size == 0)
642 continue;
643
644 if (pool->entries[index].offset - offset >= layout_size)
645 break;
646 offset = pool->entries[index].offset + pool->entries[index].size;
647 }
648
649 if (pool->size - offset < layout_size) {
650 vk_object_free(&device->vk, NULL, set);
651 return vk_error(device, VK_ERROR_OUT_OF_POOL_MEMORY);
652 }
653
654 set->mapped_ptr = (uint32_t*)(pool_base(pool) + offset);
655 set->va = pool->host_bo ? 0 : pool->bo->iova + offset;
656
657 memmove(&pool->entries[index + 1], &pool->entries[index],
658 sizeof(pool->entries[0]) * (pool->entry_count - index));
659 pool->entries[index].offset = offset;
660 pool->entries[index].size = layout_size;
661 pool->entries[index].set = set;
662 pool->entry_count++;
663 } else
664 return vk_error(device, VK_ERROR_OUT_OF_POOL_MEMORY);
665 } else if (!pool->host_memory_base) {
666 /* Also keep track of zero sized descriptor sets, such as descriptor
667 * sets with just dynamic descriptors, so that we can free the sets on
668 * vkDestroyDescriptorPool().
669 */
670 pool->entries[pool->entry_count].offset = ~0;
671 pool->entries[pool->entry_count].size = 0;
672 pool->entries[pool->entry_count].set = set;
673 pool->entry_count++;
674 }
675
676 if (layout->has_immutable_samplers) {
677 for (unsigned i = 0; i < layout->binding_count; ++i) {
678 if (!layout->binding[i].immutable_samplers_offset)
679 continue;
680
681 unsigned offset = layout->binding[i].offset / 4;
682 if (layout->binding[i].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
683 offset += A6XX_TEX_CONST_DWORDS;
684
685 const struct tu_sampler *samplers =
686 (const struct tu_sampler *)((const char *)layout +
687 layout->binding[i].immutable_samplers_offset);
688 for (unsigned j = 0; j < layout->binding[i].array_size; ++j) {
689 memcpy(set->mapped_ptr + offset, samplers[j].descriptor,
690 sizeof(samplers[j].descriptor));
691 offset += layout->binding[i].size / 4;
692 }
693 }
694 }
695
696 vk_descriptor_set_layout_ref(&layout->vk);
697 list_addtail(&set->pool_link, &pool->desc_sets);
698
699 *out_set = set;
700 return VK_SUCCESS;
701 }
702
703 static void
tu_descriptor_set_destroy(struct tu_device * device,struct tu_descriptor_pool * pool,struct tu_descriptor_set * set,bool free_bo)704 tu_descriptor_set_destroy(struct tu_device *device,
705 struct tu_descriptor_pool *pool,
706 struct tu_descriptor_set *set,
707 bool free_bo)
708 {
709 assert(!pool->host_memory_base);
710
711 if (free_bo && !pool->host_memory_base) {
712 for (int i = 0; i < pool->entry_count; ++i) {
713 if (pool->entries[i].set == set) {
714 if (set->size) {
715 ASSERTED uint32_t offset =
716 (uint8_t *) set->mapped_ptr - pool_base(pool);
717 assert(pool->entries[i].offset == offset);
718 } else {
719 assert(pool->entries[i].size == 0);
720 }
721
722 memmove(&pool->entries[i], &pool->entries[i+1],
723 sizeof(pool->entries[i]) * (pool->entry_count - i - 1));
724 --pool->entry_count;
725 break;
726 }
727 }
728 }
729
730 vk_object_free(&device->vk, NULL, set);
731 }
732
733 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateDescriptorPool(VkDevice _device,const VkDescriptorPoolCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDescriptorPool * pDescriptorPool)734 tu_CreateDescriptorPool(VkDevice _device,
735 const VkDescriptorPoolCreateInfo *pCreateInfo,
736 const VkAllocationCallbacks *pAllocator,
737 VkDescriptorPool *pDescriptorPool)
738 {
739 VK_FROM_HANDLE(tu_device, device, _device);
740 struct tu_descriptor_pool *pool;
741 uint64_t size = sizeof(struct tu_descriptor_pool);
742 uint64_t bo_size = 0, dynamic_size = 0;
743 VkResult ret;
744
745 const VkMutableDescriptorTypeCreateInfoEXT *mutable_info =
746 vk_find_struct_const( pCreateInfo->pNext,
747 MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_EXT);
748
749 const VkDescriptorPoolInlineUniformBlockCreateInfo *inline_info =
750 vk_find_struct_const(pCreateInfo->pNext,
751 DESCRIPTOR_POOL_INLINE_UNIFORM_BLOCK_CREATE_INFO);
752
753 if (inline_info) {
754 /* We have to factor in the padding for each binding. The sizes are 4
755 * aligned but we have to align to 4 * A6XX_TEX_CONST_DWORDS bytes, and in
756 * the worst case each inline binding has a size of 4 bytes and we have
757 * to pad each one out.
758 */
759 bo_size += (4 * A6XX_TEX_CONST_DWORDS - 4) *
760 inline_info->maxInlineUniformBlockBindings;
761 }
762
763 for (unsigned i = 0; i < pCreateInfo->poolSizeCount; ++i) {
764 const VkDescriptorPoolSize *pool_size = &pCreateInfo->pPoolSizes[i];
765
766 switch (pool_size->type) {
767 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
768 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
769 dynamic_size += descriptor_size(device, NULL, pool_size->type) *
770 pool_size->descriptorCount;
771 break;
772 case VK_DESCRIPTOR_TYPE_MUTABLE_EXT:
773 if (mutable_info && i < mutable_info->mutableDescriptorTypeListCount &&
774 mutable_info->pMutableDescriptorTypeLists[i].descriptorTypeCount > 0) {
775 bo_size +=
776 mutable_descriptor_size(device, &mutable_info->pMutableDescriptorTypeLists[i]) *
777 pool_size->descriptorCount;
778 } else {
779 /* Allocate the maximum size possible. */
780 bo_size += 2 * A6XX_TEX_CONST_DWORDS * 4 *
781 pool_size->descriptorCount;
782 }
783 break;
784 case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
785 bo_size += pool_size->descriptorCount;
786 break;
787 default:
788 bo_size += descriptor_size(device, NULL, pool_size->type) *
789 pool_size->descriptorCount;
790 break;
791 }
792 }
793
794 if (!(pCreateInfo->flags & VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT)) {
795 uint64_t host_size = pCreateInfo->maxSets * sizeof(struct tu_descriptor_set);
796 host_size += dynamic_size;
797 size += host_size;
798 } else {
799 size += sizeof(struct tu_descriptor_pool_entry) * pCreateInfo->maxSets;
800 }
801
802 pool = (struct tu_descriptor_pool *) vk_object_zalloc(
803 &device->vk, pAllocator, size, VK_OBJECT_TYPE_DESCRIPTOR_POOL);
804 if (!pool)
805 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
806
807 if (!(pCreateInfo->flags & VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT)) {
808 pool->host_memory_base = (uint8_t*)pool + sizeof(struct tu_descriptor_pool);
809 pool->host_memory_ptr = pool->host_memory_base;
810 pool->host_memory_end = (uint8_t*)pool + size;
811 }
812
813 if (bo_size) {
814 if (!(pCreateInfo->flags & VK_DESCRIPTOR_POOL_CREATE_HOST_ONLY_BIT_EXT)) {
815 ret = tu_bo_init_new(device, &pool->base, &pool->bo, bo_size,
816 TU_BO_ALLOC_ALLOW_DUMP, "descriptor pool");
817 if (ret)
818 goto fail_alloc;
819
820 ret = tu_bo_map(device, pool->bo, NULL);
821 if (ret)
822 goto fail_map;
823 } else {
824 pool->host_bo =
825 (uint8_t *) vk_alloc2(&device->vk.alloc, pAllocator, bo_size, 8,
826 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
827 if (!pool->host_bo) {
828 ret = VK_ERROR_OUT_OF_HOST_MEMORY;
829 goto fail_alloc;
830 }
831 }
832 }
833 pool->size = bo_size;
834 pool->max_entry_count = pCreateInfo->maxSets;
835
836 list_inithead(&pool->desc_sets);
837
838 TU_RMV(descriptor_pool_create, device, pCreateInfo, pool);
839
840 *pDescriptorPool = tu_descriptor_pool_to_handle(pool);
841 return VK_SUCCESS;
842
843 fail_map:
844 tu_bo_finish(device, pool->bo);
845 fail_alloc:
846 vk_object_free(&device->vk, pAllocator, pool);
847 return ret;
848 }
849
850 VKAPI_ATTR void VKAPI_CALL
tu_DestroyDescriptorPool(VkDevice _device,VkDescriptorPool _pool,const VkAllocationCallbacks * pAllocator)851 tu_DestroyDescriptorPool(VkDevice _device,
852 VkDescriptorPool _pool,
853 const VkAllocationCallbacks *pAllocator)
854 {
855 VK_FROM_HANDLE(tu_device, device, _device);
856 VK_FROM_HANDLE(tu_descriptor_pool, pool, _pool);
857
858 if (!pool)
859 return;
860
861 TU_RMV(resource_destroy, device, pool);
862
863 list_for_each_entry_safe(struct tu_descriptor_set, set,
864 &pool->desc_sets, pool_link) {
865 vk_descriptor_set_layout_unref(&device->vk, &set->layout->vk);
866 }
867
868 if (!pool->host_memory_base) {
869 for(int i = 0; i < pool->entry_count; ++i) {
870 tu_descriptor_set_destroy(device, pool, pool->entries[i].set, false);
871 }
872 }
873
874 if (pool->size) {
875 if (pool->host_bo)
876 vk_free2(&device->vk.alloc, pAllocator, pool->host_bo);
877 else
878 tu_bo_finish(device, pool->bo);
879 }
880
881 vk_object_free(&device->vk, pAllocator, pool);
882 }
883
884 VKAPI_ATTR VkResult VKAPI_CALL
tu_ResetDescriptorPool(VkDevice _device,VkDescriptorPool descriptorPool,VkDescriptorPoolResetFlags flags)885 tu_ResetDescriptorPool(VkDevice _device,
886 VkDescriptorPool descriptorPool,
887 VkDescriptorPoolResetFlags flags)
888 {
889 VK_FROM_HANDLE(tu_device, device, _device);
890 VK_FROM_HANDLE(tu_descriptor_pool, pool, descriptorPool);
891
892 list_for_each_entry_safe(struct tu_descriptor_set, set,
893 &pool->desc_sets, pool_link) {
894 vk_descriptor_set_layout_unref(&device->vk, &set->layout->vk);
895 }
896 list_inithead(&pool->desc_sets);
897
898 if (!pool->host_memory_base) {
899 for(int i = 0; i < pool->entry_count; ++i) {
900 tu_descriptor_set_destroy(device, pool, pool->entries[i].set, false);
901 }
902 pool->entry_count = 0;
903 }
904
905 pool->current_offset = 0;
906 pool->host_memory_ptr = pool->host_memory_base;
907
908 return VK_SUCCESS;
909 }
910
911 VKAPI_ATTR VkResult VKAPI_CALL
tu_AllocateDescriptorSets(VkDevice _device,const VkDescriptorSetAllocateInfo * pAllocateInfo,VkDescriptorSet * pDescriptorSets)912 tu_AllocateDescriptorSets(VkDevice _device,
913 const VkDescriptorSetAllocateInfo *pAllocateInfo,
914 VkDescriptorSet *pDescriptorSets)
915 {
916 VK_FROM_HANDLE(tu_device, device, _device);
917 VK_FROM_HANDLE(tu_descriptor_pool, pool, pAllocateInfo->descriptorPool);
918
919 VkResult result = VK_SUCCESS;
920 uint32_t i;
921 struct tu_descriptor_set *set = NULL;
922
923 const VkDescriptorSetVariableDescriptorCountAllocateInfo *variable_counts =
924 vk_find_struct_const(pAllocateInfo->pNext, DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO);
925 if (variable_counts && !variable_counts->descriptorSetCount)
926 variable_counts = NULL;
927
928 /* allocate a set of buffers for each shader to contain descriptors */
929 for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) {
930 VK_FROM_HANDLE(tu_descriptor_set_layout, layout,
931 pAllocateInfo->pSetLayouts[i]);
932
933 assert(!(layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
934
935 result = tu_descriptor_set_create(
936 device, pool, layout,
937 variable_counts ? variable_counts->pDescriptorCounts[i] : 0, &set);
938 if (result != VK_SUCCESS)
939 break;
940
941 pDescriptorSets[i] = tu_descriptor_set_to_handle(set);
942 }
943
944 if (result != VK_SUCCESS) {
945 tu_FreeDescriptorSets(_device, pAllocateInfo->descriptorPool,
946 i, pDescriptorSets);
947 for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) {
948 pDescriptorSets[i] = VK_NULL_HANDLE;
949 }
950 }
951 return result;
952 }
953
954 VKAPI_ATTR VkResult VKAPI_CALL
tu_FreeDescriptorSets(VkDevice _device,VkDescriptorPool descriptorPool,uint32_t count,const VkDescriptorSet * pDescriptorSets)955 tu_FreeDescriptorSets(VkDevice _device,
956 VkDescriptorPool descriptorPool,
957 uint32_t count,
958 const VkDescriptorSet *pDescriptorSets)
959 {
960 VK_FROM_HANDLE(tu_device, device, _device);
961 VK_FROM_HANDLE(tu_descriptor_pool, pool, descriptorPool);
962
963 for (uint32_t i = 0; i < count; i++) {
964 VK_FROM_HANDLE(tu_descriptor_set, set, pDescriptorSets[i]);
965
966 if (set) {
967 vk_descriptor_set_layout_unref(&device->vk, &set->layout->vk);
968 list_del(&set->pool_link);
969 }
970
971 if (set && !pool->host_memory_base)
972 tu_descriptor_set_destroy(device, pool, set, true);
973 }
974 return VK_SUCCESS;
975 }
976
977 static void
write_texel_buffer_descriptor_addr(uint32_t * dst,const VkDescriptorAddressInfoEXT * buffer_info)978 write_texel_buffer_descriptor_addr(uint32_t *dst,
979 const VkDescriptorAddressInfoEXT *buffer_info)
980 {
981 if (!buffer_info || buffer_info->address == 0) {
982 memset(dst, 0, A6XX_TEX_CONST_DWORDS * sizeof(uint32_t));
983 } else {
984 uint8_t swiz[4] = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z,
985 PIPE_SWIZZLE_W };
986 fdl6_buffer_view_init(dst,
987 vk_format_to_pipe_format(buffer_info->format),
988 swiz, buffer_info->address, buffer_info->range);
989 }
990 }
991
992 static void
write_texel_buffer_descriptor(uint32_t * dst,const VkBufferView buffer_view)993 write_texel_buffer_descriptor(uint32_t *dst, const VkBufferView buffer_view)
994 {
995 if (buffer_view == VK_NULL_HANDLE) {
996 memset(dst, 0, A6XX_TEX_CONST_DWORDS * sizeof(uint32_t));
997 } else {
998 VK_FROM_HANDLE(tu_buffer_view, view, buffer_view);
999
1000 memcpy(dst, view->descriptor, sizeof(view->descriptor));
1001 }
1002 }
1003
1004 static VkDescriptorAddressInfoEXT
buffer_info_to_address(const VkDescriptorBufferInfo * buffer_info)1005 buffer_info_to_address(const VkDescriptorBufferInfo *buffer_info)
1006 {
1007 VK_FROM_HANDLE(tu_buffer, buffer, buffer_info->buffer);
1008
1009 uint32_t range = buffer ? vk_buffer_range(&buffer->vk, buffer_info->offset, buffer_info->range) : 0;
1010 uint64_t va = buffer ? buffer->iova + buffer_info->offset : 0;
1011
1012 return (VkDescriptorAddressInfoEXT) {
1013 .address = va,
1014 .range = range,
1015 };
1016 }
1017
1018 static void
write_buffer_descriptor_addr(const struct tu_device * device,uint32_t * dst,const VkDescriptorAddressInfoEXT * buffer_info)1019 write_buffer_descriptor_addr(const struct tu_device *device,
1020 uint32_t *dst,
1021 const VkDescriptorAddressInfoEXT *buffer_info)
1022 {
1023 const struct fd_dev_info *info = device->physical_device->info;
1024 /* This prevents any misconfiguration, but 16-bit descriptor capable of both
1025 * 16-bit and 32-bit access through isam.v will of course only be functional
1026 * when 16-bit storage is supported. */
1027 assert(!info->a6xx.has_isam_v || info->a6xx.storage_16bit);
1028 /* Any configuration enabling 8-bit storage support will also provide 16-bit
1029 * storage support and 16-bit descriptors capable of 32-bit isam loads. This
1030 * indirectly ensures we won't need more than two descriptors for access of
1031 * any size.
1032 */
1033 assert(!info->a7xx.storage_8bit || (info->a6xx.storage_16bit &&
1034 info->a6xx.has_isam_v));
1035
1036 unsigned num_descriptors = 1 +
1037 COND(info->a6xx.storage_16bit && !info->a6xx.has_isam_v, 1) +
1038 COND(info->a7xx.storage_8bit, 1);
1039 memset(dst, 0, num_descriptors * A6XX_TEX_CONST_DWORDS * sizeof(uint32_t));
1040
1041 if (!buffer_info || buffer_info->address == 0)
1042 return;
1043
1044 uint64_t va = buffer_info->address;
1045 uint64_t base_va = va & ~0x3full;
1046 unsigned offset = va & 0x3f;
1047 uint32_t range = buffer_info->range;
1048
1049 if (info->a6xx.storage_16bit) {
1050 dst[0] = A6XX_TEX_CONST_0_TILE_MODE(TILE6_LINEAR) | A6XX_TEX_CONST_0_FMT(FMT6_16_UINT);
1051 dst[1] = DIV_ROUND_UP(range, 2);
1052 dst[2] =
1053 A6XX_TEX_CONST_2_STRUCTSIZETEXELS(1) |
1054 A6XX_TEX_CONST_2_STARTOFFSETTEXELS(offset / 2) |
1055 A6XX_TEX_CONST_2_TYPE(A6XX_TEX_BUFFER);
1056 dst[4] = A6XX_TEX_CONST_4_BASE_LO(base_va);
1057 dst[5] = A6XX_TEX_CONST_5_BASE_HI(base_va >> 32);
1058 dst += A6XX_TEX_CONST_DWORDS;
1059 }
1060
1061 /* Set up the 32-bit descriptor when 16-bit storage isn't supported or the
1062 * 16-bit descriptor cannot be used for 32-bit loads through isam.v.
1063 */
1064 if (!info->a6xx.storage_16bit || !info->a6xx.has_isam_v) {
1065 dst[0] = A6XX_TEX_CONST_0_TILE_MODE(TILE6_LINEAR) | A6XX_TEX_CONST_0_FMT(FMT6_32_UINT);
1066 dst[1] = DIV_ROUND_UP(range, 4);
1067 dst[2] =
1068 A6XX_TEX_CONST_2_STRUCTSIZETEXELS(1) |
1069 A6XX_TEX_CONST_2_STARTOFFSETTEXELS(offset / 4) |
1070 A6XX_TEX_CONST_2_TYPE(A6XX_TEX_BUFFER);
1071 dst[4] = A6XX_TEX_CONST_4_BASE_LO(base_va);
1072 dst[5] = A6XX_TEX_CONST_5_BASE_HI(base_va >> 32);
1073 dst += A6XX_TEX_CONST_DWORDS;
1074 }
1075
1076 if (info->a7xx.storage_8bit) {
1077 dst[0] = A6XX_TEX_CONST_0_TILE_MODE(TILE6_LINEAR) | A6XX_TEX_CONST_0_FMT(FMT6_8_UINT);
1078 dst[1] = range;
1079 dst[2] =
1080 A6XX_TEX_CONST_2_STRUCTSIZETEXELS(1) |
1081 A6XX_TEX_CONST_2_STARTOFFSETTEXELS(offset) |
1082 A6XX_TEX_CONST_2_TYPE(A6XX_TEX_BUFFER);
1083 dst[4] = A6XX_TEX_CONST_4_BASE_LO(base_va);
1084 dst[5] = A6XX_TEX_CONST_5_BASE_HI(base_va >> 32);
1085 }
1086 }
1087
1088 static void
write_buffer_descriptor(const struct tu_device * device,uint32_t * dst,const VkDescriptorBufferInfo * buffer_info)1089 write_buffer_descriptor(const struct tu_device *device,
1090 uint32_t *dst,
1091 const VkDescriptorBufferInfo *buffer_info)
1092 {
1093 VkDescriptorAddressInfoEXT addr = buffer_info_to_address(buffer_info);
1094 write_buffer_descriptor_addr(device, dst, &addr);
1095 }
1096
1097 static void
write_ubo_descriptor_addr(uint32_t * dst,const VkDescriptorAddressInfoEXT * buffer_info)1098 write_ubo_descriptor_addr(uint32_t *dst,
1099 const VkDescriptorAddressInfoEXT *buffer_info)
1100 {
1101 if (!buffer_info) {
1102 dst[0] = dst[1] = 0;
1103 return;
1104 }
1105
1106 uint64_t va = buffer_info->address;
1107 /* The HW range is in vec4 units */
1108 uint32_t range = va ? DIV_ROUND_UP(buffer_info->range, 16) : 0;
1109 dst[0] = A6XX_UBO_0_BASE_LO(va);
1110 dst[1] = A6XX_UBO_1_BASE_HI(va >> 32) | A6XX_UBO_1_SIZE(range);
1111 }
1112
1113 static void
write_ubo_descriptor(uint32_t * dst,const VkDescriptorBufferInfo * buffer_info)1114 write_ubo_descriptor(uint32_t *dst, const VkDescriptorBufferInfo *buffer_info)
1115 {
1116 VkDescriptorAddressInfoEXT addr = buffer_info_to_address(buffer_info);
1117 write_ubo_descriptor_addr(dst, &addr);
1118 }
1119
1120 static void
write_image_descriptor(uint32_t * dst,VkDescriptorType descriptor_type,const VkDescriptorImageInfo * image_info)1121 write_image_descriptor(uint32_t *dst,
1122 VkDescriptorType descriptor_type,
1123 const VkDescriptorImageInfo *image_info)
1124 {
1125 if (!image_info || image_info->imageView == VK_NULL_HANDLE) {
1126 memset(dst, 0, A6XX_TEX_CONST_DWORDS * sizeof(uint32_t));
1127 return;
1128 }
1129
1130 VK_FROM_HANDLE(tu_image_view, iview, image_info->imageView);
1131
1132 if (descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
1133 memcpy(dst, iview->view.storage_descriptor, sizeof(iview->view.storage_descriptor));
1134 } else {
1135 memcpy(dst, iview->view.descriptor, sizeof(iview->view.descriptor));
1136 }
1137 }
1138
1139 static void
write_combined_image_sampler_descriptor(uint32_t * dst,VkDescriptorType descriptor_type,const VkDescriptorImageInfo * image_info,bool has_sampler)1140 write_combined_image_sampler_descriptor(uint32_t *dst,
1141 VkDescriptorType descriptor_type,
1142 const VkDescriptorImageInfo *image_info,
1143 bool has_sampler)
1144 {
1145 write_image_descriptor(dst, descriptor_type, image_info);
1146 /* copy over sampler state */
1147 if (has_sampler) {
1148 VK_FROM_HANDLE(tu_sampler, sampler, image_info->sampler);
1149
1150 memcpy(dst + A6XX_TEX_CONST_DWORDS, sampler->descriptor, sizeof(sampler->descriptor));
1151 }
1152 }
1153
1154 static void
write_sampler_descriptor(uint32_t * dst,VkSampler _sampler)1155 write_sampler_descriptor(uint32_t *dst, VkSampler _sampler)
1156 {
1157 VK_FROM_HANDLE(tu_sampler, sampler, _sampler);
1158
1159 memcpy(dst, sampler->descriptor, sizeof(sampler->descriptor));
1160 }
1161
1162 static void
write_accel_struct(uint32_t * dst,uint64_t va,uint64_t size)1163 write_accel_struct(uint32_t *dst, uint64_t va, uint64_t size)
1164 {
1165 dst[0] = A6XX_TEX_CONST_0_TILE_MODE(TILE6_LINEAR) | A6XX_TEX_CONST_0_FMT(FMT6_32_UINT);
1166
1167 /* The overall range of the entire AS may be more than the max range, but
1168 * the SSBO is only used to access the instance descriptors and header.
1169 * Make sure that we don't specify a too-large range.
1170 */
1171 dst[1] = MAX2(DIV_ROUND_UP(size, AS_RECORD_SIZE), MAX_TEXEL_ELEMENTS);
1172 dst[2] =
1173 A6XX_TEX_CONST_2_STRUCTSIZETEXELS(AS_RECORD_SIZE / 4) |
1174 A6XX_TEX_CONST_2_STARTOFFSETTEXELS(0) |
1175 A6XX_TEX_CONST_2_TYPE(A6XX_TEX_BUFFER);
1176 dst[3] = 0;
1177 dst[4] = A6XX_TEX_CONST_4_BASE_LO(va);
1178 dst[5] = A6XX_TEX_CONST_5_BASE_HI(va >> 32);
1179 for (int j = 6; j < A6XX_TEX_CONST_DWORDS; j++)
1180 dst[j] = 0;
1181 }
1182
1183 /* note: this is used with immutable samplers in push descriptors */
1184 static void
write_sampler_push(uint32_t * dst,const struct tu_sampler * sampler)1185 write_sampler_push(uint32_t *dst, const struct tu_sampler *sampler)
1186 {
1187 memcpy(dst, sampler->descriptor, sizeof(sampler->descriptor));
1188 }
1189
1190 VKAPI_ATTR void VKAPI_CALL
tu_GetDescriptorEXT(VkDevice _device,const VkDescriptorGetInfoEXT * pDescriptorInfo,size_t dataSize,void * pDescriptor)1191 tu_GetDescriptorEXT(
1192 VkDevice _device,
1193 const VkDescriptorGetInfoEXT *pDescriptorInfo,
1194 size_t dataSize,
1195 void *pDescriptor)
1196 {
1197 VK_FROM_HANDLE(tu_device, device, _device);
1198 uint32_t *dest = (uint32_t *) pDescriptor;
1199
1200 switch (pDescriptorInfo->type) {
1201 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
1202 write_ubo_descriptor_addr(dest, pDescriptorInfo->data.pUniformBuffer);
1203 break;
1204 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
1205 write_buffer_descriptor_addr(device, dest, pDescriptorInfo->data.pStorageBuffer);
1206 break;
1207 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
1208 write_texel_buffer_descriptor_addr(dest, pDescriptorInfo->data.pUniformTexelBuffer);
1209 break;
1210 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
1211 write_texel_buffer_descriptor_addr(dest, pDescriptorInfo->data.pStorageTexelBuffer);
1212 break;
1213 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
1214 write_image_descriptor(dest, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
1215 pDescriptorInfo->data.pSampledImage);
1216 break;
1217 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
1218 write_image_descriptor(dest, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1219 pDescriptorInfo->data.pStorageImage);
1220 break;
1221 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1222 write_combined_image_sampler_descriptor(dest,
1223 VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
1224 pDescriptorInfo->data.pCombinedImageSampler,
1225 true);
1226 break;
1227 case VK_DESCRIPTOR_TYPE_SAMPLER:
1228 write_sampler_descriptor(dest, *pDescriptorInfo->data.pSampler);
1229 break;
1230 case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: {
1231 if (pDescriptorInfo->data.accelerationStructure == 0) {
1232 write_accel_struct(dest, device->null_accel_struct_bo->iova,
1233 device->null_accel_struct_bo->size);
1234 } else {
1235 VkDeviceSize size = *(VkDeviceSize *)
1236 util_sparse_array_get(&device->accel_struct_ranges,
1237 pDescriptorInfo->data.accelerationStructure);
1238 write_accel_struct(dest, pDescriptorInfo->data.accelerationStructure, size);
1239 }
1240 break;
1241 }
1242 case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
1243 write_image_descriptor(dest, VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT,
1244 pDescriptorInfo->data.pInputAttachmentImage);
1245 break;
1246 default:
1247 unreachable("unimplemented descriptor type");
1248 break;
1249 }
1250 }
1251
1252 void
tu_update_descriptor_sets(const struct tu_device * device,VkDescriptorSet dstSetOverride,uint32_t descriptorWriteCount,const VkWriteDescriptorSet * pDescriptorWrites,uint32_t descriptorCopyCount,const VkCopyDescriptorSet * pDescriptorCopies)1253 tu_update_descriptor_sets(const struct tu_device *device,
1254 VkDescriptorSet dstSetOverride,
1255 uint32_t descriptorWriteCount,
1256 const VkWriteDescriptorSet *pDescriptorWrites,
1257 uint32_t descriptorCopyCount,
1258 const VkCopyDescriptorSet *pDescriptorCopies)
1259 {
1260 uint32_t i, j;
1261 for (i = 0; i < descriptorWriteCount; i++) {
1262 const VkWriteDescriptorSet *writeset = &pDescriptorWrites[i];
1263 VK_FROM_HANDLE(tu_descriptor_set, set, dstSetOverride ?: writeset->dstSet);
1264 const struct tu_descriptor_set_binding_layout *binding_layout =
1265 set->layout->binding + writeset->dstBinding;
1266 uint32_t *ptr = set->mapped_ptr;
1267 if (vk_descriptor_type_is_dynamic(writeset->descriptorType)) {
1268 ptr = set->dynamic_descriptors;
1269 ptr += binding_layout->dynamic_offset_offset / 4;
1270 } else {
1271 ptr = set->mapped_ptr;
1272 ptr += binding_layout->offset / 4;
1273 }
1274
1275 const VkWriteDescriptorSetAccelerationStructureKHR *accel_structs = NULL;
1276
1277 /* for immutable samplers with push descriptors: */
1278 const bool copy_immutable_samplers =
1279 dstSetOverride && binding_layout->immutable_samplers_offset;
1280 const struct tu_sampler *samplers =
1281 tu_immutable_samplers(set->layout, binding_layout);
1282
1283 if (writeset->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
1284 /* We need to respect this note:
1285 *
1286 * The same behavior applies to bindings with a descriptor type of
1287 * VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK where descriptorCount
1288 * specifies the number of bytes to update while dstArrayElement
1289 * specifies the starting byte offset, thus in this case if the
1290 * dstBinding has a smaller byte size than the sum of
1291 * dstArrayElement and descriptorCount, then the remainder will be
1292 * used to update the subsequent binding - dstBinding+1 starting
1293 * at offset zero. This falls out as a special case of the above
1294 * rule.
1295 *
1296 * This means we can't just do a straight memcpy, because due to
1297 * alignment padding there are gaps between sequential bindings. We
1298 * have to loop over each binding updated.
1299 */
1300 const VkWriteDescriptorSetInlineUniformBlock *inline_write =
1301 vk_find_struct_const(writeset->pNext,
1302 WRITE_DESCRIPTOR_SET_INLINE_UNIFORM_BLOCK);
1303 uint32_t remaining = inline_write->dataSize;
1304 const uint8_t *src = (const uint8_t *) inline_write->pData;
1305 uint32_t dst_offset = writeset->dstArrayElement;
1306 do {
1307 uint8_t *dst = (uint8_t *)(ptr) + dst_offset;
1308 uint32_t binding_size = binding_layout->size - dst_offset;
1309 uint32_t to_write = MIN2(remaining, binding_size);
1310 memcpy(dst, src, to_write);
1311
1312 binding_layout++;
1313 ptr = set->mapped_ptr + binding_layout->offset / 4;
1314 dst_offset = 0;
1315 src += to_write;
1316 remaining -= to_write;
1317 } while (remaining > 0);
1318
1319 continue;
1320 } else if (writeset->descriptorType ==
1321 VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR) {
1322 accel_structs = vk_find_struct_const(writeset->pNext, WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR);
1323 }
1324
1325 ptr += binding_layout->size / 4 * writeset->dstArrayElement;
1326 for (j = 0; j < writeset->descriptorCount; ++j) {
1327 switch(writeset->descriptorType) {
1328 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
1329 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
1330 write_ubo_descriptor(ptr, writeset->pBufferInfo + j);
1331 break;
1332 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
1333 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
1334 write_buffer_descriptor(device, ptr, writeset->pBufferInfo + j);
1335 break;
1336 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
1337 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
1338 write_texel_buffer_descriptor(ptr, writeset->pTexelBufferView[j]);
1339 break;
1340 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
1341 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
1342 case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
1343 write_image_descriptor(ptr, writeset->descriptorType, writeset->pImageInfo + j);
1344 break;
1345 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1346 write_combined_image_sampler_descriptor(ptr,
1347 writeset->descriptorType,
1348 writeset->pImageInfo + j,
1349 !binding_layout->immutable_samplers_offset);
1350
1351 if (copy_immutable_samplers)
1352 write_sampler_push(ptr + A6XX_TEX_CONST_DWORDS, &samplers[writeset->dstArrayElement + j]);
1353 break;
1354 case VK_DESCRIPTOR_TYPE_SAMPLER:
1355 if (!binding_layout->immutable_samplers_offset)
1356 write_sampler_descriptor(ptr, writeset->pImageInfo[j].sampler);
1357 else if (copy_immutable_samplers)
1358 write_sampler_push(ptr, &samplers[writeset->dstArrayElement + j]);
1359 break;
1360 case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: {
1361 VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, accel_structs->pAccelerationStructures[j]);
1362 if (accel_struct) {
1363 write_accel_struct(ptr,
1364 vk_acceleration_structure_get_va(accel_struct),
1365 accel_struct->size);
1366 } else {
1367 write_accel_struct(ptr, device->null_accel_struct_bo->iova,
1368 device->null_accel_struct_bo->size);
1369 }
1370 break;
1371 }
1372 default:
1373 unreachable("unimplemented descriptor type");
1374 break;
1375 }
1376 ptr += binding_layout->size / 4;
1377 }
1378 }
1379
1380 for (i = 0; i < descriptorCopyCount; i++) {
1381 const VkCopyDescriptorSet *copyset = &pDescriptorCopies[i];
1382 VK_FROM_HANDLE(tu_descriptor_set, src_set,
1383 copyset->srcSet);
1384 VK_FROM_HANDLE(tu_descriptor_set, dst_set,
1385 copyset->dstSet);
1386 const struct tu_descriptor_set_binding_layout *src_binding_layout =
1387 src_set->layout->binding + copyset->srcBinding;
1388 const struct tu_descriptor_set_binding_layout *dst_binding_layout =
1389 dst_set->layout->binding + copyset->dstBinding;
1390 uint32_t *src_ptr = src_set->mapped_ptr;
1391 uint32_t *dst_ptr = dst_set->mapped_ptr;
1392 if (vk_descriptor_type_is_dynamic(src_binding_layout->type)) {
1393 src_ptr = src_set->dynamic_descriptors;
1394 dst_ptr = dst_set->dynamic_descriptors;
1395 src_ptr += src_binding_layout->dynamic_offset_offset / 4;
1396 dst_ptr += dst_binding_layout->dynamic_offset_offset / 4;
1397 } else {
1398 src_ptr = src_set->mapped_ptr;
1399 dst_ptr = dst_set->mapped_ptr;
1400 src_ptr += src_binding_layout->offset / 4;
1401 dst_ptr += dst_binding_layout->offset / 4;
1402 }
1403
1404 if (src_binding_layout->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
1405 uint32_t remaining = copyset->descriptorCount;
1406 uint32_t src_start = copyset->srcArrayElement;
1407 uint32_t dst_start = copyset->dstArrayElement;
1408 uint8_t *src = (uint8_t *)(src_ptr) + src_start;
1409 uint8_t *dst = (uint8_t *)(dst_ptr) + dst_start;
1410 uint32_t src_remaining =
1411 src_binding_layout->size - src_start;
1412 uint32_t dst_remaining =
1413 dst_binding_layout->size - dst_start;
1414 do {
1415 uint32_t to_write = MIN3(remaining, src_remaining, dst_remaining);
1416 memcpy(dst, src, to_write);
1417
1418 src += to_write;
1419 dst += to_write;
1420 src_remaining -= to_write;
1421 dst_remaining -= to_write;
1422 remaining -= to_write;
1423
1424 if (src_remaining == 0) {
1425 src_binding_layout++;
1426 src_ptr = src_set->mapped_ptr + src_binding_layout->offset / 4;
1427 src = (uint8_t *)(src_ptr + A6XX_TEX_CONST_DWORDS);
1428 src_remaining = src_binding_layout->size - 4 * A6XX_TEX_CONST_DWORDS;
1429 }
1430
1431 if (dst_remaining == 0) {
1432 dst_binding_layout++;
1433 dst_ptr = dst_set->mapped_ptr + dst_binding_layout->offset / 4;
1434 dst = (uint8_t *)(dst_ptr + A6XX_TEX_CONST_DWORDS);
1435 dst_remaining = dst_binding_layout->size - 4 * A6XX_TEX_CONST_DWORDS;
1436 }
1437 } while (remaining > 0);
1438
1439 continue;
1440 }
1441
1442 src_ptr += src_binding_layout->size * copyset->srcArrayElement / 4;
1443 dst_ptr += dst_binding_layout->size * copyset->dstArrayElement / 4;
1444
1445 /* In case of copies between mutable descriptor types
1446 * and non-mutable descriptor types.
1447 */
1448 uint32_t copy_size = MIN2(src_binding_layout->size, dst_binding_layout->size);
1449
1450 for (j = 0; j < copyset->descriptorCount; ++j) {
1451 memcpy(dst_ptr, src_ptr, copy_size);
1452
1453 src_ptr += src_binding_layout->size / 4;
1454 dst_ptr += dst_binding_layout->size / 4;
1455 }
1456 }
1457 }
1458
1459 VKAPI_ATTR void VKAPI_CALL
tu_UpdateDescriptorSets(VkDevice _device,uint32_t descriptorWriteCount,const VkWriteDescriptorSet * pDescriptorWrites,uint32_t descriptorCopyCount,const VkCopyDescriptorSet * pDescriptorCopies)1460 tu_UpdateDescriptorSets(VkDevice _device,
1461 uint32_t descriptorWriteCount,
1462 const VkWriteDescriptorSet *pDescriptorWrites,
1463 uint32_t descriptorCopyCount,
1464 const VkCopyDescriptorSet *pDescriptorCopies)
1465 {
1466 VK_FROM_HANDLE(tu_device, device, _device);
1467 tu_update_descriptor_sets(device, VK_NULL_HANDLE,
1468 descriptorWriteCount, pDescriptorWrites,
1469 descriptorCopyCount, pDescriptorCopies);
1470 }
1471
1472 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateDescriptorUpdateTemplate(VkDevice _device,const VkDescriptorUpdateTemplateCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDescriptorUpdateTemplate * pDescriptorUpdateTemplate)1473 tu_CreateDescriptorUpdateTemplate(
1474 VkDevice _device,
1475 const VkDescriptorUpdateTemplateCreateInfo *pCreateInfo,
1476 const VkAllocationCallbacks *pAllocator,
1477 VkDescriptorUpdateTemplate *pDescriptorUpdateTemplate)
1478 {
1479 VK_FROM_HANDLE(tu_device, device, _device);
1480 struct tu_descriptor_set_layout *set_layout = NULL;
1481 const uint32_t entry_count = pCreateInfo->descriptorUpdateEntryCount;
1482 uint32_t dst_entry_count = 0;
1483
1484 if (pCreateInfo->templateType == VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR) {
1485 VK_FROM_HANDLE(tu_pipeline_layout, pipeline_layout, pCreateInfo->pipelineLayout);
1486
1487 /* descriptorSetLayout should be ignored for push descriptors
1488 * and instead it refers to pipelineLayout and set.
1489 */
1490 assert(pCreateInfo->set < device->physical_device->usable_sets);
1491 set_layout = pipeline_layout->set[pCreateInfo->set].layout;
1492 } else {
1493 VK_FROM_HANDLE(tu_descriptor_set_layout, _set_layout,
1494 pCreateInfo->descriptorSetLayout);
1495 set_layout = _set_layout;
1496 }
1497
1498 for (uint32_t i = 0; i < entry_count; i++) {
1499 const VkDescriptorUpdateTemplateEntry *entry = &pCreateInfo->pDescriptorUpdateEntries[i];
1500 if (entry->descriptorType != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
1501 dst_entry_count++;
1502 continue;
1503 }
1504
1505 /* Calculate how many bindings this update steps over, so we can split
1506 * up the template entry. This lets the actual update be a simple
1507 * memcpy.
1508 */
1509 uint32_t remaining = entry->descriptorCount;
1510 const struct tu_descriptor_set_binding_layout *binding_layout =
1511 set_layout->binding + entry->dstBinding;
1512 uint32_t dst_start = entry->dstArrayElement;
1513 do {
1514 uint32_t size = binding_layout->size;
1515 uint32_t count = MIN2(remaining, size - dst_start);
1516 remaining -= count;
1517 binding_layout++;
1518 dst_entry_count++;
1519 dst_start = 0;
1520 } while (remaining > 0);
1521 }
1522
1523 const size_t size =
1524 sizeof(struct tu_descriptor_update_template) +
1525 sizeof(struct tu_descriptor_update_template_entry) * dst_entry_count;
1526 struct tu_descriptor_update_template *templ;
1527
1528 templ = (struct tu_descriptor_update_template *) vk_object_alloc(
1529 &device->vk, pAllocator, size,
1530 VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE);
1531 if (!templ)
1532 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1533
1534 templ->entry_count = dst_entry_count;
1535
1536 if (pCreateInfo->templateType == VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR) {
1537 templ->bind_point = pCreateInfo->pipelineBindPoint;
1538 }
1539
1540 uint32_t j = 0;
1541 for (uint32_t i = 0; i < entry_count; i++) {
1542 const VkDescriptorUpdateTemplateEntry *entry = &pCreateInfo->pDescriptorUpdateEntries[i];
1543
1544 const struct tu_descriptor_set_binding_layout *binding_layout =
1545 set_layout->binding + entry->dstBinding;
1546 uint32_t dst_offset, dst_stride;
1547 const struct tu_sampler *immutable_samplers = NULL;
1548
1549 /* dst_offset is an offset into dynamic_descriptors when the descriptor
1550 * is dynamic, and an offset into mapped_ptr otherwise.
1551 */
1552 switch (entry->descriptorType) {
1553 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
1554 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
1555 dst_offset = binding_layout->dynamic_offset_offset / 4;
1556 break;
1557 case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: {
1558 uint32_t remaining = entry->descriptorCount;
1559 uint32_t dst_start = entry->dstArrayElement;
1560 uint32_t src_offset = entry->offset;
1561 /* See comment in update_descriptor_sets() */
1562 do {
1563 dst_offset =
1564 binding_layout->offset + dst_start;
1565 uint32_t size = binding_layout->size;
1566 uint32_t count = MIN2(remaining, size - dst_start);
1567 templ->entry[j++] = (struct tu_descriptor_update_template_entry) {
1568 .descriptor_type = entry->descriptorType,
1569 .descriptor_count = count,
1570 .dst_offset = dst_offset,
1571 .src_offset = src_offset,
1572 };
1573 remaining -= count;
1574 src_offset += count;
1575 binding_layout++;
1576 dst_start = 0;
1577 } while (remaining > 0);
1578
1579 continue;
1580 }
1581 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1582 case VK_DESCRIPTOR_TYPE_SAMPLER:
1583 if (pCreateInfo->templateType == VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR &&
1584 binding_layout->immutable_samplers_offset) {
1585 immutable_samplers =
1586 tu_immutable_samplers(set_layout, binding_layout) + entry->dstArrayElement;
1587 }
1588 FALLTHROUGH;
1589 default:
1590 dst_offset = binding_layout->offset / 4;
1591 }
1592
1593 dst_offset += (binding_layout->size * entry->dstArrayElement) / 4;
1594 dst_stride = binding_layout->size / 4;
1595
1596 templ->entry[j++] = (struct tu_descriptor_update_template_entry) {
1597 .descriptor_type = entry->descriptorType,
1598 .descriptor_count = entry->descriptorCount,
1599 .dst_offset = dst_offset,
1600 .dst_stride = dst_stride,
1601 .has_sampler = !binding_layout->immutable_samplers_offset,
1602 .src_offset = entry->offset,
1603 .src_stride = entry->stride,
1604 .immutable_samplers = immutable_samplers,
1605 };
1606 }
1607
1608 assert(j == dst_entry_count);
1609
1610 *pDescriptorUpdateTemplate =
1611 tu_descriptor_update_template_to_handle(templ);
1612
1613 return VK_SUCCESS;
1614 }
1615
1616 VKAPI_ATTR void VKAPI_CALL
tu_DestroyDescriptorUpdateTemplate(VkDevice _device,VkDescriptorUpdateTemplate descriptorUpdateTemplate,const VkAllocationCallbacks * pAllocator)1617 tu_DestroyDescriptorUpdateTemplate(
1618 VkDevice _device,
1619 VkDescriptorUpdateTemplate descriptorUpdateTemplate,
1620 const VkAllocationCallbacks *pAllocator)
1621 {
1622 VK_FROM_HANDLE(tu_device, device, _device);
1623 VK_FROM_HANDLE(tu_descriptor_update_template, templ,
1624 descriptorUpdateTemplate);
1625
1626 if (!templ)
1627 return;
1628
1629 vk_object_free(&device->vk, pAllocator, templ);
1630 }
1631
1632 void
tu_update_descriptor_set_with_template(const struct tu_device * device,struct tu_descriptor_set * set,VkDescriptorUpdateTemplate descriptorUpdateTemplate,const void * pData)1633 tu_update_descriptor_set_with_template(
1634 const struct tu_device *device,
1635 struct tu_descriptor_set *set,
1636 VkDescriptorUpdateTemplate descriptorUpdateTemplate,
1637 const void *pData)
1638 {
1639 VK_FROM_HANDLE(tu_descriptor_update_template, templ,
1640 descriptorUpdateTemplate);
1641
1642 for (uint32_t i = 0; i < templ->entry_count; i++) {
1643 uint32_t *ptr = set->mapped_ptr;
1644 const void *src = ((const char *) pData) + templ->entry[i].src_offset;
1645 const struct tu_sampler *samplers = templ->entry[i].immutable_samplers;
1646
1647 if (templ->entry[i].descriptor_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
1648 memcpy(((uint8_t *) ptr) + templ->entry[i].dst_offset, src,
1649 templ->entry[i].descriptor_count);
1650 continue;
1651 }
1652
1653 ptr += templ->entry[i].dst_offset;
1654 unsigned dst_offset = templ->entry[i].dst_offset;
1655 for (unsigned j = 0; j < templ->entry[i].descriptor_count; ++j) {
1656 switch(templ->entry[i].descriptor_type) {
1657 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: {
1658 assert(!(set->layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
1659 write_ubo_descriptor(set->dynamic_descriptors + dst_offset,
1660 (const VkDescriptorBufferInfo *) src);
1661 break;
1662 }
1663 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
1664 write_ubo_descriptor(ptr, (const VkDescriptorBufferInfo *) src);
1665 break;
1666 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
1667 assert(!(set->layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
1668 write_buffer_descriptor(device,
1669 set->dynamic_descriptors + dst_offset,
1670 (const VkDescriptorBufferInfo *) src);
1671 break;
1672 }
1673 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
1674 write_buffer_descriptor(device, ptr,
1675 (const VkDescriptorBufferInfo *) src);
1676 break;
1677 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
1678 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
1679 write_texel_buffer_descriptor(ptr, *(VkBufferView *) src);
1680 break;
1681 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
1682 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
1683 case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: {
1684 write_image_descriptor(ptr, templ->entry[i].descriptor_type,
1685 (const VkDescriptorImageInfo *) src);
1686 break;
1687 }
1688 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1689 write_combined_image_sampler_descriptor(ptr,
1690 templ->entry[i].descriptor_type,
1691 (const VkDescriptorImageInfo *) src,
1692 templ->entry[i].has_sampler);
1693 if (samplers)
1694 write_sampler_push(ptr + A6XX_TEX_CONST_DWORDS, &samplers[j]);
1695 break;
1696 case VK_DESCRIPTOR_TYPE_SAMPLER:
1697 if (templ->entry[i].has_sampler)
1698 write_sampler_descriptor(ptr, ((const VkDescriptorImageInfo *)src)->sampler);
1699 else if (samplers)
1700 write_sampler_push(ptr, &samplers[j]);
1701 break;
1702 case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: {
1703 VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, *(const VkAccelerationStructureKHR *)src);
1704 if (accel_struct) {
1705 write_accel_struct(ptr,
1706 vk_acceleration_structure_get_va(accel_struct),
1707 accel_struct->size);
1708 } else {
1709 write_accel_struct(ptr, device->null_accel_struct_bo->iova,
1710 device->null_accel_struct_bo->size);
1711 }
1712 break;
1713 }
1714 default:
1715 unreachable("unimplemented descriptor type");
1716 break;
1717 }
1718 src = (char *) src + templ->entry[i].src_stride;
1719 ptr += templ->entry[i].dst_stride;
1720 dst_offset += templ->entry[i].dst_stride;
1721 }
1722 }
1723 }
1724
1725 VKAPI_ATTR void VKAPI_CALL
tu_UpdateDescriptorSetWithTemplate(VkDevice _device,VkDescriptorSet descriptorSet,VkDescriptorUpdateTemplate descriptorUpdateTemplate,const void * pData)1726 tu_UpdateDescriptorSetWithTemplate(
1727 VkDevice _device,
1728 VkDescriptorSet descriptorSet,
1729 VkDescriptorUpdateTemplate descriptorUpdateTemplate,
1730 const void *pData)
1731 {
1732 VK_FROM_HANDLE(tu_device, device, _device);
1733 VK_FROM_HANDLE(tu_descriptor_set, set, descriptorSet);
1734
1735 tu_update_descriptor_set_with_template(device, set, descriptorUpdateTemplate, pData);
1736 }
1737