• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Bas Nieuwenhuizen
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "meta/radv_meta.h"
8 #include "nir_builder.h"
9 #include "radv_cs.h"
10 #include "radv_entrypoints.h"
11 
12 #include "radix_sort/radix_sort_u64.h"
13 
14 #include "bvh/build_interface.h"
15 #include "bvh/bvh.h"
16 
17 #include "vk_acceleration_structure.h"
18 #include "vk_common_entrypoints.h"
19 
20 static const uint32_t copy_spv[] = {
21 #include "bvh/copy.spv.h"
22 };
23 
24 static const uint32_t encode_spv[] = {
25 #include "bvh/encode.spv.h"
26 };
27 
28 static const uint32_t encode_compact_spv[] = {
29 #include "bvh/encode_compact.spv.h"
30 };
31 
32 static const uint32_t header_spv[] = {
33 #include "bvh/header.spv.h"
34 };
35 
36 static const uint32_t update_spv[] = {
37 #include "bvh/update.spv.h"
38 };
39 
40 struct acceleration_structure_layout {
41    uint32_t geometry_info_offset;
42    uint32_t bvh_offset;
43    uint32_t leaf_nodes_offset;
44    uint32_t internal_nodes_offset;
45    uint32_t size;
46 };
47 
48 struct scratch_layout {
49    uint32_t update_size;
50    uint32_t header_offset;
51    uint32_t internal_ready_count_offset;
52 };
53 
54 enum radv_encode_key_bits {
55    RADV_ENCODE_KEY_COMPACT = 1,
56 };
57 
58 static void
radv_get_acceleration_structure_layout(struct radv_device * device,uint32_t leaf_count,const VkAccelerationStructureBuildGeometryInfoKHR * build_info,struct acceleration_structure_layout * accel_struct)59 radv_get_acceleration_structure_layout(struct radv_device *device, uint32_t leaf_count,
60                                        const VkAccelerationStructureBuildGeometryInfoKHR *build_info,
61                                        struct acceleration_structure_layout *accel_struct)
62 {
63    uint32_t internal_count = MAX2(leaf_count, 2) - 1;
64 
65    VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(build_info);
66 
67    uint32_t bvh_leaf_size;
68    switch (geometry_type) {
69    case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
70       bvh_leaf_size = sizeof(struct radv_bvh_triangle_node);
71       break;
72    case VK_GEOMETRY_TYPE_AABBS_KHR:
73       bvh_leaf_size = sizeof(struct radv_bvh_aabb_node);
74       break;
75    case VK_GEOMETRY_TYPE_INSTANCES_KHR:
76       bvh_leaf_size = sizeof(struct radv_bvh_instance_node);
77       break;
78    default:
79       unreachable("Unknown VkGeometryTypeKHR");
80    }
81 
82    uint64_t bvh_size = bvh_leaf_size * leaf_count + sizeof(struct radv_bvh_box32_node) * internal_count;
83    uint32_t offset = 0;
84    offset += sizeof(struct radv_accel_struct_header);
85 
86    if (device->rra_trace.accel_structs) {
87       accel_struct->geometry_info_offset = offset;
88       offset += sizeof(struct radv_accel_struct_geometry_info) * build_info->geometryCount;
89    }
90    /* Parent links, which have to go directly before bvh_offset as we index them using negative
91     * offsets from there. */
92    offset += bvh_size / 64 * 4;
93 
94    /* The BVH and hence bvh_offset needs 64 byte alignment for RT nodes. */
95    offset = ALIGN(offset, 64);
96    accel_struct->bvh_offset = offset;
97 
98    /* root node */
99    offset += sizeof(struct radv_bvh_box32_node);
100 
101    accel_struct->leaf_nodes_offset = offset;
102    offset += bvh_leaf_size * leaf_count;
103 
104    accel_struct->internal_nodes_offset = offset;
105    /* Factor out the root node. */
106    offset += sizeof(struct radv_bvh_box32_node) * (internal_count - 1);
107 
108    accel_struct->size = offset;
109 }
110 
111 static void
radv_get_scratch_layout(struct radv_device * device,uint32_t leaf_count,struct scratch_layout * scratch)112 radv_get_scratch_layout(struct radv_device *device, uint32_t leaf_count, struct scratch_layout *scratch)
113 {
114    uint32_t internal_count = MAX2(leaf_count, 2) - 1;
115 
116    uint32_t offset = 0;
117 
118    scratch->header_offset = offset;
119    offset += sizeof(struct vk_ir_header);
120 
121    uint32_t update_offset = 0;
122 
123    update_offset += sizeof(vk_aabb) * leaf_count;
124    scratch->internal_ready_count_offset = update_offset;
125 
126    update_offset += sizeof(uint32_t) * internal_count;
127    scratch->update_size = update_offset;
128 }
129 
130 VKAPI_ATTR void VKAPI_CALL
radv_GetAccelerationStructureBuildSizesKHR(VkDevice _device,VkAccelerationStructureBuildTypeKHR buildType,const VkAccelerationStructureBuildGeometryInfoKHR * pBuildInfo,const uint32_t * pMaxPrimitiveCounts,VkAccelerationStructureBuildSizesInfoKHR * pSizeInfo)131 radv_GetAccelerationStructureBuildSizesKHR(VkDevice _device, VkAccelerationStructureBuildTypeKHR buildType,
132                                            const VkAccelerationStructureBuildGeometryInfoKHR *pBuildInfo,
133                                            const uint32_t *pMaxPrimitiveCounts,
134                                            VkAccelerationStructureBuildSizesInfoKHR *pSizeInfo)
135 {
136    VK_FROM_HANDLE(radv_device, device, _device);
137 
138    STATIC_ASSERT(sizeof(struct radv_bvh_triangle_node) == 64);
139    STATIC_ASSERT(sizeof(struct radv_bvh_aabb_node) == 64);
140    STATIC_ASSERT(sizeof(struct radv_bvh_instance_node) == 128);
141    STATIC_ASSERT(sizeof(struct radv_bvh_box16_node) == 64);
142    STATIC_ASSERT(sizeof(struct radv_bvh_box32_node) == 128);
143 
144    if (radv_device_init_accel_struct_build_state(device) != VK_SUCCESS)
145       return;
146 
147    vk_get_as_build_sizes(_device, buildType, pBuildInfo, pMaxPrimitiveCounts, pSizeInfo,
148                          &device->meta_state.accel_struct_build.build_args);
149 }
150 
151 VKAPI_ATTR VkResult VKAPI_CALL
radv_WriteAccelerationStructuresPropertiesKHR(VkDevice _device,uint32_t accelerationStructureCount,const VkAccelerationStructureKHR * pAccelerationStructures,VkQueryType queryType,size_t dataSize,void * pData,size_t stride)152 radv_WriteAccelerationStructuresPropertiesKHR(VkDevice _device, uint32_t accelerationStructureCount,
153                                               const VkAccelerationStructureKHR *pAccelerationStructures,
154                                               VkQueryType queryType, size_t dataSize, void *pData, size_t stride)
155 {
156    unreachable("Unimplemented");
157    return VK_ERROR_FEATURE_NOT_PRESENT;
158 }
159 
160 VKAPI_ATTR VkResult VKAPI_CALL
radv_BuildAccelerationStructuresKHR(VkDevice _device,VkDeferredOperationKHR deferredOperation,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,const VkAccelerationStructureBuildRangeInfoKHR * const * ppBuildRangeInfos)161 radv_BuildAccelerationStructuresKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation, uint32_t infoCount,
162                                     const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
163                                     const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
164 {
165    unreachable("Unimplemented");
166    return VK_ERROR_FEATURE_NOT_PRESENT;
167 }
168 
169 VKAPI_ATTR VkResult VKAPI_CALL
radv_CopyAccelerationStructureKHR(VkDevice _device,VkDeferredOperationKHR deferredOperation,const VkCopyAccelerationStructureInfoKHR * pInfo)170 radv_CopyAccelerationStructureKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation,
171                                   const VkCopyAccelerationStructureInfoKHR *pInfo)
172 {
173    unreachable("Unimplemented");
174    return VK_ERROR_FEATURE_NOT_PRESENT;
175 }
176 
177 void
radv_device_finish_accel_struct_build_state(struct radv_device * device)178 radv_device_finish_accel_struct_build_state(struct radv_device *device)
179 {
180    VkDevice _device = radv_device_to_handle(device);
181    struct radv_meta_state *state = &device->meta_state;
182    struct vk_device_dispatch_table *dispatch = &device->vk.dispatch_table;
183 
184    dispatch->DestroyPipeline(_device, state->accel_struct_build.copy_pipeline, &state->alloc);
185    dispatch->DestroyPipeline(_device, state->accel_struct_build.encode_pipeline, &state->alloc);
186    dispatch->DestroyPipeline(_device, state->accel_struct_build.encode_compact_pipeline, &state->alloc);
187    dispatch->DestroyPipeline(_device, state->accel_struct_build.header_pipeline, &state->alloc);
188    dispatch->DestroyPipeline(_device, state->accel_struct_build.update_pipeline, &state->alloc);
189    radv_DestroyPipelineLayout(_device, state->accel_struct_build.copy_p_layout, &state->alloc);
190    radv_DestroyPipelineLayout(_device, state->accel_struct_build.encode_p_layout, &state->alloc);
191    radv_DestroyPipelineLayout(_device, state->accel_struct_build.header_p_layout, &state->alloc);
192    radv_DestroyPipelineLayout(_device, state->accel_struct_build.update_p_layout, &state->alloc);
193 
194    if (state->accel_struct_build.radix_sort)
195       radix_sort_vk_destroy(state->accel_struct_build.radix_sort, _device, &state->alloc);
196 
197    radv_DestroyBuffer(_device, state->accel_struct_build.null.buffer, &state->alloc);
198    radv_FreeMemory(_device, state->accel_struct_build.null.memory, &state->alloc);
199    vk_common_DestroyAccelerationStructureKHR(_device, state->accel_struct_build.null.accel_struct, &state->alloc);
200 }
201 
202 static VkResult
create_build_pipeline_spv(struct radv_device * device,const uint32_t * spv,uint32_t spv_size,unsigned push_constant_size,VkPipeline * pipeline,VkPipelineLayout * layout)203 create_build_pipeline_spv(struct radv_device *device, const uint32_t *spv, uint32_t spv_size,
204                           unsigned push_constant_size, VkPipeline *pipeline, VkPipelineLayout *layout)
205 {
206    if (*pipeline)
207       return VK_SUCCESS;
208 
209    VkDevice _device = radv_device_to_handle(device);
210 
211    const VkPipelineLayoutCreateInfo pl_create_info = {
212       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
213       .setLayoutCount = 0,
214       .pushConstantRangeCount = 1,
215       .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constant_size},
216    };
217 
218    VkShaderModuleCreateInfo module_info = {
219       .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
220       .pNext = NULL,
221       .flags = 0,
222       .codeSize = spv_size,
223       .pCode = spv,
224    };
225 
226    VkShaderModule module;
227    VkResult result =
228       device->vk.dispatch_table.CreateShaderModule(_device, &module_info, &device->meta_state.alloc, &module);
229    if (result != VK_SUCCESS)
230       return result;
231 
232    if (!*layout) {
233       result = radv_CreatePipelineLayout(_device, &pl_create_info, &device->meta_state.alloc, layout);
234       if (result != VK_SUCCESS)
235          goto cleanup;
236    }
237 
238    VkPipelineShaderStageCreateInfo shader_stage = {
239       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
240       .stage = VK_SHADER_STAGE_COMPUTE_BIT,
241       .module = module,
242       .pName = "main",
243       .pSpecializationInfo = NULL,
244    };
245 
246    VkComputePipelineCreateInfo pipeline_info = {
247       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
248       .stage = shader_stage,
249       .flags = 0,
250       .layout = *layout,
251    };
252 
253    result = device->vk.dispatch_table.CreateComputePipelines(_device, device->meta_state.cache, 1, &pipeline_info,
254                                                              &device->meta_state.alloc, pipeline);
255 
256 cleanup:
257    device->vk.dispatch_table.DestroyShaderModule(_device, module, &device->meta_state.alloc);
258    return result;
259 }
260 
261 VkResult
radv_device_init_null_accel_struct(struct radv_device * device)262 radv_device_init_null_accel_struct(struct radv_device *device)
263 {
264    const struct radv_physical_device *pdev = radv_device_physical(device);
265 
266    if (pdev->memory_properties.memoryTypeCount == 0)
267       return VK_SUCCESS; /* Exit in the case of null winsys. */
268 
269    VkDevice _device = radv_device_to_handle(device);
270 
271    uint32_t bvh_offset = ALIGN(sizeof(struct radv_accel_struct_header), 64);
272    uint32_t size = bvh_offset + sizeof(struct radv_bvh_box32_node);
273 
274    VkResult result;
275 
276    VkBuffer buffer = VK_NULL_HANDLE;
277    VkDeviceMemory memory = VK_NULL_HANDLE;
278    VkAccelerationStructureKHR accel_struct = VK_NULL_HANDLE;
279 
280    VkBufferCreateInfo buffer_create_info = {
281       .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
282       .pNext =
283          &(VkBufferUsageFlags2CreateInfo){
284             .sType = VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO,
285             .usage = VK_BUFFER_USAGE_2_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR,
286          },
287       .size = size,
288       .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
289    };
290 
291    result = radv_CreateBuffer(_device, &buffer_create_info, &device->meta_state.alloc, &buffer);
292    if (result != VK_SUCCESS)
293       return result;
294 
295    VkBufferMemoryRequirementsInfo2 info = {
296       .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
297       .buffer = buffer,
298    };
299    VkMemoryRequirements2 mem_req = {
300       .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
301    };
302    vk_common_GetBufferMemoryRequirements2(_device, &info, &mem_req);
303 
304    VkMemoryAllocateInfo alloc_info = {
305       .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
306       .allocationSize = mem_req.memoryRequirements.size,
307       .memoryTypeIndex =
308          radv_find_memory_index(pdev, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
309                                          VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
310    };
311 
312    result = radv_AllocateMemory(_device, &alloc_info, &device->meta_state.alloc, &memory);
313    if (result != VK_SUCCESS)
314       return result;
315 
316    VkBindBufferMemoryInfo bind_info = {
317       .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
318       .buffer = buffer,
319       .memory = memory,
320    };
321 
322    result = radv_BindBufferMemory2(_device, 1, &bind_info);
323    if (result != VK_SUCCESS)
324       return result;
325 
326    void *data;
327    result = vk_common_MapMemory(_device, memory, 0, size, 0, &data);
328    if (result != VK_SUCCESS)
329       return result;
330 
331    struct radv_accel_struct_header header = {
332       .bvh_offset = bvh_offset,
333    };
334    memcpy(data, &header, sizeof(struct radv_accel_struct_header));
335 
336    struct radv_bvh_box32_node root = {
337       .children =
338          {
339             RADV_BVH_INVALID_NODE,
340             RADV_BVH_INVALID_NODE,
341             RADV_BVH_INVALID_NODE,
342             RADV_BVH_INVALID_NODE,
343          },
344    };
345 
346    for (uint32_t child = 0; child < 4; child++) {
347       root.coords[child] = (vk_aabb){
348          .min.x = NAN,
349          .min.y = NAN,
350          .min.z = NAN,
351          .max.x = NAN,
352          .max.y = NAN,
353          .max.z = NAN,
354       };
355    }
356 
357    memcpy((uint8_t *)data + bvh_offset, &root, sizeof(struct radv_bvh_box32_node));
358 
359    vk_common_UnmapMemory(_device, memory);
360 
361    VkAccelerationStructureCreateInfoKHR create_info = {
362       .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR,
363       .buffer = buffer,
364       .size = size,
365       .type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR,
366    };
367 
368    result = vk_common_CreateAccelerationStructureKHR(_device, &create_info, &device->meta_state.alloc, &accel_struct);
369    if (result != VK_SUCCESS)
370       return result;
371 
372    device->meta_state.accel_struct_build.null.buffer = buffer;
373    device->meta_state.accel_struct_build.null.memory = memory;
374    device->meta_state.accel_struct_build.null.accel_struct = accel_struct;
375 
376    return VK_SUCCESS;
377 }
378 
379 static VkDeviceSize
radv_get_as_size(VkDevice _device,const VkAccelerationStructureBuildGeometryInfoKHR * pBuildInfo,uint32_t leaf_count)380 radv_get_as_size(VkDevice _device, const VkAccelerationStructureBuildGeometryInfoKHR *pBuildInfo, uint32_t leaf_count)
381 {
382    VK_FROM_HANDLE(radv_device, device, _device);
383 
384    struct acceleration_structure_layout accel_struct;
385    radv_get_acceleration_structure_layout(device, leaf_count, pBuildInfo, &accel_struct);
386    return accel_struct.size;
387 }
388 
389 static VkDeviceSize
radv_get_update_scratch_size(struct vk_device * vk_device,uint32_t leaf_count)390 radv_get_update_scratch_size(struct vk_device *vk_device, uint32_t leaf_count)
391 {
392    struct radv_device *device = container_of(vk_device, struct radv_device, vk);
393 
394    struct scratch_layout scratch;
395    radv_get_scratch_layout(device, leaf_count, &scratch);
396    return scratch.update_size;
397 }
398 
399 static uint32_t
radv_get_encode_key(VkAccelerationStructureTypeKHR type,VkBuildAccelerationStructureFlagBitsKHR flags)400 radv_get_encode_key(VkAccelerationStructureTypeKHR type, VkBuildAccelerationStructureFlagBitsKHR flags)
401 {
402    if (flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR)
403       return RADV_ENCODE_KEY_COMPACT;
404 
405    return 0;
406 }
407 
408 static VkResult
radv_encode_bind_pipeline(VkCommandBuffer commandBuffer,uint32_t key)409 radv_encode_bind_pipeline(VkCommandBuffer commandBuffer, uint32_t key)
410 {
411    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
412    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
413 
414    bool compact = key & RADV_ENCODE_KEY_COMPACT;
415    device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
416                                              compact ? device->meta_state.accel_struct_build.encode_compact_pipeline
417                                                      : device->meta_state.accel_struct_build.encode_pipeline);
418 
419    return VK_SUCCESS;
420 }
421 
422 static void
radv_encode_as(VkCommandBuffer commandBuffer,const VkAccelerationStructureBuildGeometryInfoKHR * build_info,const VkAccelerationStructureBuildRangeInfoKHR * build_range_infos,VkDeviceAddress intermediate_as_addr,VkDeviceAddress intermediate_header_addr,uint32_t leaf_count,uint32_t key,struct vk_acceleration_structure * dst)423 radv_encode_as(VkCommandBuffer commandBuffer, const VkAccelerationStructureBuildGeometryInfoKHR *build_info,
424                const VkAccelerationStructureBuildRangeInfoKHR *build_range_infos, VkDeviceAddress intermediate_as_addr,
425                VkDeviceAddress intermediate_header_addr, uint32_t leaf_count, uint32_t key,
426                struct vk_acceleration_structure *dst)
427 {
428    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
429    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
430 
431    struct acceleration_structure_layout layout;
432    radv_get_acceleration_structure_layout(device, leaf_count, build_info, &layout);
433 
434    if (key & RADV_ENCODE_KEY_COMPACT) {
435       uint32_t dst_offset = layout.internal_nodes_offset - layout.bvh_offset;
436       radv_update_buffer_cp(cmd_buffer, intermediate_header_addr + offsetof(struct vk_ir_header, dst_node_offset),
437                             &dst_offset, sizeof(uint32_t));
438    }
439 
440    const struct encode_args args = {
441       .intermediate_bvh = intermediate_as_addr,
442       .output_bvh = vk_acceleration_structure_get_va(dst) + layout.bvh_offset,
443       .header = intermediate_header_addr,
444       .output_bvh_offset = layout.bvh_offset,
445       .leaf_node_count = leaf_count,
446       .geometry_type = vk_get_as_geometry_type(build_info),
447    };
448    vk_common_CmdPushConstants(commandBuffer, device->meta_state.accel_struct_build.encode_p_layout,
449                               VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args), &args);
450 
451    struct radv_dispatch_info dispatch = {
452       .unaligned = true,
453       .ordered = true,
454       .blocks = {MAX2(leaf_count, 1), 1, 1},
455    };
456 
457    radv_compute_dispatch(cmd_buffer, &dispatch);
458 }
459 
460 static VkResult
radv_init_header_bind_pipeline(VkCommandBuffer commandBuffer,uint32_t key)461 radv_init_header_bind_pipeline(VkCommandBuffer commandBuffer, uint32_t key)
462 {
463    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
464    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
465 
466    if (!(key & RADV_ENCODE_KEY_COMPACT))
467       return VK_SUCCESS;
468 
469    /* Wait for encoding to finish. */
470    cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
471                                    radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
472                                                          VK_ACCESS_2_SHADER_WRITE_BIT, NULL, NULL) |
473                                    radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
474                                                          VK_ACCESS_2_SHADER_READ_BIT, NULL, NULL);
475 
476    device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
477                                              device->meta_state.accel_struct_build.header_pipeline);
478 
479    return VK_SUCCESS;
480 }
481 
482 static void
radv_init_header(VkCommandBuffer commandBuffer,const VkAccelerationStructureBuildGeometryInfoKHR * build_info,const VkAccelerationStructureBuildRangeInfoKHR * build_range_infos,VkDeviceAddress intermediate_as_addr,VkDeviceAddress intermediate_header_addr,uint32_t leaf_count,uint32_t key,struct vk_acceleration_structure * dst)483 radv_init_header(VkCommandBuffer commandBuffer, const VkAccelerationStructureBuildGeometryInfoKHR *build_info,
484                  const VkAccelerationStructureBuildRangeInfoKHR *build_range_infos,
485                  VkDeviceAddress intermediate_as_addr, VkDeviceAddress intermediate_header_addr, uint32_t leaf_count,
486                  uint32_t key, struct vk_acceleration_structure *dst)
487 {
488    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
489    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
490 
491    size_t base = offsetof(struct radv_accel_struct_header, compacted_size);
492 
493    uint64_t instance_count = build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR ? leaf_count : 0;
494 
495    struct acceleration_structure_layout layout;
496    radv_get_acceleration_structure_layout(device, leaf_count, build_info, &layout);
497 
498    if (key & RADV_ENCODE_KEY_COMPACT) {
499       base = offsetof(struct radv_accel_struct_header, geometry_count);
500 
501       struct header_args args = {
502          .src = intermediate_header_addr,
503          .dst = vk_acceleration_structure_get_va(dst),
504          .bvh_offset = layout.bvh_offset,
505          .instance_count = instance_count,
506       };
507 
508       vk_common_CmdPushConstants(commandBuffer, device->meta_state.accel_struct_build.header_p_layout,
509                                  VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args), &args);
510 
511       radv_unaligned_dispatch(cmd_buffer, 1, 1, 1);
512    }
513 
514    struct radv_accel_struct_header header;
515 
516    header.instance_offset = layout.bvh_offset + sizeof(struct radv_bvh_box32_node);
517    header.instance_count = instance_count;
518    header.compacted_size = layout.size;
519 
520    header.copy_dispatch_size[0] = DIV_ROUND_UP(header.compacted_size, 16 * 64);
521    header.copy_dispatch_size[1] = 1;
522    header.copy_dispatch_size[2] = 1;
523 
524    header.serialization_size =
525       header.compacted_size +
526       align(sizeof(struct radv_accel_struct_serialization_header) + sizeof(uint64_t) * header.instance_count, 128);
527 
528    header.size = header.serialization_size - sizeof(struct radv_accel_struct_serialization_header) -
529                  sizeof(uint64_t) * header.instance_count;
530 
531    header.build_flags = build_info->flags;
532    header.geometry_count = build_info->geometryCount;
533 
534    radv_update_buffer_cp(cmd_buffer, vk_acceleration_structure_get_va(dst) + base, (const char *)&header + base,
535                          sizeof(header) - base);
536 
537    if (device->rra_trace.accel_structs) {
538       uint64_t geometry_infos_size = build_info->geometryCount * sizeof(struct radv_accel_struct_geometry_info);
539 
540       struct radv_accel_struct_geometry_info *geometry_infos = malloc(geometry_infos_size);
541       if (!geometry_infos)
542          return;
543 
544       for (uint32_t i = 0; i < build_info->geometryCount; i++) {
545          const VkAccelerationStructureGeometryKHR *geometry =
546             build_info->pGeometries ? &build_info->pGeometries[i] : build_info->ppGeometries[i];
547          geometry_infos[i].type = geometry->geometryType;
548          geometry_infos[i].flags = geometry->flags;
549          geometry_infos[i].primitive_count = build_range_infos[i].primitiveCount;
550       }
551 
552       radv_CmdUpdateBuffer(commandBuffer, dst->buffer, dst->offset + layout.geometry_info_offset, geometry_infos_size,
553                            geometry_infos);
554 
555       free(geometry_infos);
556    }
557 }
558 
559 static void
radv_init_update_scratch(VkCommandBuffer commandBuffer,VkDeviceAddress scratch,uint32_t leaf_count,struct vk_acceleration_structure * src_as,struct vk_acceleration_structure * dst_as)560 radv_init_update_scratch(VkCommandBuffer commandBuffer, VkDeviceAddress scratch, uint32_t leaf_count,
561                          struct vk_acceleration_structure *src_as, struct vk_acceleration_structure *dst_as)
562 {
563    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
564    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
565 
566    struct scratch_layout layout;
567    radv_get_scratch_layout(device, leaf_count, &layout);
568 
569    /* Prepare ready counts for internal nodes */
570    radv_fill_buffer(cmd_buffer, NULL, NULL, scratch + layout.internal_ready_count_offset,
571                     layout.update_size - layout.internal_ready_count_offset, 0x0);
572 }
573 
574 static void
radv_update_bind_pipeline(VkCommandBuffer commandBuffer)575 radv_update_bind_pipeline(VkCommandBuffer commandBuffer)
576 {
577    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
578    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
579 
580    /* Wait for update scratch initialization to finish.. */
581    cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
582                                    radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
583                                                          VK_ACCESS_2_SHADER_WRITE_BIT, NULL, NULL) |
584                                    radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
585                                                          VK_ACCESS_2_SHADER_READ_BIT, NULL, NULL);
586 
587    device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
588                                              device->meta_state.accel_struct_build.update_pipeline);
589 }
590 
591 static uint32_t
pack_geometry_id_and_flags(uint32_t geometry_id,uint32_t flags)592 pack_geometry_id_and_flags(uint32_t geometry_id, uint32_t flags)
593 {
594    uint32_t geometry_id_and_flags = geometry_id;
595    if (flags & VK_GEOMETRY_OPAQUE_BIT_KHR)
596       geometry_id_and_flags |= RADV_GEOMETRY_OPAQUE;
597 
598    return geometry_id_and_flags;
599 }
600 
601 static void
radv_update_as(VkCommandBuffer commandBuffer,const VkAccelerationStructureBuildGeometryInfoKHR * build_info,const VkAccelerationStructureBuildRangeInfoKHR * build_range_infos,uint32_t leaf_count,struct vk_acceleration_structure * src,struct vk_acceleration_structure * dst)602 radv_update_as(VkCommandBuffer commandBuffer, const VkAccelerationStructureBuildGeometryInfoKHR *build_info,
603                const VkAccelerationStructureBuildRangeInfoKHR *build_range_infos, uint32_t leaf_count,
604                struct vk_acceleration_structure *src, struct vk_acceleration_structure *dst)
605 {
606    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
607    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
608 
609    if (src != dst) {
610       VK_FROM_HANDLE(radv_buffer, src_as_buffer, src->buffer);
611       VK_FROM_HANDLE(radv_buffer, dst_as_buffer, dst->buffer);
612 
613       struct acceleration_structure_layout layout;
614       radv_get_acceleration_structure_layout(device, leaf_count, build_info, &layout);
615 
616       /* Copy header/metadata */
617       radv_copy_buffer(cmd_buffer, src_as_buffer->bo, dst_as_buffer->bo, src_as_buffer->offset + src->offset,
618                        dst_as_buffer->offset + dst->offset, layout.bvh_offset);
619    }
620 
621    struct scratch_layout layout;
622    radv_get_scratch_layout(device, leaf_count, &layout);
623 
624    struct update_args update_consts = {
625       .src = vk_acceleration_structure_get_va(src),
626       .dst = vk_acceleration_structure_get_va(dst),
627       .leaf_bounds = build_info->scratchData.deviceAddress,
628       .internal_ready_count = build_info->scratchData.deviceAddress + layout.internal_ready_count_offset,
629       .leaf_node_count = leaf_count,
630    };
631 
632    uint32_t first_id = 0;
633    for (uint32_t i = 0; i < build_info->geometryCount; i++) {
634       const VkAccelerationStructureGeometryKHR *geom =
635          build_info->pGeometries ? &build_info->pGeometries[i] : build_info->ppGeometries[i];
636 
637       const VkAccelerationStructureBuildRangeInfoKHR *build_range_info = &build_range_infos[i];
638 
639       update_consts.geom_data = vk_fill_geometry_data(build_info->type, first_id, i, geom, build_range_info);
640 
641       vk_common_CmdPushConstants(commandBuffer, device->meta_state.accel_struct_build.update_p_layout,
642                                  VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(update_consts), &update_consts);
643       radv_unaligned_dispatch(cmd_buffer, build_range_info->primitiveCount, 1, 1);
644 
645       first_id += build_range_info->primitiveCount;
646    }
647 }
648 
649 static const struct radix_sort_vk_target_config radix_sort_config = {
650    .keyval_dwords = 2,
651    .fill.workgroup_size_log2 = 7,
652    .fill.block_rows = 8,
653    .histogram.workgroup_size_log2 = 8,
654    .histogram.subgroup_size_log2 = 6,
655    .histogram.block_rows = 14,
656    .prefix.workgroup_size_log2 = 8,
657    .prefix.subgroup_size_log2 = 6,
658    .scatter.workgroup_size_log2 = 8,
659    .scatter.subgroup_size_log2 = 6,
660    .scatter.block_rows = 14,
661 };
662 
663 static const struct vk_acceleration_structure_build_ops build_ops = {
664    .begin_debug_marker = vk_accel_struct_cmd_begin_debug_marker,
665    .end_debug_marker = vk_accel_struct_cmd_end_debug_marker,
666    .get_as_size = radv_get_as_size,
667    .get_update_scratch_size = radv_get_update_scratch_size,
668    .get_encode_key[0] = radv_get_encode_key,
669    .get_encode_key[1] = radv_get_encode_key,
670    .encode_bind_pipeline[0] = radv_encode_bind_pipeline,
671    .encode_bind_pipeline[1] = radv_init_header_bind_pipeline,
672    .encode_as[0] = radv_encode_as,
673    .encode_as[1] = radv_init_header,
674    .init_update_scratch = radv_init_update_scratch,
675    .update_bind_pipeline[0] = radv_update_bind_pipeline,
676    .update_as[0] = radv_update_as,
677 };
678 
679 static void
radv_write_buffer_cp(VkCommandBuffer commandBuffer,VkDeviceAddress addr,void * data,uint32_t size)680 radv_write_buffer_cp(VkCommandBuffer commandBuffer, VkDeviceAddress addr, void *data, uint32_t size)
681 {
682    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
683    radv_update_buffer_cp(cmd_buffer, addr, data, size);
684 }
685 
686 static void
radv_flush_buffer_write_cp(VkCommandBuffer commandBuffer)687 radv_flush_buffer_write_cp(VkCommandBuffer commandBuffer)
688 {
689 }
690 
691 static void
radv_cmd_dispatch_unaligned(VkCommandBuffer commandBuffer,uint32_t x,uint32_t y,uint32_t z)692 radv_cmd_dispatch_unaligned(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z)
693 {
694    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
695    radv_unaligned_dispatch(cmd_buffer, x, y, z);
696 }
697 
698 static void
radv_cmd_fill_buffer_addr(VkCommandBuffer commandBuffer,VkDeviceAddress addr,VkDeviceSize size,uint32_t data)699 radv_cmd_fill_buffer_addr(VkCommandBuffer commandBuffer, VkDeviceAddress addr, VkDeviceSize size, uint32_t data)
700 {
701    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
702    radv_fill_buffer(cmd_buffer, NULL, NULL, addr, size, data);
703 }
704 
705 VkResult
radv_device_init_accel_struct_build_state(struct radv_device * device)706 radv_device_init_accel_struct_build_state(struct radv_device *device)
707 {
708    VkResult result = VK_SUCCESS;
709    mtx_lock(&device->meta_state.mtx);
710 
711    if (device->meta_state.accel_struct_build.radix_sort)
712       goto exit;
713 
714    result = create_build_pipeline_spv(device, encode_spv, sizeof(encode_spv), sizeof(struct encode_args),
715                                       &device->meta_state.accel_struct_build.encode_pipeline,
716                                       &device->meta_state.accel_struct_build.encode_p_layout);
717    if (result != VK_SUCCESS)
718       goto exit;
719 
720    result =
721       create_build_pipeline_spv(device, encode_compact_spv, sizeof(encode_compact_spv), sizeof(struct encode_args),
722                                 &device->meta_state.accel_struct_build.encode_compact_pipeline,
723                                 &device->meta_state.accel_struct_build.encode_p_layout);
724    if (result != VK_SUCCESS)
725       goto exit;
726 
727    result = create_build_pipeline_spv(device, header_spv, sizeof(header_spv), sizeof(struct header_args),
728                                       &device->meta_state.accel_struct_build.header_pipeline,
729                                       &device->meta_state.accel_struct_build.header_p_layout);
730    if (result != VK_SUCCESS)
731       goto exit;
732 
733    result = create_build_pipeline_spv(device, update_spv, sizeof(update_spv), sizeof(struct update_args),
734                                       &device->meta_state.accel_struct_build.update_pipeline,
735                                       &device->meta_state.accel_struct_build.update_p_layout);
736    if (result != VK_SUCCESS)
737       goto exit;
738 
739    device->meta_state.accel_struct_build.radix_sort = vk_create_radix_sort_u64(
740       radv_device_to_handle(device), &device->meta_state.alloc, device->meta_state.cache, radix_sort_config);
741 
742    device->vk.as_build_ops = &build_ops;
743    device->vk.write_buffer_cp = radv_write_buffer_cp;
744    device->vk.flush_buffer_write_cp = radv_flush_buffer_write_cp;
745    device->vk.cmd_dispatch_unaligned = radv_cmd_dispatch_unaligned;
746    device->vk.cmd_fill_buffer_addr = radv_cmd_fill_buffer_addr;
747 
748    struct vk_acceleration_structure_build_args *build_args = &device->meta_state.accel_struct_build.build_args;
749    build_args->subgroup_size = 64;
750    build_args->bvh_bounds_offset = offsetof(struct radv_accel_struct_header, aabb);
751    build_args->emit_markers = device->sqtt.bo;
752    build_args->radix_sort = device->meta_state.accel_struct_build.radix_sort;
753 
754 exit:
755    mtx_unlock(&device->meta_state.mtx);
756    return result;
757 }
758 
759 static VkResult
radv_device_init_accel_struct_copy_state(struct radv_device * device)760 radv_device_init_accel_struct_copy_state(struct radv_device *device)
761 {
762    mtx_lock(&device->meta_state.mtx);
763 
764    VkResult result = create_build_pipeline_spv(device, copy_spv, sizeof(copy_spv), sizeof(struct copy_args),
765                                                &device->meta_state.accel_struct_build.copy_pipeline,
766                                                &device->meta_state.accel_struct_build.copy_p_layout);
767 
768    mtx_unlock(&device->meta_state.mtx);
769    return result;
770 }
771 
772 VKAPI_ATTR void VKAPI_CALL
radv_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,const VkAccelerationStructureBuildRangeInfoKHR * const * ppBuildRangeInfos)773 radv_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer, uint32_t infoCount,
774                                        const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
775                                        const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
776 {
777    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
778    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
779    struct radv_meta_saved_state saved_state;
780 
781    VkResult result = radv_device_init_accel_struct_build_state(device);
782    if (result != VK_SUCCESS) {
783       vk_command_buffer_set_error(&cmd_buffer->vk, result);
784       return;
785    }
786 
787    radv_meta_save(&saved_state, cmd_buffer,
788                   RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
789 
790    cmd_buffer->state.current_event_type = EventInternalUnknown;
791 
792    vk_cmd_build_acceleration_structures(commandBuffer, &device->vk, &device->meta_state.device, infoCount, pInfos,
793                                         ppBuildRangeInfos, &device->meta_state.accel_struct_build.build_args);
794 
795    radv_meta_restore(&saved_state, cmd_buffer);
796 }
797 
798 VKAPI_ATTR void VKAPI_CALL
radv_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer,const VkCopyAccelerationStructureInfoKHR * pInfo)799 radv_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, const VkCopyAccelerationStructureInfoKHR *pInfo)
800 {
801    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
802    VK_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
803    VK_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
804    VK_FROM_HANDLE(radv_buffer, src_buffer, src->buffer);
805    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
806    struct radv_meta_saved_state saved_state;
807 
808    VkResult result = radv_device_init_accel_struct_copy_state(device);
809    if (result != VK_SUCCESS) {
810       vk_command_buffer_set_error(&cmd_buffer->vk, result);
811       return;
812    }
813 
814    radv_meta_save(&saved_state, cmd_buffer,
815                   RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
816 
817    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
818                         device->meta_state.accel_struct_build.copy_pipeline);
819 
820    struct copy_args consts = {
821       .src_addr = vk_acceleration_structure_get_va(src),
822       .dst_addr = vk_acceleration_structure_get_va(dst),
823       .mode = RADV_COPY_MODE_COPY,
824    };
825 
826    vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
827                               device->meta_state.accel_struct_build.copy_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
828                               sizeof(consts), &consts);
829 
830    cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT,
831                                                          VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT, NULL, NULL);
832 
833    radv_indirect_dispatch(
834       cmd_buffer, src_buffer->bo,
835       vk_acceleration_structure_get_va(src) + offsetof(struct radv_accel_struct_header, copy_dispatch_size));
836    radv_meta_restore(&saved_state, cmd_buffer);
837 }
838 
839 VKAPI_ATTR void VKAPI_CALL
radv_GetDeviceAccelerationStructureCompatibilityKHR(VkDevice _device,const VkAccelerationStructureVersionInfoKHR * pVersionInfo,VkAccelerationStructureCompatibilityKHR * pCompatibility)840 radv_GetDeviceAccelerationStructureCompatibilityKHR(VkDevice _device,
841                                                     const VkAccelerationStructureVersionInfoKHR *pVersionInfo,
842                                                     VkAccelerationStructureCompatibilityKHR *pCompatibility)
843 {
844    VK_FROM_HANDLE(radv_device, device, _device);
845    const struct radv_physical_device *pdev = radv_device_physical(device);
846    bool compat = memcmp(pVersionInfo->pVersionData, pdev->driver_uuid, VK_UUID_SIZE) == 0 &&
847                  memcmp(pVersionInfo->pVersionData + VK_UUID_SIZE, pdev->cache_uuid, VK_UUID_SIZE) == 0;
848    *pCompatibility = compat ? VK_ACCELERATION_STRUCTURE_COMPATIBILITY_COMPATIBLE_KHR
849                             : VK_ACCELERATION_STRUCTURE_COMPATIBILITY_INCOMPATIBLE_KHR;
850 }
851 
852 VKAPI_ATTR VkResult VKAPI_CALL
radv_CopyMemoryToAccelerationStructureKHR(VkDevice _device,VkDeferredOperationKHR deferredOperation,const VkCopyMemoryToAccelerationStructureInfoKHR * pInfo)853 radv_CopyMemoryToAccelerationStructureKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation,
854                                           const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo)
855 {
856    unreachable("Unimplemented");
857    return VK_ERROR_FEATURE_NOT_PRESENT;
858 }
859 
860 VKAPI_ATTR VkResult VKAPI_CALL
radv_CopyAccelerationStructureToMemoryKHR(VkDevice _device,VkDeferredOperationKHR deferredOperation,const VkCopyAccelerationStructureToMemoryInfoKHR * pInfo)861 radv_CopyAccelerationStructureToMemoryKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation,
862                                           const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo)
863 {
864    unreachable("Unimplemented");
865    return VK_ERROR_FEATURE_NOT_PRESENT;
866 }
867 
868 VKAPI_ATTR void VKAPI_CALL
radv_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,const VkCopyMemoryToAccelerationStructureInfoKHR * pInfo)869 radv_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,
870                                              const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo)
871 {
872    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
873    VK_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
874    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
875    struct radv_meta_saved_state saved_state;
876 
877    VkResult result = radv_device_init_accel_struct_copy_state(device);
878    if (result != VK_SUCCESS) {
879       vk_command_buffer_set_error(&cmd_buffer->vk, result);
880       return;
881    }
882 
883    radv_meta_save(&saved_state, cmd_buffer,
884                   RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
885 
886    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
887                         device->meta_state.accel_struct_build.copy_pipeline);
888 
889    const struct copy_args consts = {
890       .src_addr = pInfo->src.deviceAddress,
891       .dst_addr = vk_acceleration_structure_get_va(dst),
892       .mode = RADV_COPY_MODE_DESERIALIZE,
893    };
894 
895    vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
896                               device->meta_state.accel_struct_build.copy_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
897                               sizeof(consts), &consts);
898 
899    vk_common_CmdDispatch(commandBuffer, 512, 1, 1);
900    radv_meta_restore(&saved_state, cmd_buffer);
901 }
902 
903 VKAPI_ATTR void VKAPI_CALL
radv_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,const VkCopyAccelerationStructureToMemoryInfoKHR * pInfo)904 radv_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,
905                                              const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo)
906 {
907    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
908    VK_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
909    VK_FROM_HANDLE(radv_buffer, src_buffer, src->buffer);
910    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
911    const struct radv_physical_device *pdev = radv_device_physical(device);
912    struct radv_meta_saved_state saved_state;
913 
914    VkResult result = radv_device_init_accel_struct_copy_state(device);
915    if (result != VK_SUCCESS) {
916       vk_command_buffer_set_error(&cmd_buffer->vk, result);
917       return;
918    }
919 
920    radv_meta_save(&saved_state, cmd_buffer,
921                   RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
922 
923    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
924                         device->meta_state.accel_struct_build.copy_pipeline);
925 
926    const struct copy_args consts = {
927       .src_addr = vk_acceleration_structure_get_va(src),
928       .dst_addr = pInfo->dst.deviceAddress,
929       .mode = RADV_COPY_MODE_SERIALIZE,
930    };
931 
932    vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
933                               device->meta_state.accel_struct_build.copy_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
934                               sizeof(consts), &consts);
935 
936    cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT,
937                                                          VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT, NULL, NULL);
938 
939    radv_indirect_dispatch(
940       cmd_buffer, src_buffer->bo,
941       vk_acceleration_structure_get_va(src) + offsetof(struct radv_accel_struct_header, copy_dispatch_size));
942    radv_meta_restore(&saved_state, cmd_buffer);
943 
944    /* Set the header of the serialized data. */
945    uint8_t header_data[2 * VK_UUID_SIZE];
946    memcpy(header_data, pdev->driver_uuid, VK_UUID_SIZE);
947    memcpy(header_data + VK_UUID_SIZE, pdev->cache_uuid, VK_UUID_SIZE);
948 
949    radv_update_buffer_cp(cmd_buffer, pInfo->dst.deviceAddress, header_data, sizeof(header_data));
950 }
951 
952 VKAPI_ATTR void VKAPI_CALL
radv_CmdBuildAccelerationStructuresIndirectKHR(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,const VkDeviceAddress * pIndirectDeviceAddresses,const uint32_t * pIndirectStrides,const uint32_t * const * ppMaxPrimitiveCounts)953 radv_CmdBuildAccelerationStructuresIndirectKHR(VkCommandBuffer commandBuffer, uint32_t infoCount,
954                                                const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
955                                                const VkDeviceAddress *pIndirectDeviceAddresses,
956                                                const uint32_t *pIndirectStrides,
957                                                const uint32_t *const *ppMaxPrimitiveCounts)
958 {
959    unreachable("Unimplemented");
960 }
961