• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Bas Nieuwenhuizen
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "radv_private.h"
25 
26 #include "meta/radv_meta.h"
27 #include "nir_builder.h"
28 #include "radv_cs.h"
29 
30 #include "radix_sort/common/vk/barrier.h"
31 #include "radix_sort/radv_radix_sort.h"
32 #include "radix_sort/shaders/push.h"
33 
34 #include "bvh/build_interface.h"
35 #include "bvh/bvh.h"
36 
37 #include "vk_acceleration_structure.h"
38 #include "vk_common_entrypoints.h"
39 
40 static const uint32_t leaf_spv[] = {
41 #include "bvh/leaf.spv.h"
42 };
43 
44 static const uint32_t leaf_always_active_spv[] = {
45 #include "bvh/leaf_always_active.spv.h"
46 };
47 
48 static const uint32_t morton_spv[] = {
49 #include "bvh/morton.spv.h"
50 };
51 
52 static const uint32_t lbvh_main_spv[] = {
53 #include "bvh/lbvh_main.spv.h"
54 };
55 
56 static const uint32_t lbvh_generate_ir_spv[] = {
57 #include "bvh/lbvh_generate_ir.spv.h"
58 };
59 
60 static const uint32_t ploc_spv[] = {
61 #include "bvh/ploc_internal.spv.h"
62 };
63 
64 static const uint32_t copy_spv[] = {
65 #include "bvh/copy.spv.h"
66 };
67 
68 static const uint32_t encode_spv[] = {
69 #include "bvh/encode.spv.h"
70 };
71 
72 static const uint32_t encode_compact_spv[] = {
73 #include "bvh/encode_compact.spv.h"
74 };
75 
76 static const uint32_t header_spv[] = {
77 #include "bvh/header.spv.h"
78 };
79 
80 static const uint32_t update_spv[] = {
81 #include "bvh/update.spv.h"
82 };
83 
84 #define KEY_ID_PAIR_SIZE 8
85 #define MORTON_BIT_SIZE  24
86 
87 enum internal_build_type {
88    INTERNAL_BUILD_TYPE_LBVH,
89    INTERNAL_BUILD_TYPE_PLOC,
90    INTERNAL_BUILD_TYPE_UPDATE,
91 };
92 
93 struct build_config {
94    enum internal_build_type internal_type;
95    bool compact;
96 };
97 
98 struct acceleration_structure_layout {
99    uint32_t geometry_info_offset;
100    uint32_t bvh_offset;
101    uint32_t leaf_nodes_offset;
102    uint32_t internal_nodes_offset;
103    uint32_t size;
104 };
105 
106 struct scratch_layout {
107    uint32_t size;
108    uint32_t update_size;
109 
110    uint32_t header_offset;
111 
112    /* Used for UPDATE only. */
113 
114    uint32_t internal_ready_count_offset;
115 
116    /* Used for BUILD only. */
117 
118    uint32_t sort_buffer_offset[2];
119    uint32_t sort_internal_offset;
120 
121    uint32_t ploc_prefix_sum_partition_offset;
122    uint32_t lbvh_node_offset;
123 
124    uint32_t ir_offset;
125    uint32_t internal_node_offset;
126 };
127 
128 static struct build_config
build_config(uint32_t leaf_count,const VkAccelerationStructureBuildGeometryInfoKHR * build_info)129 build_config(uint32_t leaf_count, const VkAccelerationStructureBuildGeometryInfoKHR *build_info)
130 {
131    struct build_config config = {0};
132 
133    if (leaf_count <= 4)
134       config.internal_type = INTERNAL_BUILD_TYPE_LBVH;
135    else if (build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR)
136       config.internal_type = INTERNAL_BUILD_TYPE_PLOC;
137    else if (!(build_info->flags & VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_KHR) &&
138             !(build_info->flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR))
139       config.internal_type = INTERNAL_BUILD_TYPE_PLOC;
140    else
141       config.internal_type = INTERNAL_BUILD_TYPE_LBVH;
142 
143    if (build_info->mode == VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR &&
144        build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR)
145       config.internal_type = INTERNAL_BUILD_TYPE_UPDATE;
146 
147    if (build_info->flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR)
148       config.compact = true;
149 
150    return config;
151 }
152 
153 static void
get_build_layout(struct radv_device * device,uint32_t leaf_count,const VkAccelerationStructureBuildGeometryInfoKHR * build_info,struct acceleration_structure_layout * accel_struct,struct scratch_layout * scratch)154 get_build_layout(struct radv_device *device, uint32_t leaf_count,
155                  const VkAccelerationStructureBuildGeometryInfoKHR *build_info,
156                  struct acceleration_structure_layout *accel_struct, struct scratch_layout *scratch)
157 {
158    uint32_t internal_count = MAX2(leaf_count, 2) - 1;
159 
160    VkGeometryTypeKHR geometry_type = VK_GEOMETRY_TYPE_TRIANGLES_KHR;
161 
162    if (build_info->geometryCount) {
163       if (build_info->pGeometries)
164          geometry_type = build_info->pGeometries[0].geometryType;
165       else
166          geometry_type = build_info->ppGeometries[0]->geometryType;
167    }
168 
169    uint32_t bvh_leaf_size;
170    switch (geometry_type) {
171    case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
172       bvh_leaf_size = sizeof(struct radv_bvh_triangle_node);
173       break;
174    case VK_GEOMETRY_TYPE_AABBS_KHR:
175       bvh_leaf_size = sizeof(struct radv_bvh_aabb_node);
176       break;
177    case VK_GEOMETRY_TYPE_INSTANCES_KHR:
178       bvh_leaf_size = sizeof(struct radv_bvh_instance_node);
179       break;
180    default:
181       unreachable("Unknown VkGeometryTypeKHR");
182    }
183 
184    if (accel_struct) {
185       uint64_t bvh_size = bvh_leaf_size * leaf_count + sizeof(struct radv_bvh_box32_node) * internal_count;
186       uint32_t offset = 0;
187       offset += sizeof(struct radv_accel_struct_header);
188 
189       if (device->rra_trace.accel_structs) {
190          accel_struct->geometry_info_offset = offset;
191          offset += sizeof(struct radv_accel_struct_geometry_info) * build_info->geometryCount;
192       }
193       /* Parent links, which have to go directly before bvh_offset as we index them using negative
194        * offsets from there. */
195       offset += bvh_size / 64 * 4;
196 
197       /* The BVH and hence bvh_offset needs 64 byte alignment for RT nodes. */
198       offset = ALIGN(offset, 64);
199       accel_struct->bvh_offset = offset;
200 
201       /* root node */
202       offset += sizeof(struct radv_bvh_box32_node);
203 
204       accel_struct->leaf_nodes_offset = offset;
205       offset += bvh_leaf_size * leaf_count;
206 
207       accel_struct->internal_nodes_offset = offset;
208       /* Factor out the root node. */
209       offset += sizeof(struct radv_bvh_box32_node) * (internal_count - 1);
210 
211       accel_struct->size = offset;
212    }
213 
214    if (scratch) {
215       radix_sort_vk_memory_requirements_t requirements = {
216          0,
217       };
218       if (radv_device_init_accel_struct_build_state(device) == VK_SUCCESS)
219          radix_sort_vk_get_memory_requirements(device->meta_state.accel_struct_build.radix_sort, leaf_count,
220                                                &requirements);
221 
222       uint32_t offset = 0;
223 
224       uint32_t ploc_scratch_space = 0;
225       uint32_t lbvh_node_space = 0;
226 
227       struct build_config config = build_config(leaf_count, build_info);
228 
229       if (config.internal_type == INTERNAL_BUILD_TYPE_PLOC)
230          ploc_scratch_space = DIV_ROUND_UP(leaf_count, PLOC_WORKGROUP_SIZE) * sizeof(struct ploc_prefix_scan_partition);
231       else
232          lbvh_node_space = sizeof(struct lbvh_node_info) * internal_count;
233 
234       scratch->header_offset = offset;
235       offset += sizeof(struct radv_ir_header);
236 
237       scratch->sort_buffer_offset[0] = offset;
238       offset += requirements.keyvals_size;
239 
240       scratch->sort_buffer_offset[1] = offset;
241       offset += requirements.keyvals_size;
242 
243       scratch->sort_internal_offset = offset;
244       /* Internal sorting data is not needed when PLOC/LBVH are invoked,
245        * save space by aliasing them */
246       scratch->ploc_prefix_sum_partition_offset = offset;
247       scratch->lbvh_node_offset = offset;
248       offset += MAX3(requirements.internal_size, ploc_scratch_space, lbvh_node_space);
249 
250       scratch->ir_offset = offset;
251       offset += sizeof(struct radv_ir_node) * leaf_count;
252 
253       scratch->internal_node_offset = offset;
254       offset += sizeof(struct radv_ir_box_node) * internal_count;
255 
256       scratch->size = offset;
257 
258       if (build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR) {
259          uint32_t update_offset = 0;
260 
261          update_offset += sizeof(radv_aabb) * leaf_count;
262          scratch->internal_ready_count_offset = update_offset;
263 
264          update_offset += sizeof(uint32_t) * internal_count;
265          scratch->update_size = update_offset;
266       } else {
267          scratch->update_size = offset;
268       }
269    }
270 }
271 
272 VKAPI_ATTR void VKAPI_CALL
radv_GetAccelerationStructureBuildSizesKHR(VkDevice _device,VkAccelerationStructureBuildTypeKHR buildType,const VkAccelerationStructureBuildGeometryInfoKHR * pBuildInfo,const uint32_t * pMaxPrimitiveCounts,VkAccelerationStructureBuildSizesInfoKHR * pSizeInfo)273 radv_GetAccelerationStructureBuildSizesKHR(VkDevice _device, VkAccelerationStructureBuildTypeKHR buildType,
274                                            const VkAccelerationStructureBuildGeometryInfoKHR *pBuildInfo,
275                                            const uint32_t *pMaxPrimitiveCounts,
276                                            VkAccelerationStructureBuildSizesInfoKHR *pSizeInfo)
277 {
278    RADV_FROM_HANDLE(radv_device, device, _device);
279 
280    STATIC_ASSERT(sizeof(struct radv_bvh_triangle_node) == 64);
281    STATIC_ASSERT(sizeof(struct radv_bvh_aabb_node) == 64);
282    STATIC_ASSERT(sizeof(struct radv_bvh_instance_node) == 128);
283    STATIC_ASSERT(sizeof(struct radv_bvh_box16_node) == 64);
284    STATIC_ASSERT(sizeof(struct radv_bvh_box32_node) == 128);
285 
286    uint32_t leaf_count = 0;
287    for (uint32_t i = 0; i < pBuildInfo->geometryCount; i++)
288       leaf_count += pMaxPrimitiveCounts[i];
289 
290    struct acceleration_structure_layout accel_struct;
291    struct scratch_layout scratch;
292    get_build_layout(device, leaf_count, pBuildInfo, &accel_struct, &scratch);
293 
294    pSizeInfo->accelerationStructureSize = accel_struct.size;
295    pSizeInfo->updateScratchSize = scratch.update_size;
296    pSizeInfo->buildScratchSize = scratch.size;
297 }
298 
299 VKAPI_ATTR VkResult VKAPI_CALL
radv_WriteAccelerationStructuresPropertiesKHR(VkDevice _device,uint32_t accelerationStructureCount,const VkAccelerationStructureKHR * pAccelerationStructures,VkQueryType queryType,size_t dataSize,void * pData,size_t stride)300 radv_WriteAccelerationStructuresPropertiesKHR(VkDevice _device, uint32_t accelerationStructureCount,
301                                               const VkAccelerationStructureKHR *pAccelerationStructures,
302                                               VkQueryType queryType, size_t dataSize, void *pData, size_t stride)
303 {
304    unreachable("Unimplemented");
305    return VK_ERROR_FEATURE_NOT_PRESENT;
306 }
307 
308 VKAPI_ATTR VkResult VKAPI_CALL
radv_BuildAccelerationStructuresKHR(VkDevice _device,VkDeferredOperationKHR deferredOperation,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,const VkAccelerationStructureBuildRangeInfoKHR * const * ppBuildRangeInfos)309 radv_BuildAccelerationStructuresKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation, uint32_t infoCount,
310                                     const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
311                                     const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
312 {
313    unreachable("Unimplemented");
314    return VK_ERROR_FEATURE_NOT_PRESENT;
315 }
316 
317 VKAPI_ATTR VkResult VKAPI_CALL
radv_CopyAccelerationStructureKHR(VkDevice _device,VkDeferredOperationKHR deferredOperation,const VkCopyAccelerationStructureInfoKHR * pInfo)318 radv_CopyAccelerationStructureKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation,
319                                   const VkCopyAccelerationStructureInfoKHR *pInfo)
320 {
321    unreachable("Unimplemented");
322    return VK_ERROR_FEATURE_NOT_PRESENT;
323 }
324 
325 void
radv_device_finish_accel_struct_build_state(struct radv_device * device)326 radv_device_finish_accel_struct_build_state(struct radv_device *device)
327 {
328    VkDevice _device = radv_device_to_handle(device);
329    struct radv_meta_state *state = &device->meta_state;
330    struct vk_device_dispatch_table *dispatch = &device->vk.dispatch_table;
331 
332    dispatch->DestroyPipeline(_device, state->accel_struct_build.copy_pipeline, &state->alloc);
333    dispatch->DestroyPipeline(_device, state->accel_struct_build.ploc_pipeline, &state->alloc);
334    dispatch->DestroyPipeline(_device, state->accel_struct_build.lbvh_generate_ir_pipeline, &state->alloc);
335    dispatch->DestroyPipeline(_device, state->accel_struct_build.lbvh_main_pipeline, &state->alloc);
336    dispatch->DestroyPipeline(_device, state->accel_struct_build.leaf_pipeline, &state->alloc);
337    dispatch->DestroyPipeline(_device, state->accel_struct_build.encode_pipeline, &state->alloc);
338    dispatch->DestroyPipeline(_device, state->accel_struct_build.encode_compact_pipeline, &state->alloc);
339    dispatch->DestroyPipeline(_device, state->accel_struct_build.header_pipeline, &state->alloc);
340    dispatch->DestroyPipeline(_device, state->accel_struct_build.morton_pipeline, &state->alloc);
341    dispatch->DestroyPipeline(_device, state->accel_struct_build.update_pipeline, &state->alloc);
342    radv_DestroyPipelineLayout(_device, state->accel_struct_build.copy_p_layout, &state->alloc);
343    radv_DestroyPipelineLayout(_device, state->accel_struct_build.ploc_p_layout, &state->alloc);
344    radv_DestroyPipelineLayout(_device, state->accel_struct_build.lbvh_generate_ir_p_layout, &state->alloc);
345    radv_DestroyPipelineLayout(_device, state->accel_struct_build.lbvh_main_p_layout, &state->alloc);
346    radv_DestroyPipelineLayout(_device, state->accel_struct_build.leaf_p_layout, &state->alloc);
347    radv_DestroyPipelineLayout(_device, state->accel_struct_build.encode_p_layout, &state->alloc);
348    radv_DestroyPipelineLayout(_device, state->accel_struct_build.header_p_layout, &state->alloc);
349    radv_DestroyPipelineLayout(_device, state->accel_struct_build.morton_p_layout, &state->alloc);
350    radv_DestroyPipelineLayout(_device, state->accel_struct_build.update_p_layout, &state->alloc);
351 
352    if (state->accel_struct_build.radix_sort)
353       radix_sort_vk_destroy(state->accel_struct_build.radix_sort, _device, &state->alloc);
354 
355    radv_DestroyBuffer(_device, state->accel_struct_build.null.buffer, &state->alloc);
356    radv_FreeMemory(_device, state->accel_struct_build.null.memory, &state->alloc);
357    vk_common_DestroyAccelerationStructureKHR(_device, state->accel_struct_build.null.accel_struct, &state->alloc);
358 }
359 
360 static VkResult
create_build_pipeline_spv(struct radv_device * device,const uint32_t * spv,uint32_t spv_size,unsigned push_constant_size,VkPipeline * pipeline,VkPipelineLayout * layout)361 create_build_pipeline_spv(struct radv_device *device, const uint32_t *spv, uint32_t spv_size,
362                           unsigned push_constant_size, VkPipeline *pipeline, VkPipelineLayout *layout)
363 {
364    if (*pipeline)
365       return VK_SUCCESS;
366 
367    VkDevice _device = radv_device_to_handle(device);
368 
369    const VkPipelineLayoutCreateInfo pl_create_info = {
370       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
371       .setLayoutCount = 0,
372       .pushConstantRangeCount = 1,
373       .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constant_size},
374    };
375 
376    VkShaderModuleCreateInfo module_info = {
377       .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
378       .pNext = NULL,
379       .flags = 0,
380       .codeSize = spv_size,
381       .pCode = spv,
382    };
383 
384    VkShaderModule module;
385    VkResult result =
386       device->vk.dispatch_table.CreateShaderModule(_device, &module_info, &device->meta_state.alloc, &module);
387    if (result != VK_SUCCESS)
388       return result;
389 
390    if (!*layout) {
391       result = radv_CreatePipelineLayout(_device, &pl_create_info, &device->meta_state.alloc, layout);
392       if (result != VK_SUCCESS)
393          goto cleanup;
394    }
395 
396    VkPipelineShaderStageCreateInfo shader_stage = {
397       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
398       .stage = VK_SHADER_STAGE_COMPUTE_BIT,
399       .module = module,
400       .pName = "main",
401       .pSpecializationInfo = NULL,
402    };
403 
404    VkComputePipelineCreateInfo pipeline_info = {
405       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
406       .stage = shader_stage,
407       .flags = 0,
408       .layout = *layout,
409    };
410 
411    result = device->vk.dispatch_table.CreateComputePipelines(_device, device->meta_state.cache, 1, &pipeline_info,
412                                                              &device->meta_state.alloc, pipeline);
413 
414 cleanup:
415    device->vk.dispatch_table.DestroyShaderModule(_device, module, &device->meta_state.alloc);
416    return result;
417 }
418 
419 VkResult
radv_device_init_null_accel_struct(struct radv_device * device)420 radv_device_init_null_accel_struct(struct radv_device *device)
421 {
422    if (device->physical_device->memory_properties.memoryTypeCount == 0)
423       return VK_SUCCESS; /* Exit in the case of null winsys. */
424 
425    VkDevice _device = radv_device_to_handle(device);
426 
427    uint32_t bvh_offset = ALIGN(sizeof(struct radv_accel_struct_header), 64);
428    uint32_t size = bvh_offset + sizeof(struct radv_bvh_box32_node);
429 
430    VkResult result;
431 
432    VkBuffer buffer = VK_NULL_HANDLE;
433    VkDeviceMemory memory = VK_NULL_HANDLE;
434    VkAccelerationStructureKHR accel_struct = VK_NULL_HANDLE;
435 
436    VkBufferCreateInfo buffer_create_info = {
437       .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
438       .pNext =
439          &(VkBufferUsageFlags2CreateInfoKHR){
440             .sType = VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO_KHR,
441             .usage = VK_BUFFER_USAGE_2_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR,
442          },
443       .size = size,
444       .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
445    };
446 
447    result = radv_CreateBuffer(_device, &buffer_create_info, &device->meta_state.alloc, &buffer);
448    if (result != VK_SUCCESS)
449       return result;
450 
451    VkBufferMemoryRequirementsInfo2 info = {
452       .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
453       .buffer = buffer,
454    };
455    VkMemoryRequirements2 mem_req = {
456       .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
457    };
458    vk_common_GetBufferMemoryRequirements2(_device, &info, &mem_req);
459 
460    VkMemoryAllocateInfo alloc_info = {
461       .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
462       .allocationSize = mem_req.memoryRequirements.size,
463       .memoryTypeIndex = radv_find_memory_index(device->physical_device, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
464                                                                             VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
465                                                                             VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
466    };
467 
468    result = radv_AllocateMemory(_device, &alloc_info, &device->meta_state.alloc, &memory);
469    if (result != VK_SUCCESS)
470       return result;
471 
472    VkBindBufferMemoryInfo bind_info = {
473       .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
474       .buffer = buffer,
475       .memory = memory,
476    };
477 
478    result = radv_BindBufferMemory2(_device, 1, &bind_info);
479    if (result != VK_SUCCESS)
480       return result;
481 
482    void *data;
483    result = vk_common_MapMemory(_device, memory, 0, size, 0, &data);
484    if (result != VK_SUCCESS)
485       return result;
486 
487    struct radv_accel_struct_header header = {
488       .bvh_offset = bvh_offset,
489    };
490    memcpy(data, &header, sizeof(struct radv_accel_struct_header));
491 
492    struct radv_bvh_box32_node root = {
493       .children =
494          {
495             RADV_BVH_INVALID_NODE,
496             RADV_BVH_INVALID_NODE,
497             RADV_BVH_INVALID_NODE,
498             RADV_BVH_INVALID_NODE,
499          },
500    };
501 
502    for (uint32_t child = 0; child < 4; child++) {
503       root.coords[child] = (radv_aabb){
504          .min.x = NAN,
505          .min.y = NAN,
506          .min.z = NAN,
507          .max.x = NAN,
508          .max.y = NAN,
509          .max.z = NAN,
510       };
511    }
512 
513    memcpy((uint8_t *)data + bvh_offset, &root, sizeof(struct radv_bvh_box32_node));
514 
515    vk_common_UnmapMemory(_device, memory);
516 
517    VkAccelerationStructureCreateInfoKHR create_info = {
518       .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR,
519       .buffer = buffer,
520       .size = size,
521       .type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR,
522    };
523 
524    result = vk_common_CreateAccelerationStructureKHR(_device, &create_info, &device->meta_state.alloc, &accel_struct);
525    if (result != VK_SUCCESS)
526       return result;
527 
528    device->meta_state.accel_struct_build.null.buffer = buffer;
529    device->meta_state.accel_struct_build.null.memory = memory;
530    device->meta_state.accel_struct_build.null.accel_struct = accel_struct;
531 
532    return VK_SUCCESS;
533 }
534 
535 VkResult
radv_device_init_accel_struct_build_state(struct radv_device * device)536 radv_device_init_accel_struct_build_state(struct radv_device *device)
537 {
538    VkResult result = VK_SUCCESS;
539    mtx_lock(&device->meta_state.mtx);
540 
541    if (device->meta_state.accel_struct_build.radix_sort)
542       goto exit;
543 
544    if (device->instance->drirc.force_active_accel_struct_leaves)
545       result = create_build_pipeline_spv(device, leaf_always_active_spv, sizeof(leaf_always_active_spv),
546                                          sizeof(struct leaf_args), &device->meta_state.accel_struct_build.leaf_pipeline,
547                                          &device->meta_state.accel_struct_build.leaf_p_layout);
548    else
549       result = create_build_pipeline_spv(device, leaf_spv, sizeof(leaf_spv), sizeof(struct leaf_args),
550                                          &device->meta_state.accel_struct_build.leaf_pipeline,
551                                          &device->meta_state.accel_struct_build.leaf_p_layout);
552    if (result != VK_SUCCESS)
553       goto exit;
554 
555    result = create_build_pipeline_spv(device, lbvh_main_spv, sizeof(lbvh_main_spv), sizeof(struct lbvh_main_args),
556                                       &device->meta_state.accel_struct_build.lbvh_main_pipeline,
557                                       &device->meta_state.accel_struct_build.lbvh_main_p_layout);
558    if (result != VK_SUCCESS)
559       goto exit;
560 
561    result = create_build_pipeline_spv(device, lbvh_generate_ir_spv, sizeof(lbvh_generate_ir_spv),
562                                       sizeof(struct lbvh_generate_ir_args),
563                                       &device->meta_state.accel_struct_build.lbvh_generate_ir_pipeline,
564                                       &device->meta_state.accel_struct_build.lbvh_generate_ir_p_layout);
565    if (result != VK_SUCCESS)
566       goto exit;
567 
568    result = create_build_pipeline_spv(device, ploc_spv, sizeof(ploc_spv), sizeof(struct ploc_args),
569                                       &device->meta_state.accel_struct_build.ploc_pipeline,
570                                       &device->meta_state.accel_struct_build.ploc_p_layout);
571    if (result != VK_SUCCESS)
572       goto exit;
573 
574    result = create_build_pipeline_spv(device, encode_spv, sizeof(encode_spv), sizeof(struct encode_args),
575                                       &device->meta_state.accel_struct_build.encode_pipeline,
576                                       &device->meta_state.accel_struct_build.encode_p_layout);
577    if (result != VK_SUCCESS)
578       goto exit;
579 
580    result =
581       create_build_pipeline_spv(device, encode_compact_spv, sizeof(encode_compact_spv), sizeof(struct encode_args),
582                                 &device->meta_state.accel_struct_build.encode_compact_pipeline,
583                                 &device->meta_state.accel_struct_build.encode_p_layout);
584    if (result != VK_SUCCESS)
585       goto exit;
586 
587    result = create_build_pipeline_spv(device, header_spv, sizeof(header_spv), sizeof(struct header_args),
588                                       &device->meta_state.accel_struct_build.header_pipeline,
589                                       &device->meta_state.accel_struct_build.header_p_layout);
590    if (result != VK_SUCCESS)
591       goto exit;
592 
593    result = create_build_pipeline_spv(device, morton_spv, sizeof(morton_spv), sizeof(struct morton_args),
594                                       &device->meta_state.accel_struct_build.morton_pipeline,
595                                       &device->meta_state.accel_struct_build.morton_p_layout);
596    if (result != VK_SUCCESS)
597       goto exit;
598 
599    result = create_build_pipeline_spv(device, update_spv, sizeof(update_spv), sizeof(struct update_args),
600                                       &device->meta_state.accel_struct_build.update_pipeline,
601                                       &device->meta_state.accel_struct_build.update_p_layout);
602    if (result != VK_SUCCESS)
603       goto exit;
604 
605    device->meta_state.accel_struct_build.radix_sort =
606       radv_create_radix_sort_u64(radv_device_to_handle(device), &device->meta_state.alloc, device->meta_state.cache);
607 exit:
608    mtx_unlock(&device->meta_state.mtx);
609    return result;
610 }
611 
612 static VkResult
radv_device_init_accel_struct_copy_state(struct radv_device * device)613 radv_device_init_accel_struct_copy_state(struct radv_device *device)
614 {
615    mtx_lock(&device->meta_state.mtx);
616 
617    VkResult result = create_build_pipeline_spv(device, copy_spv, sizeof(copy_spv), sizeof(struct copy_args),
618                                                &device->meta_state.accel_struct_build.copy_pipeline,
619                                                &device->meta_state.accel_struct_build.copy_p_layout);
620 
621    mtx_unlock(&device->meta_state.mtx);
622    return result;
623 }
624 
625 struct bvh_state {
626    uint32_t node_count;
627    uint32_t scratch_offset;
628 
629    uint32_t leaf_node_count;
630    uint32_t internal_node_count;
631    uint32_t leaf_node_size;
632 
633    struct acceleration_structure_layout accel_struct;
634    struct scratch_layout scratch;
635    struct build_config config;
636 
637    /* Radix sort state */
638    uint32_t scatter_blocks;
639    uint32_t count_ru_scatter;
640    uint32_t histo_blocks;
641    uint32_t count_ru_histo;
642    struct rs_push_scatter push_scatter;
643 };
644 
645 struct radv_bvh_batch_state {
646    bool any_compact;
647    bool any_non_compact;
648    bool any_ploc;
649    bool any_lbvh;
650    bool any_update;
651 };
652 
653 static uint32_t
pack_geometry_id_and_flags(uint32_t geometry_id,uint32_t flags)654 pack_geometry_id_and_flags(uint32_t geometry_id, uint32_t flags)
655 {
656    uint32_t geometry_id_and_flags = geometry_id;
657    if (flags & VK_GEOMETRY_OPAQUE_BIT_KHR)
658       geometry_id_and_flags |= RADV_GEOMETRY_OPAQUE;
659 
660    return geometry_id_and_flags;
661 }
662 
663 static struct radv_bvh_geometry_data
fill_geometry_data(VkAccelerationStructureTypeKHR type,struct bvh_state * bvh_state,uint32_t geom_index,const VkAccelerationStructureGeometryKHR * geometry,const VkAccelerationStructureBuildRangeInfoKHR * build_range_info)664 fill_geometry_data(VkAccelerationStructureTypeKHR type, struct bvh_state *bvh_state, uint32_t geom_index,
665                    const VkAccelerationStructureGeometryKHR *geometry,
666                    const VkAccelerationStructureBuildRangeInfoKHR *build_range_info)
667 {
668    struct radv_bvh_geometry_data data = {
669       .first_id = bvh_state->node_count,
670       .geometry_id = pack_geometry_id_and_flags(geom_index, geometry->flags),
671       .geometry_type = geometry->geometryType,
672    };
673 
674    switch (geometry->geometryType) {
675    case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
676       assert(type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR);
677 
678       data.data = geometry->geometry.triangles.vertexData.deviceAddress +
679                   build_range_info->firstVertex * geometry->geometry.triangles.vertexStride;
680       data.indices = geometry->geometry.triangles.indexData.deviceAddress;
681 
682       if (geometry->geometry.triangles.indexType == VK_INDEX_TYPE_NONE_KHR)
683          data.data += build_range_info->primitiveOffset;
684       else
685          data.indices += build_range_info->primitiveOffset;
686 
687       data.transform = geometry->geometry.triangles.transformData.deviceAddress;
688       if (data.transform)
689          data.transform += build_range_info->transformOffset;
690 
691       data.stride = geometry->geometry.triangles.vertexStride;
692       data.vertex_format = geometry->geometry.triangles.vertexFormat;
693       data.index_format = geometry->geometry.triangles.indexType;
694       break;
695    case VK_GEOMETRY_TYPE_AABBS_KHR:
696       assert(type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR);
697 
698       data.data = geometry->geometry.aabbs.data.deviceAddress + build_range_info->primitiveOffset;
699       data.stride = geometry->geometry.aabbs.stride;
700       break;
701    case VK_GEOMETRY_TYPE_INSTANCES_KHR:
702       assert(type == VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR);
703 
704       data.data = geometry->geometry.instances.data.deviceAddress + build_range_info->primitiveOffset;
705 
706       if (geometry->geometry.instances.arrayOfPointers)
707          data.stride = 8;
708       else
709          data.stride = sizeof(VkAccelerationStructureInstanceKHR);
710       break;
711    default:
712       unreachable("Unknown geometryType");
713    }
714 
715    return data;
716 }
717 
718 static void
build_leaves(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,const VkAccelerationStructureBuildRangeInfoKHR * const * ppBuildRangeInfos,struct bvh_state * bvh_states,enum radv_cmd_flush_bits flush_bits)719 build_leaves(VkCommandBuffer commandBuffer, uint32_t infoCount,
720              const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
721              const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos, struct bvh_state *bvh_states,
722              enum radv_cmd_flush_bits flush_bits)
723 {
724    RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
725 
726    radv_write_user_event_marker(cmd_buffer, UserEventPush, "leaves");
727 
728    cmd_buffer->device->vk.dispatch_table.CmdBindPipeline(
729       commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, cmd_buffer->device->meta_state.accel_struct_build.leaf_pipeline);
730 
731    for (uint32_t i = 0; i < infoCount; ++i) {
732       if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
733          continue;
734 
735       RADV_FROM_HANDLE(vk_acceleration_structure, accel_struct, pInfos[i].dstAccelerationStructure);
736 
737       struct leaf_args leaf_consts = {
738          .ir = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
739          .bvh = vk_acceleration_structure_get_va(accel_struct) + bvh_states[i].accel_struct.leaf_nodes_offset,
740          .header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
741          .ids = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0],
742       };
743 
744       for (unsigned j = 0; j < pInfos[i].geometryCount; ++j) {
745          const VkAccelerationStructureGeometryKHR *geom =
746             pInfos[i].pGeometries ? &pInfos[i].pGeometries[j] : pInfos[i].ppGeometries[j];
747 
748          const VkAccelerationStructureBuildRangeInfoKHR *build_range_info = &ppBuildRangeInfos[i][j];
749 
750          leaf_consts.geom_data = fill_geometry_data(pInfos[i].type, &bvh_states[i], j, geom, build_range_info);
751 
752          vk_common_CmdPushConstants(commandBuffer, cmd_buffer->device->meta_state.accel_struct_build.leaf_p_layout,
753                                     VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(leaf_consts), &leaf_consts);
754          radv_unaligned_dispatch(cmd_buffer, build_range_info->primitiveCount, 1, 1);
755 
756          bvh_states[i].leaf_node_count += build_range_info->primitiveCount;
757          bvh_states[i].node_count += build_range_info->primitiveCount;
758       }
759    }
760 
761    radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
762 
763    cmd_buffer->state.flush_bits |= flush_bits;
764 }
765 
766 static void
morton_generate(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,struct bvh_state * bvh_states,enum radv_cmd_flush_bits flush_bits)767 morton_generate(VkCommandBuffer commandBuffer, uint32_t infoCount,
768                 const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states,
769                 enum radv_cmd_flush_bits flush_bits)
770 {
771    RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
772 
773    radv_write_user_event_marker(cmd_buffer, UserEventPush, "morton");
774 
775    cmd_buffer->device->vk.dispatch_table.CmdBindPipeline(
776       commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, cmd_buffer->device->meta_state.accel_struct_build.morton_pipeline);
777 
778    for (uint32_t i = 0; i < infoCount; ++i) {
779       if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
780          continue;
781       const struct morton_args consts = {
782          .bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
783          .header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
784          .ids = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0],
785       };
786 
787       vk_common_CmdPushConstants(commandBuffer, cmd_buffer->device->meta_state.accel_struct_build.morton_p_layout,
788                                  VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
789       radv_unaligned_dispatch(cmd_buffer, bvh_states[i].node_count, 1, 1);
790    }
791 
792    radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
793 
794    cmd_buffer->state.flush_bits |= flush_bits;
795 }
796 
797 static void
morton_sort(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,struct bvh_state * bvh_states,enum radv_cmd_flush_bits flush_bits)798 morton_sort(VkCommandBuffer commandBuffer, uint32_t infoCount,
799             const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states,
800             enum radv_cmd_flush_bits flush_bits)
801 {
802    /* Copyright 2019 The Fuchsia Authors. */
803    RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
804 
805    radv_write_user_event_marker(cmd_buffer, UserEventPush, "sort");
806 
807    radix_sort_vk_t *rs = cmd_buffer->device->meta_state.accel_struct_build.radix_sort;
808 
809    /*
810     * OVERVIEW
811     *
812     *   1. Pad the keyvals in `scatter_even`.
813     *   2. Zero the `histograms` and `partitions`.
814     *      --- BARRIER ---
815     *   3. HISTOGRAM is dispatched before PREFIX.
816     *      --- BARRIER ---
817     *   4. PREFIX is dispatched before the first SCATTER.
818     *      --- BARRIER ---
819     *   5. One or more SCATTER dispatches.
820     *
821     * Note that the `partitions` buffer can be zeroed anytime before the first
822     * scatter.
823     */
824 
825    /* How many passes? */
826    uint32_t keyval_bytes = rs->config.keyval_dwords * (uint32_t)sizeof(uint32_t);
827    uint32_t keyval_bits = keyval_bytes * 8;
828    uint32_t key_bits = MIN2(MORTON_BIT_SIZE, keyval_bits);
829    uint32_t passes = (key_bits + RS_RADIX_LOG2 - 1) / RS_RADIX_LOG2;
830 
831    for (uint32_t i = 0; i < infoCount; ++i) {
832       if (bvh_states[i].node_count)
833          bvh_states[i].scratch_offset = bvh_states[i].scratch.sort_buffer_offset[passes & 1];
834       else
835          bvh_states[i].scratch_offset = bvh_states[i].scratch.sort_buffer_offset[0];
836    }
837 
838    /*
839     * PAD KEYVALS AND ZERO HISTOGRAM/PARTITIONS
840     *
841     * Pad fractional blocks with max-valued keyvals.
842     *
843     * Zero the histograms and partitions buffer.
844     *
845     * This assumes the partitions follow the histograms.
846     */
847 
848    /* FIXME(allanmac): Consider precomputing some of these values and hang them off `rs`. */
849 
850    /* How many scatter blocks? */
851    uint32_t scatter_wg_size = 1 << rs->config.scatter.workgroup_size_log2;
852    uint32_t scatter_block_kvs = scatter_wg_size * rs->config.scatter.block_rows;
853 
854    /*
855     * How many histogram blocks?
856     *
857     * Note that it's OK to have more max-valued digits counted by the histogram
858     * than sorted by the scatters because the sort is stable.
859     */
860    uint32_t histo_wg_size = 1 << rs->config.histogram.workgroup_size_log2;
861    uint32_t histo_block_kvs = histo_wg_size * rs->config.histogram.block_rows;
862 
863    uint32_t pass_idx = (keyval_bytes - passes);
864 
865    for (uint32_t i = 0; i < infoCount; ++i) {
866       if (!bvh_states[i].node_count)
867          continue;
868       if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
869          continue;
870 
871       uint64_t keyvals_even_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0];
872       uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
873 
874       bvh_states[i].scatter_blocks = (bvh_states[i].node_count + scatter_block_kvs - 1) / scatter_block_kvs;
875       bvh_states[i].count_ru_scatter = bvh_states[i].scatter_blocks * scatter_block_kvs;
876 
877       bvh_states[i].histo_blocks = (bvh_states[i].count_ru_scatter + histo_block_kvs - 1) / histo_block_kvs;
878       bvh_states[i].count_ru_histo = bvh_states[i].histo_blocks * histo_block_kvs;
879 
880       /* Fill with max values */
881       if (bvh_states[i].count_ru_histo > bvh_states[i].node_count) {
882          radv_fill_buffer(cmd_buffer, NULL, NULL, keyvals_even_addr + bvh_states[i].node_count * keyval_bytes,
883                           (bvh_states[i].count_ru_histo - bvh_states[i].node_count) * keyval_bytes, 0xFFFFFFFF);
884       }
885 
886       /*
887        * Zero histograms and invalidate partitions.
888        *
889        * Note that the partition invalidation only needs to be performed once
890        * because the even/odd scatter dispatches rely on the the previous pass to
891        * leave the partitions in an invalid state.
892        *
893        * Note that the last workgroup doesn't read/write a partition so it doesn't
894        * need to be initialized.
895        */
896       uint32_t histo_partition_count = passes + bvh_states[i].scatter_blocks - 1;
897 
898       uint32_t fill_base = pass_idx * (RS_RADIX_SIZE * sizeof(uint32_t));
899 
900       radv_fill_buffer(cmd_buffer, NULL, NULL, internal_addr + rs->internal.histograms.offset + fill_base,
901                        histo_partition_count * (RS_RADIX_SIZE * sizeof(uint32_t)), 0);
902    }
903 
904    /*
905     * Pipeline: HISTOGRAM
906     *
907     * TODO(allanmac): All subgroups should try to process approximately the same
908     * number of blocks in order to minimize tail effects.  This was implemented
909     * and reverted but should be reimplemented and benchmarked later.
910     */
911    vk_barrier_transfer_w_to_compute_r(commandBuffer);
912 
913    cmd_buffer->device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
914                                                          rs->pipelines.named.histogram);
915 
916    for (uint32_t i = 0; i < infoCount; ++i) {
917       if (!bvh_states[i].node_count)
918          continue;
919       if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
920          continue;
921 
922       uint64_t keyvals_even_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0];
923       uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
924 
925       /* Dispatch histogram */
926       struct rs_push_histogram push_histogram = {
927          .devaddr_histograms = internal_addr + rs->internal.histograms.offset,
928          .devaddr_keyvals = keyvals_even_addr,
929          .passes = passes,
930       };
931 
932       vk_common_CmdPushConstants(commandBuffer, rs->pipeline_layouts.named.histogram, VK_SHADER_STAGE_COMPUTE_BIT, 0,
933                                  sizeof(push_histogram), &push_histogram);
934 
935       vk_common_CmdDispatch(commandBuffer, bvh_states[i].histo_blocks, 1, 1);
936    }
937 
938    /*
939     * Pipeline: PREFIX
940     *
941     * Launch one workgroup per pass.
942     */
943    vk_barrier_compute_w_to_compute_r(commandBuffer);
944 
945    cmd_buffer->device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
946                                                          rs->pipelines.named.prefix);
947 
948    for (uint32_t i = 0; i < infoCount; ++i) {
949       if (!bvh_states[i].node_count)
950          continue;
951       if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
952          continue;
953 
954       uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
955 
956       struct rs_push_prefix push_prefix = {
957          .devaddr_histograms = internal_addr + rs->internal.histograms.offset,
958       };
959 
960       vk_common_CmdPushConstants(commandBuffer, rs->pipeline_layouts.named.prefix, VK_SHADER_STAGE_COMPUTE_BIT, 0,
961                                  sizeof(push_prefix), &push_prefix);
962 
963       vk_common_CmdDispatch(commandBuffer, passes, 1, 1);
964    }
965 
966    /* Pipeline: SCATTER */
967    vk_barrier_compute_w_to_compute_r(commandBuffer);
968 
969    uint32_t histogram_offset = pass_idx * (RS_RADIX_SIZE * sizeof(uint32_t));
970 
971    for (uint32_t i = 0; i < infoCount; i++) {
972       uint64_t keyvals_even_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0];
973       uint64_t keyvals_odd_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[1];
974       uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
975 
976       bvh_states[i].push_scatter = (struct rs_push_scatter){
977          .devaddr_keyvals_even = keyvals_even_addr,
978          .devaddr_keyvals_odd = keyvals_odd_addr,
979          .devaddr_partitions = internal_addr + rs->internal.partitions.offset,
980          .devaddr_histograms = internal_addr + rs->internal.histograms.offset + histogram_offset,
981       };
982    }
983 
984    bool is_even = true;
985 
986    while (true) {
987       uint32_t pass_dword = pass_idx / 4;
988 
989       /* Bind new pipeline */
990       VkPipeline p =
991          is_even ? rs->pipelines.named.scatter[pass_dword].even : rs->pipelines.named.scatter[pass_dword].odd;
992       cmd_buffer->device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, p);
993 
994       /* Update push constants that changed */
995       VkPipelineLayout pl = is_even ? rs->pipeline_layouts.named.scatter[pass_dword].even
996                                     : rs->pipeline_layouts.named.scatter[pass_dword].odd;
997 
998       for (uint32_t i = 0; i < infoCount; i++) {
999          if (!bvh_states[i].node_count)
1000             continue;
1001          if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
1002             continue;
1003 
1004          bvh_states[i].push_scatter.pass_offset = (pass_idx & 3) * RS_RADIX_LOG2;
1005 
1006          vk_common_CmdPushConstants(commandBuffer, pl, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(struct rs_push_scatter),
1007                                     &bvh_states[i].push_scatter);
1008 
1009          vk_common_CmdDispatch(commandBuffer, bvh_states[i].scatter_blocks, 1, 1);
1010 
1011          bvh_states[i].push_scatter.devaddr_histograms += (RS_RADIX_SIZE * sizeof(uint32_t));
1012       }
1013 
1014       /* Continue? */
1015       if (++pass_idx >= keyval_bytes)
1016          break;
1017 
1018       vk_barrier_compute_w_to_compute_r(commandBuffer);
1019 
1020       is_even ^= true;
1021    }
1022 
1023    radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
1024 
1025    cmd_buffer->state.flush_bits |= flush_bits;
1026 }
1027 
1028 static void
lbvh_build_internal(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,struct bvh_state * bvh_states,enum radv_cmd_flush_bits flush_bits)1029 lbvh_build_internal(VkCommandBuffer commandBuffer, uint32_t infoCount,
1030                     const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states,
1031                     enum radv_cmd_flush_bits flush_bits)
1032 {
1033    RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1034 
1035    radv_write_user_event_marker(cmd_buffer, UserEventPush, "lbvh");
1036 
1037    cmd_buffer->device->vk.dispatch_table.CmdBindPipeline(
1038       commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
1039       cmd_buffer->device->meta_state.accel_struct_build.lbvh_main_pipeline);
1040 
1041    for (uint32_t i = 0; i < infoCount; ++i) {
1042       if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_LBVH)
1043          continue;
1044 
1045       uint32_t src_scratch_offset = bvh_states[i].scratch_offset;
1046       uint32_t internal_node_count = MAX2(bvh_states[i].node_count, 2) - 1;
1047 
1048       const struct lbvh_main_args consts = {
1049          .bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
1050          .src_ids = pInfos[i].scratchData.deviceAddress + src_scratch_offset,
1051          .node_info = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.lbvh_node_offset,
1052          .id_count = bvh_states[i].node_count,
1053          .internal_node_base = bvh_states[i].scratch.internal_node_offset - bvh_states[i].scratch.ir_offset,
1054       };
1055 
1056       vk_common_CmdPushConstants(commandBuffer, cmd_buffer->device->meta_state.accel_struct_build.lbvh_main_p_layout,
1057                                  VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
1058       radv_unaligned_dispatch(cmd_buffer, internal_node_count, 1, 1);
1059       bvh_states[i].node_count = internal_node_count;
1060       bvh_states[i].internal_node_count = internal_node_count;
1061    }
1062 
1063    cmd_buffer->state.flush_bits |= flush_bits;
1064 
1065    cmd_buffer->device->vk.dispatch_table.CmdBindPipeline(
1066       commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
1067       cmd_buffer->device->meta_state.accel_struct_build.lbvh_generate_ir_pipeline);
1068 
1069    for (uint32_t i = 0; i < infoCount; ++i) {
1070       if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_LBVH)
1071          continue;
1072 
1073       const struct lbvh_generate_ir_args consts = {
1074          .bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
1075          .node_info = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.lbvh_node_offset,
1076          .header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
1077          .internal_node_base = bvh_states[i].scratch.internal_node_offset - bvh_states[i].scratch.ir_offset,
1078       };
1079 
1080       vk_common_CmdPushConstants(commandBuffer,
1081                                  cmd_buffer->device->meta_state.accel_struct_build.lbvh_generate_ir_p_layout,
1082                                  VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
1083       radv_unaligned_dispatch(cmd_buffer, bvh_states[i].internal_node_count, 1, 1);
1084    }
1085 
1086    radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
1087 }
1088 
1089 static void
ploc_build_internal(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,struct bvh_state * bvh_states)1090 ploc_build_internal(VkCommandBuffer commandBuffer, uint32_t infoCount,
1091                     const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states)
1092 {
1093    RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1094 
1095    radv_write_user_event_marker(cmd_buffer, UserEventPush, "ploc");
1096 
1097    cmd_buffer->device->vk.dispatch_table.CmdBindPipeline(
1098       commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, cmd_buffer->device->meta_state.accel_struct_build.ploc_pipeline);
1099 
1100    for (uint32_t i = 0; i < infoCount; ++i) {
1101       if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_PLOC)
1102          continue;
1103 
1104       uint32_t src_scratch_offset = bvh_states[i].scratch_offset;
1105       uint32_t dst_scratch_offset = (src_scratch_offset == bvh_states[i].scratch.sort_buffer_offset[0])
1106                                        ? bvh_states[i].scratch.sort_buffer_offset[1]
1107                                        : bvh_states[i].scratch.sort_buffer_offset[0];
1108 
1109       const struct ploc_args consts = {
1110          .bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
1111          .header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
1112          .ids_0 = pInfos[i].scratchData.deviceAddress + src_scratch_offset,
1113          .ids_1 = pInfos[i].scratchData.deviceAddress + dst_scratch_offset,
1114          .prefix_scan_partitions =
1115             pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ploc_prefix_sum_partition_offset,
1116          .internal_node_offset = bvh_states[i].scratch.internal_node_offset - bvh_states[i].scratch.ir_offset,
1117       };
1118 
1119       vk_common_CmdPushConstants(commandBuffer, cmd_buffer->device->meta_state.accel_struct_build.ploc_p_layout,
1120                                  VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
1121       vk_common_CmdDispatch(commandBuffer, MAX2(DIV_ROUND_UP(bvh_states[i].node_count, PLOC_WORKGROUP_SIZE), 1), 1, 1);
1122    }
1123 
1124    radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
1125 }
1126 
1127 static void
encode_nodes(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,struct bvh_state * bvh_states,bool compact)1128 encode_nodes(VkCommandBuffer commandBuffer, uint32_t infoCount,
1129              const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states, bool compact)
1130 {
1131    RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1132 
1133    radv_write_user_event_marker(cmd_buffer, UserEventPush, "encode");
1134 
1135    cmd_buffer->device->vk.dispatch_table.CmdBindPipeline(
1136       commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
1137       compact ? cmd_buffer->device->meta_state.accel_struct_build.encode_compact_pipeline
1138               : cmd_buffer->device->meta_state.accel_struct_build.encode_pipeline);
1139 
1140    for (uint32_t i = 0; i < infoCount; ++i) {
1141       if (compact != bvh_states[i].config.compact)
1142          continue;
1143       if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
1144          continue;
1145 
1146       RADV_FROM_HANDLE(vk_acceleration_structure, accel_struct, pInfos[i].dstAccelerationStructure);
1147 
1148       VkGeometryTypeKHR geometry_type = VK_GEOMETRY_TYPE_TRIANGLES_KHR;
1149 
1150       /* If the geometry count is 0, then the size does not matter
1151        * because it will be multiplied with 0.
1152        */
1153       if (pInfos[i].geometryCount)
1154          geometry_type =
1155             pInfos[i].pGeometries ? pInfos[i].pGeometries[0].geometryType : pInfos[i].ppGeometries[0]->geometryType;
1156 
1157       if (bvh_states[i].config.compact) {
1158          uint32_t dst_offset = bvh_states[i].accel_struct.internal_nodes_offset - bvh_states[i].accel_struct.bvh_offset;
1159          radv_update_buffer_cp(cmd_buffer,
1160                                pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset +
1161                                   offsetof(struct radv_ir_header, dst_node_offset),
1162                                &dst_offset, sizeof(uint32_t));
1163       }
1164 
1165       const struct encode_args args = {
1166          .intermediate_bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
1167          .output_bvh = vk_acceleration_structure_get_va(accel_struct) + bvh_states[i].accel_struct.bvh_offset,
1168          .header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
1169          .output_bvh_offset = bvh_states[i].accel_struct.bvh_offset,
1170          .leaf_node_count = bvh_states[i].leaf_node_count,
1171          .geometry_type = geometry_type,
1172       };
1173       vk_common_CmdPushConstants(commandBuffer, cmd_buffer->device->meta_state.accel_struct_build.encode_p_layout,
1174                                  VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args), &args);
1175 
1176       struct radv_dispatch_info dispatch = {
1177          .unaligned = true,
1178          .ordered = true,
1179          .va = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset +
1180                offsetof(struct radv_ir_header, ir_internal_node_count),
1181       };
1182 
1183       radv_compute_dispatch(cmd_buffer, &dispatch);
1184    }
1185    /* This is the final access to the leaf nodes, no need to flush */
1186 
1187    radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
1188 }
1189 
1190 static void
init_header(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,struct bvh_state * bvh_states,struct radv_bvh_batch_state * batch_state)1191 init_header(VkCommandBuffer commandBuffer, uint32_t infoCount,
1192             const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states,
1193             struct radv_bvh_batch_state *batch_state)
1194 {
1195    RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1196 
1197    if (batch_state->any_compact) {
1198       radv_write_user_event_marker(cmd_buffer, UserEventPush, "header");
1199 
1200       cmd_buffer->device->vk.dispatch_table.CmdBindPipeline(
1201          commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
1202          cmd_buffer->device->meta_state.accel_struct_build.header_pipeline);
1203    }
1204 
1205    for (uint32_t i = 0; i < infoCount; ++i) {
1206       if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
1207          continue;
1208       RADV_FROM_HANDLE(vk_acceleration_structure, accel_struct, pInfos[i].dstAccelerationStructure);
1209       size_t base = offsetof(struct radv_accel_struct_header, compacted_size);
1210 
1211       uint64_t instance_count =
1212          pInfos[i].type == VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR ? bvh_states[i].leaf_node_count : 0;
1213 
1214       if (bvh_states[i].config.compact) {
1215          base = offsetof(struct radv_accel_struct_header, geometry_count);
1216 
1217          struct header_args args = {
1218             .src = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
1219             .dst = vk_acceleration_structure_get_va(accel_struct),
1220             .bvh_offset = bvh_states[i].accel_struct.bvh_offset,
1221             .instance_count = instance_count,
1222          };
1223 
1224          vk_common_CmdPushConstants(commandBuffer, cmd_buffer->device->meta_state.accel_struct_build.header_p_layout,
1225                                     VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args), &args);
1226 
1227          radv_unaligned_dispatch(cmd_buffer, 1, 1, 1);
1228       }
1229 
1230       struct radv_accel_struct_header header;
1231 
1232       header.instance_offset = bvh_states[i].accel_struct.bvh_offset + sizeof(struct radv_bvh_box32_node);
1233       header.instance_count = instance_count;
1234       header.compacted_size = bvh_states[i].accel_struct.size;
1235 
1236       header.copy_dispatch_size[0] = DIV_ROUND_UP(header.compacted_size, 16 * 64);
1237       header.copy_dispatch_size[1] = 1;
1238       header.copy_dispatch_size[2] = 1;
1239 
1240       header.serialization_size =
1241          header.compacted_size +
1242          align(sizeof(struct radv_accel_struct_serialization_header) + sizeof(uint64_t) * header.instance_count, 128);
1243 
1244       header.size = header.serialization_size - sizeof(struct radv_accel_struct_serialization_header) -
1245                     sizeof(uint64_t) * header.instance_count;
1246 
1247       header.build_flags = pInfos[i].flags;
1248       header.geometry_count = pInfos[i].geometryCount;
1249 
1250       radv_update_buffer_cp(cmd_buffer, vk_acceleration_structure_get_va(accel_struct) + base,
1251                             (const char *)&header + base, sizeof(header) - base);
1252    }
1253 
1254    if (batch_state->any_compact)
1255       radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
1256 }
1257 
1258 static void
init_geometry_infos(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,struct bvh_state * bvh_states,const VkAccelerationStructureBuildRangeInfoKHR * const * ppBuildRangeInfos)1259 init_geometry_infos(VkCommandBuffer commandBuffer, uint32_t infoCount,
1260                     const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states,
1261                     const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
1262 {
1263    for (uint32_t i = 0; i < infoCount; ++i) {
1264       if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
1265          continue;
1266       RADV_FROM_HANDLE(vk_acceleration_structure, accel_struct, pInfos[i].dstAccelerationStructure);
1267 
1268       uint64_t geometry_infos_size = pInfos[i].geometryCount * sizeof(struct radv_accel_struct_geometry_info);
1269 
1270       struct radv_accel_struct_geometry_info *geometry_infos = malloc(geometry_infos_size);
1271       if (!geometry_infos)
1272          continue;
1273 
1274       for (uint32_t j = 0; j < pInfos[i].geometryCount; ++j) {
1275          const VkAccelerationStructureGeometryKHR *geometry =
1276             pInfos[i].pGeometries ? pInfos[i].pGeometries + j : pInfos[i].ppGeometries[j];
1277          geometry_infos[j].type = geometry->geometryType;
1278          geometry_infos[j].flags = geometry->flags;
1279          geometry_infos[j].primitive_count = ppBuildRangeInfos[i][j].primitiveCount;
1280       }
1281 
1282       radv_CmdUpdateBuffer(commandBuffer, accel_struct->buffer,
1283                            accel_struct->offset + bvh_states[i].accel_struct.geometry_info_offset, geometry_infos_size,
1284                            geometry_infos);
1285 
1286       free(geometry_infos);
1287    }
1288 }
1289 
1290 static void
update(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,const VkAccelerationStructureBuildRangeInfoKHR * const * ppBuildRangeInfos,struct bvh_state * bvh_states)1291 update(VkCommandBuffer commandBuffer, uint32_t infoCount, const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
1292        const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos, struct bvh_state *bvh_states)
1293 {
1294    RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1295 
1296    radv_write_user_event_marker(cmd_buffer, UserEventPush, "update");
1297 
1298    cmd_buffer->device->vk.dispatch_table.CmdBindPipeline(
1299       commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, cmd_buffer->device->meta_state.accel_struct_build.update_pipeline);
1300 
1301    for (uint32_t i = 0; i < infoCount; ++i) {
1302       if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_UPDATE)
1303          continue;
1304 
1305       uint32_t leaf_node_count = 0;
1306       for (uint32_t j = 0; j < pInfos[i].geometryCount; ++j) {
1307          leaf_node_count += ppBuildRangeInfos[i][j].primitiveCount;
1308       }
1309 
1310       VK_FROM_HANDLE(vk_acceleration_structure, src_bvh, pInfos[i].srcAccelerationStructure);
1311       VK_FROM_HANDLE(vk_acceleration_structure, dst_bvh, pInfos[i].dstAccelerationStructure);
1312       struct update_args update_consts = {
1313          .src = vk_acceleration_structure_get_va(src_bvh),
1314          .dst = vk_acceleration_structure_get_va(dst_bvh),
1315          .leaf_bounds = pInfos[i].scratchData.deviceAddress,
1316          .internal_ready_count =
1317             pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.internal_ready_count_offset,
1318          .leaf_node_count = leaf_node_count,
1319       };
1320 
1321       for (unsigned j = 0; j < pInfos[i].geometryCount; ++j) {
1322          const VkAccelerationStructureGeometryKHR *geom =
1323             pInfos[i].pGeometries ? &pInfos[i].pGeometries[j] : pInfos[i].ppGeometries[j];
1324 
1325          const VkAccelerationStructureBuildRangeInfoKHR *build_range_info = &ppBuildRangeInfos[i][j];
1326 
1327          update_consts.geom_data = fill_geometry_data(pInfos[i].type, &bvh_states[i], j, geom, build_range_info);
1328 
1329          vk_common_CmdPushConstants(commandBuffer, cmd_buffer->device->meta_state.accel_struct_build.update_p_layout,
1330                                     VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(update_consts), &update_consts);
1331          radv_unaligned_dispatch(cmd_buffer, build_range_info->primitiveCount, 1, 1);
1332 
1333          bvh_states[i].leaf_node_count += build_range_info->primitiveCount;
1334          bvh_states[i].node_count += build_range_info->primitiveCount;
1335       }
1336    }
1337 
1338    radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
1339 }
1340 
1341 VKAPI_ATTR void VKAPI_CALL
radv_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,const VkAccelerationStructureBuildRangeInfoKHR * const * ppBuildRangeInfos)1342 radv_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer, uint32_t infoCount,
1343                                        const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
1344                                        const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
1345 {
1346    RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1347    struct radv_meta_saved_state saved_state;
1348 
1349    VkResult result = radv_device_init_accel_struct_build_state(cmd_buffer->device);
1350    if (result != VK_SUCCESS) {
1351       vk_command_buffer_set_error(&cmd_buffer->vk, result);
1352       return;
1353    }
1354 
1355    enum radv_cmd_flush_bits flush_bits =
1356       RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
1357       radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_READ_BIT | VK_ACCESS_2_SHADER_WRITE_BIT, NULL) |
1358       radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_READ_BIT | VK_ACCESS_2_SHADER_WRITE_BIT, NULL);
1359 
1360    radv_meta_save(&saved_state, cmd_buffer,
1361                   RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
1362    struct bvh_state *bvh_states = calloc(infoCount, sizeof(struct bvh_state));
1363 
1364    radv_describe_begin_accel_struct_build(cmd_buffer, infoCount);
1365 
1366    struct radv_bvh_batch_state batch_state = {0};
1367 
1368    for (uint32_t i = 0; i < infoCount; ++i) {
1369       uint32_t leaf_node_count = 0;
1370       for (uint32_t j = 0; j < pInfos[i].geometryCount; ++j) {
1371          leaf_node_count += ppBuildRangeInfos[i][j].primitiveCount;
1372       }
1373 
1374       get_build_layout(cmd_buffer->device, leaf_node_count, pInfos + i, &bvh_states[i].accel_struct,
1375                        &bvh_states[i].scratch);
1376 
1377       struct build_config config = build_config(leaf_node_count, pInfos + i);
1378       bvh_states[i].config = config;
1379 
1380       if (config.compact)
1381          batch_state.any_compact = true;
1382       else
1383          batch_state.any_non_compact = true;
1384 
1385       if (config.internal_type == INTERNAL_BUILD_TYPE_PLOC) {
1386          batch_state.any_ploc = true;
1387       } else if (config.internal_type == INTERNAL_BUILD_TYPE_LBVH) {
1388          batch_state.any_lbvh = true;
1389       } else if (config.internal_type == INTERNAL_BUILD_TYPE_UPDATE) {
1390          batch_state.any_update = true;
1391       } else {
1392          unreachable("Unknown internal_build_type");
1393       }
1394 
1395       if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_UPDATE) {
1396          /* The internal node count is updated in lbvh_build_internal for LBVH
1397           * and from the PLOC shader for PLOC. */
1398          struct radv_ir_header header = {
1399             .min_bounds = {0x7fffffff, 0x7fffffff, 0x7fffffff},
1400             .max_bounds = {0x80000000, 0x80000000, 0x80000000},
1401             .dispatch_size_y = 1,
1402             .dispatch_size_z = 1,
1403             .sync_data =
1404                {
1405                   .current_phase_end_counter = TASK_INDEX_INVALID,
1406                   /* Will be updated by the first PLOC shader invocation */
1407                   .task_counts = {TASK_INDEX_INVALID, TASK_INDEX_INVALID},
1408                },
1409          };
1410 
1411          radv_update_buffer_cp(cmd_buffer, pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
1412                                &header, sizeof(header));
1413       } else {
1414          /* Prepare ready counts for internal nodes */
1415          radv_fill_buffer(cmd_buffer, NULL, NULL,
1416                           pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.internal_ready_count_offset,
1417                           bvh_states[i].scratch.update_size - bvh_states[i].scratch.internal_ready_count_offset, 0x0);
1418          if (pInfos[i].srcAccelerationStructure != pInfos[i].dstAccelerationStructure) {
1419             VK_FROM_HANDLE(vk_acceleration_structure, src_as, pInfos[i].srcAccelerationStructure);
1420             VK_FROM_HANDLE(vk_acceleration_structure, dst_as, pInfos[i].dstAccelerationStructure);
1421 
1422             RADV_FROM_HANDLE(radv_buffer, src_as_buffer, src_as->buffer);
1423             RADV_FROM_HANDLE(radv_buffer, dst_as_buffer, dst_as->buffer);
1424 
1425             /* Copy header/metadata */
1426             radv_copy_buffer(cmd_buffer, src_as_buffer->bo, dst_as_buffer->bo, src_as_buffer->offset + src_as->offset,
1427                              dst_as_buffer->offset + dst_as->offset, bvh_states[i].accel_struct.bvh_offset);
1428          }
1429       }
1430    }
1431 
1432    cmd_buffer->state.current_event_type = EventInternalUnknown;
1433 
1434    build_leaves(commandBuffer, infoCount, pInfos, ppBuildRangeInfos, bvh_states, flush_bits);
1435 
1436    morton_generate(commandBuffer, infoCount, pInfos, bvh_states, flush_bits);
1437 
1438    morton_sort(commandBuffer, infoCount, pInfos, bvh_states, flush_bits);
1439 
1440    cmd_buffer->state.flush_bits |= flush_bits;
1441 
1442    lbvh_build_internal(commandBuffer, infoCount, pInfos, bvh_states, flush_bits);
1443 
1444    if (batch_state.any_ploc)
1445       ploc_build_internal(commandBuffer, infoCount, pInfos, bvh_states);
1446 
1447    cmd_buffer->state.flush_bits |= flush_bits;
1448 
1449    if (batch_state.any_non_compact)
1450       encode_nodes(commandBuffer, infoCount, pInfos, bvh_states, false);
1451 
1452    if (batch_state.any_compact)
1453       encode_nodes(commandBuffer, infoCount, pInfos, bvh_states, true);
1454 
1455    cmd_buffer->state.flush_bits |= flush_bits;
1456 
1457    init_header(commandBuffer, infoCount, pInfos, bvh_states, &batch_state);
1458 
1459    if (cmd_buffer->device->rra_trace.accel_structs)
1460       init_geometry_infos(commandBuffer, infoCount, pInfos, bvh_states, ppBuildRangeInfos);
1461 
1462    if (batch_state.any_update)
1463       update(commandBuffer, infoCount, pInfos, ppBuildRangeInfos, bvh_states);
1464 
1465    radv_describe_end_accel_struct_build(cmd_buffer);
1466 
1467    free(bvh_states);
1468    radv_meta_restore(&saved_state, cmd_buffer);
1469 }
1470 
1471 VKAPI_ATTR void VKAPI_CALL
radv_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer,const VkCopyAccelerationStructureInfoKHR * pInfo)1472 radv_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, const VkCopyAccelerationStructureInfoKHR *pInfo)
1473 {
1474    RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1475    RADV_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
1476    RADV_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
1477    RADV_FROM_HANDLE(radv_buffer, src_buffer, src->buffer);
1478    struct radv_meta_saved_state saved_state;
1479 
1480    VkResult result = radv_device_init_accel_struct_copy_state(cmd_buffer->device);
1481    if (result != VK_SUCCESS) {
1482       vk_command_buffer_set_error(&cmd_buffer->vk, result);
1483       return;
1484    }
1485 
1486    radv_meta_save(&saved_state, cmd_buffer,
1487                   RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
1488 
1489    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
1490                         cmd_buffer->device->meta_state.accel_struct_build.copy_pipeline);
1491 
1492    struct copy_args consts = {
1493       .src_addr = vk_acceleration_structure_get_va(src),
1494       .dst_addr = vk_acceleration_structure_get_va(dst),
1495       .mode = RADV_COPY_MODE_COPY,
1496    };
1497 
1498    vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1499                               cmd_buffer->device->meta_state.accel_struct_build.copy_p_layout,
1500                               VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
1501 
1502    cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT, NULL);
1503 
1504    radv_indirect_dispatch(
1505       cmd_buffer, src_buffer->bo,
1506       vk_acceleration_structure_get_va(src) + offsetof(struct radv_accel_struct_header, copy_dispatch_size));
1507    radv_meta_restore(&saved_state, cmd_buffer);
1508 }
1509 
1510 VKAPI_ATTR void VKAPI_CALL
radv_GetDeviceAccelerationStructureCompatibilityKHR(VkDevice _device,const VkAccelerationStructureVersionInfoKHR * pVersionInfo,VkAccelerationStructureCompatibilityKHR * pCompatibility)1511 radv_GetDeviceAccelerationStructureCompatibilityKHR(VkDevice _device,
1512                                                     const VkAccelerationStructureVersionInfoKHR *pVersionInfo,
1513                                                     VkAccelerationStructureCompatibilityKHR *pCompatibility)
1514 {
1515    RADV_FROM_HANDLE(radv_device, device, _device);
1516    bool compat =
1517       memcmp(pVersionInfo->pVersionData, device->physical_device->driver_uuid, VK_UUID_SIZE) == 0 &&
1518       memcmp(pVersionInfo->pVersionData + VK_UUID_SIZE, device->physical_device->cache_uuid, VK_UUID_SIZE) == 0;
1519    *pCompatibility = compat ? VK_ACCELERATION_STRUCTURE_COMPATIBILITY_COMPATIBLE_KHR
1520                             : VK_ACCELERATION_STRUCTURE_COMPATIBILITY_INCOMPATIBLE_KHR;
1521 }
1522 
1523 VKAPI_ATTR VkResult VKAPI_CALL
radv_CopyMemoryToAccelerationStructureKHR(VkDevice _device,VkDeferredOperationKHR deferredOperation,const VkCopyMemoryToAccelerationStructureInfoKHR * pInfo)1524 radv_CopyMemoryToAccelerationStructureKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation,
1525                                           const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo)
1526 {
1527    unreachable("Unimplemented");
1528    return VK_ERROR_FEATURE_NOT_PRESENT;
1529 }
1530 
1531 VKAPI_ATTR VkResult VKAPI_CALL
radv_CopyAccelerationStructureToMemoryKHR(VkDevice _device,VkDeferredOperationKHR deferredOperation,const VkCopyAccelerationStructureToMemoryInfoKHR * pInfo)1532 radv_CopyAccelerationStructureToMemoryKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation,
1533                                           const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo)
1534 {
1535    unreachable("Unimplemented");
1536    return VK_ERROR_FEATURE_NOT_PRESENT;
1537 }
1538 
1539 VKAPI_ATTR void VKAPI_CALL
radv_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,const VkCopyMemoryToAccelerationStructureInfoKHR * pInfo)1540 radv_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,
1541                                              const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo)
1542 {
1543    RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1544    RADV_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
1545    struct radv_meta_saved_state saved_state;
1546 
1547    VkResult result = radv_device_init_accel_struct_copy_state(cmd_buffer->device);
1548    if (result != VK_SUCCESS) {
1549       vk_command_buffer_set_error(&cmd_buffer->vk, result);
1550       return;
1551    }
1552 
1553    radv_meta_save(&saved_state, cmd_buffer,
1554                   RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
1555 
1556    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
1557                         cmd_buffer->device->meta_state.accel_struct_build.copy_pipeline);
1558 
1559    const struct copy_args consts = {
1560       .src_addr = pInfo->src.deviceAddress,
1561       .dst_addr = vk_acceleration_structure_get_va(dst),
1562       .mode = RADV_COPY_MODE_DESERIALIZE,
1563    };
1564 
1565    vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1566                               cmd_buffer->device->meta_state.accel_struct_build.copy_p_layout,
1567                               VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
1568 
1569    vk_common_CmdDispatch(commandBuffer, 512, 1, 1);
1570    radv_meta_restore(&saved_state, cmd_buffer);
1571 }
1572 
1573 VKAPI_ATTR void VKAPI_CALL
radv_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,const VkCopyAccelerationStructureToMemoryInfoKHR * pInfo)1574 radv_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,
1575                                              const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo)
1576 {
1577    RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1578    RADV_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
1579    RADV_FROM_HANDLE(radv_buffer, src_buffer, src->buffer);
1580    struct radv_meta_saved_state saved_state;
1581 
1582    VkResult result = radv_device_init_accel_struct_copy_state(cmd_buffer->device);
1583    if (result != VK_SUCCESS) {
1584       vk_command_buffer_set_error(&cmd_buffer->vk, result);
1585       return;
1586    }
1587 
1588    radv_meta_save(&saved_state, cmd_buffer,
1589                   RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
1590 
1591    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
1592                         cmd_buffer->device->meta_state.accel_struct_build.copy_pipeline);
1593 
1594    const struct copy_args consts = {
1595       .src_addr = vk_acceleration_structure_get_va(src),
1596       .dst_addr = pInfo->dst.deviceAddress,
1597       .mode = RADV_COPY_MODE_SERIALIZE,
1598    };
1599 
1600    vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1601                               cmd_buffer->device->meta_state.accel_struct_build.copy_p_layout,
1602                               VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
1603 
1604    cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT, NULL);
1605 
1606    radv_indirect_dispatch(
1607       cmd_buffer, src_buffer->bo,
1608       vk_acceleration_structure_get_va(src) + offsetof(struct radv_accel_struct_header, copy_dispatch_size));
1609    radv_meta_restore(&saved_state, cmd_buffer);
1610 
1611    /* Set the header of the serialized data. */
1612    uint8_t header_data[2 * VK_UUID_SIZE];
1613    memcpy(header_data, cmd_buffer->device->physical_device->driver_uuid, VK_UUID_SIZE);
1614    memcpy(header_data + VK_UUID_SIZE, cmd_buffer->device->physical_device->cache_uuid, VK_UUID_SIZE);
1615 
1616    radv_update_buffer_cp(cmd_buffer, pInfo->dst.deviceAddress, header_data, sizeof(header_data));
1617 }
1618 
1619 VKAPI_ATTR void VKAPI_CALL
radv_CmdBuildAccelerationStructuresIndirectKHR(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,const VkDeviceAddress * pIndirectDeviceAddresses,const uint32_t * pIndirectStrides,const uint32_t * const * ppMaxPrimitiveCounts)1620 radv_CmdBuildAccelerationStructuresIndirectKHR(VkCommandBuffer commandBuffer, uint32_t infoCount,
1621                                                const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
1622                                                const VkDeviceAddress *pIndirectDeviceAddresses,
1623                                                const uint32_t *pIndirectStrides,
1624                                                const uint32_t *const *ppMaxPrimitiveCounts)
1625 {
1626    unreachable("Unimplemented");
1627 }
1628