1 /*
2 * Copyright © 2021 Bas Nieuwenhuizen
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "meta/radv_meta.h"
8 #include "nir_builder.h"
9 #include "radv_cs.h"
10 #include "radv_entrypoints.h"
11
12 #include "radix_sort/radix_sort_u64.h"
13
14 #include "bvh/build_interface.h"
15 #include "bvh/bvh.h"
16
17 #include "vk_acceleration_structure.h"
18 #include "vk_common_entrypoints.h"
19
20 static const uint32_t copy_spv[] = {
21 #include "bvh/copy.spv.h"
22 };
23
24 static const uint32_t encode_spv[] = {
25 #include "bvh/encode.spv.h"
26 };
27
28 static const uint32_t encode_compact_spv[] = {
29 #include "bvh/encode_compact.spv.h"
30 };
31
32 static const uint32_t header_spv[] = {
33 #include "bvh/header.spv.h"
34 };
35
36 static const uint32_t update_spv[] = {
37 #include "bvh/update.spv.h"
38 };
39
40 struct acceleration_structure_layout {
41 uint32_t geometry_info_offset;
42 uint32_t bvh_offset;
43 uint32_t leaf_nodes_offset;
44 uint32_t internal_nodes_offset;
45 uint32_t size;
46 };
47
48 struct scratch_layout {
49 uint32_t update_size;
50 uint32_t header_offset;
51 uint32_t internal_ready_count_offset;
52 };
53
54 enum radv_encode_key_bits {
55 RADV_ENCODE_KEY_COMPACT = 1,
56 };
57
58 static void
radv_get_acceleration_structure_layout(struct radv_device * device,uint32_t leaf_count,const VkAccelerationStructureBuildGeometryInfoKHR * build_info,struct acceleration_structure_layout * accel_struct)59 radv_get_acceleration_structure_layout(struct radv_device *device, uint32_t leaf_count,
60 const VkAccelerationStructureBuildGeometryInfoKHR *build_info,
61 struct acceleration_structure_layout *accel_struct)
62 {
63 uint32_t internal_count = MAX2(leaf_count, 2) - 1;
64
65 VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(build_info);
66
67 uint32_t bvh_leaf_size;
68 switch (geometry_type) {
69 case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
70 bvh_leaf_size = sizeof(struct radv_bvh_triangle_node);
71 break;
72 case VK_GEOMETRY_TYPE_AABBS_KHR:
73 bvh_leaf_size = sizeof(struct radv_bvh_aabb_node);
74 break;
75 case VK_GEOMETRY_TYPE_INSTANCES_KHR:
76 bvh_leaf_size = sizeof(struct radv_bvh_instance_node);
77 break;
78 default:
79 unreachable("Unknown VkGeometryTypeKHR");
80 }
81
82 uint64_t bvh_size = bvh_leaf_size * leaf_count + sizeof(struct radv_bvh_box32_node) * internal_count;
83 uint32_t offset = 0;
84 offset += sizeof(struct radv_accel_struct_header);
85
86 if (device->rra_trace.accel_structs) {
87 accel_struct->geometry_info_offset = offset;
88 offset += sizeof(struct radv_accel_struct_geometry_info) * build_info->geometryCount;
89 }
90 /* Parent links, which have to go directly before bvh_offset as we index them using negative
91 * offsets from there. */
92 offset += bvh_size / 64 * 4;
93
94 /* The BVH and hence bvh_offset needs 64 byte alignment for RT nodes. */
95 offset = ALIGN(offset, 64);
96 accel_struct->bvh_offset = offset;
97
98 /* root node */
99 offset += sizeof(struct radv_bvh_box32_node);
100
101 accel_struct->leaf_nodes_offset = offset;
102 offset += bvh_leaf_size * leaf_count;
103
104 accel_struct->internal_nodes_offset = offset;
105 /* Factor out the root node. */
106 offset += sizeof(struct radv_bvh_box32_node) * (internal_count - 1);
107
108 accel_struct->size = offset;
109 }
110
111 static void
radv_get_scratch_layout(struct radv_device * device,uint32_t leaf_count,struct scratch_layout * scratch)112 radv_get_scratch_layout(struct radv_device *device, uint32_t leaf_count, struct scratch_layout *scratch)
113 {
114 uint32_t internal_count = MAX2(leaf_count, 2) - 1;
115
116 uint32_t offset = 0;
117
118 scratch->header_offset = offset;
119 offset += sizeof(struct vk_ir_header);
120
121 uint32_t update_offset = 0;
122
123 update_offset += sizeof(vk_aabb) * leaf_count;
124 scratch->internal_ready_count_offset = update_offset;
125
126 update_offset += sizeof(uint32_t) * internal_count;
127 scratch->update_size = update_offset;
128 }
129
130 VKAPI_ATTR void VKAPI_CALL
radv_GetAccelerationStructureBuildSizesKHR(VkDevice _device,VkAccelerationStructureBuildTypeKHR buildType,const VkAccelerationStructureBuildGeometryInfoKHR * pBuildInfo,const uint32_t * pMaxPrimitiveCounts,VkAccelerationStructureBuildSizesInfoKHR * pSizeInfo)131 radv_GetAccelerationStructureBuildSizesKHR(VkDevice _device, VkAccelerationStructureBuildTypeKHR buildType,
132 const VkAccelerationStructureBuildGeometryInfoKHR *pBuildInfo,
133 const uint32_t *pMaxPrimitiveCounts,
134 VkAccelerationStructureBuildSizesInfoKHR *pSizeInfo)
135 {
136 VK_FROM_HANDLE(radv_device, device, _device);
137
138 STATIC_ASSERT(sizeof(struct radv_bvh_triangle_node) == 64);
139 STATIC_ASSERT(sizeof(struct radv_bvh_aabb_node) == 64);
140 STATIC_ASSERT(sizeof(struct radv_bvh_instance_node) == 128);
141 STATIC_ASSERT(sizeof(struct radv_bvh_box16_node) == 64);
142 STATIC_ASSERT(sizeof(struct radv_bvh_box32_node) == 128);
143
144 if (radv_device_init_accel_struct_build_state(device) != VK_SUCCESS)
145 return;
146
147 vk_get_as_build_sizes(_device, buildType, pBuildInfo, pMaxPrimitiveCounts, pSizeInfo,
148 &device->meta_state.accel_struct_build.build_args);
149 }
150
151 void
radv_device_finish_accel_struct_build_state(struct radv_device * device)152 radv_device_finish_accel_struct_build_state(struct radv_device *device)
153 {
154 VkDevice _device = radv_device_to_handle(device);
155 struct radv_meta_state *state = &device->meta_state;
156 struct vk_device_dispatch_table *dispatch = &device->vk.dispatch_table;
157
158 dispatch->DestroyPipeline(_device, state->accel_struct_build.copy_pipeline, &state->alloc);
159 dispatch->DestroyPipeline(_device, state->accel_struct_build.encode_pipeline, &state->alloc);
160 dispatch->DestroyPipeline(_device, state->accel_struct_build.encode_compact_pipeline, &state->alloc);
161 dispatch->DestroyPipeline(_device, state->accel_struct_build.header_pipeline, &state->alloc);
162 dispatch->DestroyPipeline(_device, state->accel_struct_build.update_pipeline, &state->alloc);
163 radv_DestroyPipelineLayout(_device, state->accel_struct_build.copy_p_layout, &state->alloc);
164 radv_DestroyPipelineLayout(_device, state->accel_struct_build.encode_p_layout, &state->alloc);
165 radv_DestroyPipelineLayout(_device, state->accel_struct_build.header_p_layout, &state->alloc);
166 radv_DestroyPipelineLayout(_device, state->accel_struct_build.update_p_layout, &state->alloc);
167
168 if (state->accel_struct_build.radix_sort)
169 radix_sort_vk_destroy(state->accel_struct_build.radix_sort, _device, &state->alloc);
170
171 radv_DestroyBuffer(_device, state->accel_struct_build.null.buffer, &state->alloc);
172 radv_FreeMemory(_device, state->accel_struct_build.null.memory, &state->alloc);
173 vk_common_DestroyAccelerationStructureKHR(_device, state->accel_struct_build.null.accel_struct, &state->alloc);
174 }
175
176 static VkResult
create_build_pipeline_spv(struct radv_device * device,const uint32_t * spv,uint32_t spv_size,unsigned push_constant_size,VkPipeline * pipeline,VkPipelineLayout * layout)177 create_build_pipeline_spv(struct radv_device *device, const uint32_t *spv, uint32_t spv_size,
178 unsigned push_constant_size, VkPipeline *pipeline, VkPipelineLayout *layout)
179 {
180 if (*pipeline)
181 return VK_SUCCESS;
182
183 VkDevice _device = radv_device_to_handle(device);
184
185 const VkPipelineLayoutCreateInfo pl_create_info = {
186 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
187 .setLayoutCount = 0,
188 .pushConstantRangeCount = 1,
189 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constant_size},
190 };
191
192 VkShaderModuleCreateInfo module_info = {
193 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
194 .pNext = NULL,
195 .flags = 0,
196 .codeSize = spv_size,
197 .pCode = spv,
198 };
199
200 VkShaderModule module;
201 VkResult result =
202 device->vk.dispatch_table.CreateShaderModule(_device, &module_info, &device->meta_state.alloc, &module);
203 if (result != VK_SUCCESS)
204 return result;
205
206 if (!*layout) {
207 result = radv_CreatePipelineLayout(_device, &pl_create_info, &device->meta_state.alloc, layout);
208 if (result != VK_SUCCESS)
209 goto cleanup;
210 }
211
212 VkPipelineShaderStageCreateInfo shader_stage = {
213 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
214 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
215 .module = module,
216 .pName = "main",
217 .pSpecializationInfo = NULL,
218 };
219
220 VkComputePipelineCreateInfo pipeline_info = {
221 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
222 .stage = shader_stage,
223 .flags = 0,
224 .layout = *layout,
225 };
226
227 result = device->vk.dispatch_table.CreateComputePipelines(_device, device->meta_state.cache, 1, &pipeline_info,
228 &device->meta_state.alloc, pipeline);
229
230 cleanup:
231 device->vk.dispatch_table.DestroyShaderModule(_device, module, &device->meta_state.alloc);
232 return result;
233 }
234
235 VkResult
radv_device_init_null_accel_struct(struct radv_device * device)236 radv_device_init_null_accel_struct(struct radv_device *device)
237 {
238 const struct radv_physical_device *pdev = radv_device_physical(device);
239
240 if (pdev->memory_properties.memoryTypeCount == 0)
241 return VK_SUCCESS; /* Exit in the case of null winsys. */
242
243 VkDevice _device = radv_device_to_handle(device);
244
245 uint32_t bvh_offset = ALIGN(sizeof(struct radv_accel_struct_header), 64);
246 uint32_t size = bvh_offset + sizeof(struct radv_bvh_box32_node);
247
248 VkResult result;
249
250 VkBuffer buffer = VK_NULL_HANDLE;
251 VkDeviceMemory memory = VK_NULL_HANDLE;
252 VkAccelerationStructureKHR accel_struct = VK_NULL_HANDLE;
253
254 VkBufferCreateInfo buffer_create_info = {
255 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
256 .pNext =
257 &(VkBufferUsageFlags2CreateInfo){
258 .sType = VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO,
259 .usage = VK_BUFFER_USAGE_2_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR,
260 },
261 .size = size,
262 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
263 };
264
265 result = radv_CreateBuffer(_device, &buffer_create_info, &device->meta_state.alloc, &buffer);
266 if (result != VK_SUCCESS)
267 return result;
268
269 VkBufferMemoryRequirementsInfo2 info = {
270 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
271 .buffer = buffer,
272 };
273 VkMemoryRequirements2 mem_req = {
274 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
275 };
276 vk_common_GetBufferMemoryRequirements2(_device, &info, &mem_req);
277
278 VkMemoryAllocateInfo alloc_info = {
279 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
280 .allocationSize = mem_req.memoryRequirements.size,
281 .memoryTypeIndex =
282 radv_find_memory_index(pdev, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
283 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
284 };
285
286 result = radv_AllocateMemory(_device, &alloc_info, &device->meta_state.alloc, &memory);
287 if (result != VK_SUCCESS)
288 return result;
289
290 VkBindBufferMemoryInfo bind_info = {
291 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
292 .buffer = buffer,
293 .memory = memory,
294 };
295
296 result = radv_BindBufferMemory2(_device, 1, &bind_info);
297 if (result != VK_SUCCESS)
298 return result;
299
300 void *data;
301 result = vk_common_MapMemory(_device, memory, 0, size, 0, &data);
302 if (result != VK_SUCCESS)
303 return result;
304
305 struct radv_accel_struct_header header = {
306 .bvh_offset = bvh_offset,
307 };
308 memcpy(data, &header, sizeof(struct radv_accel_struct_header));
309
310 struct radv_bvh_box32_node root = {
311 .children =
312 {
313 RADV_BVH_INVALID_NODE,
314 RADV_BVH_INVALID_NODE,
315 RADV_BVH_INVALID_NODE,
316 RADV_BVH_INVALID_NODE,
317 },
318 };
319
320 for (uint32_t child = 0; child < 4; child++) {
321 root.coords[child] = (vk_aabb){
322 .min.x = NAN,
323 .min.y = NAN,
324 .min.z = NAN,
325 .max.x = NAN,
326 .max.y = NAN,
327 .max.z = NAN,
328 };
329 }
330
331 memcpy((uint8_t *)data + bvh_offset, &root, sizeof(struct radv_bvh_box32_node));
332
333 vk_common_UnmapMemory(_device, memory);
334
335 VkAccelerationStructureCreateInfoKHR create_info = {
336 .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR,
337 .buffer = buffer,
338 .size = size,
339 .type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR,
340 };
341
342 result = vk_common_CreateAccelerationStructureKHR(_device, &create_info, &device->meta_state.alloc, &accel_struct);
343 if (result != VK_SUCCESS)
344 return result;
345
346 device->meta_state.accel_struct_build.null.buffer = buffer;
347 device->meta_state.accel_struct_build.null.memory = memory;
348 device->meta_state.accel_struct_build.null.accel_struct = accel_struct;
349
350 return VK_SUCCESS;
351 }
352
353 static VkDeviceSize
radv_get_as_size(VkDevice _device,const VkAccelerationStructureBuildGeometryInfoKHR * pBuildInfo,uint32_t leaf_count)354 radv_get_as_size(VkDevice _device, const VkAccelerationStructureBuildGeometryInfoKHR *pBuildInfo, uint32_t leaf_count)
355 {
356 VK_FROM_HANDLE(radv_device, device, _device);
357
358 struct acceleration_structure_layout accel_struct;
359 radv_get_acceleration_structure_layout(device, leaf_count, pBuildInfo, &accel_struct);
360 return accel_struct.size;
361 }
362
363 static VkDeviceSize
radv_get_update_scratch_size(struct vk_device * vk_device,uint32_t leaf_count)364 radv_get_update_scratch_size(struct vk_device *vk_device, uint32_t leaf_count)
365 {
366 struct radv_device *device = container_of(vk_device, struct radv_device, vk);
367
368 struct scratch_layout scratch;
369 radv_get_scratch_layout(device, leaf_count, &scratch);
370 return scratch.update_size;
371 }
372
373 static uint32_t
radv_get_encode_key(VkAccelerationStructureTypeKHR type,VkBuildAccelerationStructureFlagBitsKHR flags)374 radv_get_encode_key(VkAccelerationStructureTypeKHR type, VkBuildAccelerationStructureFlagBitsKHR flags)
375 {
376 if (flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR)
377 return RADV_ENCODE_KEY_COMPACT;
378
379 return 0;
380 }
381
382 static VkResult
radv_encode_bind_pipeline(VkCommandBuffer commandBuffer,uint32_t key)383 radv_encode_bind_pipeline(VkCommandBuffer commandBuffer, uint32_t key)
384 {
385 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
386 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
387
388 bool compact = key & RADV_ENCODE_KEY_COMPACT;
389 device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
390 compact ? device->meta_state.accel_struct_build.encode_compact_pipeline
391 : device->meta_state.accel_struct_build.encode_pipeline);
392
393 return VK_SUCCESS;
394 }
395
396 static void
radv_encode_as(VkCommandBuffer commandBuffer,const VkAccelerationStructureBuildGeometryInfoKHR * build_info,const VkAccelerationStructureBuildRangeInfoKHR * build_range_infos,VkDeviceAddress intermediate_as_addr,VkDeviceAddress intermediate_header_addr,uint32_t leaf_count,uint32_t key,struct vk_acceleration_structure * dst)397 radv_encode_as(VkCommandBuffer commandBuffer, const VkAccelerationStructureBuildGeometryInfoKHR *build_info,
398 const VkAccelerationStructureBuildRangeInfoKHR *build_range_infos, VkDeviceAddress intermediate_as_addr,
399 VkDeviceAddress intermediate_header_addr, uint32_t leaf_count, uint32_t key,
400 struct vk_acceleration_structure *dst)
401 {
402 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
403 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
404
405 struct acceleration_structure_layout layout;
406 radv_get_acceleration_structure_layout(device, leaf_count, build_info, &layout);
407
408 if (key & RADV_ENCODE_KEY_COMPACT) {
409 uint32_t dst_offset = layout.internal_nodes_offset - layout.bvh_offset;
410 radv_update_buffer_cp(cmd_buffer, intermediate_header_addr + offsetof(struct vk_ir_header, dst_node_offset),
411 &dst_offset, sizeof(uint32_t));
412 }
413
414 const struct encode_args args = {
415 .intermediate_bvh = intermediate_as_addr,
416 .output_bvh = vk_acceleration_structure_get_va(dst) + layout.bvh_offset,
417 .header = intermediate_header_addr,
418 .output_bvh_offset = layout.bvh_offset,
419 .leaf_node_count = leaf_count,
420 .geometry_type = vk_get_as_geometry_type(build_info),
421 };
422 vk_common_CmdPushConstants(commandBuffer, device->meta_state.accel_struct_build.encode_p_layout,
423 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args), &args);
424
425 struct radv_dispatch_info dispatch = {
426 .unaligned = true,
427 .ordered = true,
428 .blocks = {MAX2(leaf_count, 1), 1, 1},
429 };
430
431 radv_compute_dispatch(cmd_buffer, &dispatch);
432 }
433
434 static VkResult
radv_init_header_bind_pipeline(VkCommandBuffer commandBuffer,uint32_t key)435 radv_init_header_bind_pipeline(VkCommandBuffer commandBuffer, uint32_t key)
436 {
437 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
438 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
439
440 if (!(key & RADV_ENCODE_KEY_COMPACT))
441 return VK_SUCCESS;
442
443 /* Wait for encoding to finish. */
444 cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
445 radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
446 VK_ACCESS_2_SHADER_WRITE_BIT, 0, NULL, NULL) |
447 radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
448 VK_ACCESS_2_SHADER_READ_BIT, 0, NULL, NULL);
449
450 device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
451 device->meta_state.accel_struct_build.header_pipeline);
452
453 return VK_SUCCESS;
454 }
455
456 static void
radv_init_header(VkCommandBuffer commandBuffer,const VkAccelerationStructureBuildGeometryInfoKHR * build_info,const VkAccelerationStructureBuildRangeInfoKHR * build_range_infos,VkDeviceAddress intermediate_as_addr,VkDeviceAddress intermediate_header_addr,uint32_t leaf_count,uint32_t key,struct vk_acceleration_structure * dst)457 radv_init_header(VkCommandBuffer commandBuffer, const VkAccelerationStructureBuildGeometryInfoKHR *build_info,
458 const VkAccelerationStructureBuildRangeInfoKHR *build_range_infos,
459 VkDeviceAddress intermediate_as_addr, VkDeviceAddress intermediate_header_addr, uint32_t leaf_count,
460 uint32_t key, struct vk_acceleration_structure *dst)
461 {
462 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
463 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
464
465 size_t base = offsetof(struct radv_accel_struct_header, compacted_size);
466
467 uint64_t instance_count = build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR ? leaf_count : 0;
468
469 struct acceleration_structure_layout layout;
470 radv_get_acceleration_structure_layout(device, leaf_count, build_info, &layout);
471
472 if (key & RADV_ENCODE_KEY_COMPACT) {
473 base = offsetof(struct radv_accel_struct_header, geometry_count);
474
475 struct header_args args = {
476 .src = intermediate_header_addr,
477 .dst = vk_acceleration_structure_get_va(dst),
478 .bvh_offset = layout.bvh_offset,
479 .instance_count = instance_count,
480 };
481
482 vk_common_CmdPushConstants(commandBuffer, device->meta_state.accel_struct_build.header_p_layout,
483 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args), &args);
484
485 radv_unaligned_dispatch(cmd_buffer, 1, 1, 1);
486 }
487
488 struct radv_accel_struct_header header;
489
490 header.instance_offset = layout.bvh_offset + sizeof(struct radv_bvh_box32_node);
491 header.instance_count = instance_count;
492 header.compacted_size = layout.size;
493
494 header.copy_dispatch_size[0] = DIV_ROUND_UP(header.compacted_size, 16 * 64);
495 header.copy_dispatch_size[1] = 1;
496 header.copy_dispatch_size[2] = 1;
497
498 header.serialization_size =
499 header.compacted_size +
500 align(sizeof(struct radv_accel_struct_serialization_header) + sizeof(uint64_t) * header.instance_count, 128);
501
502 header.size = header.serialization_size - sizeof(struct radv_accel_struct_serialization_header) -
503 sizeof(uint64_t) * header.instance_count;
504
505 header.build_flags = build_info->flags;
506 header.geometry_count = build_info->geometryCount;
507
508 radv_update_buffer_cp(cmd_buffer, vk_acceleration_structure_get_va(dst) + base, (const char *)&header + base,
509 sizeof(header) - base);
510
511 if (device->rra_trace.accel_structs) {
512 uint64_t geometry_infos_size = build_info->geometryCount * sizeof(struct radv_accel_struct_geometry_info);
513
514 struct radv_accel_struct_geometry_info *geometry_infos = malloc(geometry_infos_size);
515 if (!geometry_infos)
516 return;
517
518 for (uint32_t i = 0; i < build_info->geometryCount; i++) {
519 const VkAccelerationStructureGeometryKHR *geometry =
520 build_info->pGeometries ? &build_info->pGeometries[i] : build_info->ppGeometries[i];
521 geometry_infos[i].type = geometry->geometryType;
522 geometry_infos[i].flags = geometry->flags;
523 geometry_infos[i].primitive_count = build_range_infos[i].primitiveCount;
524 }
525
526 radv_CmdUpdateBuffer(commandBuffer, dst->buffer, dst->offset + layout.geometry_info_offset, geometry_infos_size,
527 geometry_infos);
528
529 free(geometry_infos);
530 }
531 }
532
533 static void
radv_init_update_scratch(VkCommandBuffer commandBuffer,VkDeviceAddress scratch,uint32_t leaf_count,struct vk_acceleration_structure * src_as,struct vk_acceleration_structure * dst_as)534 radv_init_update_scratch(VkCommandBuffer commandBuffer, VkDeviceAddress scratch, uint32_t leaf_count,
535 struct vk_acceleration_structure *src_as, struct vk_acceleration_structure *dst_as)
536 {
537 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
538 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
539
540 struct scratch_layout layout;
541 radv_get_scratch_layout(device, leaf_count, &layout);
542
543 /* Prepare ready counts for internal nodes */
544 radv_fill_buffer(cmd_buffer, NULL, NULL, scratch + layout.internal_ready_count_offset,
545 layout.update_size - layout.internal_ready_count_offset, 0x0);
546 }
547
548 static void
radv_update_bind_pipeline(VkCommandBuffer commandBuffer)549 radv_update_bind_pipeline(VkCommandBuffer commandBuffer)
550 {
551 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
552 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
553
554 /* Wait for update scratch initialization to finish.. */
555 cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
556 radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
557 VK_ACCESS_2_SHADER_WRITE_BIT, 0, NULL, NULL) |
558 radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
559 VK_ACCESS_2_SHADER_READ_BIT, 0, NULL, NULL);
560
561 device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
562 device->meta_state.accel_struct_build.update_pipeline);
563 }
564
565 static uint32_t
pack_geometry_id_and_flags(uint32_t geometry_id,uint32_t flags)566 pack_geometry_id_and_flags(uint32_t geometry_id, uint32_t flags)
567 {
568 uint32_t geometry_id_and_flags = geometry_id;
569 if (flags & VK_GEOMETRY_OPAQUE_BIT_KHR)
570 geometry_id_and_flags |= RADV_GEOMETRY_OPAQUE;
571
572 return geometry_id_and_flags;
573 }
574
575 static void
radv_update_as(VkCommandBuffer commandBuffer,const VkAccelerationStructureBuildGeometryInfoKHR * build_info,const VkAccelerationStructureBuildRangeInfoKHR * build_range_infos,uint32_t leaf_count,struct vk_acceleration_structure * src,struct vk_acceleration_structure * dst)576 radv_update_as(VkCommandBuffer commandBuffer, const VkAccelerationStructureBuildGeometryInfoKHR *build_info,
577 const VkAccelerationStructureBuildRangeInfoKHR *build_range_infos, uint32_t leaf_count,
578 struct vk_acceleration_structure *src, struct vk_acceleration_structure *dst)
579 {
580 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
581 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
582
583 if (src != dst) {
584 VK_FROM_HANDLE(radv_buffer, src_as_buffer, src->buffer);
585 VK_FROM_HANDLE(radv_buffer, dst_as_buffer, dst->buffer);
586
587 struct acceleration_structure_layout layout;
588 radv_get_acceleration_structure_layout(device, leaf_count, build_info, &layout);
589
590 /* Copy header/metadata */
591 radv_copy_buffer(cmd_buffer, src_as_buffer->bo, dst_as_buffer->bo, src_as_buffer->offset + src->offset,
592 dst_as_buffer->offset + dst->offset, layout.bvh_offset);
593 }
594
595 struct scratch_layout layout;
596 radv_get_scratch_layout(device, leaf_count, &layout);
597
598 struct update_args update_consts = {
599 .src = vk_acceleration_structure_get_va(src),
600 .dst = vk_acceleration_structure_get_va(dst),
601 .leaf_bounds = build_info->scratchData.deviceAddress,
602 .internal_ready_count = build_info->scratchData.deviceAddress + layout.internal_ready_count_offset,
603 .leaf_node_count = leaf_count,
604 };
605
606 uint32_t first_id = 0;
607 for (uint32_t i = 0; i < build_info->geometryCount; i++) {
608 const VkAccelerationStructureGeometryKHR *geom =
609 build_info->pGeometries ? &build_info->pGeometries[i] : build_info->ppGeometries[i];
610
611 const VkAccelerationStructureBuildRangeInfoKHR *build_range_info = &build_range_infos[i];
612
613 update_consts.geom_data = vk_fill_geometry_data(build_info->type, first_id, i, geom, build_range_info);
614
615 vk_common_CmdPushConstants(commandBuffer, device->meta_state.accel_struct_build.update_p_layout,
616 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(update_consts), &update_consts);
617 radv_unaligned_dispatch(cmd_buffer, build_range_info->primitiveCount, 1, 1);
618
619 first_id += build_range_info->primitiveCount;
620 }
621 }
622
623 static const struct radix_sort_vk_target_config radix_sort_config = {
624 .keyval_dwords = 2,
625 .fill.workgroup_size_log2 = 7,
626 .fill.block_rows = 8,
627 .histogram.workgroup_size_log2 = 8,
628 .histogram.subgroup_size_log2 = 6,
629 .histogram.block_rows = 14,
630 .prefix.workgroup_size_log2 = 8,
631 .prefix.subgroup_size_log2 = 6,
632 .scatter.workgroup_size_log2 = 8,
633 .scatter.subgroup_size_log2 = 6,
634 .scatter.block_rows = 14,
635 };
636
637 static const struct vk_acceleration_structure_build_ops build_ops = {
638 .begin_debug_marker = vk_accel_struct_cmd_begin_debug_marker,
639 .end_debug_marker = vk_accel_struct_cmd_end_debug_marker,
640 .get_as_size = radv_get_as_size,
641 .get_update_scratch_size = radv_get_update_scratch_size,
642 .get_encode_key[0] = radv_get_encode_key,
643 .get_encode_key[1] = radv_get_encode_key,
644 .encode_bind_pipeline[0] = radv_encode_bind_pipeline,
645 .encode_bind_pipeline[1] = radv_init_header_bind_pipeline,
646 .encode_as[0] = radv_encode_as,
647 .encode_as[1] = radv_init_header,
648 .init_update_scratch = radv_init_update_scratch,
649 .update_bind_pipeline[0] = radv_update_bind_pipeline,
650 .update_as[0] = radv_update_as,
651 };
652
653 static void
radv_write_buffer_cp(VkCommandBuffer commandBuffer,VkDeviceAddress addr,void * data,uint32_t size)654 radv_write_buffer_cp(VkCommandBuffer commandBuffer, VkDeviceAddress addr, void *data, uint32_t size)
655 {
656 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
657 radv_update_buffer_cp(cmd_buffer, addr, data, size);
658 }
659
660 static void
radv_flush_buffer_write_cp(VkCommandBuffer commandBuffer)661 radv_flush_buffer_write_cp(VkCommandBuffer commandBuffer)
662 {
663 }
664
665 static void
radv_cmd_dispatch_unaligned(VkCommandBuffer commandBuffer,uint32_t x,uint32_t y,uint32_t z)666 radv_cmd_dispatch_unaligned(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z)
667 {
668 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
669 radv_unaligned_dispatch(cmd_buffer, x, y, z);
670 }
671
672 static void
radv_cmd_fill_buffer_addr(VkCommandBuffer commandBuffer,VkDeviceAddress addr,VkDeviceSize size,uint32_t data)673 radv_cmd_fill_buffer_addr(VkCommandBuffer commandBuffer, VkDeviceAddress addr, VkDeviceSize size, uint32_t data)
674 {
675 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
676 radv_fill_buffer(cmd_buffer, NULL, NULL, addr, size, data);
677 }
678
679 VkResult
radv_device_init_accel_struct_build_state(struct radv_device * device)680 radv_device_init_accel_struct_build_state(struct radv_device *device)
681 {
682 VkResult result = VK_SUCCESS;
683 mtx_lock(&device->meta_state.mtx);
684
685 if (device->meta_state.accel_struct_build.radix_sort)
686 goto exit;
687
688 result = create_build_pipeline_spv(device, encode_spv, sizeof(encode_spv), sizeof(struct encode_args),
689 &device->meta_state.accel_struct_build.encode_pipeline,
690 &device->meta_state.accel_struct_build.encode_p_layout);
691 if (result != VK_SUCCESS)
692 goto exit;
693
694 result =
695 create_build_pipeline_spv(device, encode_compact_spv, sizeof(encode_compact_spv), sizeof(struct encode_args),
696 &device->meta_state.accel_struct_build.encode_compact_pipeline,
697 &device->meta_state.accel_struct_build.encode_p_layout);
698 if (result != VK_SUCCESS)
699 goto exit;
700
701 result = create_build_pipeline_spv(device, header_spv, sizeof(header_spv), sizeof(struct header_args),
702 &device->meta_state.accel_struct_build.header_pipeline,
703 &device->meta_state.accel_struct_build.header_p_layout);
704 if (result != VK_SUCCESS)
705 goto exit;
706
707 result = create_build_pipeline_spv(device, update_spv, sizeof(update_spv), sizeof(struct update_args),
708 &device->meta_state.accel_struct_build.update_pipeline,
709 &device->meta_state.accel_struct_build.update_p_layout);
710 if (result != VK_SUCCESS)
711 goto exit;
712
713 device->meta_state.accel_struct_build.radix_sort = vk_create_radix_sort_u64(
714 radv_device_to_handle(device), &device->meta_state.alloc, device->meta_state.cache, radix_sort_config);
715
716 device->vk.as_build_ops = &build_ops;
717 device->vk.write_buffer_cp = radv_write_buffer_cp;
718 device->vk.flush_buffer_write_cp = radv_flush_buffer_write_cp;
719 device->vk.cmd_dispatch_unaligned = radv_cmd_dispatch_unaligned;
720 device->vk.cmd_fill_buffer_addr = radv_cmd_fill_buffer_addr;
721
722 struct vk_acceleration_structure_build_args *build_args = &device->meta_state.accel_struct_build.build_args;
723 build_args->subgroup_size = 64;
724 build_args->bvh_bounds_offset = offsetof(struct radv_accel_struct_header, aabb);
725 build_args->emit_markers = device->sqtt.bo;
726 build_args->radix_sort = device->meta_state.accel_struct_build.radix_sort;
727
728 exit:
729 mtx_unlock(&device->meta_state.mtx);
730 return result;
731 }
732
733 static VkResult
radv_device_init_accel_struct_copy_state(struct radv_device * device)734 radv_device_init_accel_struct_copy_state(struct radv_device *device)
735 {
736 mtx_lock(&device->meta_state.mtx);
737
738 VkResult result = create_build_pipeline_spv(device, copy_spv, sizeof(copy_spv), sizeof(struct copy_args),
739 &device->meta_state.accel_struct_build.copy_pipeline,
740 &device->meta_state.accel_struct_build.copy_p_layout);
741
742 mtx_unlock(&device->meta_state.mtx);
743 return result;
744 }
745
746 VKAPI_ATTR void VKAPI_CALL
radv_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,const VkAccelerationStructureBuildRangeInfoKHR * const * ppBuildRangeInfos)747 radv_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer, uint32_t infoCount,
748 const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
749 const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
750 {
751 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
752 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
753 struct radv_meta_saved_state saved_state;
754
755 VkResult result = radv_device_init_accel_struct_build_state(device);
756 if (result != VK_SUCCESS) {
757 vk_command_buffer_set_error(&cmd_buffer->vk, result);
758 return;
759 }
760
761 radv_meta_save(&saved_state, cmd_buffer,
762 RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
763
764 cmd_buffer->state.current_event_type = EventInternalUnknown;
765
766 vk_cmd_build_acceleration_structures(commandBuffer, &device->vk, &device->meta_state.device, infoCount, pInfos,
767 ppBuildRangeInfos, &device->meta_state.accel_struct_build.build_args);
768
769 radv_meta_restore(&saved_state, cmd_buffer);
770 }
771
772 VKAPI_ATTR void VKAPI_CALL
radv_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer,const VkCopyAccelerationStructureInfoKHR * pInfo)773 radv_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, const VkCopyAccelerationStructureInfoKHR *pInfo)
774 {
775 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
776 VK_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
777 VK_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
778 VK_FROM_HANDLE(radv_buffer, src_buffer, src->buffer);
779 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
780 struct radv_meta_saved_state saved_state;
781
782 VkResult result = radv_device_init_accel_struct_copy_state(device);
783 if (result != VK_SUCCESS) {
784 vk_command_buffer_set_error(&cmd_buffer->vk, result);
785 return;
786 }
787
788 radv_meta_save(&saved_state, cmd_buffer,
789 RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
790
791 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
792 device->meta_state.accel_struct_build.copy_pipeline);
793
794 struct copy_args consts = {
795 .src_addr = vk_acceleration_structure_get_va(src),
796 .dst_addr = vk_acceleration_structure_get_va(dst),
797 .mode = RADV_COPY_MODE_COPY,
798 };
799
800 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
801 device->meta_state.accel_struct_build.copy_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
802 sizeof(consts), &consts);
803
804 cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT,
805 VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT, 0, NULL, NULL);
806
807 radv_indirect_dispatch(
808 cmd_buffer, src_buffer->bo,
809 vk_acceleration_structure_get_va(src) + offsetof(struct radv_accel_struct_header, copy_dispatch_size));
810 radv_meta_restore(&saved_state, cmd_buffer);
811 }
812
813 VKAPI_ATTR void VKAPI_CALL
radv_GetDeviceAccelerationStructureCompatibilityKHR(VkDevice _device,const VkAccelerationStructureVersionInfoKHR * pVersionInfo,VkAccelerationStructureCompatibilityKHR * pCompatibility)814 radv_GetDeviceAccelerationStructureCompatibilityKHR(VkDevice _device,
815 const VkAccelerationStructureVersionInfoKHR *pVersionInfo,
816 VkAccelerationStructureCompatibilityKHR *pCompatibility)
817 {
818 VK_FROM_HANDLE(radv_device, device, _device);
819 const struct radv_physical_device *pdev = radv_device_physical(device);
820 bool compat = memcmp(pVersionInfo->pVersionData, pdev->driver_uuid, VK_UUID_SIZE) == 0 &&
821 memcmp(pVersionInfo->pVersionData + VK_UUID_SIZE, pdev->cache_uuid, VK_UUID_SIZE) == 0;
822 *pCompatibility = compat ? VK_ACCELERATION_STRUCTURE_COMPATIBILITY_COMPATIBLE_KHR
823 : VK_ACCELERATION_STRUCTURE_COMPATIBILITY_INCOMPATIBLE_KHR;
824 }
825
826 VKAPI_ATTR void VKAPI_CALL
radv_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,const VkCopyMemoryToAccelerationStructureInfoKHR * pInfo)827 radv_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,
828 const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo)
829 {
830 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
831 VK_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
832 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
833 struct radv_meta_saved_state saved_state;
834
835 VkResult result = radv_device_init_accel_struct_copy_state(device);
836 if (result != VK_SUCCESS) {
837 vk_command_buffer_set_error(&cmd_buffer->vk, result);
838 return;
839 }
840
841 radv_meta_save(&saved_state, cmd_buffer,
842 RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
843
844 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
845 device->meta_state.accel_struct_build.copy_pipeline);
846
847 const struct copy_args consts = {
848 .src_addr = pInfo->src.deviceAddress,
849 .dst_addr = vk_acceleration_structure_get_va(dst),
850 .mode = RADV_COPY_MODE_DESERIALIZE,
851 };
852
853 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
854 device->meta_state.accel_struct_build.copy_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
855 sizeof(consts), &consts);
856
857 vk_common_CmdDispatch(commandBuffer, 512, 1, 1);
858 radv_meta_restore(&saved_state, cmd_buffer);
859 }
860
861 VKAPI_ATTR void VKAPI_CALL
radv_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,const VkCopyAccelerationStructureToMemoryInfoKHR * pInfo)862 radv_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,
863 const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo)
864 {
865 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
866 VK_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
867 VK_FROM_HANDLE(radv_buffer, src_buffer, src->buffer);
868 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
869 const struct radv_physical_device *pdev = radv_device_physical(device);
870 struct radv_meta_saved_state saved_state;
871
872 VkResult result = radv_device_init_accel_struct_copy_state(device);
873 if (result != VK_SUCCESS) {
874 vk_command_buffer_set_error(&cmd_buffer->vk, result);
875 return;
876 }
877
878 radv_meta_save(&saved_state, cmd_buffer,
879 RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
880
881 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
882 device->meta_state.accel_struct_build.copy_pipeline);
883
884 const struct copy_args consts = {
885 .src_addr = vk_acceleration_structure_get_va(src),
886 .dst_addr = pInfo->dst.deviceAddress,
887 .mode = RADV_COPY_MODE_SERIALIZE,
888 };
889
890 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
891 device->meta_state.accel_struct_build.copy_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
892 sizeof(consts), &consts);
893
894 cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT,
895 VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT, 0, NULL, NULL);
896
897 radv_indirect_dispatch(
898 cmd_buffer, src_buffer->bo,
899 vk_acceleration_structure_get_va(src) + offsetof(struct radv_accel_struct_header, copy_dispatch_size));
900 radv_meta_restore(&saved_state, cmd_buffer);
901
902 /* Set the header of the serialized data. */
903 uint8_t header_data[2 * VK_UUID_SIZE];
904 memcpy(header_data, pdev->driver_uuid, VK_UUID_SIZE);
905 memcpy(header_data + VK_UUID_SIZE, pdev->cache_uuid, VK_UUID_SIZE);
906
907 radv_update_buffer_cp(cmd_buffer, pInfo->dst.deviceAddress, header_data, sizeof(header_data));
908 }
909