1 /*
2 * Copyright © 2021 Bas Nieuwenhuizen
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "radv_private.h"
25
26 #include "meta/radv_meta.h"
27 #include "nir_builder.h"
28 #include "radv_cs.h"
29
30 #include "radix_sort/common/vk/barrier.h"
31 #include "radix_sort/radv_radix_sort.h"
32 #include "radix_sort/shaders/push.h"
33
34 #include "bvh/build_interface.h"
35 #include "bvh/bvh.h"
36
37 #include "vk_acceleration_structure.h"
38 #include "vk_common_entrypoints.h"
39
40 static const uint32_t leaf_spv[] = {
41 #include "bvh/leaf.spv.h"
42 };
43
44 static const uint32_t leaf_always_active_spv[] = {
45 #include "bvh/leaf_always_active.spv.h"
46 };
47
48 static const uint32_t morton_spv[] = {
49 #include "bvh/morton.spv.h"
50 };
51
52 static const uint32_t lbvh_main_spv[] = {
53 #include "bvh/lbvh_main.spv.h"
54 };
55
56 static const uint32_t lbvh_generate_ir_spv[] = {
57 #include "bvh/lbvh_generate_ir.spv.h"
58 };
59
60 static const uint32_t ploc_spv[] = {
61 #include "bvh/ploc_internal.spv.h"
62 };
63
64 static const uint32_t copy_spv[] = {
65 #include "bvh/copy.spv.h"
66 };
67
68 static const uint32_t encode_spv[] = {
69 #include "bvh/encode.spv.h"
70 };
71
72 static const uint32_t encode_compact_spv[] = {
73 #include "bvh/encode_compact.spv.h"
74 };
75
76 static const uint32_t header_spv[] = {
77 #include "bvh/header.spv.h"
78 };
79
80 static const uint32_t update_spv[] = {
81 #include "bvh/update.spv.h"
82 };
83
84 #define KEY_ID_PAIR_SIZE 8
85 #define MORTON_BIT_SIZE 24
86
87 enum internal_build_type {
88 INTERNAL_BUILD_TYPE_LBVH,
89 INTERNAL_BUILD_TYPE_PLOC,
90 INTERNAL_BUILD_TYPE_UPDATE,
91 };
92
93 struct build_config {
94 enum internal_build_type internal_type;
95 bool compact;
96 };
97
98 struct acceleration_structure_layout {
99 uint32_t geometry_info_offset;
100 uint32_t bvh_offset;
101 uint32_t leaf_nodes_offset;
102 uint32_t internal_nodes_offset;
103 uint32_t size;
104 };
105
106 struct scratch_layout {
107 uint32_t size;
108 uint32_t update_size;
109
110 uint32_t header_offset;
111
112 /* Used for UPDATE only. */
113
114 uint32_t internal_ready_count_offset;
115
116 /* Used for BUILD only. */
117
118 uint32_t sort_buffer_offset[2];
119 uint32_t sort_internal_offset;
120
121 uint32_t ploc_prefix_sum_partition_offset;
122 uint32_t lbvh_node_offset;
123
124 uint32_t ir_offset;
125 uint32_t internal_node_offset;
126 };
127
128 static struct build_config
build_config(uint32_t leaf_count,const VkAccelerationStructureBuildGeometryInfoKHR * build_info)129 build_config(uint32_t leaf_count, const VkAccelerationStructureBuildGeometryInfoKHR *build_info)
130 {
131 struct build_config config = {0};
132
133 if (leaf_count <= 4)
134 config.internal_type = INTERNAL_BUILD_TYPE_LBVH;
135 else if (build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR)
136 config.internal_type = INTERNAL_BUILD_TYPE_PLOC;
137 else if (!(build_info->flags & VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_KHR) &&
138 !(build_info->flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR))
139 config.internal_type = INTERNAL_BUILD_TYPE_PLOC;
140 else
141 config.internal_type = INTERNAL_BUILD_TYPE_LBVH;
142
143 if (build_info->mode == VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR &&
144 build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR)
145 config.internal_type = INTERNAL_BUILD_TYPE_UPDATE;
146
147 if (build_info->flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR)
148 config.compact = true;
149
150 return config;
151 }
152
153 static void
get_build_layout(struct radv_device * device,uint32_t leaf_count,const VkAccelerationStructureBuildGeometryInfoKHR * build_info,struct acceleration_structure_layout * accel_struct,struct scratch_layout * scratch)154 get_build_layout(struct radv_device *device, uint32_t leaf_count,
155 const VkAccelerationStructureBuildGeometryInfoKHR *build_info,
156 struct acceleration_structure_layout *accel_struct, struct scratch_layout *scratch)
157 {
158 uint32_t internal_count = MAX2(leaf_count, 2) - 1;
159
160 VkGeometryTypeKHR geometry_type = VK_GEOMETRY_TYPE_TRIANGLES_KHR;
161
162 if (build_info->geometryCount) {
163 if (build_info->pGeometries)
164 geometry_type = build_info->pGeometries[0].geometryType;
165 else
166 geometry_type = build_info->ppGeometries[0]->geometryType;
167 }
168
169 uint32_t bvh_leaf_size;
170 switch (geometry_type) {
171 case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
172 bvh_leaf_size = sizeof(struct radv_bvh_triangle_node);
173 break;
174 case VK_GEOMETRY_TYPE_AABBS_KHR:
175 bvh_leaf_size = sizeof(struct radv_bvh_aabb_node);
176 break;
177 case VK_GEOMETRY_TYPE_INSTANCES_KHR:
178 bvh_leaf_size = sizeof(struct radv_bvh_instance_node);
179 break;
180 default:
181 unreachable("Unknown VkGeometryTypeKHR");
182 }
183
184 if (accel_struct) {
185 uint64_t bvh_size = bvh_leaf_size * leaf_count + sizeof(struct radv_bvh_box32_node) * internal_count;
186 uint32_t offset = 0;
187 offset += sizeof(struct radv_accel_struct_header);
188
189 if (device->rra_trace.accel_structs) {
190 accel_struct->geometry_info_offset = offset;
191 offset += sizeof(struct radv_accel_struct_geometry_info) * build_info->geometryCount;
192 }
193 /* Parent links, which have to go directly before bvh_offset as we index them using negative
194 * offsets from there. */
195 offset += bvh_size / 64 * 4;
196
197 /* The BVH and hence bvh_offset needs 64 byte alignment for RT nodes. */
198 offset = ALIGN(offset, 64);
199 accel_struct->bvh_offset = offset;
200
201 /* root node */
202 offset += sizeof(struct radv_bvh_box32_node);
203
204 accel_struct->leaf_nodes_offset = offset;
205 offset += bvh_leaf_size * leaf_count;
206
207 accel_struct->internal_nodes_offset = offset;
208 /* Factor out the root node. */
209 offset += sizeof(struct radv_bvh_box32_node) * (internal_count - 1);
210
211 accel_struct->size = offset;
212 }
213
214 if (scratch) {
215 radix_sort_vk_memory_requirements_t requirements = {
216 0,
217 };
218 if (radv_device_init_accel_struct_build_state(device) == VK_SUCCESS)
219 radix_sort_vk_get_memory_requirements(device->meta_state.accel_struct_build.radix_sort, leaf_count,
220 &requirements);
221
222 uint32_t offset = 0;
223
224 uint32_t ploc_scratch_space = 0;
225 uint32_t lbvh_node_space = 0;
226
227 struct build_config config = build_config(leaf_count, build_info);
228
229 if (config.internal_type == INTERNAL_BUILD_TYPE_PLOC)
230 ploc_scratch_space = DIV_ROUND_UP(leaf_count, PLOC_WORKGROUP_SIZE) * sizeof(struct ploc_prefix_scan_partition);
231 else
232 lbvh_node_space = sizeof(struct lbvh_node_info) * internal_count;
233
234 scratch->header_offset = offset;
235 offset += sizeof(struct radv_ir_header);
236
237 scratch->sort_buffer_offset[0] = offset;
238 offset += requirements.keyvals_size;
239
240 scratch->sort_buffer_offset[1] = offset;
241 offset += requirements.keyvals_size;
242
243 scratch->sort_internal_offset = offset;
244 /* Internal sorting data is not needed when PLOC/LBVH are invoked,
245 * save space by aliasing them */
246 scratch->ploc_prefix_sum_partition_offset = offset;
247 scratch->lbvh_node_offset = offset;
248 offset += MAX3(requirements.internal_size, ploc_scratch_space, lbvh_node_space);
249
250 scratch->ir_offset = offset;
251 offset += sizeof(struct radv_ir_node) * leaf_count;
252
253 scratch->internal_node_offset = offset;
254 offset += sizeof(struct radv_ir_box_node) * internal_count;
255
256 scratch->size = offset;
257
258 if (build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR) {
259 uint32_t update_offset = 0;
260
261 update_offset += sizeof(radv_aabb) * leaf_count;
262 scratch->internal_ready_count_offset = update_offset;
263
264 update_offset += sizeof(uint32_t) * internal_count;
265 scratch->update_size = update_offset;
266 } else {
267 scratch->update_size = offset;
268 }
269 }
270 }
271
272 VKAPI_ATTR void VKAPI_CALL
radv_GetAccelerationStructureBuildSizesKHR(VkDevice _device,VkAccelerationStructureBuildTypeKHR buildType,const VkAccelerationStructureBuildGeometryInfoKHR * pBuildInfo,const uint32_t * pMaxPrimitiveCounts,VkAccelerationStructureBuildSizesInfoKHR * pSizeInfo)273 radv_GetAccelerationStructureBuildSizesKHR(VkDevice _device, VkAccelerationStructureBuildTypeKHR buildType,
274 const VkAccelerationStructureBuildGeometryInfoKHR *pBuildInfo,
275 const uint32_t *pMaxPrimitiveCounts,
276 VkAccelerationStructureBuildSizesInfoKHR *pSizeInfo)
277 {
278 RADV_FROM_HANDLE(radv_device, device, _device);
279
280 STATIC_ASSERT(sizeof(struct radv_bvh_triangle_node) == 64);
281 STATIC_ASSERT(sizeof(struct radv_bvh_aabb_node) == 64);
282 STATIC_ASSERT(sizeof(struct radv_bvh_instance_node) == 128);
283 STATIC_ASSERT(sizeof(struct radv_bvh_box16_node) == 64);
284 STATIC_ASSERT(sizeof(struct radv_bvh_box32_node) == 128);
285
286 uint32_t leaf_count = 0;
287 for (uint32_t i = 0; i < pBuildInfo->geometryCount; i++)
288 leaf_count += pMaxPrimitiveCounts[i];
289
290 struct acceleration_structure_layout accel_struct;
291 struct scratch_layout scratch;
292 get_build_layout(device, leaf_count, pBuildInfo, &accel_struct, &scratch);
293
294 pSizeInfo->accelerationStructureSize = accel_struct.size;
295 pSizeInfo->updateScratchSize = scratch.update_size;
296 pSizeInfo->buildScratchSize = scratch.size;
297 }
298
299 VKAPI_ATTR VkResult VKAPI_CALL
radv_WriteAccelerationStructuresPropertiesKHR(VkDevice _device,uint32_t accelerationStructureCount,const VkAccelerationStructureKHR * pAccelerationStructures,VkQueryType queryType,size_t dataSize,void * pData,size_t stride)300 radv_WriteAccelerationStructuresPropertiesKHR(VkDevice _device, uint32_t accelerationStructureCount,
301 const VkAccelerationStructureKHR *pAccelerationStructures,
302 VkQueryType queryType, size_t dataSize, void *pData, size_t stride)
303 {
304 unreachable("Unimplemented");
305 return VK_ERROR_FEATURE_NOT_PRESENT;
306 }
307
308 VKAPI_ATTR VkResult VKAPI_CALL
radv_BuildAccelerationStructuresKHR(VkDevice _device,VkDeferredOperationKHR deferredOperation,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,const VkAccelerationStructureBuildRangeInfoKHR * const * ppBuildRangeInfos)309 radv_BuildAccelerationStructuresKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation, uint32_t infoCount,
310 const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
311 const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
312 {
313 unreachable("Unimplemented");
314 return VK_ERROR_FEATURE_NOT_PRESENT;
315 }
316
317 VKAPI_ATTR VkResult VKAPI_CALL
radv_CopyAccelerationStructureKHR(VkDevice _device,VkDeferredOperationKHR deferredOperation,const VkCopyAccelerationStructureInfoKHR * pInfo)318 radv_CopyAccelerationStructureKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation,
319 const VkCopyAccelerationStructureInfoKHR *pInfo)
320 {
321 unreachable("Unimplemented");
322 return VK_ERROR_FEATURE_NOT_PRESENT;
323 }
324
325 void
radv_device_finish_accel_struct_build_state(struct radv_device * device)326 radv_device_finish_accel_struct_build_state(struct radv_device *device)
327 {
328 VkDevice _device = radv_device_to_handle(device);
329 struct radv_meta_state *state = &device->meta_state;
330 struct vk_device_dispatch_table *dispatch = &device->vk.dispatch_table;
331
332 dispatch->DestroyPipeline(_device, state->accel_struct_build.copy_pipeline, &state->alloc);
333 dispatch->DestroyPipeline(_device, state->accel_struct_build.ploc_pipeline, &state->alloc);
334 dispatch->DestroyPipeline(_device, state->accel_struct_build.lbvh_generate_ir_pipeline, &state->alloc);
335 dispatch->DestroyPipeline(_device, state->accel_struct_build.lbvh_main_pipeline, &state->alloc);
336 dispatch->DestroyPipeline(_device, state->accel_struct_build.leaf_pipeline, &state->alloc);
337 dispatch->DestroyPipeline(_device, state->accel_struct_build.encode_pipeline, &state->alloc);
338 dispatch->DestroyPipeline(_device, state->accel_struct_build.encode_compact_pipeline, &state->alloc);
339 dispatch->DestroyPipeline(_device, state->accel_struct_build.header_pipeline, &state->alloc);
340 dispatch->DestroyPipeline(_device, state->accel_struct_build.morton_pipeline, &state->alloc);
341 dispatch->DestroyPipeline(_device, state->accel_struct_build.update_pipeline, &state->alloc);
342 radv_DestroyPipelineLayout(_device, state->accel_struct_build.copy_p_layout, &state->alloc);
343 radv_DestroyPipelineLayout(_device, state->accel_struct_build.ploc_p_layout, &state->alloc);
344 radv_DestroyPipelineLayout(_device, state->accel_struct_build.lbvh_generate_ir_p_layout, &state->alloc);
345 radv_DestroyPipelineLayout(_device, state->accel_struct_build.lbvh_main_p_layout, &state->alloc);
346 radv_DestroyPipelineLayout(_device, state->accel_struct_build.leaf_p_layout, &state->alloc);
347 radv_DestroyPipelineLayout(_device, state->accel_struct_build.encode_p_layout, &state->alloc);
348 radv_DestroyPipelineLayout(_device, state->accel_struct_build.header_p_layout, &state->alloc);
349 radv_DestroyPipelineLayout(_device, state->accel_struct_build.morton_p_layout, &state->alloc);
350 radv_DestroyPipelineLayout(_device, state->accel_struct_build.update_p_layout, &state->alloc);
351
352 if (state->accel_struct_build.radix_sort)
353 radix_sort_vk_destroy(state->accel_struct_build.radix_sort, _device, &state->alloc);
354
355 radv_DestroyBuffer(_device, state->accel_struct_build.null.buffer, &state->alloc);
356 radv_FreeMemory(_device, state->accel_struct_build.null.memory, &state->alloc);
357 vk_common_DestroyAccelerationStructureKHR(_device, state->accel_struct_build.null.accel_struct, &state->alloc);
358 }
359
360 static VkResult
create_build_pipeline_spv(struct radv_device * device,const uint32_t * spv,uint32_t spv_size,unsigned push_constant_size,VkPipeline * pipeline,VkPipelineLayout * layout)361 create_build_pipeline_spv(struct radv_device *device, const uint32_t *spv, uint32_t spv_size,
362 unsigned push_constant_size, VkPipeline *pipeline, VkPipelineLayout *layout)
363 {
364 if (*pipeline)
365 return VK_SUCCESS;
366
367 VkDevice _device = radv_device_to_handle(device);
368
369 const VkPipelineLayoutCreateInfo pl_create_info = {
370 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
371 .setLayoutCount = 0,
372 .pushConstantRangeCount = 1,
373 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constant_size},
374 };
375
376 VkShaderModuleCreateInfo module_info = {
377 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
378 .pNext = NULL,
379 .flags = 0,
380 .codeSize = spv_size,
381 .pCode = spv,
382 };
383
384 VkShaderModule module;
385 VkResult result =
386 device->vk.dispatch_table.CreateShaderModule(_device, &module_info, &device->meta_state.alloc, &module);
387 if (result != VK_SUCCESS)
388 return result;
389
390 if (!*layout) {
391 result = radv_CreatePipelineLayout(_device, &pl_create_info, &device->meta_state.alloc, layout);
392 if (result != VK_SUCCESS)
393 goto cleanup;
394 }
395
396 VkPipelineShaderStageCreateInfo shader_stage = {
397 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
398 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
399 .module = module,
400 .pName = "main",
401 .pSpecializationInfo = NULL,
402 };
403
404 VkComputePipelineCreateInfo pipeline_info = {
405 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
406 .stage = shader_stage,
407 .flags = 0,
408 .layout = *layout,
409 };
410
411 result = device->vk.dispatch_table.CreateComputePipelines(_device, device->meta_state.cache, 1, &pipeline_info,
412 &device->meta_state.alloc, pipeline);
413
414 cleanup:
415 device->vk.dispatch_table.DestroyShaderModule(_device, module, &device->meta_state.alloc);
416 return result;
417 }
418
419 VkResult
radv_device_init_null_accel_struct(struct radv_device * device)420 radv_device_init_null_accel_struct(struct radv_device *device)
421 {
422 if (device->physical_device->memory_properties.memoryTypeCount == 0)
423 return VK_SUCCESS; /* Exit in the case of null winsys. */
424
425 VkDevice _device = radv_device_to_handle(device);
426
427 uint32_t bvh_offset = ALIGN(sizeof(struct radv_accel_struct_header), 64);
428 uint32_t size = bvh_offset + sizeof(struct radv_bvh_box32_node);
429
430 VkResult result;
431
432 VkBuffer buffer = VK_NULL_HANDLE;
433 VkDeviceMemory memory = VK_NULL_HANDLE;
434 VkAccelerationStructureKHR accel_struct = VK_NULL_HANDLE;
435
436 VkBufferCreateInfo buffer_create_info = {
437 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
438 .pNext =
439 &(VkBufferUsageFlags2CreateInfoKHR){
440 .sType = VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO_KHR,
441 .usage = VK_BUFFER_USAGE_2_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR,
442 },
443 .size = size,
444 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
445 };
446
447 result = radv_CreateBuffer(_device, &buffer_create_info, &device->meta_state.alloc, &buffer);
448 if (result != VK_SUCCESS)
449 return result;
450
451 VkBufferMemoryRequirementsInfo2 info = {
452 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
453 .buffer = buffer,
454 };
455 VkMemoryRequirements2 mem_req = {
456 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
457 };
458 vk_common_GetBufferMemoryRequirements2(_device, &info, &mem_req);
459
460 VkMemoryAllocateInfo alloc_info = {
461 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
462 .allocationSize = mem_req.memoryRequirements.size,
463 .memoryTypeIndex = radv_find_memory_index(device->physical_device, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
464 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
465 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
466 };
467
468 result = radv_AllocateMemory(_device, &alloc_info, &device->meta_state.alloc, &memory);
469 if (result != VK_SUCCESS)
470 return result;
471
472 VkBindBufferMemoryInfo bind_info = {
473 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
474 .buffer = buffer,
475 .memory = memory,
476 };
477
478 result = radv_BindBufferMemory2(_device, 1, &bind_info);
479 if (result != VK_SUCCESS)
480 return result;
481
482 void *data;
483 result = vk_common_MapMemory(_device, memory, 0, size, 0, &data);
484 if (result != VK_SUCCESS)
485 return result;
486
487 struct radv_accel_struct_header header = {
488 .bvh_offset = bvh_offset,
489 };
490 memcpy(data, &header, sizeof(struct radv_accel_struct_header));
491
492 struct radv_bvh_box32_node root = {
493 .children =
494 {
495 RADV_BVH_INVALID_NODE,
496 RADV_BVH_INVALID_NODE,
497 RADV_BVH_INVALID_NODE,
498 RADV_BVH_INVALID_NODE,
499 },
500 };
501
502 for (uint32_t child = 0; child < 4; child++) {
503 root.coords[child] = (radv_aabb){
504 .min.x = NAN,
505 .min.y = NAN,
506 .min.z = NAN,
507 .max.x = NAN,
508 .max.y = NAN,
509 .max.z = NAN,
510 };
511 }
512
513 memcpy((uint8_t *)data + bvh_offset, &root, sizeof(struct radv_bvh_box32_node));
514
515 vk_common_UnmapMemory(_device, memory);
516
517 VkAccelerationStructureCreateInfoKHR create_info = {
518 .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR,
519 .buffer = buffer,
520 .size = size,
521 .type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR,
522 };
523
524 result = vk_common_CreateAccelerationStructureKHR(_device, &create_info, &device->meta_state.alloc, &accel_struct);
525 if (result != VK_SUCCESS)
526 return result;
527
528 device->meta_state.accel_struct_build.null.buffer = buffer;
529 device->meta_state.accel_struct_build.null.memory = memory;
530 device->meta_state.accel_struct_build.null.accel_struct = accel_struct;
531
532 return VK_SUCCESS;
533 }
534
535 VkResult
radv_device_init_accel_struct_build_state(struct radv_device * device)536 radv_device_init_accel_struct_build_state(struct radv_device *device)
537 {
538 VkResult result = VK_SUCCESS;
539 mtx_lock(&device->meta_state.mtx);
540
541 if (device->meta_state.accel_struct_build.radix_sort)
542 goto exit;
543
544 if (device->instance->drirc.force_active_accel_struct_leaves)
545 result = create_build_pipeline_spv(device, leaf_always_active_spv, sizeof(leaf_always_active_spv),
546 sizeof(struct leaf_args), &device->meta_state.accel_struct_build.leaf_pipeline,
547 &device->meta_state.accel_struct_build.leaf_p_layout);
548 else
549 result = create_build_pipeline_spv(device, leaf_spv, sizeof(leaf_spv), sizeof(struct leaf_args),
550 &device->meta_state.accel_struct_build.leaf_pipeline,
551 &device->meta_state.accel_struct_build.leaf_p_layout);
552 if (result != VK_SUCCESS)
553 goto exit;
554
555 result = create_build_pipeline_spv(device, lbvh_main_spv, sizeof(lbvh_main_spv), sizeof(struct lbvh_main_args),
556 &device->meta_state.accel_struct_build.lbvh_main_pipeline,
557 &device->meta_state.accel_struct_build.lbvh_main_p_layout);
558 if (result != VK_SUCCESS)
559 goto exit;
560
561 result = create_build_pipeline_spv(device, lbvh_generate_ir_spv, sizeof(lbvh_generate_ir_spv),
562 sizeof(struct lbvh_generate_ir_args),
563 &device->meta_state.accel_struct_build.lbvh_generate_ir_pipeline,
564 &device->meta_state.accel_struct_build.lbvh_generate_ir_p_layout);
565 if (result != VK_SUCCESS)
566 goto exit;
567
568 result = create_build_pipeline_spv(device, ploc_spv, sizeof(ploc_spv), sizeof(struct ploc_args),
569 &device->meta_state.accel_struct_build.ploc_pipeline,
570 &device->meta_state.accel_struct_build.ploc_p_layout);
571 if (result != VK_SUCCESS)
572 goto exit;
573
574 result = create_build_pipeline_spv(device, encode_spv, sizeof(encode_spv), sizeof(struct encode_args),
575 &device->meta_state.accel_struct_build.encode_pipeline,
576 &device->meta_state.accel_struct_build.encode_p_layout);
577 if (result != VK_SUCCESS)
578 goto exit;
579
580 result =
581 create_build_pipeline_spv(device, encode_compact_spv, sizeof(encode_compact_spv), sizeof(struct encode_args),
582 &device->meta_state.accel_struct_build.encode_compact_pipeline,
583 &device->meta_state.accel_struct_build.encode_p_layout);
584 if (result != VK_SUCCESS)
585 goto exit;
586
587 result = create_build_pipeline_spv(device, header_spv, sizeof(header_spv), sizeof(struct header_args),
588 &device->meta_state.accel_struct_build.header_pipeline,
589 &device->meta_state.accel_struct_build.header_p_layout);
590 if (result != VK_SUCCESS)
591 goto exit;
592
593 result = create_build_pipeline_spv(device, morton_spv, sizeof(morton_spv), sizeof(struct morton_args),
594 &device->meta_state.accel_struct_build.morton_pipeline,
595 &device->meta_state.accel_struct_build.morton_p_layout);
596 if (result != VK_SUCCESS)
597 goto exit;
598
599 result = create_build_pipeline_spv(device, update_spv, sizeof(update_spv), sizeof(struct update_args),
600 &device->meta_state.accel_struct_build.update_pipeline,
601 &device->meta_state.accel_struct_build.update_p_layout);
602 if (result != VK_SUCCESS)
603 goto exit;
604
605 device->meta_state.accel_struct_build.radix_sort =
606 radv_create_radix_sort_u64(radv_device_to_handle(device), &device->meta_state.alloc, device->meta_state.cache);
607 exit:
608 mtx_unlock(&device->meta_state.mtx);
609 return result;
610 }
611
612 static VkResult
radv_device_init_accel_struct_copy_state(struct radv_device * device)613 radv_device_init_accel_struct_copy_state(struct radv_device *device)
614 {
615 mtx_lock(&device->meta_state.mtx);
616
617 VkResult result = create_build_pipeline_spv(device, copy_spv, sizeof(copy_spv), sizeof(struct copy_args),
618 &device->meta_state.accel_struct_build.copy_pipeline,
619 &device->meta_state.accel_struct_build.copy_p_layout);
620
621 mtx_unlock(&device->meta_state.mtx);
622 return result;
623 }
624
625 struct bvh_state {
626 uint32_t node_count;
627 uint32_t scratch_offset;
628
629 uint32_t leaf_node_count;
630 uint32_t internal_node_count;
631 uint32_t leaf_node_size;
632
633 struct acceleration_structure_layout accel_struct;
634 struct scratch_layout scratch;
635 struct build_config config;
636
637 /* Radix sort state */
638 uint32_t scatter_blocks;
639 uint32_t count_ru_scatter;
640 uint32_t histo_blocks;
641 uint32_t count_ru_histo;
642 struct rs_push_scatter push_scatter;
643 };
644
645 struct radv_bvh_batch_state {
646 bool any_compact;
647 bool any_non_compact;
648 bool any_ploc;
649 bool any_lbvh;
650 bool any_update;
651 };
652
653 static uint32_t
pack_geometry_id_and_flags(uint32_t geometry_id,uint32_t flags)654 pack_geometry_id_and_flags(uint32_t geometry_id, uint32_t flags)
655 {
656 uint32_t geometry_id_and_flags = geometry_id;
657 if (flags & VK_GEOMETRY_OPAQUE_BIT_KHR)
658 geometry_id_and_flags |= RADV_GEOMETRY_OPAQUE;
659
660 return geometry_id_and_flags;
661 }
662
663 static struct radv_bvh_geometry_data
fill_geometry_data(VkAccelerationStructureTypeKHR type,struct bvh_state * bvh_state,uint32_t geom_index,const VkAccelerationStructureGeometryKHR * geometry,const VkAccelerationStructureBuildRangeInfoKHR * build_range_info)664 fill_geometry_data(VkAccelerationStructureTypeKHR type, struct bvh_state *bvh_state, uint32_t geom_index,
665 const VkAccelerationStructureGeometryKHR *geometry,
666 const VkAccelerationStructureBuildRangeInfoKHR *build_range_info)
667 {
668 struct radv_bvh_geometry_data data = {
669 .first_id = bvh_state->node_count,
670 .geometry_id = pack_geometry_id_and_flags(geom_index, geometry->flags),
671 .geometry_type = geometry->geometryType,
672 };
673
674 switch (geometry->geometryType) {
675 case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
676 assert(type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR);
677
678 data.data = geometry->geometry.triangles.vertexData.deviceAddress +
679 build_range_info->firstVertex * geometry->geometry.triangles.vertexStride;
680 data.indices = geometry->geometry.triangles.indexData.deviceAddress;
681
682 if (geometry->geometry.triangles.indexType == VK_INDEX_TYPE_NONE_KHR)
683 data.data += build_range_info->primitiveOffset;
684 else
685 data.indices += build_range_info->primitiveOffset;
686
687 data.transform = geometry->geometry.triangles.transformData.deviceAddress;
688 if (data.transform)
689 data.transform += build_range_info->transformOffset;
690
691 data.stride = geometry->geometry.triangles.vertexStride;
692 data.vertex_format = geometry->geometry.triangles.vertexFormat;
693 data.index_format = geometry->geometry.triangles.indexType;
694 break;
695 case VK_GEOMETRY_TYPE_AABBS_KHR:
696 assert(type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR);
697
698 data.data = geometry->geometry.aabbs.data.deviceAddress + build_range_info->primitiveOffset;
699 data.stride = geometry->geometry.aabbs.stride;
700 break;
701 case VK_GEOMETRY_TYPE_INSTANCES_KHR:
702 assert(type == VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR);
703
704 data.data = geometry->geometry.instances.data.deviceAddress + build_range_info->primitiveOffset;
705
706 if (geometry->geometry.instances.arrayOfPointers)
707 data.stride = 8;
708 else
709 data.stride = sizeof(VkAccelerationStructureInstanceKHR);
710 break;
711 default:
712 unreachable("Unknown geometryType");
713 }
714
715 return data;
716 }
717
718 static void
build_leaves(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,const VkAccelerationStructureBuildRangeInfoKHR * const * ppBuildRangeInfos,struct bvh_state * bvh_states,enum radv_cmd_flush_bits flush_bits)719 build_leaves(VkCommandBuffer commandBuffer, uint32_t infoCount,
720 const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
721 const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos, struct bvh_state *bvh_states,
722 enum radv_cmd_flush_bits flush_bits)
723 {
724 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
725
726 radv_write_user_event_marker(cmd_buffer, UserEventPush, "leaves");
727
728 cmd_buffer->device->vk.dispatch_table.CmdBindPipeline(
729 commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, cmd_buffer->device->meta_state.accel_struct_build.leaf_pipeline);
730
731 for (uint32_t i = 0; i < infoCount; ++i) {
732 if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
733 continue;
734
735 RADV_FROM_HANDLE(vk_acceleration_structure, accel_struct, pInfos[i].dstAccelerationStructure);
736
737 struct leaf_args leaf_consts = {
738 .ir = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
739 .bvh = vk_acceleration_structure_get_va(accel_struct) + bvh_states[i].accel_struct.leaf_nodes_offset,
740 .header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
741 .ids = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0],
742 };
743
744 for (unsigned j = 0; j < pInfos[i].geometryCount; ++j) {
745 const VkAccelerationStructureGeometryKHR *geom =
746 pInfos[i].pGeometries ? &pInfos[i].pGeometries[j] : pInfos[i].ppGeometries[j];
747
748 const VkAccelerationStructureBuildRangeInfoKHR *build_range_info = &ppBuildRangeInfos[i][j];
749
750 leaf_consts.geom_data = fill_geometry_data(pInfos[i].type, &bvh_states[i], j, geom, build_range_info);
751
752 vk_common_CmdPushConstants(commandBuffer, cmd_buffer->device->meta_state.accel_struct_build.leaf_p_layout,
753 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(leaf_consts), &leaf_consts);
754 radv_unaligned_dispatch(cmd_buffer, build_range_info->primitiveCount, 1, 1);
755
756 bvh_states[i].leaf_node_count += build_range_info->primitiveCount;
757 bvh_states[i].node_count += build_range_info->primitiveCount;
758 }
759 }
760
761 radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
762
763 cmd_buffer->state.flush_bits |= flush_bits;
764 }
765
766 static void
morton_generate(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,struct bvh_state * bvh_states,enum radv_cmd_flush_bits flush_bits)767 morton_generate(VkCommandBuffer commandBuffer, uint32_t infoCount,
768 const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states,
769 enum radv_cmd_flush_bits flush_bits)
770 {
771 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
772
773 radv_write_user_event_marker(cmd_buffer, UserEventPush, "morton");
774
775 cmd_buffer->device->vk.dispatch_table.CmdBindPipeline(
776 commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, cmd_buffer->device->meta_state.accel_struct_build.morton_pipeline);
777
778 for (uint32_t i = 0; i < infoCount; ++i) {
779 if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
780 continue;
781 const struct morton_args consts = {
782 .bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
783 .header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
784 .ids = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0],
785 };
786
787 vk_common_CmdPushConstants(commandBuffer, cmd_buffer->device->meta_state.accel_struct_build.morton_p_layout,
788 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
789 radv_unaligned_dispatch(cmd_buffer, bvh_states[i].node_count, 1, 1);
790 }
791
792 radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
793
794 cmd_buffer->state.flush_bits |= flush_bits;
795 }
796
797 static void
morton_sort(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,struct bvh_state * bvh_states,enum radv_cmd_flush_bits flush_bits)798 morton_sort(VkCommandBuffer commandBuffer, uint32_t infoCount,
799 const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states,
800 enum radv_cmd_flush_bits flush_bits)
801 {
802 /* Copyright 2019 The Fuchsia Authors. */
803 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
804
805 radv_write_user_event_marker(cmd_buffer, UserEventPush, "sort");
806
807 radix_sort_vk_t *rs = cmd_buffer->device->meta_state.accel_struct_build.radix_sort;
808
809 /*
810 * OVERVIEW
811 *
812 * 1. Pad the keyvals in `scatter_even`.
813 * 2. Zero the `histograms` and `partitions`.
814 * --- BARRIER ---
815 * 3. HISTOGRAM is dispatched before PREFIX.
816 * --- BARRIER ---
817 * 4. PREFIX is dispatched before the first SCATTER.
818 * --- BARRIER ---
819 * 5. One or more SCATTER dispatches.
820 *
821 * Note that the `partitions` buffer can be zeroed anytime before the first
822 * scatter.
823 */
824
825 /* How many passes? */
826 uint32_t keyval_bytes = rs->config.keyval_dwords * (uint32_t)sizeof(uint32_t);
827 uint32_t keyval_bits = keyval_bytes * 8;
828 uint32_t key_bits = MIN2(MORTON_BIT_SIZE, keyval_bits);
829 uint32_t passes = (key_bits + RS_RADIX_LOG2 - 1) / RS_RADIX_LOG2;
830
831 for (uint32_t i = 0; i < infoCount; ++i) {
832 if (bvh_states[i].node_count)
833 bvh_states[i].scratch_offset = bvh_states[i].scratch.sort_buffer_offset[passes & 1];
834 else
835 bvh_states[i].scratch_offset = bvh_states[i].scratch.sort_buffer_offset[0];
836 }
837
838 /*
839 * PAD KEYVALS AND ZERO HISTOGRAM/PARTITIONS
840 *
841 * Pad fractional blocks with max-valued keyvals.
842 *
843 * Zero the histograms and partitions buffer.
844 *
845 * This assumes the partitions follow the histograms.
846 */
847
848 /* FIXME(allanmac): Consider precomputing some of these values and hang them off `rs`. */
849
850 /* How many scatter blocks? */
851 uint32_t scatter_wg_size = 1 << rs->config.scatter.workgroup_size_log2;
852 uint32_t scatter_block_kvs = scatter_wg_size * rs->config.scatter.block_rows;
853
854 /*
855 * How many histogram blocks?
856 *
857 * Note that it's OK to have more max-valued digits counted by the histogram
858 * than sorted by the scatters because the sort is stable.
859 */
860 uint32_t histo_wg_size = 1 << rs->config.histogram.workgroup_size_log2;
861 uint32_t histo_block_kvs = histo_wg_size * rs->config.histogram.block_rows;
862
863 uint32_t pass_idx = (keyval_bytes - passes);
864
865 for (uint32_t i = 0; i < infoCount; ++i) {
866 if (!bvh_states[i].node_count)
867 continue;
868 if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
869 continue;
870
871 uint64_t keyvals_even_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0];
872 uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
873
874 bvh_states[i].scatter_blocks = (bvh_states[i].node_count + scatter_block_kvs - 1) / scatter_block_kvs;
875 bvh_states[i].count_ru_scatter = bvh_states[i].scatter_blocks * scatter_block_kvs;
876
877 bvh_states[i].histo_blocks = (bvh_states[i].count_ru_scatter + histo_block_kvs - 1) / histo_block_kvs;
878 bvh_states[i].count_ru_histo = bvh_states[i].histo_blocks * histo_block_kvs;
879
880 /* Fill with max values */
881 if (bvh_states[i].count_ru_histo > bvh_states[i].node_count) {
882 radv_fill_buffer(cmd_buffer, NULL, NULL, keyvals_even_addr + bvh_states[i].node_count * keyval_bytes,
883 (bvh_states[i].count_ru_histo - bvh_states[i].node_count) * keyval_bytes, 0xFFFFFFFF);
884 }
885
886 /*
887 * Zero histograms and invalidate partitions.
888 *
889 * Note that the partition invalidation only needs to be performed once
890 * because the even/odd scatter dispatches rely on the the previous pass to
891 * leave the partitions in an invalid state.
892 *
893 * Note that the last workgroup doesn't read/write a partition so it doesn't
894 * need to be initialized.
895 */
896 uint32_t histo_partition_count = passes + bvh_states[i].scatter_blocks - 1;
897
898 uint32_t fill_base = pass_idx * (RS_RADIX_SIZE * sizeof(uint32_t));
899
900 radv_fill_buffer(cmd_buffer, NULL, NULL, internal_addr + rs->internal.histograms.offset + fill_base,
901 histo_partition_count * (RS_RADIX_SIZE * sizeof(uint32_t)), 0);
902 }
903
904 /*
905 * Pipeline: HISTOGRAM
906 *
907 * TODO(allanmac): All subgroups should try to process approximately the same
908 * number of blocks in order to minimize tail effects. This was implemented
909 * and reverted but should be reimplemented and benchmarked later.
910 */
911 vk_barrier_transfer_w_to_compute_r(commandBuffer);
912
913 cmd_buffer->device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
914 rs->pipelines.named.histogram);
915
916 for (uint32_t i = 0; i < infoCount; ++i) {
917 if (!bvh_states[i].node_count)
918 continue;
919 if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
920 continue;
921
922 uint64_t keyvals_even_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0];
923 uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
924
925 /* Dispatch histogram */
926 struct rs_push_histogram push_histogram = {
927 .devaddr_histograms = internal_addr + rs->internal.histograms.offset,
928 .devaddr_keyvals = keyvals_even_addr,
929 .passes = passes,
930 };
931
932 vk_common_CmdPushConstants(commandBuffer, rs->pipeline_layouts.named.histogram, VK_SHADER_STAGE_COMPUTE_BIT, 0,
933 sizeof(push_histogram), &push_histogram);
934
935 vk_common_CmdDispatch(commandBuffer, bvh_states[i].histo_blocks, 1, 1);
936 }
937
938 /*
939 * Pipeline: PREFIX
940 *
941 * Launch one workgroup per pass.
942 */
943 vk_barrier_compute_w_to_compute_r(commandBuffer);
944
945 cmd_buffer->device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
946 rs->pipelines.named.prefix);
947
948 for (uint32_t i = 0; i < infoCount; ++i) {
949 if (!bvh_states[i].node_count)
950 continue;
951 if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
952 continue;
953
954 uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
955
956 struct rs_push_prefix push_prefix = {
957 .devaddr_histograms = internal_addr + rs->internal.histograms.offset,
958 };
959
960 vk_common_CmdPushConstants(commandBuffer, rs->pipeline_layouts.named.prefix, VK_SHADER_STAGE_COMPUTE_BIT, 0,
961 sizeof(push_prefix), &push_prefix);
962
963 vk_common_CmdDispatch(commandBuffer, passes, 1, 1);
964 }
965
966 /* Pipeline: SCATTER */
967 vk_barrier_compute_w_to_compute_r(commandBuffer);
968
969 uint32_t histogram_offset = pass_idx * (RS_RADIX_SIZE * sizeof(uint32_t));
970
971 for (uint32_t i = 0; i < infoCount; i++) {
972 uint64_t keyvals_even_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0];
973 uint64_t keyvals_odd_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[1];
974 uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
975
976 bvh_states[i].push_scatter = (struct rs_push_scatter){
977 .devaddr_keyvals_even = keyvals_even_addr,
978 .devaddr_keyvals_odd = keyvals_odd_addr,
979 .devaddr_partitions = internal_addr + rs->internal.partitions.offset,
980 .devaddr_histograms = internal_addr + rs->internal.histograms.offset + histogram_offset,
981 };
982 }
983
984 bool is_even = true;
985
986 while (true) {
987 uint32_t pass_dword = pass_idx / 4;
988
989 /* Bind new pipeline */
990 VkPipeline p =
991 is_even ? rs->pipelines.named.scatter[pass_dword].even : rs->pipelines.named.scatter[pass_dword].odd;
992 cmd_buffer->device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, p);
993
994 /* Update push constants that changed */
995 VkPipelineLayout pl = is_even ? rs->pipeline_layouts.named.scatter[pass_dword].even
996 : rs->pipeline_layouts.named.scatter[pass_dword].odd;
997
998 for (uint32_t i = 0; i < infoCount; i++) {
999 if (!bvh_states[i].node_count)
1000 continue;
1001 if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
1002 continue;
1003
1004 bvh_states[i].push_scatter.pass_offset = (pass_idx & 3) * RS_RADIX_LOG2;
1005
1006 vk_common_CmdPushConstants(commandBuffer, pl, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(struct rs_push_scatter),
1007 &bvh_states[i].push_scatter);
1008
1009 vk_common_CmdDispatch(commandBuffer, bvh_states[i].scatter_blocks, 1, 1);
1010
1011 bvh_states[i].push_scatter.devaddr_histograms += (RS_RADIX_SIZE * sizeof(uint32_t));
1012 }
1013
1014 /* Continue? */
1015 if (++pass_idx >= keyval_bytes)
1016 break;
1017
1018 vk_barrier_compute_w_to_compute_r(commandBuffer);
1019
1020 is_even ^= true;
1021 }
1022
1023 radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
1024
1025 cmd_buffer->state.flush_bits |= flush_bits;
1026 }
1027
1028 static void
lbvh_build_internal(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,struct bvh_state * bvh_states,enum radv_cmd_flush_bits flush_bits)1029 lbvh_build_internal(VkCommandBuffer commandBuffer, uint32_t infoCount,
1030 const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states,
1031 enum radv_cmd_flush_bits flush_bits)
1032 {
1033 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1034
1035 radv_write_user_event_marker(cmd_buffer, UserEventPush, "lbvh");
1036
1037 cmd_buffer->device->vk.dispatch_table.CmdBindPipeline(
1038 commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
1039 cmd_buffer->device->meta_state.accel_struct_build.lbvh_main_pipeline);
1040
1041 for (uint32_t i = 0; i < infoCount; ++i) {
1042 if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_LBVH)
1043 continue;
1044
1045 uint32_t src_scratch_offset = bvh_states[i].scratch_offset;
1046 uint32_t internal_node_count = MAX2(bvh_states[i].node_count, 2) - 1;
1047
1048 const struct lbvh_main_args consts = {
1049 .bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
1050 .src_ids = pInfos[i].scratchData.deviceAddress + src_scratch_offset,
1051 .node_info = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.lbvh_node_offset,
1052 .id_count = bvh_states[i].node_count,
1053 .internal_node_base = bvh_states[i].scratch.internal_node_offset - bvh_states[i].scratch.ir_offset,
1054 };
1055
1056 vk_common_CmdPushConstants(commandBuffer, cmd_buffer->device->meta_state.accel_struct_build.lbvh_main_p_layout,
1057 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
1058 radv_unaligned_dispatch(cmd_buffer, internal_node_count, 1, 1);
1059 bvh_states[i].node_count = internal_node_count;
1060 bvh_states[i].internal_node_count = internal_node_count;
1061 }
1062
1063 cmd_buffer->state.flush_bits |= flush_bits;
1064
1065 cmd_buffer->device->vk.dispatch_table.CmdBindPipeline(
1066 commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
1067 cmd_buffer->device->meta_state.accel_struct_build.lbvh_generate_ir_pipeline);
1068
1069 for (uint32_t i = 0; i < infoCount; ++i) {
1070 if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_LBVH)
1071 continue;
1072
1073 const struct lbvh_generate_ir_args consts = {
1074 .bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
1075 .node_info = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.lbvh_node_offset,
1076 .header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
1077 .internal_node_base = bvh_states[i].scratch.internal_node_offset - bvh_states[i].scratch.ir_offset,
1078 };
1079
1080 vk_common_CmdPushConstants(commandBuffer,
1081 cmd_buffer->device->meta_state.accel_struct_build.lbvh_generate_ir_p_layout,
1082 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
1083 radv_unaligned_dispatch(cmd_buffer, bvh_states[i].internal_node_count, 1, 1);
1084 }
1085
1086 radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
1087 }
1088
1089 static void
ploc_build_internal(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,struct bvh_state * bvh_states)1090 ploc_build_internal(VkCommandBuffer commandBuffer, uint32_t infoCount,
1091 const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states)
1092 {
1093 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1094
1095 radv_write_user_event_marker(cmd_buffer, UserEventPush, "ploc");
1096
1097 cmd_buffer->device->vk.dispatch_table.CmdBindPipeline(
1098 commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, cmd_buffer->device->meta_state.accel_struct_build.ploc_pipeline);
1099
1100 for (uint32_t i = 0; i < infoCount; ++i) {
1101 if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_PLOC)
1102 continue;
1103
1104 uint32_t src_scratch_offset = bvh_states[i].scratch_offset;
1105 uint32_t dst_scratch_offset = (src_scratch_offset == bvh_states[i].scratch.sort_buffer_offset[0])
1106 ? bvh_states[i].scratch.sort_buffer_offset[1]
1107 : bvh_states[i].scratch.sort_buffer_offset[0];
1108
1109 const struct ploc_args consts = {
1110 .bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
1111 .header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
1112 .ids_0 = pInfos[i].scratchData.deviceAddress + src_scratch_offset,
1113 .ids_1 = pInfos[i].scratchData.deviceAddress + dst_scratch_offset,
1114 .prefix_scan_partitions =
1115 pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ploc_prefix_sum_partition_offset,
1116 .internal_node_offset = bvh_states[i].scratch.internal_node_offset - bvh_states[i].scratch.ir_offset,
1117 };
1118
1119 vk_common_CmdPushConstants(commandBuffer, cmd_buffer->device->meta_state.accel_struct_build.ploc_p_layout,
1120 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
1121 vk_common_CmdDispatch(commandBuffer, MAX2(DIV_ROUND_UP(bvh_states[i].node_count, PLOC_WORKGROUP_SIZE), 1), 1, 1);
1122 }
1123
1124 radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
1125 }
1126
1127 static void
encode_nodes(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,struct bvh_state * bvh_states,bool compact)1128 encode_nodes(VkCommandBuffer commandBuffer, uint32_t infoCount,
1129 const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states, bool compact)
1130 {
1131 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1132
1133 radv_write_user_event_marker(cmd_buffer, UserEventPush, "encode");
1134
1135 cmd_buffer->device->vk.dispatch_table.CmdBindPipeline(
1136 commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
1137 compact ? cmd_buffer->device->meta_state.accel_struct_build.encode_compact_pipeline
1138 : cmd_buffer->device->meta_state.accel_struct_build.encode_pipeline);
1139
1140 for (uint32_t i = 0; i < infoCount; ++i) {
1141 if (compact != bvh_states[i].config.compact)
1142 continue;
1143 if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
1144 continue;
1145
1146 RADV_FROM_HANDLE(vk_acceleration_structure, accel_struct, pInfos[i].dstAccelerationStructure);
1147
1148 VkGeometryTypeKHR geometry_type = VK_GEOMETRY_TYPE_TRIANGLES_KHR;
1149
1150 /* If the geometry count is 0, then the size does not matter
1151 * because it will be multiplied with 0.
1152 */
1153 if (pInfos[i].geometryCount)
1154 geometry_type =
1155 pInfos[i].pGeometries ? pInfos[i].pGeometries[0].geometryType : pInfos[i].ppGeometries[0]->geometryType;
1156
1157 if (bvh_states[i].config.compact) {
1158 uint32_t dst_offset = bvh_states[i].accel_struct.internal_nodes_offset - bvh_states[i].accel_struct.bvh_offset;
1159 radv_update_buffer_cp(cmd_buffer,
1160 pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset +
1161 offsetof(struct radv_ir_header, dst_node_offset),
1162 &dst_offset, sizeof(uint32_t));
1163 }
1164
1165 const struct encode_args args = {
1166 .intermediate_bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
1167 .output_bvh = vk_acceleration_structure_get_va(accel_struct) + bvh_states[i].accel_struct.bvh_offset,
1168 .header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
1169 .output_bvh_offset = bvh_states[i].accel_struct.bvh_offset,
1170 .leaf_node_count = bvh_states[i].leaf_node_count,
1171 .geometry_type = geometry_type,
1172 };
1173 vk_common_CmdPushConstants(commandBuffer, cmd_buffer->device->meta_state.accel_struct_build.encode_p_layout,
1174 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args), &args);
1175
1176 struct radv_dispatch_info dispatch = {
1177 .unaligned = true,
1178 .ordered = true,
1179 .va = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset +
1180 offsetof(struct radv_ir_header, ir_internal_node_count),
1181 };
1182
1183 radv_compute_dispatch(cmd_buffer, &dispatch);
1184 }
1185 /* This is the final access to the leaf nodes, no need to flush */
1186
1187 radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
1188 }
1189
1190 static void
init_header(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,struct bvh_state * bvh_states,struct radv_bvh_batch_state * batch_state)1191 init_header(VkCommandBuffer commandBuffer, uint32_t infoCount,
1192 const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states,
1193 struct radv_bvh_batch_state *batch_state)
1194 {
1195 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1196
1197 if (batch_state->any_compact) {
1198 radv_write_user_event_marker(cmd_buffer, UserEventPush, "header");
1199
1200 cmd_buffer->device->vk.dispatch_table.CmdBindPipeline(
1201 commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
1202 cmd_buffer->device->meta_state.accel_struct_build.header_pipeline);
1203 }
1204
1205 for (uint32_t i = 0; i < infoCount; ++i) {
1206 if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
1207 continue;
1208 RADV_FROM_HANDLE(vk_acceleration_structure, accel_struct, pInfos[i].dstAccelerationStructure);
1209 size_t base = offsetof(struct radv_accel_struct_header, compacted_size);
1210
1211 uint64_t instance_count =
1212 pInfos[i].type == VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR ? bvh_states[i].leaf_node_count : 0;
1213
1214 if (bvh_states[i].config.compact) {
1215 base = offsetof(struct radv_accel_struct_header, geometry_count);
1216
1217 struct header_args args = {
1218 .src = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
1219 .dst = vk_acceleration_structure_get_va(accel_struct),
1220 .bvh_offset = bvh_states[i].accel_struct.bvh_offset,
1221 .instance_count = instance_count,
1222 };
1223
1224 vk_common_CmdPushConstants(commandBuffer, cmd_buffer->device->meta_state.accel_struct_build.header_p_layout,
1225 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args), &args);
1226
1227 radv_unaligned_dispatch(cmd_buffer, 1, 1, 1);
1228 }
1229
1230 struct radv_accel_struct_header header;
1231
1232 header.instance_offset = bvh_states[i].accel_struct.bvh_offset + sizeof(struct radv_bvh_box32_node);
1233 header.instance_count = instance_count;
1234 header.compacted_size = bvh_states[i].accel_struct.size;
1235
1236 header.copy_dispatch_size[0] = DIV_ROUND_UP(header.compacted_size, 16 * 64);
1237 header.copy_dispatch_size[1] = 1;
1238 header.copy_dispatch_size[2] = 1;
1239
1240 header.serialization_size =
1241 header.compacted_size +
1242 align(sizeof(struct radv_accel_struct_serialization_header) + sizeof(uint64_t) * header.instance_count, 128);
1243
1244 header.size = header.serialization_size - sizeof(struct radv_accel_struct_serialization_header) -
1245 sizeof(uint64_t) * header.instance_count;
1246
1247 header.build_flags = pInfos[i].flags;
1248 header.geometry_count = pInfos[i].geometryCount;
1249
1250 radv_update_buffer_cp(cmd_buffer, vk_acceleration_structure_get_va(accel_struct) + base,
1251 (const char *)&header + base, sizeof(header) - base);
1252 }
1253
1254 if (batch_state->any_compact)
1255 radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
1256 }
1257
1258 static void
init_geometry_infos(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,struct bvh_state * bvh_states,const VkAccelerationStructureBuildRangeInfoKHR * const * ppBuildRangeInfos)1259 init_geometry_infos(VkCommandBuffer commandBuffer, uint32_t infoCount,
1260 const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states,
1261 const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
1262 {
1263 for (uint32_t i = 0; i < infoCount; ++i) {
1264 if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
1265 continue;
1266 RADV_FROM_HANDLE(vk_acceleration_structure, accel_struct, pInfos[i].dstAccelerationStructure);
1267
1268 uint64_t geometry_infos_size = pInfos[i].geometryCount * sizeof(struct radv_accel_struct_geometry_info);
1269
1270 struct radv_accel_struct_geometry_info *geometry_infos = malloc(geometry_infos_size);
1271 if (!geometry_infos)
1272 continue;
1273
1274 for (uint32_t j = 0; j < pInfos[i].geometryCount; ++j) {
1275 const VkAccelerationStructureGeometryKHR *geometry =
1276 pInfos[i].pGeometries ? pInfos[i].pGeometries + j : pInfos[i].ppGeometries[j];
1277 geometry_infos[j].type = geometry->geometryType;
1278 geometry_infos[j].flags = geometry->flags;
1279 geometry_infos[j].primitive_count = ppBuildRangeInfos[i][j].primitiveCount;
1280 }
1281
1282 radv_CmdUpdateBuffer(commandBuffer, accel_struct->buffer,
1283 accel_struct->offset + bvh_states[i].accel_struct.geometry_info_offset, geometry_infos_size,
1284 geometry_infos);
1285
1286 free(geometry_infos);
1287 }
1288 }
1289
1290 static void
update(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,const VkAccelerationStructureBuildRangeInfoKHR * const * ppBuildRangeInfos,struct bvh_state * bvh_states)1291 update(VkCommandBuffer commandBuffer, uint32_t infoCount, const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
1292 const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos, struct bvh_state *bvh_states)
1293 {
1294 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1295
1296 radv_write_user_event_marker(cmd_buffer, UserEventPush, "update");
1297
1298 cmd_buffer->device->vk.dispatch_table.CmdBindPipeline(
1299 commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, cmd_buffer->device->meta_state.accel_struct_build.update_pipeline);
1300
1301 for (uint32_t i = 0; i < infoCount; ++i) {
1302 if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_UPDATE)
1303 continue;
1304
1305 uint32_t leaf_node_count = 0;
1306 for (uint32_t j = 0; j < pInfos[i].geometryCount; ++j) {
1307 leaf_node_count += ppBuildRangeInfos[i][j].primitiveCount;
1308 }
1309
1310 VK_FROM_HANDLE(vk_acceleration_structure, src_bvh, pInfos[i].srcAccelerationStructure);
1311 VK_FROM_HANDLE(vk_acceleration_structure, dst_bvh, pInfos[i].dstAccelerationStructure);
1312 struct update_args update_consts = {
1313 .src = vk_acceleration_structure_get_va(src_bvh),
1314 .dst = vk_acceleration_structure_get_va(dst_bvh),
1315 .leaf_bounds = pInfos[i].scratchData.deviceAddress,
1316 .internal_ready_count =
1317 pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.internal_ready_count_offset,
1318 .leaf_node_count = leaf_node_count,
1319 };
1320
1321 for (unsigned j = 0; j < pInfos[i].geometryCount; ++j) {
1322 const VkAccelerationStructureGeometryKHR *geom =
1323 pInfos[i].pGeometries ? &pInfos[i].pGeometries[j] : pInfos[i].ppGeometries[j];
1324
1325 const VkAccelerationStructureBuildRangeInfoKHR *build_range_info = &ppBuildRangeInfos[i][j];
1326
1327 update_consts.geom_data = fill_geometry_data(pInfos[i].type, &bvh_states[i], j, geom, build_range_info);
1328
1329 vk_common_CmdPushConstants(commandBuffer, cmd_buffer->device->meta_state.accel_struct_build.update_p_layout,
1330 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(update_consts), &update_consts);
1331 radv_unaligned_dispatch(cmd_buffer, build_range_info->primitiveCount, 1, 1);
1332
1333 bvh_states[i].leaf_node_count += build_range_info->primitiveCount;
1334 bvh_states[i].node_count += build_range_info->primitiveCount;
1335 }
1336 }
1337
1338 radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
1339 }
1340
1341 VKAPI_ATTR void VKAPI_CALL
radv_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,const VkAccelerationStructureBuildRangeInfoKHR * const * ppBuildRangeInfos)1342 radv_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer, uint32_t infoCount,
1343 const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
1344 const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
1345 {
1346 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1347 struct radv_meta_saved_state saved_state;
1348
1349 VkResult result = radv_device_init_accel_struct_build_state(cmd_buffer->device);
1350 if (result != VK_SUCCESS) {
1351 vk_command_buffer_set_error(&cmd_buffer->vk, result);
1352 return;
1353 }
1354
1355 enum radv_cmd_flush_bits flush_bits =
1356 RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
1357 radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_READ_BIT | VK_ACCESS_2_SHADER_WRITE_BIT, NULL) |
1358 radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_READ_BIT | VK_ACCESS_2_SHADER_WRITE_BIT, NULL);
1359
1360 radv_meta_save(&saved_state, cmd_buffer,
1361 RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
1362 struct bvh_state *bvh_states = calloc(infoCount, sizeof(struct bvh_state));
1363
1364 radv_describe_begin_accel_struct_build(cmd_buffer, infoCount);
1365
1366 struct radv_bvh_batch_state batch_state = {0};
1367
1368 for (uint32_t i = 0; i < infoCount; ++i) {
1369 uint32_t leaf_node_count = 0;
1370 for (uint32_t j = 0; j < pInfos[i].geometryCount; ++j) {
1371 leaf_node_count += ppBuildRangeInfos[i][j].primitiveCount;
1372 }
1373
1374 get_build_layout(cmd_buffer->device, leaf_node_count, pInfos + i, &bvh_states[i].accel_struct,
1375 &bvh_states[i].scratch);
1376
1377 struct build_config config = build_config(leaf_node_count, pInfos + i);
1378 bvh_states[i].config = config;
1379
1380 if (config.compact)
1381 batch_state.any_compact = true;
1382 else
1383 batch_state.any_non_compact = true;
1384
1385 if (config.internal_type == INTERNAL_BUILD_TYPE_PLOC) {
1386 batch_state.any_ploc = true;
1387 } else if (config.internal_type == INTERNAL_BUILD_TYPE_LBVH) {
1388 batch_state.any_lbvh = true;
1389 } else if (config.internal_type == INTERNAL_BUILD_TYPE_UPDATE) {
1390 batch_state.any_update = true;
1391 } else {
1392 unreachable("Unknown internal_build_type");
1393 }
1394
1395 if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_UPDATE) {
1396 /* The internal node count is updated in lbvh_build_internal for LBVH
1397 * and from the PLOC shader for PLOC. */
1398 struct radv_ir_header header = {
1399 .min_bounds = {0x7fffffff, 0x7fffffff, 0x7fffffff},
1400 .max_bounds = {0x80000000, 0x80000000, 0x80000000},
1401 .dispatch_size_y = 1,
1402 .dispatch_size_z = 1,
1403 .sync_data =
1404 {
1405 .current_phase_end_counter = TASK_INDEX_INVALID,
1406 /* Will be updated by the first PLOC shader invocation */
1407 .task_counts = {TASK_INDEX_INVALID, TASK_INDEX_INVALID},
1408 },
1409 };
1410
1411 radv_update_buffer_cp(cmd_buffer, pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
1412 &header, sizeof(header));
1413 } else {
1414 /* Prepare ready counts for internal nodes */
1415 radv_fill_buffer(cmd_buffer, NULL, NULL,
1416 pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.internal_ready_count_offset,
1417 bvh_states[i].scratch.update_size - bvh_states[i].scratch.internal_ready_count_offset, 0x0);
1418 if (pInfos[i].srcAccelerationStructure != pInfos[i].dstAccelerationStructure) {
1419 VK_FROM_HANDLE(vk_acceleration_structure, src_as, pInfos[i].srcAccelerationStructure);
1420 VK_FROM_HANDLE(vk_acceleration_structure, dst_as, pInfos[i].dstAccelerationStructure);
1421
1422 RADV_FROM_HANDLE(radv_buffer, src_as_buffer, src_as->buffer);
1423 RADV_FROM_HANDLE(radv_buffer, dst_as_buffer, dst_as->buffer);
1424
1425 /* Copy header/metadata */
1426 radv_copy_buffer(cmd_buffer, src_as_buffer->bo, dst_as_buffer->bo, src_as_buffer->offset + src_as->offset,
1427 dst_as_buffer->offset + dst_as->offset, bvh_states[i].accel_struct.bvh_offset);
1428 }
1429 }
1430 }
1431
1432 cmd_buffer->state.current_event_type = EventInternalUnknown;
1433
1434 build_leaves(commandBuffer, infoCount, pInfos, ppBuildRangeInfos, bvh_states, flush_bits);
1435
1436 morton_generate(commandBuffer, infoCount, pInfos, bvh_states, flush_bits);
1437
1438 morton_sort(commandBuffer, infoCount, pInfos, bvh_states, flush_bits);
1439
1440 cmd_buffer->state.flush_bits |= flush_bits;
1441
1442 lbvh_build_internal(commandBuffer, infoCount, pInfos, bvh_states, flush_bits);
1443
1444 if (batch_state.any_ploc)
1445 ploc_build_internal(commandBuffer, infoCount, pInfos, bvh_states);
1446
1447 cmd_buffer->state.flush_bits |= flush_bits;
1448
1449 if (batch_state.any_non_compact)
1450 encode_nodes(commandBuffer, infoCount, pInfos, bvh_states, false);
1451
1452 if (batch_state.any_compact)
1453 encode_nodes(commandBuffer, infoCount, pInfos, bvh_states, true);
1454
1455 cmd_buffer->state.flush_bits |= flush_bits;
1456
1457 init_header(commandBuffer, infoCount, pInfos, bvh_states, &batch_state);
1458
1459 if (cmd_buffer->device->rra_trace.accel_structs)
1460 init_geometry_infos(commandBuffer, infoCount, pInfos, bvh_states, ppBuildRangeInfos);
1461
1462 if (batch_state.any_update)
1463 update(commandBuffer, infoCount, pInfos, ppBuildRangeInfos, bvh_states);
1464
1465 radv_describe_end_accel_struct_build(cmd_buffer);
1466
1467 free(bvh_states);
1468 radv_meta_restore(&saved_state, cmd_buffer);
1469 }
1470
1471 VKAPI_ATTR void VKAPI_CALL
radv_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer,const VkCopyAccelerationStructureInfoKHR * pInfo)1472 radv_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, const VkCopyAccelerationStructureInfoKHR *pInfo)
1473 {
1474 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1475 RADV_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
1476 RADV_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
1477 RADV_FROM_HANDLE(radv_buffer, src_buffer, src->buffer);
1478 struct radv_meta_saved_state saved_state;
1479
1480 VkResult result = radv_device_init_accel_struct_copy_state(cmd_buffer->device);
1481 if (result != VK_SUCCESS) {
1482 vk_command_buffer_set_error(&cmd_buffer->vk, result);
1483 return;
1484 }
1485
1486 radv_meta_save(&saved_state, cmd_buffer,
1487 RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
1488
1489 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
1490 cmd_buffer->device->meta_state.accel_struct_build.copy_pipeline);
1491
1492 struct copy_args consts = {
1493 .src_addr = vk_acceleration_structure_get_va(src),
1494 .dst_addr = vk_acceleration_structure_get_va(dst),
1495 .mode = RADV_COPY_MODE_COPY,
1496 };
1497
1498 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1499 cmd_buffer->device->meta_state.accel_struct_build.copy_p_layout,
1500 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
1501
1502 cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT, NULL);
1503
1504 radv_indirect_dispatch(
1505 cmd_buffer, src_buffer->bo,
1506 vk_acceleration_structure_get_va(src) + offsetof(struct radv_accel_struct_header, copy_dispatch_size));
1507 radv_meta_restore(&saved_state, cmd_buffer);
1508 }
1509
1510 VKAPI_ATTR void VKAPI_CALL
radv_GetDeviceAccelerationStructureCompatibilityKHR(VkDevice _device,const VkAccelerationStructureVersionInfoKHR * pVersionInfo,VkAccelerationStructureCompatibilityKHR * pCompatibility)1511 radv_GetDeviceAccelerationStructureCompatibilityKHR(VkDevice _device,
1512 const VkAccelerationStructureVersionInfoKHR *pVersionInfo,
1513 VkAccelerationStructureCompatibilityKHR *pCompatibility)
1514 {
1515 RADV_FROM_HANDLE(radv_device, device, _device);
1516 bool compat =
1517 memcmp(pVersionInfo->pVersionData, device->physical_device->driver_uuid, VK_UUID_SIZE) == 0 &&
1518 memcmp(pVersionInfo->pVersionData + VK_UUID_SIZE, device->physical_device->cache_uuid, VK_UUID_SIZE) == 0;
1519 *pCompatibility = compat ? VK_ACCELERATION_STRUCTURE_COMPATIBILITY_COMPATIBLE_KHR
1520 : VK_ACCELERATION_STRUCTURE_COMPATIBILITY_INCOMPATIBLE_KHR;
1521 }
1522
1523 VKAPI_ATTR VkResult VKAPI_CALL
radv_CopyMemoryToAccelerationStructureKHR(VkDevice _device,VkDeferredOperationKHR deferredOperation,const VkCopyMemoryToAccelerationStructureInfoKHR * pInfo)1524 radv_CopyMemoryToAccelerationStructureKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation,
1525 const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo)
1526 {
1527 unreachable("Unimplemented");
1528 return VK_ERROR_FEATURE_NOT_PRESENT;
1529 }
1530
1531 VKAPI_ATTR VkResult VKAPI_CALL
radv_CopyAccelerationStructureToMemoryKHR(VkDevice _device,VkDeferredOperationKHR deferredOperation,const VkCopyAccelerationStructureToMemoryInfoKHR * pInfo)1532 radv_CopyAccelerationStructureToMemoryKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation,
1533 const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo)
1534 {
1535 unreachable("Unimplemented");
1536 return VK_ERROR_FEATURE_NOT_PRESENT;
1537 }
1538
1539 VKAPI_ATTR void VKAPI_CALL
radv_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,const VkCopyMemoryToAccelerationStructureInfoKHR * pInfo)1540 radv_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,
1541 const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo)
1542 {
1543 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1544 RADV_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
1545 struct radv_meta_saved_state saved_state;
1546
1547 VkResult result = radv_device_init_accel_struct_copy_state(cmd_buffer->device);
1548 if (result != VK_SUCCESS) {
1549 vk_command_buffer_set_error(&cmd_buffer->vk, result);
1550 return;
1551 }
1552
1553 radv_meta_save(&saved_state, cmd_buffer,
1554 RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
1555
1556 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
1557 cmd_buffer->device->meta_state.accel_struct_build.copy_pipeline);
1558
1559 const struct copy_args consts = {
1560 .src_addr = pInfo->src.deviceAddress,
1561 .dst_addr = vk_acceleration_structure_get_va(dst),
1562 .mode = RADV_COPY_MODE_DESERIALIZE,
1563 };
1564
1565 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1566 cmd_buffer->device->meta_state.accel_struct_build.copy_p_layout,
1567 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
1568
1569 vk_common_CmdDispatch(commandBuffer, 512, 1, 1);
1570 radv_meta_restore(&saved_state, cmd_buffer);
1571 }
1572
1573 VKAPI_ATTR void VKAPI_CALL
radv_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,const VkCopyAccelerationStructureToMemoryInfoKHR * pInfo)1574 radv_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,
1575 const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo)
1576 {
1577 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1578 RADV_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
1579 RADV_FROM_HANDLE(radv_buffer, src_buffer, src->buffer);
1580 struct radv_meta_saved_state saved_state;
1581
1582 VkResult result = radv_device_init_accel_struct_copy_state(cmd_buffer->device);
1583 if (result != VK_SUCCESS) {
1584 vk_command_buffer_set_error(&cmd_buffer->vk, result);
1585 return;
1586 }
1587
1588 radv_meta_save(&saved_state, cmd_buffer,
1589 RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
1590
1591 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
1592 cmd_buffer->device->meta_state.accel_struct_build.copy_pipeline);
1593
1594 const struct copy_args consts = {
1595 .src_addr = vk_acceleration_structure_get_va(src),
1596 .dst_addr = pInfo->dst.deviceAddress,
1597 .mode = RADV_COPY_MODE_SERIALIZE,
1598 };
1599
1600 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1601 cmd_buffer->device->meta_state.accel_struct_build.copy_p_layout,
1602 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
1603
1604 cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT, NULL);
1605
1606 radv_indirect_dispatch(
1607 cmd_buffer, src_buffer->bo,
1608 vk_acceleration_structure_get_va(src) + offsetof(struct radv_accel_struct_header, copy_dispatch_size));
1609 radv_meta_restore(&saved_state, cmd_buffer);
1610
1611 /* Set the header of the serialized data. */
1612 uint8_t header_data[2 * VK_UUID_SIZE];
1613 memcpy(header_data, cmd_buffer->device->physical_device->driver_uuid, VK_UUID_SIZE);
1614 memcpy(header_data + VK_UUID_SIZE, cmd_buffer->device->physical_device->cache_uuid, VK_UUID_SIZE);
1615
1616 radv_update_buffer_cp(cmd_buffer, pInfo->dst.deviceAddress, header_data, sizeof(header_data));
1617 }
1618
1619 VKAPI_ATTR void VKAPI_CALL
radv_CmdBuildAccelerationStructuresIndirectKHR(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,const VkDeviceAddress * pIndirectDeviceAddresses,const uint32_t * pIndirectStrides,const uint32_t * const * ppMaxPrimitiveCounts)1620 radv_CmdBuildAccelerationStructuresIndirectKHR(VkCommandBuffer commandBuffer, uint32_t infoCount,
1621 const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
1622 const VkDeviceAddress *pIndirectDeviceAddresses,
1623 const uint32_t *pIndirectStrides,
1624 const uint32_t *const *ppMaxPrimitiveCounts)
1625 {
1626 unreachable("Unimplemented");
1627 }
1628