1 /*
2 * Copyright © 2021 Bas Nieuwenhuizen
3 * Copyright © 2024 Valve Corporation
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #include "tu_buffer.h"
26 #include "tu_device.h"
27 #include "tu_cmd_buffer.h"
28
29 #include "vk_acceleration_structure.h"
30 #include "tu_acceleration_structure.h"
31 #include "radix_sort/radix_sort_u64.h"
32
33
34 #include "common/freedreno_gpu_event.h"
35
36 #include "util/u_hexdump.h"
37
38 #include "bvh/tu_build_interface.h"
39
40 static const uint32_t encode_spv[] = {
41 #include "bvh/encode.spv.h"
42 };
43
44 static const uint32_t header_spv[] = {
45 #include "bvh/header.spv.h"
46 };
47
48 static const uint32_t copy_spv[] = {
49 #include "bvh/copy.spv.h"
50 };
51
52 static_assert(sizeof(struct tu_instance_descriptor) == AS_RECORD_SIZE);
53 static_assert(sizeof(struct tu_accel_struct_header) == AS_RECORD_SIZE);
54 static_assert(sizeof(struct tu_internal_node) == AS_NODE_SIZE);
55 static_assert(sizeof(struct tu_leaf_node) == AS_NODE_SIZE);
56
57 static VkResult
get_pipeline_spv(struct tu_device * device,const char * name,const uint32_t * spv,uint32_t spv_size,unsigned push_constant_size,VkPipeline * pipeline,VkPipelineLayout * layout)58 get_pipeline_spv(struct tu_device *device,
59 const char *name, const uint32_t *spv, uint32_t spv_size,
60 unsigned push_constant_size,
61 VkPipeline *pipeline, VkPipelineLayout *layout)
62 {
63 size_t key_size = strlen(name);
64
65 const VkPushConstantRange pc_range = {
66 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
67 .offset = 0,
68 .size = push_constant_size,
69 };
70
71 VkResult result = vk_meta_get_pipeline_layout(&device->vk,
72 &device->meta, NULL,
73 &pc_range, name, key_size,
74 layout);
75
76 if (result != VK_SUCCESS)
77 return result;
78
79 VkPipeline pipeline_from_cache = vk_meta_lookup_pipeline(&device->meta, name, key_size);
80 if (pipeline_from_cache != VK_NULL_HANDLE) {
81 *pipeline = pipeline_from_cache;
82 return VK_SUCCESS;
83 }
84
85 VkShaderModuleCreateInfo module_info = {
86 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
87 .pNext = NULL,
88 .flags = 0,
89 .codeSize = spv_size,
90 .pCode = spv,
91 };
92
93 VkPipelineShaderStageCreateInfo shader_stage = {
94 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
95 .pNext = &module_info,
96 .flags = 0,
97 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
98 .pName = "main",
99 .pSpecializationInfo = NULL,
100 };
101
102 VkComputePipelineCreateInfo pipeline_info = {
103 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
104 .flags = 0,
105 .stage = shader_stage,
106 .layout = *layout,
107 };
108
109 return vk_meta_create_compute_pipeline(&device->vk, &device->meta, &pipeline_info,
110 name, key_size, pipeline);
111 }
112
113 struct bvh_layout {
114 uint64_t bvh_offset;
115 uint64_t size;
116 };
117
118 static void
get_bvh_layout(VkGeometryTypeKHR geometry_type,uint32_t leaf_count,struct bvh_layout * layout)119 get_bvh_layout(VkGeometryTypeKHR geometry_type,
120 uint32_t leaf_count,
121 struct bvh_layout *layout)
122 {
123 uint32_t internal_count = MAX2(leaf_count, 2) - 1;
124
125 uint64_t offset = sizeof(struct tu_accel_struct_header);
126
127 /* Instance descriptors, one per instance. */
128 if (geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR) {
129 offset += leaf_count * sizeof(struct tu_instance_descriptor);
130 }
131
132 /* Parent links, which have to go directly before bvh_offset as we index
133 * them using negative offsets from there.
134 */
135 offset += (internal_count + leaf_count) * sizeof(uint32_t);
136
137 /* The BVH and hence bvh_offset needs 64 byte alignment for RT nodes. */
138 offset = ALIGN(offset, 64);
139 layout->bvh_offset = offset;
140
141 offset += internal_count * sizeof(struct tu_internal_node) +
142 leaf_count * sizeof(struct tu_leaf_node);
143
144 layout->size = offset;
145 }
146
get_bvh_size(VkDevice device,const VkAccelerationStructureBuildGeometryInfoKHR * pBuildInfo,uint32_t leaf_count)147 VkDeviceSize get_bvh_size(VkDevice device,
148 const VkAccelerationStructureBuildGeometryInfoKHR *pBuildInfo,
149 uint32_t leaf_count)
150 {
151 struct bvh_layout layout;
152 get_bvh_layout(vk_get_as_geometry_type(pBuildInfo), leaf_count, &layout);
153 return layout.size;
154 }
155
156 static uint32_t
encode_key(VkAccelerationStructureTypeKHR type,VkBuildAccelerationStructureFlagBitsKHR flags)157 encode_key(VkAccelerationStructureTypeKHR type,
158 VkBuildAccelerationStructureFlagBitsKHR flags)
159 {
160 return 0;
161 }
162
163
164 static VkResult
encode_bind_pipeline(VkCommandBuffer commandBuffer,uint32_t key)165 encode_bind_pipeline(VkCommandBuffer commandBuffer, uint32_t key)
166 {
167 VK_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
168 struct tu_device *device = cmdbuf->device;
169
170 VkPipeline pipeline;
171 VkPipelineLayout layout;
172 VkResult result =
173 get_pipeline_spv(device, "encode", encode_spv, sizeof(encode_spv),
174 sizeof(encode_args), &pipeline, &layout);
175
176 if (result != VK_SUCCESS)
177 return result;
178
179 tu_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
180 return VK_SUCCESS;
181 }
182
183 static void
encode(VkCommandBuffer commandBuffer,const VkAccelerationStructureBuildGeometryInfoKHR * build_info,const VkAccelerationStructureBuildRangeInfoKHR * build_range_infos,VkDeviceAddress intermediate_as_addr,VkDeviceAddress intermediate_header_addr,uint32_t leaf_count,uint32_t key,struct vk_acceleration_structure * dst)184 encode(VkCommandBuffer commandBuffer,
185 const VkAccelerationStructureBuildGeometryInfoKHR *build_info,
186 const VkAccelerationStructureBuildRangeInfoKHR *build_range_infos,
187 VkDeviceAddress intermediate_as_addr,
188 VkDeviceAddress intermediate_header_addr,
189 uint32_t leaf_count,
190 uint32_t key,
191 struct vk_acceleration_structure *dst)
192 {
193 VK_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
194 struct tu_device *device = cmdbuf->device;
195 VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(build_info);
196
197 VkPipeline pipeline;
198 VkPipelineLayout layout;
199 get_pipeline_spv(device, "encode", encode_spv, sizeof(encode_spv),
200 sizeof(encode_args), &pipeline, &layout);
201
202 struct bvh_layout bvh_layout;
203 get_bvh_layout(geometry_type, leaf_count, &bvh_layout);
204
205 const struct encode_args args = {
206 .intermediate_bvh = intermediate_as_addr,
207 .output_bvh = vk_acceleration_structure_get_va(dst) + bvh_layout.bvh_offset,
208 .header = intermediate_header_addr,
209 .output_bvh_offset = bvh_layout.bvh_offset,
210 .leaf_node_count = leaf_count,
211 .geometry_type = geometry_type,
212 };
213 vk_common_CmdPushConstants(commandBuffer, layout,
214 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args),
215 &args);
216
217 tu_dispatch_unaligned_indirect(commandBuffer,
218 intermediate_header_addr +
219 offsetof(struct vk_ir_header, ir_internal_node_count));
220
221 *(VkDeviceSize *)
222 util_sparse_array_get(&device->accel_struct_ranges,
223 vk_acceleration_structure_get_va(dst)) = dst->size;
224
225 }
226
227 /* Don't bother copying over the compacted size using a compute shader if
228 * compaction is never going to happen.
229 */
230 enum tu_header_key {
231 HEADER_NO_DISPATCH,
232 HEADER_USE_DISPATCH
233 };
234
235 static uint32_t
header_key(VkAccelerationStructureTypeKHR type,VkBuildAccelerationStructureFlagBitsKHR flags)236 header_key(VkAccelerationStructureTypeKHR type,
237 VkBuildAccelerationStructureFlagBitsKHR flags)
238 {
239 return (flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR) ?
240 HEADER_USE_DISPATCH : HEADER_NO_DISPATCH;
241 }
242
243 static VkResult
header_bind_pipeline(VkCommandBuffer commandBuffer,uint32_t key)244 header_bind_pipeline(VkCommandBuffer commandBuffer, uint32_t key)
245 {
246 VK_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
247 struct tu_device *device = cmdbuf->device;
248
249 if (key == HEADER_USE_DISPATCH) {
250 VkPipeline pipeline;
251 VkPipelineLayout layout;
252 VkResult result =
253 get_pipeline_spv(device, "header", header_spv, sizeof(header_spv),
254 sizeof(header_args), &pipeline, &layout);
255
256 if (result != VK_SUCCESS)
257 return result;
258
259 static const VkMemoryBarrier mb = {
260 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
261 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
262 .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
263 };
264
265 vk_common_CmdPipelineBarrier(commandBuffer,
266 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
267 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
268 0, 1, &mb, 0, NULL, 0, NULL);
269
270 tu_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
271 }
272
273 return VK_SUCCESS;
274 }
275
276 static void
header(VkCommandBuffer commandBuffer,const VkAccelerationStructureBuildGeometryInfoKHR * build_info,const VkAccelerationStructureBuildRangeInfoKHR * build_range_infos,VkDeviceAddress intermediate_as_addr,VkDeviceAddress intermediate_header_addr,uint32_t leaf_count,uint32_t key,struct vk_acceleration_structure * dst)277 header(VkCommandBuffer commandBuffer,
278 const VkAccelerationStructureBuildGeometryInfoKHR *build_info,
279 const VkAccelerationStructureBuildRangeInfoKHR *build_range_infos,
280 VkDeviceAddress intermediate_as_addr,
281 VkDeviceAddress intermediate_header_addr,
282 uint32_t leaf_count,
283 uint32_t key,
284 struct vk_acceleration_structure *dst)
285 {
286 VK_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
287 struct tu_device *device = cmdbuf->device;
288 VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(build_info);
289
290 struct bvh_layout bvh_layout;
291 get_bvh_layout(geometry_type, leaf_count, &bvh_layout);
292
293 VkDeviceAddress header_addr = vk_acceleration_structure_get_va(dst);
294
295 size_t base = offsetof(struct tu_accel_struct_header, copy_dispatch_size);
296
297 uint32_t instance_count =
298 geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR ? leaf_count : 0;
299
300 if (key == HEADER_USE_DISPATCH) {
301 base = offsetof(struct tu_accel_struct_header, instance_count);
302 VkPipeline pipeline;
303 VkPipelineLayout layout;
304 get_pipeline_spv(device, "header", header_spv, sizeof(header_spv),
305 sizeof(header_args), &pipeline, &layout);
306
307 struct header_args args = {
308 .src = intermediate_header_addr,
309 .dst = vk_acceleration_structure_get_va(dst),
310 .bvh_offset = bvh_layout.bvh_offset,
311 .instance_count = instance_count,
312 };
313
314 vk_common_CmdPushConstants(commandBuffer, layout,
315 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args),
316 &args);
317
318 vk_common_CmdDispatch(commandBuffer, 1, 1, 1);
319 }
320
321 struct tu_accel_struct_header header = {};
322
323 header.instance_count = instance_count;
324 header.self_ptr = header_addr;
325 header.compacted_size = bvh_layout.size;
326
327 header.copy_dispatch_size[0] = DIV_ROUND_UP(header.compacted_size, 16 * 128);
328 header.copy_dispatch_size[1] = 1;
329 header.copy_dispatch_size[2] = 1;
330
331 header.serialization_size =
332 header.compacted_size +
333 sizeof(struct vk_accel_struct_serialization_header) + sizeof(uint64_t) * header.instance_count;
334
335 header.size = header.serialization_size - sizeof(struct vk_accel_struct_serialization_header) -
336 sizeof(uint64_t) * header.instance_count;
337
338 struct tu_cs *cs = &cmdbuf->cs;
339
340 size_t header_size = sizeof(struct tu_accel_struct_header) - base;
341 assert(base % sizeof(uint32_t) == 0);
342 assert(header_size % sizeof(uint32_t) == 0);
343 uint32_t *header_ptr = (uint32_t *)((char *)&header + base);
344
345 tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 2 + header_size / sizeof(uint32_t));
346 tu_cs_emit_qw(cs, header_addr + base);
347 tu_cs_emit_array(cs, header_ptr, header_size / sizeof(uint32_t));
348 }
349
350 const struct vk_acceleration_structure_build_ops tu_as_build_ops = {
351 .get_as_size = get_bvh_size,
352 .get_encode_key = { encode_key, header_key },
353 .encode_bind_pipeline = { encode_bind_pipeline, header_bind_pipeline },
354 .encode_as = { encode, header },
355 };
356
357 struct radix_sort_vk_target_config tu_radix_sort_config = {
358 .keyval_dwords = 2,
359 .init = { .workgroup_size_log2 = 8, },
360 .fill = { .workgroup_size_log2 = 8, .block_rows = 8 },
361 .histogram = {
362 .workgroup_size_log2 = 8,
363 .subgroup_size_log2 = 7,
364 .block_rows = 14, /* TODO tune this */
365 },
366 .prefix = {
367 .workgroup_size_log2 = 8,
368 .subgroup_size_log2 = 7,
369 },
370 .scatter = {
371 .workgroup_size_log2 = 8,
372 .subgroup_size_log2 = 7,
373 .block_rows = 14, /* TODO tune this */
374 },
375 .nonsequential_dispatch = false,
376 };
377
378 static VkResult
init_radix_sort(struct tu_device * device)379 init_radix_sort(struct tu_device *device)
380 {
381 if (!device->radix_sort) {
382 mtx_lock(&device->radix_sort_mutex);
383 if (!device->radix_sort) {
384 device->radix_sort =
385 vk_create_radix_sort_u64(tu_device_to_handle(device),
386 &device->vk.alloc,
387 VK_NULL_HANDLE, tu_radix_sort_config);
388 if (!device->radix_sort) {
389 /* TODO plumb through the error here */
390 mtx_unlock(&device->radix_sort_mutex);
391 return VK_ERROR_OUT_OF_HOST_MEMORY;
392 }
393
394 }
395 mtx_unlock(&device->radix_sort_mutex);
396 }
397
398 return VK_SUCCESS;
399 }
400
401 struct tu_saved_compute_state {
402 uint32_t push_constants[MAX_PUSH_CONSTANTS_SIZE / 4];
403 struct tu_shader *compute_shader;
404 };
405
406 static void
tu_save_compute_state(struct tu_cmd_buffer * cmd,struct tu_saved_compute_state * state)407 tu_save_compute_state(struct tu_cmd_buffer *cmd,
408 struct tu_saved_compute_state *state)
409 {
410 memcpy(state->push_constants, cmd->push_constants, sizeof(cmd->push_constants));
411 state->compute_shader = cmd->state.shaders[MESA_SHADER_COMPUTE];
412 }
413
414 static void
tu_restore_compute_state(struct tu_cmd_buffer * cmd,struct tu_saved_compute_state * state)415 tu_restore_compute_state(struct tu_cmd_buffer *cmd,
416 struct tu_saved_compute_state *state)
417 {
418 cmd->state.shaders[MESA_SHADER_COMPUTE] = state->compute_shader;
419 if (state->compute_shader) {
420 tu_cs_emit_state_ib(&cmd->cs, state->compute_shader->state);
421 }
422 memcpy(cmd->push_constants, state->push_constants, sizeof(cmd->push_constants));
423 cmd->state.dirty |= TU_CMD_DIRTY_SHADER_CONSTS;
424 }
425
426 VKAPI_ATTR void VKAPI_CALL
tu_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,const VkAccelerationStructureBuildRangeInfoKHR * const * ppBuildRangeInfos)427 tu_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer, uint32_t infoCount,
428 const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
429 const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
430 {
431 VK_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
432 struct tu_device *device = cmd->device;
433 struct tu_saved_compute_state state;
434
435 VkResult result = init_radix_sort(device);
436 if (result != VK_SUCCESS) {
437 vk_command_buffer_set_error(&cmd->vk, result);
438 return;
439 }
440
441 tu_save_compute_state(cmd, &state);
442
443 struct vk_acceleration_structure_build_args args = {
444 .subgroup_size = 128,
445 .bvh_bounds_offset = offsetof(tu_accel_struct_header, aabb),
446 .emit_markers = false,
447 .radix_sort = device->radix_sort,
448 };
449
450 vk_cmd_build_acceleration_structures(commandBuffer,
451 &device->vk,
452 &device->meta,
453 infoCount,
454 pInfos,
455 ppBuildRangeInfos,
456 &args);
457
458 tu_restore_compute_state(cmd, &state);
459 }
460
461 VKAPI_ATTR void VKAPI_CALL
tu_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer,const VkCopyAccelerationStructureInfoKHR * pInfo)462 tu_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, const VkCopyAccelerationStructureInfoKHR *pInfo)
463 {
464 VK_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
465 VK_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
466 VK_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
467 struct tu_saved_compute_state state;
468
469 VkPipeline pipeline;
470 VkPipelineLayout layout;
471 VkResult result =
472 get_pipeline_spv(cmd->device, "copy", copy_spv, sizeof(copy_spv),
473 sizeof(copy_args), &pipeline, &layout);
474 if (result != VK_SUCCESS) {
475 vk_command_buffer_set_error(&cmd->vk, result);
476 return;
477 }
478
479 tu_save_compute_state(cmd, &state);
480
481 tu_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
482
483 struct copy_args consts = {
484 .src_addr = vk_acceleration_structure_get_va(src),
485 .dst_addr = vk_acceleration_structure_get_va(dst),
486 .mode = TU_COPY_MODE_COPY,
487 };
488
489 vk_common_CmdPushConstants(commandBuffer, layout,
490 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts),
491 &consts);
492
493 TU_CALLX(cmd->device, tu_CmdDispatchIndirect)(
494 commandBuffer, src->buffer,
495 src->offset + offsetof(struct tu_accel_struct_header, copy_dispatch_size));
496
497 tu_restore_compute_state(cmd, &state);
498 }
499
500 VKAPI_ATTR void VKAPI_CALL
tu_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,const VkCopyMemoryToAccelerationStructureInfoKHR * pInfo)501 tu_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,
502 const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo)
503 {
504 VK_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
505 VK_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
506 struct tu_saved_compute_state state;
507
508 VkPipeline pipeline;
509 VkPipelineLayout layout;
510 VkResult result =
511 get_pipeline_spv(cmd->device, "copy", copy_spv, sizeof(copy_spv),
512 sizeof(copy_args), &pipeline, &layout);
513 if (result != VK_SUCCESS) {
514 vk_command_buffer_set_error(&cmd->vk, result);
515 return;
516 }
517
518 tu_save_compute_state(cmd, &state);
519
520 tu_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
521
522 const struct copy_args consts = {
523 .src_addr = pInfo->src.deviceAddress,
524 .dst_addr = vk_acceleration_structure_get_va(dst),
525 .mode = TU_COPY_MODE_DESERIALIZE,
526 };
527
528 vk_common_CmdPushConstants(commandBuffer, layout,
529 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts),
530 &consts);
531
532 vk_common_CmdDispatch(commandBuffer, 256, 1, 1);
533
534 tu_restore_compute_state(cmd, &state);
535 }
536
537 VKAPI_ATTR void VKAPI_CALL
tu_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,const VkCopyAccelerationStructureToMemoryInfoKHR * pInfo)538 tu_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,
539 const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo)
540 {
541 VK_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
542 VK_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
543 struct tu_saved_compute_state state;
544
545 VkPipeline pipeline;
546 VkPipelineLayout layout;
547 VkResult result =
548 get_pipeline_spv(cmd->device, "copy", copy_spv, sizeof(copy_spv),
549 sizeof(copy_args), &pipeline, &layout);
550 if (result != VK_SUCCESS) {
551 vk_command_buffer_set_error(&cmd->vk, result);
552 return;
553 }
554
555 tu_save_compute_state(cmd, &state);
556
557 tu_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
558
559 const struct copy_args consts = {
560 .src_addr = vk_acceleration_structure_get_va(src),
561 .dst_addr = pInfo->dst.deviceAddress,
562 .mode = TU_COPY_MODE_SERIALIZE,
563 };
564
565 vk_common_CmdPushConstants(commandBuffer, layout,
566 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts),
567 &consts);
568
569 TU_CALLX(cmd->device, tu_CmdDispatchIndirect)(
570 commandBuffer, src->buffer,
571 src->offset + offsetof(struct tu_accel_struct_header, copy_dispatch_size));
572
573 tu_restore_compute_state(cmd, &state);
574
575 /* Set the header of the serialized data. */
576 uint32_t header_data[2 * VK_UUID_SIZE / 4];
577 memcpy(header_data, cmd->device->physical_device->driver_uuid, VK_UUID_SIZE);
578 memcpy(header_data + VK_UUID_SIZE / 4, cmd->device->physical_device->cache_uuid, VK_UUID_SIZE);
579
580 struct tu_cs *cs = &cmd->cs;
581
582 tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 2 + ARRAY_SIZE(header_data));
583 tu_cs_emit_qw(cs, pInfo->dst.deviceAddress);
584 tu_cs_emit_array(cs, header_data, ARRAY_SIZE(header_data));
585 }
586
587 VKAPI_ATTR void VKAPI_CALL
tu_GetAccelerationStructureBuildSizesKHR(VkDevice _device,VkAccelerationStructureBuildTypeKHR buildType,const VkAccelerationStructureBuildGeometryInfoKHR * pBuildInfo,const uint32_t * pMaxPrimitiveCounts,VkAccelerationStructureBuildSizesInfoKHR * pSizeInfo)588 tu_GetAccelerationStructureBuildSizesKHR(VkDevice _device, VkAccelerationStructureBuildTypeKHR buildType,
589 const VkAccelerationStructureBuildGeometryInfoKHR *pBuildInfo,
590 const uint32_t *pMaxPrimitiveCounts,
591 VkAccelerationStructureBuildSizesInfoKHR *pSizeInfo)
592 {
593 VK_FROM_HANDLE(tu_device, device, _device);
594
595 init_radix_sort(device);
596
597 struct vk_acceleration_structure_build_args args = {
598 .subgroup_size = 128,
599 .radix_sort = device->radix_sort,
600 };
601
602 vk_get_as_build_sizes(_device, buildType, pBuildInfo, pMaxPrimitiveCounts,
603 pSizeInfo, &args);
604 }
605
606 VKAPI_ATTR void VKAPI_CALL
tu_GetDeviceAccelerationStructureCompatibilityKHR(VkDevice _device,const VkAccelerationStructureVersionInfoKHR * pVersionInfo,VkAccelerationStructureCompatibilityKHR * pCompatibility)607 tu_GetDeviceAccelerationStructureCompatibilityKHR(VkDevice _device,
608 const VkAccelerationStructureVersionInfoKHR *pVersionInfo,
609 VkAccelerationStructureCompatibilityKHR *pCompatibility)
610 {
611 VK_FROM_HANDLE(tu_device, device, _device);
612 bool compat =
613 memcmp(pVersionInfo->pVersionData, device->physical_device->driver_uuid, VK_UUID_SIZE) == 0 &&
614 memcmp(pVersionInfo->pVersionData + VK_UUID_SIZE, device->physical_device->cache_uuid, VK_UUID_SIZE) == 0;
615 *pCompatibility = compat ? VK_ACCELERATION_STRUCTURE_COMPATIBILITY_COMPATIBLE_KHR
616 : VK_ACCELERATION_STRUCTURE_COMPATIBILITY_INCOMPATIBLE_KHR;
617 }
618
619 VkResult
tu_init_null_accel_struct(struct tu_device * device)620 tu_init_null_accel_struct(struct tu_device *device)
621 {
622 VkResult result = tu_bo_init_new(device, NULL,
623 &device->null_accel_struct_bo,
624 sizeof(tu_accel_struct_header) +
625 sizeof(tu_internal_node),
626 TU_BO_ALLOC_NO_FLAGS, "null AS");
627 if (result != VK_SUCCESS) {
628 return result;
629 }
630
631 result = tu_bo_map(device, device->null_accel_struct_bo, NULL);
632 if (result != VK_SUCCESS) {
633 tu_bo_finish(device, device->null_accel_struct_bo);
634 return result;
635 }
636
637 struct tu_accel_struct_header header = {
638 .bvh_ptr = device->null_accel_struct_bo->iova +
639 sizeof(tu_accel_struct_header),
640 .self_ptr = device->null_accel_struct_bo->iova,
641 };
642
643 struct tu_internal_node node = {
644 .child_count = 0,
645 .type_flags = 0,
646 };
647
648 for (unsigned i = 0; i < 8; i++) {
649 node.mantissas[i][0][0] = 0xff;
650 node.mantissas[i][0][1] = 0xff;
651 node.mantissas[i][0][2] = 0xff;
652 }
653
654 memcpy(device->null_accel_struct_bo->map, (void *)&header, sizeof(header));
655 memcpy((char *)device->null_accel_struct_bo->map + sizeof(header),
656 (void *)&node, sizeof(node));
657 return VK_SUCCESS;
658 }
659
660 struct tu_node {
661 uint32_t data[16];
662 };
663
664 static void
dump_leaf(struct tu_leaf_node * node)665 dump_leaf(struct tu_leaf_node *node)
666 {
667 fprintf(stderr, "\tID: %d\n", node->id);
668 fprintf(stderr, "\tgeometry ID: %d\n", node->geometry_id);
669 bool aabb = node->type_flags & TU_NODE_TYPE_AABB;
670 for (unsigned i = 0; i < (aabb ? 2 : 3); i++) {
671 fprintf(stderr, "\t(");
672 for (unsigned j = 0; j < 3; j++) {
673 if (j != 0)
674 fprintf(stderr, ", ");
675 fprintf(stderr, "%f", node->coords[i][j]);
676 }
677 fprintf(stderr, ")\n");
678 }
679 }
680
681 static void
dump_internal(struct tu_internal_node * node,uint32_t * max_child)682 dump_internal(struct tu_internal_node *node, uint32_t *max_child)
683 {
684 *max_child = MAX2(*max_child, node->id + node->child_count);
685 float base[3];
686 unsigned exponents[3];
687 for (unsigned i = 0; i < 3; i++) {
688 base[i] = uif(node->bases[i] << 16);
689 exponents[i] = node->exponents[i] - 134;
690 }
691
692 for (unsigned i = 0; i < node->child_count; i++) {
693 fprintf(stderr, "\tchild %d\n", node->id + i);
694 for (unsigned vert = 0; vert < 2; vert++) {
695 fprintf(stderr, "\t\t(");
696 for (unsigned coord = 0; coord < 3; coord++) {
697 unsigned mantissa = node->mantissas[i][vert][coord];
698 if (coord != 0)
699 fprintf(stderr, ", ");
700 fprintf(stderr, "%f", base[coord] + ldexp((float)mantissa,
701 exponents[coord]));
702 }
703 fprintf(stderr, ")\n");
704 }
705 }
706 }
707
708 static void
dump_as(struct vk_acceleration_structure * as)709 dump_as(struct vk_acceleration_structure *as)
710 {
711 VK_FROM_HANDLE(tu_buffer, buf, as->buffer);
712
713 struct tu_accel_struct_header *hdr =
714 (struct tu_accel_struct_header *)((char *)buf->bo->map + as->offset);
715
716 fprintf(stderr, "dumping AS at %" PRIx64 "\n", buf->iova + as->offset);
717 u_hexdump(stderr, (uint8_t *)hdr, sizeof(*hdr), false);
718
719 char *base = ((char *)buf->bo->map + (hdr->bvh_ptr - buf->iova));
720 struct tu_node *node = (struct tu_node *)base;
721
722 fprintf(stderr, "dumping nodes at %" PRIx64 "\n", hdr->bvh_ptr);
723
724 uint32_t max_child = 1;
725 for (unsigned i = 0; i < max_child; i++) {
726 uint32_t *parent_ptr = (uint32_t*)(base - (4 + 4 * i));
727 uint32_t parent = *parent_ptr;
728 fprintf(stderr, "node %d parent %d\n", i, parent);
729 u_hexdump(stderr, (uint8_t *)node, sizeof(*node), false);
730 if (node->data[15] & TU_NODE_TYPE_LEAF) {
731 /* TODO compressed leaves */
732 dump_leaf((struct tu_leaf_node *)node);
733 } else {
734 dump_internal((struct tu_internal_node *)node, &max_child);
735 }
736
737 node++;
738 }
739 }
740
741 static bool
as_finished(struct tu_device * dev,struct vk_acceleration_structure * as)742 as_finished(struct tu_device *dev, struct vk_acceleration_structure *as)
743 {
744 VK_FROM_HANDLE(tu_buffer, buf, as->buffer);
745 tu_bo_map(dev, buf->bo, NULL);
746
747 struct tu_accel_struct_header *hdr =
748 (struct tu_accel_struct_header *)((char *)buf->bo->map + as->offset);
749 return hdr->self_ptr == buf->iova + as->offset;
750 }
751
752 VKAPI_ATTR void VKAPI_CALL
tu_DestroyAccelerationStructureKHR(VkDevice _device,VkAccelerationStructureKHR accelerationStructure,const VkAllocationCallbacks * pAllocator)753 tu_DestroyAccelerationStructureKHR(VkDevice _device,
754 VkAccelerationStructureKHR accelerationStructure,
755 const VkAllocationCallbacks *pAllocator)
756 {
757 VK_FROM_HANDLE(tu_device, device, _device);
758 if (TU_DEBUG(DUMPAS)) {
759 VK_FROM_HANDLE(vk_acceleration_structure, as, accelerationStructure);
760 if (as_finished(device, as))
761 dump_as(as);
762 }
763
764 vk_common_DestroyAccelerationStructureKHR(_device, accelerationStructure,
765 pAllocator);
766 }
767