• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Bas Nieuwenhuizen
3  * Copyright © 2024 Valve Corporation
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 #include "tu_buffer.h"
26 #include "tu_device.h"
27 #include "tu_cmd_buffer.h"
28 
29 #include "vk_acceleration_structure.h"
30 #include "tu_acceleration_structure.h"
31 #include "radix_sort/radix_sort_u64.h"
32 
33 
34 #include "common/freedreno_gpu_event.h"
35 
36 #include "util/u_hexdump.h"
37 
38 #include "bvh/tu_build_interface.h"
39 
40 static const uint32_t encode_spv[] = {
41 #include "bvh/encode.spv.h"
42 };
43 
44 static const uint32_t header_spv[] = {
45 #include "bvh/header.spv.h"
46 };
47 
48 static const uint32_t copy_spv[] = {
49 #include "bvh/copy.spv.h"
50 };
51 
52 static_assert(sizeof(struct tu_instance_descriptor) == AS_RECORD_SIZE);
53 static_assert(sizeof(struct tu_accel_struct_header) == AS_RECORD_SIZE);
54 static_assert(sizeof(struct tu_internal_node) == AS_NODE_SIZE);
55 static_assert(sizeof(struct tu_leaf_node) == AS_NODE_SIZE);
56 
57 static VkResult
get_pipeline_spv(struct tu_device * device,const char * name,const uint32_t * spv,uint32_t spv_size,unsigned push_constant_size,VkPipeline * pipeline,VkPipelineLayout * layout)58 get_pipeline_spv(struct tu_device *device,
59                  const char *name, const uint32_t *spv, uint32_t spv_size,
60                  unsigned push_constant_size,
61                  VkPipeline *pipeline, VkPipelineLayout *layout)
62 {
63    size_t key_size = strlen(name);
64 
65    const VkPushConstantRange pc_range = {
66       .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
67       .offset = 0,
68       .size = push_constant_size,
69    };
70 
71    VkResult result = vk_meta_get_pipeline_layout(&device->vk,
72                                                  &device->meta, NULL,
73                                                  &pc_range, name, key_size,
74                                                  layout);
75 
76    if (result != VK_SUCCESS)
77       return result;
78 
79    VkPipeline pipeline_from_cache = vk_meta_lookup_pipeline(&device->meta, name, key_size);
80    if (pipeline_from_cache != VK_NULL_HANDLE) {
81       *pipeline = pipeline_from_cache;
82       return VK_SUCCESS;
83    }
84 
85    VkShaderModuleCreateInfo module_info = {
86       .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
87       .pNext = NULL,
88       .flags = 0,
89       .codeSize = spv_size,
90       .pCode = spv,
91    };
92 
93    VkPipelineShaderStageCreateInfo shader_stage = {
94       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
95       .pNext = &module_info,
96       .flags = 0,
97       .stage = VK_SHADER_STAGE_COMPUTE_BIT,
98       .pName = "main",
99       .pSpecializationInfo = NULL,
100    };
101 
102    VkComputePipelineCreateInfo pipeline_info = {
103       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
104       .flags = 0,
105       .stage = shader_stage,
106       .layout = *layout,
107    };
108 
109    return vk_meta_create_compute_pipeline(&device->vk, &device->meta, &pipeline_info,
110                                           name, key_size, pipeline);
111 }
112 
113 struct bvh_layout {
114    uint64_t bvh_offset;
115    uint64_t size;
116 };
117 
118 static void
get_bvh_layout(VkGeometryTypeKHR geometry_type,uint32_t leaf_count,struct bvh_layout * layout)119 get_bvh_layout(VkGeometryTypeKHR geometry_type,
120                uint32_t leaf_count,
121                struct bvh_layout *layout)
122 {
123    uint32_t internal_count = MAX2(leaf_count, 2) - 1;
124 
125    uint64_t offset = sizeof(struct tu_accel_struct_header);
126 
127    /* Instance descriptors, one per instance. */
128    if (geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR) {
129       offset += leaf_count * sizeof(struct tu_instance_descriptor);
130    }
131 
132    /* Parent links, which have to go directly before bvh_offset as we index
133     * them using negative offsets from there.
134     */
135    offset += (internal_count + leaf_count) * sizeof(uint32_t);
136 
137    /* The BVH and hence bvh_offset needs 64 byte alignment for RT nodes. */
138    offset = ALIGN(offset, 64);
139    layout->bvh_offset = offset;
140 
141    offset += internal_count * sizeof(struct tu_internal_node) +
142       leaf_count * sizeof(struct tu_leaf_node);
143 
144    layout->size = offset;
145 }
146 
get_bvh_size(VkDevice device,const VkAccelerationStructureBuildGeometryInfoKHR * pBuildInfo,uint32_t leaf_count)147 VkDeviceSize get_bvh_size(VkDevice device,
148                           const VkAccelerationStructureBuildGeometryInfoKHR *pBuildInfo,
149                           uint32_t leaf_count)
150 {
151    struct bvh_layout layout;
152    get_bvh_layout(vk_get_as_geometry_type(pBuildInfo), leaf_count, &layout);
153    return layout.size;
154 }
155 
156 static uint32_t
encode_key(VkAccelerationStructureTypeKHR type,VkBuildAccelerationStructureFlagBitsKHR flags)157 encode_key(VkAccelerationStructureTypeKHR type,
158            VkBuildAccelerationStructureFlagBitsKHR flags)
159 {
160    return 0;
161 }
162 
163 
164 static VkResult
encode_bind_pipeline(VkCommandBuffer commandBuffer,uint32_t key)165 encode_bind_pipeline(VkCommandBuffer commandBuffer, uint32_t key)
166 {
167    VK_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
168    struct tu_device *device = cmdbuf->device;
169 
170    VkPipeline pipeline;
171    VkPipelineLayout layout;
172    VkResult result =
173       get_pipeline_spv(device, "encode", encode_spv, sizeof(encode_spv),
174                        sizeof(encode_args), &pipeline, &layout);
175 
176    if (result != VK_SUCCESS)
177       return result;
178 
179    tu_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
180    return VK_SUCCESS;
181 }
182 
183 static void
encode(VkCommandBuffer commandBuffer,const VkAccelerationStructureBuildGeometryInfoKHR * build_info,const VkAccelerationStructureBuildRangeInfoKHR * build_range_infos,VkDeviceAddress intermediate_as_addr,VkDeviceAddress intermediate_header_addr,uint32_t leaf_count,uint32_t key,struct vk_acceleration_structure * dst)184 encode(VkCommandBuffer commandBuffer,
185        const VkAccelerationStructureBuildGeometryInfoKHR *build_info,
186        const VkAccelerationStructureBuildRangeInfoKHR *build_range_infos,
187        VkDeviceAddress intermediate_as_addr,
188        VkDeviceAddress intermediate_header_addr,
189        uint32_t leaf_count,
190        uint32_t key,
191        struct vk_acceleration_structure *dst)
192 {
193    VK_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
194    struct tu_device *device = cmdbuf->device;
195    VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(build_info);
196 
197    VkPipeline pipeline;
198    VkPipelineLayout layout;
199    get_pipeline_spv(device, "encode", encode_spv, sizeof(encode_spv),
200                     sizeof(encode_args), &pipeline, &layout);
201 
202    struct bvh_layout bvh_layout;
203    get_bvh_layout(geometry_type, leaf_count, &bvh_layout);
204 
205    const struct encode_args args = {
206       .intermediate_bvh = intermediate_as_addr,
207       .output_bvh = vk_acceleration_structure_get_va(dst) + bvh_layout.bvh_offset,
208       .header = intermediate_header_addr,
209       .output_bvh_offset = bvh_layout.bvh_offset,
210       .leaf_node_count = leaf_count,
211       .geometry_type = geometry_type,
212    };
213    vk_common_CmdPushConstants(commandBuffer, layout,
214                               VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args),
215                               &args);
216 
217    tu_dispatch_unaligned_indirect(commandBuffer,
218                                   intermediate_header_addr +
219                                   offsetof(struct vk_ir_header, ir_internal_node_count));
220 
221    *(VkDeviceSize *)
222       util_sparse_array_get(&device->accel_struct_ranges,
223                             vk_acceleration_structure_get_va(dst)) = dst->size;
224 
225 }
226 
227 /* Don't bother copying over the compacted size using a compute shader if
228  * compaction is never going to happen.
229  */
230 enum tu_header_key {
231    HEADER_NO_DISPATCH,
232    HEADER_USE_DISPATCH
233 };
234 
235 static uint32_t
header_key(VkAccelerationStructureTypeKHR type,VkBuildAccelerationStructureFlagBitsKHR flags)236 header_key(VkAccelerationStructureTypeKHR type,
237            VkBuildAccelerationStructureFlagBitsKHR flags)
238 {
239    return (flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR) ?
240       HEADER_USE_DISPATCH : HEADER_NO_DISPATCH;
241 }
242 
243 static VkResult
header_bind_pipeline(VkCommandBuffer commandBuffer,uint32_t key)244 header_bind_pipeline(VkCommandBuffer commandBuffer, uint32_t key)
245 {
246    VK_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
247    struct tu_device *device = cmdbuf->device;
248 
249    if (key == HEADER_USE_DISPATCH) {
250       VkPipeline pipeline;
251       VkPipelineLayout layout;
252       VkResult result =
253          get_pipeline_spv(device, "header", header_spv, sizeof(header_spv),
254                           sizeof(header_args), &pipeline, &layout);
255 
256       if (result != VK_SUCCESS)
257          return result;
258 
259       static const VkMemoryBarrier mb = {
260          .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
261          .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
262          .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
263       };
264 
265       vk_common_CmdPipelineBarrier(commandBuffer,
266                                    VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
267                                    VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
268                                    0, 1, &mb, 0, NULL, 0, NULL);
269 
270       tu_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
271    }
272 
273    return VK_SUCCESS;
274 }
275 
276 static void
header(VkCommandBuffer commandBuffer,const VkAccelerationStructureBuildGeometryInfoKHR * build_info,const VkAccelerationStructureBuildRangeInfoKHR * build_range_infos,VkDeviceAddress intermediate_as_addr,VkDeviceAddress intermediate_header_addr,uint32_t leaf_count,uint32_t key,struct vk_acceleration_structure * dst)277 header(VkCommandBuffer commandBuffer,
278        const VkAccelerationStructureBuildGeometryInfoKHR *build_info,
279        const VkAccelerationStructureBuildRangeInfoKHR *build_range_infos,
280        VkDeviceAddress intermediate_as_addr,
281        VkDeviceAddress intermediate_header_addr,
282        uint32_t leaf_count,
283        uint32_t key,
284        struct vk_acceleration_structure *dst)
285 {
286    VK_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
287    struct tu_device *device = cmdbuf->device;
288    VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(build_info);
289 
290    struct bvh_layout bvh_layout;
291    get_bvh_layout(geometry_type, leaf_count, &bvh_layout);
292 
293    VkDeviceAddress header_addr = vk_acceleration_structure_get_va(dst);
294 
295    size_t base = offsetof(struct tu_accel_struct_header, copy_dispatch_size);
296 
297    uint32_t instance_count =
298       geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR ? leaf_count : 0;
299 
300    if (key == HEADER_USE_DISPATCH) {
301       base = offsetof(struct tu_accel_struct_header, instance_count);
302       VkPipeline pipeline;
303       VkPipelineLayout layout;
304       get_pipeline_spv(device, "header", header_spv, sizeof(header_spv),
305                        sizeof(header_args), &pipeline, &layout);
306 
307       struct header_args args = {
308          .src = intermediate_header_addr,
309          .dst = vk_acceleration_structure_get_va(dst),
310          .bvh_offset = bvh_layout.bvh_offset,
311          .instance_count = instance_count,
312       };
313 
314       vk_common_CmdPushConstants(commandBuffer, layout,
315                                  VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args),
316                                  &args);
317 
318       vk_common_CmdDispatch(commandBuffer, 1, 1, 1);
319    }
320 
321    struct tu_accel_struct_header header = {};
322 
323    header.instance_count = instance_count;
324    header.self_ptr = header_addr;
325    header.compacted_size = bvh_layout.size;
326 
327    header.copy_dispatch_size[0] = DIV_ROUND_UP(header.compacted_size, 16 * 128);
328    header.copy_dispatch_size[1] = 1;
329    header.copy_dispatch_size[2] = 1;
330 
331    header.serialization_size =
332       header.compacted_size +
333       sizeof(struct vk_accel_struct_serialization_header) + sizeof(uint64_t) * header.instance_count;
334 
335    header.size = header.serialization_size - sizeof(struct vk_accel_struct_serialization_header) -
336                  sizeof(uint64_t) * header.instance_count;
337 
338    struct tu_cs *cs = &cmdbuf->cs;
339 
340    size_t header_size = sizeof(struct tu_accel_struct_header) - base;
341    assert(base % sizeof(uint32_t) == 0);
342    assert(header_size % sizeof(uint32_t) == 0);
343    uint32_t *header_ptr = (uint32_t *)((char *)&header + base);
344 
345    tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 2 + header_size / sizeof(uint32_t));
346    tu_cs_emit_qw(cs, header_addr + base);
347    tu_cs_emit_array(cs, header_ptr, header_size / sizeof(uint32_t));
348 }
349 
350 const struct vk_acceleration_structure_build_ops tu_as_build_ops = {
351    .get_as_size = get_bvh_size,
352    .get_encode_key = { encode_key, header_key },
353    .encode_bind_pipeline = { encode_bind_pipeline, header_bind_pipeline },
354    .encode_as = { encode, header },
355 };
356 
357 struct radix_sort_vk_target_config tu_radix_sort_config = {
358    .keyval_dwords = 2,
359    .init = { .workgroup_size_log2 = 8, },
360    .fill = { .workgroup_size_log2 = 8, .block_rows = 8 },
361    .histogram = {
362       .workgroup_size_log2 = 8,
363       .subgroup_size_log2 = 7,
364       .block_rows = 14, /* TODO tune this */
365    },
366    .prefix = {
367       .workgroup_size_log2 = 8,
368       .subgroup_size_log2 = 7,
369    },
370    .scatter = {
371       .workgroup_size_log2 = 8,
372       .subgroup_size_log2 = 7,
373       .block_rows = 14, /* TODO tune this */
374    },
375    .nonsequential_dispatch = false,
376 };
377 
378 static VkResult
init_radix_sort(struct tu_device * device)379 init_radix_sort(struct tu_device *device)
380 {
381    if (!device->radix_sort) {
382       mtx_lock(&device->radix_sort_mutex);
383       if (!device->radix_sort) {
384          device->radix_sort =
385             vk_create_radix_sort_u64(tu_device_to_handle(device),
386                                      &device->vk.alloc,
387                                      VK_NULL_HANDLE, tu_radix_sort_config);
388          if (!device->radix_sort) {
389             /* TODO plumb through the error here */
390             mtx_unlock(&device->radix_sort_mutex);
391             return VK_ERROR_OUT_OF_HOST_MEMORY;
392          }
393 
394       }
395       mtx_unlock(&device->radix_sort_mutex);
396    }
397 
398    return VK_SUCCESS;
399 }
400 
401 struct tu_saved_compute_state {
402    uint32_t push_constants[MAX_PUSH_CONSTANTS_SIZE / 4];
403    struct tu_shader *compute_shader;
404 };
405 
406 static void
tu_save_compute_state(struct tu_cmd_buffer * cmd,struct tu_saved_compute_state * state)407 tu_save_compute_state(struct tu_cmd_buffer *cmd,
408                       struct tu_saved_compute_state *state)
409 {
410    memcpy(state->push_constants, cmd->push_constants, sizeof(cmd->push_constants));
411    state->compute_shader = cmd->state.shaders[MESA_SHADER_COMPUTE];
412 }
413 
414 static void
tu_restore_compute_state(struct tu_cmd_buffer * cmd,struct tu_saved_compute_state * state)415 tu_restore_compute_state(struct tu_cmd_buffer *cmd,
416                          struct tu_saved_compute_state *state)
417 {
418    cmd->state.shaders[MESA_SHADER_COMPUTE] = state->compute_shader;
419    if (state->compute_shader) {
420       tu_cs_emit_state_ib(&cmd->cs, state->compute_shader->state);
421    }
422    memcpy(cmd->push_constants, state->push_constants, sizeof(cmd->push_constants));
423    cmd->state.dirty |= TU_CMD_DIRTY_SHADER_CONSTS;
424 }
425 
426 VKAPI_ATTR void VKAPI_CALL
tu_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,const VkAccelerationStructureBuildRangeInfoKHR * const * ppBuildRangeInfos)427 tu_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer, uint32_t infoCount,
428                                      const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
429                                      const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
430 {
431    VK_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
432    struct tu_device *device = cmd->device;
433    struct tu_saved_compute_state state;
434 
435    VkResult result = init_radix_sort(device);
436    if (result != VK_SUCCESS) {
437       vk_command_buffer_set_error(&cmd->vk, result);
438       return;
439    }
440 
441    tu_save_compute_state(cmd, &state);
442 
443    struct vk_acceleration_structure_build_args args = {
444       .subgroup_size = 128,
445       .bvh_bounds_offset = offsetof(tu_accel_struct_header, aabb),
446       .emit_markers = false,
447       .radix_sort = device->radix_sort,
448    };
449 
450    vk_cmd_build_acceleration_structures(commandBuffer,
451                                         &device->vk,
452                                         &device->meta,
453                                         infoCount,
454                                         pInfos,
455                                         ppBuildRangeInfos,
456                                         &args);
457 
458    tu_restore_compute_state(cmd, &state);
459 }
460 
461 VKAPI_ATTR void VKAPI_CALL
tu_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer,const VkCopyAccelerationStructureInfoKHR * pInfo)462 tu_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, const VkCopyAccelerationStructureInfoKHR *pInfo)
463 {
464    VK_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
465    VK_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
466    VK_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
467    struct tu_saved_compute_state state;
468 
469    VkPipeline pipeline;
470    VkPipelineLayout layout;
471    VkResult result =
472       get_pipeline_spv(cmd->device, "copy", copy_spv, sizeof(copy_spv),
473                     sizeof(copy_args), &pipeline, &layout);
474    if (result != VK_SUCCESS) {
475       vk_command_buffer_set_error(&cmd->vk, result);
476       return;
477    }
478 
479    tu_save_compute_state(cmd, &state);
480 
481    tu_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
482 
483    struct copy_args consts = {
484       .src_addr = vk_acceleration_structure_get_va(src),
485       .dst_addr = vk_acceleration_structure_get_va(dst),
486       .mode = TU_COPY_MODE_COPY,
487    };
488 
489    vk_common_CmdPushConstants(commandBuffer, layout,
490                               VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts),
491                               &consts);
492 
493    TU_CALLX(cmd->device, tu_CmdDispatchIndirect)(
494       commandBuffer, src->buffer,
495       src->offset + offsetof(struct tu_accel_struct_header, copy_dispatch_size));
496 
497    tu_restore_compute_state(cmd, &state);
498 }
499 
500 VKAPI_ATTR void VKAPI_CALL
tu_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,const VkCopyMemoryToAccelerationStructureInfoKHR * pInfo)501 tu_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,
502                                            const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo)
503 {
504    VK_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
505    VK_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
506    struct tu_saved_compute_state state;
507 
508    VkPipeline pipeline;
509    VkPipelineLayout layout;
510    VkResult result =
511       get_pipeline_spv(cmd->device, "copy", copy_spv, sizeof(copy_spv),
512                     sizeof(copy_args), &pipeline, &layout);
513    if (result != VK_SUCCESS) {
514       vk_command_buffer_set_error(&cmd->vk, result);
515       return;
516    }
517 
518    tu_save_compute_state(cmd, &state);
519 
520    tu_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
521 
522    const struct copy_args consts = {
523       .src_addr = pInfo->src.deviceAddress,
524       .dst_addr = vk_acceleration_structure_get_va(dst),
525       .mode = TU_COPY_MODE_DESERIALIZE,
526    };
527 
528    vk_common_CmdPushConstants(commandBuffer, layout,
529                               VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts),
530                               &consts);
531 
532    vk_common_CmdDispatch(commandBuffer, 256, 1, 1);
533 
534    tu_restore_compute_state(cmd, &state);
535 }
536 
537 VKAPI_ATTR void VKAPI_CALL
tu_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,const VkCopyAccelerationStructureToMemoryInfoKHR * pInfo)538 tu_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,
539                                            const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo)
540 {
541    VK_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
542    VK_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
543    struct tu_saved_compute_state state;
544 
545    VkPipeline pipeline;
546    VkPipelineLayout layout;
547    VkResult result =
548       get_pipeline_spv(cmd->device, "copy", copy_spv, sizeof(copy_spv),
549                     sizeof(copy_args), &pipeline, &layout);
550    if (result != VK_SUCCESS) {
551       vk_command_buffer_set_error(&cmd->vk, result);
552       return;
553    }
554 
555    tu_save_compute_state(cmd, &state);
556 
557    tu_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
558 
559    const struct copy_args consts = {
560       .src_addr = vk_acceleration_structure_get_va(src),
561       .dst_addr = pInfo->dst.deviceAddress,
562       .mode = TU_COPY_MODE_SERIALIZE,
563    };
564 
565    vk_common_CmdPushConstants(commandBuffer, layout,
566                               VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts),
567                               &consts);
568 
569    TU_CALLX(cmd->device, tu_CmdDispatchIndirect)(
570       commandBuffer, src->buffer,
571       src->offset + offsetof(struct tu_accel_struct_header, copy_dispatch_size));
572 
573    tu_restore_compute_state(cmd, &state);
574 
575    /* Set the header of the serialized data. */
576    uint32_t header_data[2 * VK_UUID_SIZE / 4];
577    memcpy(header_data, cmd->device->physical_device->driver_uuid, VK_UUID_SIZE);
578    memcpy(header_data + VK_UUID_SIZE / 4, cmd->device->physical_device->cache_uuid, VK_UUID_SIZE);
579 
580    struct tu_cs *cs = &cmd->cs;
581 
582    tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 2 + ARRAY_SIZE(header_data));
583    tu_cs_emit_qw(cs, pInfo->dst.deviceAddress);
584    tu_cs_emit_array(cs, header_data, ARRAY_SIZE(header_data));
585 }
586 
587 VKAPI_ATTR void VKAPI_CALL
tu_GetAccelerationStructureBuildSizesKHR(VkDevice _device,VkAccelerationStructureBuildTypeKHR buildType,const VkAccelerationStructureBuildGeometryInfoKHR * pBuildInfo,const uint32_t * pMaxPrimitiveCounts,VkAccelerationStructureBuildSizesInfoKHR * pSizeInfo)588 tu_GetAccelerationStructureBuildSizesKHR(VkDevice _device, VkAccelerationStructureBuildTypeKHR buildType,
589                                          const VkAccelerationStructureBuildGeometryInfoKHR *pBuildInfo,
590                                          const uint32_t *pMaxPrimitiveCounts,
591                                          VkAccelerationStructureBuildSizesInfoKHR *pSizeInfo)
592 {
593    VK_FROM_HANDLE(tu_device, device, _device);
594 
595    init_radix_sort(device);
596 
597    struct vk_acceleration_structure_build_args args = {
598       .subgroup_size = 128,
599       .radix_sort = device->radix_sort,
600    };
601 
602    vk_get_as_build_sizes(_device, buildType, pBuildInfo, pMaxPrimitiveCounts,
603                          pSizeInfo, &args);
604 }
605 
606 VKAPI_ATTR void VKAPI_CALL
tu_GetDeviceAccelerationStructureCompatibilityKHR(VkDevice _device,const VkAccelerationStructureVersionInfoKHR * pVersionInfo,VkAccelerationStructureCompatibilityKHR * pCompatibility)607 tu_GetDeviceAccelerationStructureCompatibilityKHR(VkDevice _device,
608                                                   const VkAccelerationStructureVersionInfoKHR *pVersionInfo,
609                                                   VkAccelerationStructureCompatibilityKHR *pCompatibility)
610 {
611    VK_FROM_HANDLE(tu_device, device, _device);
612    bool compat =
613       memcmp(pVersionInfo->pVersionData, device->physical_device->driver_uuid, VK_UUID_SIZE) == 0 &&
614       memcmp(pVersionInfo->pVersionData + VK_UUID_SIZE, device->physical_device->cache_uuid, VK_UUID_SIZE) == 0;
615    *pCompatibility = compat ? VK_ACCELERATION_STRUCTURE_COMPATIBILITY_COMPATIBLE_KHR
616                             : VK_ACCELERATION_STRUCTURE_COMPATIBILITY_INCOMPATIBLE_KHR;
617 }
618 
619 VkResult
tu_init_null_accel_struct(struct tu_device * device)620 tu_init_null_accel_struct(struct tu_device *device)
621 {
622    VkResult result = tu_bo_init_new(device, NULL,
623                                     &device->null_accel_struct_bo,
624                                     sizeof(tu_accel_struct_header) +
625                                     sizeof(tu_internal_node),
626                                     TU_BO_ALLOC_NO_FLAGS, "null AS");
627    if (result != VK_SUCCESS) {
628       return result;
629    }
630 
631    result = tu_bo_map(device, device->null_accel_struct_bo, NULL);
632    if (result != VK_SUCCESS) {
633       tu_bo_finish(device, device->null_accel_struct_bo);
634       return result;
635    }
636 
637    struct tu_accel_struct_header header = {
638       .bvh_ptr = device->null_accel_struct_bo->iova +
639          sizeof(tu_accel_struct_header),
640       .self_ptr = device->null_accel_struct_bo->iova,
641    };
642 
643    struct tu_internal_node node = {
644       .child_count = 0,
645       .type_flags = 0,
646    };
647 
648    for (unsigned i = 0; i < 8; i++) {
649       node.mantissas[i][0][0] = 0xff;
650       node.mantissas[i][0][1] = 0xff;
651       node.mantissas[i][0][2] = 0xff;
652    }
653 
654    memcpy(device->null_accel_struct_bo->map, (void *)&header, sizeof(header));
655    memcpy((char *)device->null_accel_struct_bo->map + sizeof(header),
656           (void *)&node, sizeof(node));
657    return VK_SUCCESS;
658 }
659 
660 struct tu_node {
661    uint32_t data[16];
662 };
663 
664 static void
dump_leaf(struct tu_leaf_node * node)665 dump_leaf(struct tu_leaf_node *node)
666 {
667    fprintf(stderr, "\tID: %d\n", node->id);
668    fprintf(stderr, "\tgeometry ID: %d\n", node->geometry_id);
669    bool aabb = node->type_flags & TU_NODE_TYPE_AABB;
670    for (unsigned i = 0; i < (aabb ? 2 : 3); i++) {
671       fprintf(stderr, "\t(");
672       for (unsigned j = 0; j < 3; j++) {
673          if (j != 0)
674             fprintf(stderr, ", ");
675          fprintf(stderr, "%f", node->coords[i][j]);
676       }
677       fprintf(stderr, ")\n");
678    }
679 }
680 
681 static void
dump_internal(struct tu_internal_node * node,uint32_t * max_child)682 dump_internal(struct tu_internal_node *node, uint32_t *max_child)
683 {
684    *max_child = MAX2(*max_child, node->id + node->child_count);
685    float base[3];
686    unsigned exponents[3];
687    for (unsigned i = 0; i < 3; i++) {
688       base[i] = uif(node->bases[i] << 16);
689       exponents[i] = node->exponents[i] - 134;
690    }
691 
692    for (unsigned i = 0; i < node->child_count; i++) {
693       fprintf(stderr, "\tchild %d\n", node->id + i);
694       for (unsigned vert = 0; vert < 2; vert++) {
695          fprintf(stderr, "\t\t(");
696          for (unsigned coord = 0; coord < 3; coord++) {
697             unsigned mantissa = node->mantissas[i][vert][coord];
698             if (coord != 0)
699                fprintf(stderr, ", ");
700             fprintf(stderr, "%f", base[coord] + ldexp((float)mantissa,
701                                                       exponents[coord]));
702          }
703          fprintf(stderr, ")\n");
704       }
705    }
706 }
707 
708 static void
dump_as(struct vk_acceleration_structure * as)709 dump_as(struct vk_acceleration_structure *as)
710 {
711    VK_FROM_HANDLE(tu_buffer, buf, as->buffer);
712 
713    struct tu_accel_struct_header *hdr =
714       (struct tu_accel_struct_header *)((char *)buf->bo->map + as->offset);
715 
716    fprintf(stderr, "dumping AS at %" PRIx64 "\n", buf->iova + as->offset);
717    u_hexdump(stderr, (uint8_t *)hdr, sizeof(*hdr), false);
718 
719    char *base = ((char *)buf->bo->map + (hdr->bvh_ptr - buf->iova));
720    struct tu_node *node = (struct tu_node *)base;
721 
722    fprintf(stderr, "dumping nodes at %" PRIx64 "\n", hdr->bvh_ptr);
723 
724    uint32_t max_child = 1;
725    for (unsigned i = 0; i < max_child; i++) {
726       uint32_t *parent_ptr = (uint32_t*)(base - (4 + 4 * i));
727       uint32_t parent = *parent_ptr;
728       fprintf(stderr, "node %d parent %d\n", i, parent);
729       u_hexdump(stderr, (uint8_t *)node, sizeof(*node), false);
730       if (node->data[15] & TU_NODE_TYPE_LEAF) {
731          /* TODO compressed leaves */
732          dump_leaf((struct tu_leaf_node *)node);
733       } else {
734          dump_internal((struct tu_internal_node *)node, &max_child);
735       }
736 
737       node++;
738    }
739 }
740 
741 static bool
as_finished(struct tu_device * dev,struct vk_acceleration_structure * as)742 as_finished(struct tu_device *dev, struct vk_acceleration_structure *as)
743 {
744    VK_FROM_HANDLE(tu_buffer, buf, as->buffer);
745    tu_bo_map(dev, buf->bo, NULL);
746 
747    struct tu_accel_struct_header *hdr =
748       (struct tu_accel_struct_header *)((char *)buf->bo->map + as->offset);
749    return hdr->self_ptr == buf->iova + as->offset;
750 }
751 
752 VKAPI_ATTR void VKAPI_CALL
tu_DestroyAccelerationStructureKHR(VkDevice _device,VkAccelerationStructureKHR accelerationStructure,const VkAllocationCallbacks * pAllocator)753 tu_DestroyAccelerationStructureKHR(VkDevice _device,
754                                    VkAccelerationStructureKHR accelerationStructure,
755                                    const VkAllocationCallbacks *pAllocator)
756 {
757    VK_FROM_HANDLE(tu_device, device, _device);
758    if (TU_DEBUG(DUMPAS)) {
759       VK_FROM_HANDLE(vk_acceleration_structure, as, accelerationStructure);
760       if (as_finished(device, as))
761          dump_as(as);
762    }
763 
764    vk_common_DestroyAccelerationStructureKHR(_device, accelerationStructure,
765                                              pAllocator);
766 }
767