• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright © 2024 Intel Corporation
2  * SPDX-License-Identifier: MIT
3  */
4 
5 #include "anv_private.h"
6 
7 #include <math.h>
8 
9 #include "util/u_debug.h"
10 #include "util/half_float.h"
11 #include "util/u_atomic.h"
12 
13 #include "genxml/gen_macros.h"
14 #include "genxml/genX_pack.h"
15 #include "genxml/genX_rt_pack.h"
16 
17 #include "ds/intel_tracepoints.h"
18 
19 #include "bvh/anv_build_interface.h"
20 #include "vk_acceleration_structure.h"
21 #include "radix_sort/radix_sort_u64.h"
22 #include "radix_sort/common/vk/barrier.h"
23 
24 #include "vk_common_entrypoints.h"
25 #include "genX_mi_builder.h"
26 
27 #if GFX_VERx10 >= 125
28 
29 /* Id to track bvh_dump */
30 static uint32_t blas_id = 0;
31 static uint32_t tlas_id = 0;
32 
33 static void
begin_debug_marker(VkCommandBuffer commandBuffer,enum vk_acceleration_structure_build_step step,const char * format,...)34 begin_debug_marker(VkCommandBuffer commandBuffer,
35                    enum vk_acceleration_structure_build_step step,
36                    const char *format, ...)
37 {
38    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
39 
40    assert(cmd_buffer->state.rt.debug_marker_count <
41           ARRAY_SIZE(cmd_buffer->state.rt.debug_markers));
42    cmd_buffer->state.rt.debug_markers[cmd_buffer->state.rt.debug_marker_count++] =
43       step;
44    switch (step) {
45    case VK_ACCELERATION_STRUCTURE_BUILD_STEP_TOP:
46       trace_intel_begin_as_build(&cmd_buffer->trace);
47       break;
48    case VK_ACCELERATION_STRUCTURE_BUILD_STEP_BUILD_LEAVES:
49       trace_intel_begin_as_build_leaves(&cmd_buffer->trace);
50       break;
51    case VK_ACCELERATION_STRUCTURE_BUILD_STEP_MORTON_GENERATE:
52       trace_intel_begin_as_morton_generate(&cmd_buffer->trace);
53       break;
54    case VK_ACCELERATION_STRUCTURE_BUILD_STEP_MORTON_SORT:
55       trace_intel_begin_as_morton_sort(&cmd_buffer->trace);
56       break;
57    case VK_ACCELERATION_STRUCTURE_BUILD_STEP_LBVH_BUILD_INTERNAL:
58       trace_intel_begin_as_lbvh_build_internal(&cmd_buffer->trace);
59       break;
60    case VK_ACCELERATION_STRUCTURE_BUILD_STEP_PLOC_BUILD_INTERNAL:
61       trace_intel_begin_as_ploc_build_internal(&cmd_buffer->trace);
62       break;
63    case VK_ACCELERATION_STRUCTURE_BUILD_STEP_ENCODE:
64       trace_intel_begin_as_encode(&cmd_buffer->trace);
65       break;
66    default:
67       unreachable("Invalid build step");
68    }
69 }
70 
71 static void
end_debug_marker(VkCommandBuffer commandBuffer)72 end_debug_marker(VkCommandBuffer commandBuffer)
73 {
74    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
75 
76    cmd_buffer->state.rt.debug_marker_count--;
77    switch (cmd_buffer->state.rt.debug_markers[cmd_buffer->state.rt.debug_marker_count]) {
78    case VK_ACCELERATION_STRUCTURE_BUILD_STEP_TOP:
79       trace_intel_end_as_build(&cmd_buffer->trace);
80       break;
81    case VK_ACCELERATION_STRUCTURE_BUILD_STEP_BUILD_LEAVES:
82       trace_intel_end_as_build_leaves(&cmd_buffer->trace);
83       break;
84    case VK_ACCELERATION_STRUCTURE_BUILD_STEP_MORTON_GENERATE:
85       trace_intel_end_as_morton_generate(&cmd_buffer->trace);
86       break;
87    case VK_ACCELERATION_STRUCTURE_BUILD_STEP_MORTON_SORT:
88       trace_intel_end_as_morton_sort(&cmd_buffer->trace);
89       break;
90    case VK_ACCELERATION_STRUCTURE_BUILD_STEP_LBVH_BUILD_INTERNAL:
91       trace_intel_end_as_lbvh_build_internal(&cmd_buffer->trace);
92       break;
93    case VK_ACCELERATION_STRUCTURE_BUILD_STEP_PLOC_BUILD_INTERNAL:
94       trace_intel_end_as_ploc_build_internal(&cmd_buffer->trace);
95       break;
96    case VK_ACCELERATION_STRUCTURE_BUILD_STEP_ENCODE:
97       trace_intel_end_as_encode(&cmd_buffer->trace);
98       break;
99    default:
100       unreachable("Invalid build step");
101    }
102 }
103 
104 static void
add_bvh_dump(struct anv_cmd_buffer * cmd_buffer,VkDeviceAddress src,uint64_t dump_size,VkGeometryTypeKHR geometry_type,enum bvh_dump_type dump_type)105 add_bvh_dump(struct anv_cmd_buffer *cmd_buffer,
106              VkDeviceAddress src,
107              uint64_t dump_size,
108              VkGeometryTypeKHR geometry_type,
109              enum bvh_dump_type dump_type)
110 {
111    assert(dump_size % 4 == 0);
112 
113    struct anv_device *device = cmd_buffer->device;
114    struct anv_bo *bo = NULL;
115 
116    VkResult result = anv_device_alloc_bo(device, "bvh_dump", dump_size,
117                                          ANV_BO_ALLOC_MAPPED |
118                                          ANV_BO_ALLOC_HOST_CACHED_COHERENT, 0,
119                                          &bo);
120    if (result != VK_SUCCESS) {
121       printf("Failed to allocate bvh for dump\n");
122       vk_command_buffer_set_error(&cmd_buffer->vk, result);
123       return;
124    }
125 
126    struct anv_bvh_dump *bvh_dump = malloc(sizeof(struct anv_bvh_dump));
127 
128    bvh_dump->bo = bo;
129    bvh_dump->bvh_id = geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR ?
130                            tlas_id : blas_id;
131    bvh_dump->dump_size = dump_size;
132    bvh_dump->geometry_type = geometry_type;
133    bvh_dump->dump_type = dump_type;
134 
135    struct anv_address dst_addr = { .bo = bvh_dump->bo, .offset = 0 };
136    struct anv_address src_addr = anv_address_from_u64(src);
137    anv_cmd_copy_addr(cmd_buffer, src_addr, dst_addr, bvh_dump->dump_size);
138 
139    pthread_mutex_lock(&device->mutex);
140    list_addtail(&bvh_dump->link, &device->bvh_dumps);
141    pthread_mutex_unlock(&device->mutex);
142 }
143 
144 static void
debug_record_as_to_bvh_dump(struct anv_cmd_buffer * cmd_buffer,VkDeviceAddress header_addr,uint64_t bvh_anv_size,VkDeviceAddress intermediate_header_addr,VkDeviceAddress intermediate_as_addr,uint32_t leaf_count,VkGeometryTypeKHR geometry_type)145 debug_record_as_to_bvh_dump(struct anv_cmd_buffer *cmd_buffer,
146                             VkDeviceAddress header_addr,
147                             uint64_t bvh_anv_size,
148                             VkDeviceAddress intermediate_header_addr,
149                             VkDeviceAddress intermediate_as_addr,
150                             uint32_t leaf_count,
151                             VkGeometryTypeKHR geometry_type)
152 {
153    if (INTEL_DEBUG(DEBUG_BVH_BLAS) &&
154        geometry_type != VK_GEOMETRY_TYPE_INSTANCES_KHR) {
155       add_bvh_dump(cmd_buffer, header_addr, bvh_anv_size, geometry_type,
156                    BVH_ANV);
157    }
158 
159    if (INTEL_DEBUG(DEBUG_BVH_TLAS) &&
160        geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR) {
161       add_bvh_dump(cmd_buffer, header_addr, bvh_anv_size, geometry_type,
162                    BVH_ANV);
163    }
164 
165    if (INTEL_DEBUG(DEBUG_BVH_BLAS_IR_HDR) &&
166        geometry_type != VK_GEOMETRY_TYPE_INSTANCES_KHR) {
167       add_bvh_dump(cmd_buffer, intermediate_header_addr,
168                    sizeof(struct vk_ir_header), geometry_type, BVH_IR_HDR);
169    }
170 
171    if (INTEL_DEBUG(DEBUG_BVH_TLAS_IR_HDR) &&
172        geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR) {
173       add_bvh_dump(cmd_buffer, intermediate_header_addr,
174                    sizeof(struct vk_ir_header), geometry_type, BVH_IR_HDR);
175    }
176 
177    uint32_t internal_node_count = MAX2(leaf_count, 2) - 1;
178    uint64_t internal_node_total_size = sizeof(struct vk_ir_box_node) *
179                                        internal_node_count;
180 
181    if (INTEL_DEBUG(DEBUG_BVH_BLAS_IR_AS) &&
182        geometry_type != VK_GEOMETRY_TYPE_INSTANCES_KHR) {
183       uint64_t leaf_total_size;
184 
185       switch (geometry_type) {
186       case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
187          leaf_total_size = sizeof(struct vk_ir_triangle_node) * leaf_count;
188          break;
189       case VK_GEOMETRY_TYPE_AABBS_KHR:
190          leaf_total_size = sizeof(struct vk_ir_aabb_node) * leaf_count;
191          break;
192       default:
193          unreachable("invalid geometry type");
194       }
195 
196       add_bvh_dump(cmd_buffer, intermediate_as_addr, internal_node_total_size +
197                    leaf_total_size, geometry_type, BVH_IR_AS);
198    }
199 
200    if (INTEL_DEBUG(DEBUG_BVH_TLAS_IR_AS) &&
201        geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR) {
202       uint64_t leaf_total_size = sizeof(struct vk_ir_instance_node) *
203                                  leaf_count;
204       add_bvh_dump(cmd_buffer, intermediate_as_addr, internal_node_total_size +
205                    leaf_total_size, geometry_type, BVH_IR_AS);
206    }
207 
208 
209    if (geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR) {
210       tlas_id++;
211    } else {
212       blas_id++;
213    }
214 }
215 
216 static const uint32_t encode_spv[] = {
217 #include "bvh/encode.spv.h"
218 };
219 
220 static const uint32_t header_spv[] = {
221 #include "bvh/header.spv.h"
222 };
223 
224 static const uint32_t copy_spv[] = {
225 #include "bvh/copy.spv.h"
226 };
227 
228 static VkResult
get_pipeline_spv(struct anv_device * device,const char * name,const uint32_t * spv,uint32_t spv_size,unsigned push_constant_size,VkPipeline * pipeline,VkPipelineLayout * layout)229 get_pipeline_spv(struct anv_device *device,
230                  const char *name, const uint32_t *spv, uint32_t spv_size,
231                  unsigned push_constant_size, VkPipeline *pipeline,
232                  VkPipelineLayout *layout)
233 {
234 
235    size_t key_size = strlen(name);
236 
237    const VkPushConstantRange pc_range = {
238       .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
239       .offset = 0,
240       .size = push_constant_size,
241    };
242 
243    VkResult result = vk_meta_get_pipeline_layout(&device->vk,
244                                                  &device->meta_device, NULL,
245                                                  &pc_range, name, key_size,
246                                                  layout);
247 
248    if (result != VK_SUCCESS)
249       return result;
250 
251    VkPipeline pipeline_from_cache =
252       vk_meta_lookup_pipeline(&device->meta_device, name, key_size);
253    if (pipeline_from_cache != VK_NULL_HANDLE) {
254       *pipeline = pipeline_from_cache;
255       return VK_SUCCESS;
256    }
257 
258    VkShaderModuleCreateInfo module_info = {
259       .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
260       .pNext = NULL,
261       .flags = 0,
262       .codeSize = spv_size,
263       .pCode = spv,
264    };
265 
266    VkPipelineShaderStageCreateInfo shader_stage = {
267       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
268       .pNext = &module_info,
269       .flags = 0,
270       .stage = VK_SHADER_STAGE_COMPUTE_BIT,
271       .pName = "main",
272       .pSpecializationInfo = NULL,
273    };
274 
275    VkComputePipelineCreateInfo pipeline_info = {
276       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
277       .flags = 0,
278       .stage = shader_stage,
279       .layout = *layout,
280    };
281 
282    return vk_meta_create_compute_pipeline(&device->vk, &device->meta_device,
283                                           &pipeline_info, name, key_size, pipeline);
284 }
285 
286 static void
get_bvh_layout(VkGeometryTypeKHR geometry_type,uint32_t leaf_count,struct bvh_layout * layout)287 get_bvh_layout(VkGeometryTypeKHR geometry_type, uint32_t leaf_count,
288                struct bvh_layout *layout)
289 {
290    uint32_t internal_count = MAX2(leaf_count, 2) - 1;
291 
292    uint64_t offset = ANV_RT_BVH_HEADER_SIZE;
293 
294    /* For a TLAS, we store the address of anv_instance_leaf after header
295     * This is for quick access in the copy.comp
296     */
297    if (geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR) {
298       offset += leaf_count * sizeof(uint64_t);
299    }
300    /* The BVH and hence bvh_offset needs 64 byte alignment for RT nodes. */
301    offset = ALIGN(offset, 64);
302 
303    /* This is where internal_nodes/leaves start to be encoded */
304    layout->bvh_offset = offset;
305 
306    offset += internal_count * ANV_RT_INTERNAL_NODE_SIZE;
307 
308    switch (geometry_type) {
309    case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
310       /* Currently we encode one triangle within one quad leaf */
311       offset += leaf_count * ANV_RT_QUAD_LEAF_SIZE;
312       break;
313    case VK_GEOMETRY_TYPE_AABBS_KHR:
314       offset += leaf_count * ANV_RT_PROCEDURAL_LEAF_SIZE;
315       break;
316    case VK_GEOMETRY_TYPE_INSTANCES_KHR:
317       offset += leaf_count * ANV_RT_INSTANCE_LEAF_SIZE;
318       break;
319    default:
320       unreachable("Unknown VkGeometryTypeKHR");
321    }
322 
323    layout->size = offset;
324 }
325 
326 static VkDeviceSize
anv_get_as_size(VkDevice device,const VkAccelerationStructureBuildGeometryInfoKHR * pBuildInfo,uint32_t leaf_count)327 anv_get_as_size(VkDevice device,
328                 const VkAccelerationStructureBuildGeometryInfoKHR *pBuildInfo,
329                 uint32_t leaf_count)
330 {
331    struct bvh_layout layout;
332    get_bvh_layout(vk_get_as_geometry_type(pBuildInfo), leaf_count, &layout);
333    return layout.size;
334 }
335 
336 static uint32_t
anv_get_encode_key(VkAccelerationStructureTypeKHR type,VkBuildAccelerationStructureFlagBitsKHR flags)337 anv_get_encode_key(VkAccelerationStructureTypeKHR type,
338                    VkBuildAccelerationStructureFlagBitsKHR flags)
339 {
340    return 0;
341 }
342 
343 static VkResult
anv_encode_bind_pipeline(VkCommandBuffer commandBuffer,uint32_t key)344 anv_encode_bind_pipeline(VkCommandBuffer commandBuffer, uint32_t key)
345 {
346    VK_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
347    struct anv_device *device = cmd_buffer->device;
348 
349    VkPipeline pipeline;
350    VkPipelineLayout layout;
351    VkResult result = get_pipeline_spv(device, "encode", encode_spv,
352                                       sizeof(encode_spv),
353                                       sizeof(struct encode_args), &pipeline,
354                                       &layout);
355    if (result != VK_SUCCESS)
356       return result;
357 
358    anv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
359                        pipeline);
360 
361    return VK_SUCCESS;
362 }
363 
364 static void
anv_encode_as(VkCommandBuffer commandBuffer,const VkAccelerationStructureBuildGeometryInfoKHR * build_info,const VkAccelerationStructureBuildRangeInfoKHR * build_range_infos,VkDeviceAddress intermediate_as_addr,VkDeviceAddress intermediate_header_addr,uint32_t leaf_count,uint32_t key,struct vk_acceleration_structure * dst)365 anv_encode_as(VkCommandBuffer commandBuffer,
366               const VkAccelerationStructureBuildGeometryInfoKHR *build_info,
367               const VkAccelerationStructureBuildRangeInfoKHR *build_range_infos,
368               VkDeviceAddress intermediate_as_addr,
369               VkDeviceAddress intermediate_header_addr, uint32_t leaf_count,
370               uint32_t key,
371               struct vk_acceleration_structure *dst)
372 {
373    if (INTEL_DEBUG(DEBUG_BVH_NO_BUILD))
374       return;
375 
376    VK_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
377    struct anv_device *device = cmd_buffer->device;
378 
379    VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(build_info);
380 
381    VkPipeline pipeline;
382    VkPipelineLayout layout;
383    get_pipeline_spv(device, "encode", encode_spv, sizeof(encode_spv),
384                     sizeof(struct encode_args), &pipeline, &layout);
385 
386    STATIC_ASSERT(sizeof(struct anv_accel_struct_header) == ANV_RT_BVH_HEADER_SIZE);
387    STATIC_ASSERT(sizeof(struct anv_instance_leaf) == ANV_RT_INSTANCE_LEAF_SIZE);
388    STATIC_ASSERT(sizeof(struct anv_quad_leaf_node) == ANV_RT_QUAD_LEAF_SIZE);
389    STATIC_ASSERT(sizeof(struct anv_procedural_leaf_node) == ANV_RT_PROCEDURAL_LEAF_SIZE);
390    STATIC_ASSERT(sizeof(struct anv_internal_node) == ANV_RT_INTERNAL_NODE_SIZE);
391 
392    struct bvh_layout bvh_layout;
393    get_bvh_layout(geometry_type, leaf_count, &bvh_layout);
394 
395    const struct encode_args args = {
396       .intermediate_bvh = intermediate_as_addr,
397       .output_bvh = vk_acceleration_structure_get_va(dst) +
398                     bvh_layout.bvh_offset,
399       .header = intermediate_header_addr,
400       .output_bvh_offset = bvh_layout.bvh_offset,
401       .leaf_node_count = leaf_count,
402       .geometry_type = geometry_type,
403    };
404 
405    VkPushConstantsInfoKHR push_info = {
406       .sType = VK_STRUCTURE_TYPE_PUSH_CONSTANTS_INFO_KHR,
407       .layout = layout,
408       .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
409       .offset = 0,
410       .size = sizeof(args),
411       .pValues = &args,
412    };
413 
414    anv_CmdPushConstants2KHR(commandBuffer, &push_info);
415 
416    struct anv_address indirect_addr =
417       anv_address_from_u64(intermediate_header_addr +
418                             offsetof(struct vk_ir_header, ir_internal_node_count));
419    anv_genX(cmd_buffer->device->info, cmd_buffer_dispatch_indirect)
420       (cmd_buffer, indirect_addr, true /* is_unaligned_size_x */);
421 }
422 
423 static uint32_t
anv_get_header_key(VkAccelerationStructureTypeKHR type,VkBuildAccelerationStructureFlagBitsKHR flags)424 anv_get_header_key(VkAccelerationStructureTypeKHR type,
425                    VkBuildAccelerationStructureFlagBitsKHR flags)
426 {
427    return (flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR) ?
428            1 : 0;
429 }
430 
431 static VkResult
anv_init_header_bind_pipeline(VkCommandBuffer commandBuffer,uint32_t key)432 anv_init_header_bind_pipeline(VkCommandBuffer commandBuffer, uint32_t key)
433 {
434    VK_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
435 
436    if (key == 1) {
437       VkPipeline pipeline;
438       VkPipelineLayout layout;
439       VkResult result = get_pipeline_spv(cmd_buffer->device, "header",
440                                          header_spv, sizeof(header_spv),
441                                          sizeof(struct header_args), &pipeline,
442                                          &layout);
443       if (result != VK_SUCCESS)
444          return result;
445 
446       anv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
447                           pipeline);
448    }
449 
450    return VK_SUCCESS;
451 }
452 
453 static void
anv_init_header(VkCommandBuffer commandBuffer,const VkAccelerationStructureBuildGeometryInfoKHR * build_info,const VkAccelerationStructureBuildRangeInfoKHR * build_range_infos,VkDeviceAddress intermediate_as_addr,VkDeviceAddress intermediate_header_addr,uint32_t leaf_count,uint32_t key,struct vk_acceleration_structure * dst)454 anv_init_header(VkCommandBuffer commandBuffer,
455                 const VkAccelerationStructureBuildGeometryInfoKHR *build_info,
456                 const VkAccelerationStructureBuildRangeInfoKHR *build_range_infos,
457                 VkDeviceAddress intermediate_as_addr,
458                 VkDeviceAddress intermediate_header_addr, uint32_t leaf_count,
459                 uint32_t key,
460                 struct vk_acceleration_structure *dst)
461 {
462    VK_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
463    struct anv_device *device = cmd_buffer->device;
464 
465    VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(build_info);
466 
467    struct bvh_layout bvh_layout;
468    get_bvh_layout(geometry_type, leaf_count, &bvh_layout);
469 
470    VkDeviceAddress header_addr = vk_acceleration_structure_get_va(dst);
471 
472    UNUSED size_t base = offsetof(struct anv_accel_struct_header,
473                                  copy_dispatch_size);
474 
475    uint32_t instance_count = geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR ?
476                              leaf_count : 0;
477 
478    if (key == 1) {
479       /* Add a barrier to ensure the writes from encode.comp is ready to be
480        * read by header.comp
481        */
482       vk_barrier_compute_w_to_compute_r(commandBuffer);
483 
484       /* VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR is set, so we
485        * want to populate header.compacted_size with the compacted size, which
486        * needs to be calculated by using ir_header.dst_node_offset, which we'll
487        * access in the header.comp.
488        */
489       base = offsetof(struct anv_accel_struct_header, instance_count);
490 
491       VkPipeline pipeline;
492       VkPipelineLayout layout;
493       get_pipeline_spv(device, "header", header_spv, sizeof(header_spv),
494                        sizeof(struct header_args), &pipeline, &layout);
495 
496       struct header_args args = {
497          .src = intermediate_header_addr,
498          .dst = vk_acceleration_structure_get_va(dst),
499          .bvh_offset = bvh_layout.bvh_offset,
500          .instance_count = instance_count,
501       };
502 
503       VkPushConstantsInfoKHR push_info = {
504          .sType = VK_STRUCTURE_TYPE_PUSH_CONSTANTS_INFO_KHR,
505          .layout = layout,
506          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
507          .offset = 0,
508          .size = sizeof(args),
509          .pValues = &args,
510       };
511 
512       anv_CmdPushConstants2KHR(commandBuffer, &push_info);
513       vk_common_CmdDispatch(commandBuffer, 1, 1, 1);
514    } else {
515       vk_barrier_compute_w_to_host_r(commandBuffer);
516 
517       struct anv_accel_struct_header header = {};
518 
519       header.instance_count = instance_count;
520       header.self_ptr = header_addr;
521       header.compacted_size = bvh_layout.size;
522 
523       /* 128 is local_size_x in copy.comp shader, 8 is the amount of data
524        * copied by each iteration of that shader's loop
525        */
526       header.copy_dispatch_size[0] = DIV_ROUND_UP(header.compacted_size,
527                                                   8 * 128);
528       header.copy_dispatch_size[1] = 1;
529       header.copy_dispatch_size[2] = 1;
530 
531       header.serialization_size =
532          header.compacted_size +
533          sizeof(struct vk_accel_struct_serialization_header) +
534          sizeof(uint64_t) * header.instance_count;
535 
536       header.size = header.compacted_size;
537 
538       size_t header_size = sizeof(struct anv_accel_struct_header) - base;
539       assert(base % sizeof(uint32_t) == 0);
540       assert(header_size % sizeof(uint32_t) == 0);
541       uint32_t *header_ptr = (uint32_t *)((char *)&header + base);
542 
543       struct anv_address addr = anv_address_from_u64(header_addr + base);
544       anv_cmd_buffer_update_addr(cmd_buffer, addr, 0, header_size,
545                                  header_ptr, false);
546    }
547 
548    if (INTEL_DEBUG(DEBUG_BVH_ANY)) {
549       genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info,
550                                    cmd_buffer->state.current_pipeline,
551                                    ANV_PIPE_END_OF_PIPE_SYNC_BIT |
552                                    ANV_PIPE_DATA_CACHE_FLUSH_BIT |
553                                    ANV_PIPE_HDC_PIPELINE_FLUSH_BIT |
554                                    ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT);
555       debug_record_as_to_bvh_dump(cmd_buffer, header_addr, bvh_layout.size,
556                                   intermediate_header_addr, intermediate_as_addr,
557                                   leaf_count, geometry_type);
558    }
559 }
560 
561 static const struct vk_acceleration_structure_build_ops anv_build_ops = {
562    .begin_debug_marker = begin_debug_marker,
563    .end_debug_marker = end_debug_marker,
564    .get_as_size = anv_get_as_size,
565    .get_encode_key = { anv_get_encode_key, anv_get_header_key },
566    .encode_bind_pipeline = { anv_encode_bind_pipeline,
567                              anv_init_header_bind_pipeline },
568    .encode_as = { anv_encode_as, anv_init_header },
569 };
570 
571 static VkResult
anv_device_init_accel_struct_build_state(struct anv_device * device)572 anv_device_init_accel_struct_build_state(struct anv_device *device)
573 {
574    VkResult result = VK_SUCCESS;
575    simple_mtx_lock(&device->accel_struct_build.mutex);
576 
577    if (device->accel_struct_build.radix_sort)
578       goto exit;
579 
580    const struct radix_sort_vk_target_config radix_sort_config = {
581       .keyval_dwords = 2,
582       .init = { .workgroup_size_log2 = 8, },
583       .fill = { .workgroup_size_log2 = 8, .block_rows = 8 },
584       .histogram = {
585          .workgroup_size_log2 = 8,
586          .subgroup_size_log2 = device->info->ver >= 20 ? 4 : 3,
587          .block_rows = 14,
588       },
589       .prefix = {
590          .workgroup_size_log2 = 8,
591          .subgroup_size_log2 = device->info->ver >= 20 ? 4 : 3,
592       },
593       .scatter = {
594          .workgroup_size_log2 = 8,
595          .subgroup_size_log2 = device->info->ver >= 20 ? 4 : 3,
596          .block_rows = 14,
597       },
598    };
599 
600    device->accel_struct_build.radix_sort =
601       vk_create_radix_sort_u64(anv_device_to_handle(device),
602                                &device->vk.alloc,
603                                VK_NULL_HANDLE, radix_sort_config);
604 
605    device->vk.as_build_ops = &anv_build_ops;
606    device->vk.write_buffer_cp = anv_cmd_write_buffer_cp;
607    device->vk.flush_buffer_write_cp = anv_cmd_flush_buffer_write_cp;
608    device->vk.cmd_dispatch_unaligned = anv_cmd_dispatch_unaligned;
609    device->vk.cmd_fill_buffer_addr = anv_cmd_fill_buffer_addr;
610 
611    device->accel_struct_build.build_args =
612       (struct vk_acceleration_structure_build_args) {
613          .emit_markers = u_trace_enabled(&device->ds.trace_context),
614          .subgroup_size = device->info->ver >= 20 ? 16 : 8,
615          .radix_sort = device->accel_struct_build.radix_sort,
616          /* See struct anv_accel_struct_header from anv_bvh.h
617           *
618           * Root pointer starts at offset 0 and bound box start at offset 8.
619           */
620          .bvh_bounds_offset = 8,
621    };
622 
623 exit:
624    simple_mtx_unlock(&device->accel_struct_build.mutex);
625    return result;
626 }
627 
628 void
genX(GetAccelerationStructureBuildSizesKHR)629 genX(GetAccelerationStructureBuildSizesKHR)(
630     VkDevice                                    _device,
631     VkAccelerationStructureBuildTypeKHR         buildType,
632     const VkAccelerationStructureBuildGeometryInfoKHR* pBuildInfo,
633     const uint32_t*                             pMaxPrimitiveCounts,
634     VkAccelerationStructureBuildSizesInfoKHR*   pSizeInfo)
635 {
636    ANV_FROM_HANDLE(anv_device, device, _device);
637    if (anv_device_init_accel_struct_build_state(device) != VK_SUCCESS)
638       return;
639 
640    vk_get_as_build_sizes(_device, buildType, pBuildInfo, pMaxPrimitiveCounts,
641                          pSizeInfo, &device->accel_struct_build.build_args);
642 }
643 
644 void
genX(GetDeviceAccelerationStructureCompatibilityKHR)645 genX(GetDeviceAccelerationStructureCompatibilityKHR)(
646     VkDevice                                    _device,
647     const VkAccelerationStructureVersionInfoKHR* pVersionInfo,
648     VkAccelerationStructureCompatibilityKHR*    pCompatibility)
649 {
650    ANV_FROM_HANDLE(anv_device, device, _device);
651    struct vk_accel_struct_serialization_header* ser_header =
652       (struct vk_accel_struct_serialization_header*)(pVersionInfo->pVersionData);
653 
654    if (memcmp(ser_header->accel_struct_compat,
655               device->physical->rt_uuid,
656               sizeof(device->physical->rt_uuid)) == 0) {
657       *pCompatibility = VK_ACCELERATION_STRUCTURE_COMPATIBILITY_COMPATIBLE_KHR;
658    } else {
659       *pCompatibility =
660          VK_ACCELERATION_STRUCTURE_COMPATIBILITY_INCOMPATIBLE_KHR;
661    }
662 }
663 
664 void
genX(CmdBuildAccelerationStructuresKHR)665 genX(CmdBuildAccelerationStructuresKHR)(
666     VkCommandBuffer                             commandBuffer,
667     uint32_t                                    infoCount,
668     const VkAccelerationStructureBuildGeometryInfoKHR* pInfos,
669     const VkAccelerationStructureBuildRangeInfoKHR* const* ppBuildRangeInfos)
670 {
671    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
672 
673    struct anv_device *device = cmd_buffer->device;
674 
675    VkResult result = anv_device_init_accel_struct_build_state(device);
676    if (result != VK_SUCCESS) {
677       vk_command_buffer_set_error(&cmd_buffer->vk, result);
678       return;
679    }
680 
681    struct anv_cmd_saved_state saved;
682    anv_cmd_buffer_save_state(cmd_buffer,
683                              ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE |
684                              ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_ALL |
685                              ANV_CMD_SAVED_STATE_PUSH_CONSTANTS, &saved);
686 
687    vk_cmd_build_acceleration_structures(commandBuffer, &device->vk,
688                                         &device->meta_device, infoCount,
689                                         pInfos, ppBuildRangeInfos,
690                                         &device->accel_struct_build.build_args);
691 
692    anv_cmd_buffer_restore_state(cmd_buffer, &saved);
693 }
694 
695 void
genX(CmdBuildAccelerationStructuresIndirectKHR)696 genX(CmdBuildAccelerationStructuresIndirectKHR)(
697     VkCommandBuffer                             commandBuffer,
698     uint32_t                                    infoCount,
699     const VkAccelerationStructureBuildGeometryInfoKHR* pInfos,
700     const VkDeviceAddress*                      pIndirectDeviceAddresses,
701     const uint32_t*                             pIndirectStrides,
702     const uint32_t* const*                      ppMaxPrimitiveCounts)
703 {
704    unreachable("Unimplemented");
705 }
706 
707 void
genX(CmdCopyAccelerationStructureKHR)708 genX(CmdCopyAccelerationStructureKHR)(
709     VkCommandBuffer                             commandBuffer,
710     const VkCopyAccelerationStructureInfoKHR*   pInfo)
711 {
712    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
713    VK_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
714    VK_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
715 
716    trace_intel_begin_as_copy(&cmd_buffer->trace);
717 
718    VkPipeline pipeline;
719    VkPipelineLayout layout;
720    VkResult result = get_pipeline_spv(cmd_buffer->device, "copy", copy_spv,
721                                       sizeof(copy_spv), sizeof(struct copy_args),
722                                       &pipeline, &layout);
723    if (result != VK_SUCCESS) {
724       vk_command_buffer_set_error(&cmd_buffer->vk, result);
725       return;
726    }
727 
728    struct anv_cmd_saved_state saved;
729    anv_cmd_buffer_save_state(cmd_buffer,
730                              ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE |
731                              ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_ALL |
732                              ANV_CMD_SAVED_STATE_PUSH_CONSTANTS, &saved);
733 
734    anv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
735                        pipeline);
736 
737    struct copy_args consts = {
738       .src_addr = vk_acceleration_structure_get_va(src),
739       .dst_addr = vk_acceleration_structure_get_va(dst),
740       .mode = ANV_COPY_MODE_COPY,
741    };
742 
743    VkPushConstantsInfoKHR push_info = {
744       .sType = VK_STRUCTURE_TYPE_PUSH_CONSTANTS_INFO_KHR,
745       .layout = layout,
746       .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
747       .offset = 0,
748       .size = sizeof(consts),
749       .pValues = &consts,
750    };
751 
752    anv_CmdPushConstants2KHR(commandBuffer, &push_info);
753 
754    /* L1/L2 caches flushes should have been dealt with by pipeline barriers.
755     * Unfortunately some platforms require L3 flush because CS (reading the
756     * dispatch paramters) is not L3 coherent.
757     */
758    if (!ANV_DEVINFO_HAS_COHERENT_L3_CS(cmd_buffer->device->info)) {
759       anv_add_pending_pipe_bits(cmd_buffer, ANV_PIPE_DATA_CACHE_FLUSH_BIT,
760                                 "bvh size read for dispatch");
761    }
762 
763    anv_genX(cmd_buffer->device->info, CmdDispatchIndirect)(
764       commandBuffer, src->buffer,
765       src->offset + offsetof(struct anv_accel_struct_header,
766                              copy_dispatch_size));
767 
768    anv_cmd_buffer_restore_state(cmd_buffer, &saved);
769 
770    trace_intel_end_as_copy(&cmd_buffer->trace);
771 }
772 
773 void
genX(CmdCopyAccelerationStructureToMemoryKHR)774 genX(CmdCopyAccelerationStructureToMemoryKHR)(
775     VkCommandBuffer                             commandBuffer,
776     const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo)
777 {
778    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
779    VK_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
780    struct anv_device *device = cmd_buffer->device;
781 
782    trace_intel_begin_as_copy(&cmd_buffer->trace);
783 
784    VkPipeline pipeline;
785    VkPipelineLayout layout;
786    VkResult result = get_pipeline_spv(device, "copy", copy_spv,
787                                       sizeof(copy_spv),
788                                       sizeof(struct copy_args), &pipeline,
789                                       &layout);
790    if (result != VK_SUCCESS) {
791       vk_command_buffer_set_error(&cmd_buffer->vk, result);
792       return;
793    }
794 
795    struct anv_cmd_saved_state saved;
796    anv_cmd_buffer_save_state(cmd_buffer,
797                              ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE |
798                              ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_ALL |
799                              ANV_CMD_SAVED_STATE_PUSH_CONSTANTS, &saved);
800 
801    anv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
802                        pipeline);
803 
804    struct copy_args consts = {
805       .src_addr = vk_acceleration_structure_get_va(src),
806       .dst_addr = pInfo->dst.deviceAddress,
807       .mode = ANV_COPY_MODE_SERIALIZE,
808    };
809 
810    memcpy(consts.driver_uuid, device->physical->driver_uuid, VK_UUID_SIZE);
811    memcpy(consts.accel_struct_compat, device->physical->rt_uuid, VK_UUID_SIZE);
812 
813    VkPushConstantsInfoKHR push_info = {
814       .sType = VK_STRUCTURE_TYPE_PUSH_CONSTANTS_INFO_KHR,
815       .layout = layout,
816       .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
817       .offset = 0,
818       .size = sizeof(consts),
819       .pValues = &consts,
820    };
821 
822    anv_CmdPushConstants2KHR(commandBuffer, &push_info);
823 
824    /* L1/L2 caches flushes should have been dealt with by pipeline barriers.
825     * Unfortunately some platforms require L3 flush because CS (reading the
826     * dispatch paramters) is not L3 coherent.
827     */
828    if (!ANV_DEVINFO_HAS_COHERENT_L3_CS(cmd_buffer->device->info)) {
829       anv_add_pending_pipe_bits(cmd_buffer,
830                                 ANV_PIPE_DATA_CACHE_FLUSH_BIT,
831                                 "bvh size read for dispatch");
832    }
833 
834    anv_genX(device->info, CmdDispatchIndirect)(
835       commandBuffer, src->buffer,
836       src->offset + offsetof(struct anv_accel_struct_header,
837                              copy_dispatch_size));
838 
839    anv_cmd_buffer_restore_state(cmd_buffer, &saved);
840 
841    trace_intel_end_as_copy(&cmd_buffer->trace);
842 }
843 
844 void
genX(CmdCopyMemoryToAccelerationStructureKHR)845 genX(CmdCopyMemoryToAccelerationStructureKHR)(
846     VkCommandBuffer                             commandBuffer,
847     const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo)
848 {
849    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
850    VK_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
851 
852    trace_intel_begin_as_copy(&cmd_buffer->trace);
853 
854    VkPipeline pipeline;
855    VkPipelineLayout layout;
856    VkResult result = get_pipeline_spv(cmd_buffer->device, "copy", copy_spv,
857                                       sizeof(copy_spv),
858                                       sizeof(struct copy_args), &pipeline,
859                                       &layout);
860    if (result != VK_SUCCESS) {
861       vk_command_buffer_set_error(&cmd_buffer->vk, result);
862       return;
863    }
864 
865    struct anv_cmd_saved_state saved;
866    anv_cmd_buffer_save_state(cmd_buffer,
867                              ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE |
868                              ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_ALL |
869                              ANV_CMD_SAVED_STATE_PUSH_CONSTANTS, &saved);
870 
871    anv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
872                        pipeline);
873 
874    const struct copy_args consts = {
875       .src_addr = pInfo->src.deviceAddress,
876       .dst_addr = vk_acceleration_structure_get_va(dst),
877       .mode = ANV_COPY_MODE_DESERIALIZE,
878    };
879 
880    VkPushConstantsInfoKHR push_info = {
881       .sType = VK_STRUCTURE_TYPE_PUSH_CONSTANTS_INFO_KHR,
882       .layout = layout,
883       .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
884       .offset = 0,
885       .size = sizeof(consts),
886       .pValues = &consts,
887    };
888 
889    anv_CmdPushConstants2KHR(commandBuffer, &push_info);
890 
891    vk_common_CmdDispatch(commandBuffer, 512, 1, 1);
892    anv_cmd_buffer_restore_state(cmd_buffer, &saved);
893 
894    trace_intel_end_as_copy(&cmd_buffer->trace);
895 }
896 
897 /* TODO: Host commands */
898 
899 VkResult
genX(BuildAccelerationStructuresKHR)900 genX(BuildAccelerationStructuresKHR)(
901     VkDevice                                    _device,
902     VkDeferredOperationKHR                      deferredOperation,
903     uint32_t                                    infoCount,
904     const VkAccelerationStructureBuildGeometryInfoKHR* pInfos,
905     const VkAccelerationStructureBuildRangeInfoKHR* const* ppBuildRangeInfos)
906 {
907    ANV_FROM_HANDLE(anv_device, device, _device);
908    unreachable("Unimplemented");
909    return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
910 }
911 
912 VkResult
genX(CopyAccelerationStructureKHR)913 genX(CopyAccelerationStructureKHR)(
914     VkDevice                                    _device,
915     VkDeferredOperationKHR                      deferredOperation,
916     const VkCopyAccelerationStructureInfoKHR*   pInfo)
917 {
918    ANV_FROM_HANDLE(anv_device, device, _device);
919    unreachable("Unimplemented");
920    return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
921 }
922 
923 VkResult
genX(CopyAccelerationStructureToMemoryKHR)924 genX(CopyAccelerationStructureToMemoryKHR)(
925     VkDevice                                    _device,
926     VkDeferredOperationKHR                      deferredOperation,
927     const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo)
928 {
929    ANV_FROM_HANDLE(anv_device, device, _device);
930    unreachable("Unimplemented");
931    return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
932 }
933 
934 VkResult
genX(CopyMemoryToAccelerationStructureKHR)935 genX(CopyMemoryToAccelerationStructureKHR)(
936     VkDevice                                    _device,
937     VkDeferredOperationKHR                      deferredOperation,
938     const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo)
939 {
940    ANV_FROM_HANDLE(anv_device, device, _device);
941    unreachable("Unimplemented");
942    return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
943 }
944 
945 VkResult
genX(WriteAccelerationStructuresPropertiesKHR)946 genX(WriteAccelerationStructuresPropertiesKHR)(
947     VkDevice                                    _device,
948     uint32_t                                    accelerationStructureCount,
949     const VkAccelerationStructureKHR*           pAccelerationStructures,
950     VkQueryType                                 queryType,
951     size_t                                      dataSize,
952     void*                                       pData,
953     size_t                                      stride)
954 {
955    ANV_FROM_HANDLE(anv_device, device, _device);
956    unreachable("Unimplemented");
957    return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
958 }
959 
960 void
genX(DestroyAccelerationStructureKHR)961 genX(DestroyAccelerationStructureKHR)(
962     VkDevice                                    _device,
963     VkAccelerationStructureKHR                  accelerationStructure,
964     const VkAllocationCallbacks*                pAllocator)
965 {
966    vk_common_DestroyAccelerationStructureKHR(_device, accelerationStructure,
967                                              pAllocator);
968 }
969 #endif
970