1 /* Copyright © 2024 Intel Corporation
2 * SPDX-License-Identifier: MIT
3 */
4
5 #include "anv_private.h"
6
7 #include <math.h>
8
9 #include "util/u_debug.h"
10 #include "util/half_float.h"
11 #include "util/u_atomic.h"
12
13 #include "genxml/gen_macros.h"
14 #include "genxml/genX_pack.h"
15 #include "genxml/genX_rt_pack.h"
16
17 #include "ds/intel_tracepoints.h"
18
19 #include "bvh/anv_build_interface.h"
20 #include "vk_acceleration_structure.h"
21 #include "radix_sort/radix_sort_u64.h"
22 #include "radix_sort/common/vk/barrier.h"
23
24 #include "vk_common_entrypoints.h"
25 #include "genX_mi_builder.h"
26
27 #if GFX_VERx10 >= 125
28
29 /* Id to track bvh_dump */
30 static uint32_t blas_id = 0;
31 static uint32_t tlas_id = 0;
32
33 static void
begin_debug_marker(VkCommandBuffer commandBuffer,enum vk_acceleration_structure_build_step step,const char * format,...)34 begin_debug_marker(VkCommandBuffer commandBuffer,
35 enum vk_acceleration_structure_build_step step,
36 const char *format, ...)
37 {
38 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
39
40 assert(cmd_buffer->state.rt.debug_marker_count <
41 ARRAY_SIZE(cmd_buffer->state.rt.debug_markers));
42 cmd_buffer->state.rt.debug_markers[cmd_buffer->state.rt.debug_marker_count++] =
43 step;
44 switch (step) {
45 case VK_ACCELERATION_STRUCTURE_BUILD_STEP_TOP:
46 trace_intel_begin_as_build(&cmd_buffer->trace);
47 break;
48 case VK_ACCELERATION_STRUCTURE_BUILD_STEP_BUILD_LEAVES:
49 trace_intel_begin_as_build_leaves(&cmd_buffer->trace);
50 break;
51 case VK_ACCELERATION_STRUCTURE_BUILD_STEP_MORTON_GENERATE:
52 trace_intel_begin_as_morton_generate(&cmd_buffer->trace);
53 break;
54 case VK_ACCELERATION_STRUCTURE_BUILD_STEP_MORTON_SORT:
55 trace_intel_begin_as_morton_sort(&cmd_buffer->trace);
56 break;
57 case VK_ACCELERATION_STRUCTURE_BUILD_STEP_LBVH_BUILD_INTERNAL:
58 trace_intel_begin_as_lbvh_build_internal(&cmd_buffer->trace);
59 break;
60 case VK_ACCELERATION_STRUCTURE_BUILD_STEP_PLOC_BUILD_INTERNAL:
61 trace_intel_begin_as_ploc_build_internal(&cmd_buffer->trace);
62 break;
63 case VK_ACCELERATION_STRUCTURE_BUILD_STEP_ENCODE:
64 trace_intel_begin_as_encode(&cmd_buffer->trace);
65 break;
66 default:
67 unreachable("Invalid build step");
68 }
69 }
70
71 static void
end_debug_marker(VkCommandBuffer commandBuffer)72 end_debug_marker(VkCommandBuffer commandBuffer)
73 {
74 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
75
76 cmd_buffer->state.rt.debug_marker_count--;
77 switch (cmd_buffer->state.rt.debug_markers[cmd_buffer->state.rt.debug_marker_count]) {
78 case VK_ACCELERATION_STRUCTURE_BUILD_STEP_TOP:
79 trace_intel_end_as_build(&cmd_buffer->trace);
80 break;
81 case VK_ACCELERATION_STRUCTURE_BUILD_STEP_BUILD_LEAVES:
82 trace_intel_end_as_build_leaves(&cmd_buffer->trace);
83 break;
84 case VK_ACCELERATION_STRUCTURE_BUILD_STEP_MORTON_GENERATE:
85 trace_intel_end_as_morton_generate(&cmd_buffer->trace);
86 break;
87 case VK_ACCELERATION_STRUCTURE_BUILD_STEP_MORTON_SORT:
88 trace_intel_end_as_morton_sort(&cmd_buffer->trace);
89 break;
90 case VK_ACCELERATION_STRUCTURE_BUILD_STEP_LBVH_BUILD_INTERNAL:
91 trace_intel_end_as_lbvh_build_internal(&cmd_buffer->trace);
92 break;
93 case VK_ACCELERATION_STRUCTURE_BUILD_STEP_PLOC_BUILD_INTERNAL:
94 trace_intel_end_as_ploc_build_internal(&cmd_buffer->trace);
95 break;
96 case VK_ACCELERATION_STRUCTURE_BUILD_STEP_ENCODE:
97 trace_intel_end_as_encode(&cmd_buffer->trace);
98 break;
99 default:
100 unreachable("Invalid build step");
101 }
102 }
103
104 static void
add_bvh_dump(struct anv_cmd_buffer * cmd_buffer,VkDeviceAddress src,uint64_t dump_size,VkGeometryTypeKHR geometry_type,enum bvh_dump_type dump_type)105 add_bvh_dump(struct anv_cmd_buffer *cmd_buffer,
106 VkDeviceAddress src,
107 uint64_t dump_size,
108 VkGeometryTypeKHR geometry_type,
109 enum bvh_dump_type dump_type)
110 {
111 assert(dump_size % 4 == 0);
112
113 struct anv_device *device = cmd_buffer->device;
114 struct anv_bo *bo = NULL;
115
116 VkResult result = anv_device_alloc_bo(device, "bvh_dump", dump_size,
117 ANV_BO_ALLOC_MAPPED |
118 ANV_BO_ALLOC_HOST_CACHED_COHERENT, 0,
119 &bo);
120 if (result != VK_SUCCESS) {
121 printf("Failed to allocate bvh for dump\n");
122 vk_command_buffer_set_error(&cmd_buffer->vk, result);
123 return;
124 }
125
126 struct anv_bvh_dump *bvh_dump = malloc(sizeof(struct anv_bvh_dump));
127
128 bvh_dump->bo = bo;
129 bvh_dump->bvh_id = geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR ?
130 tlas_id : blas_id;
131 bvh_dump->dump_size = dump_size;
132 bvh_dump->geometry_type = geometry_type;
133 bvh_dump->dump_type = dump_type;
134
135 struct anv_address dst_addr = { .bo = bvh_dump->bo, .offset = 0 };
136 struct anv_address src_addr = anv_address_from_u64(src);
137 anv_cmd_copy_addr(cmd_buffer, src_addr, dst_addr, bvh_dump->dump_size);
138
139 pthread_mutex_lock(&device->mutex);
140 list_addtail(&bvh_dump->link, &device->bvh_dumps);
141 pthread_mutex_unlock(&device->mutex);
142 }
143
144 static void
debug_record_as_to_bvh_dump(struct anv_cmd_buffer * cmd_buffer,VkDeviceAddress header_addr,uint64_t bvh_anv_size,VkDeviceAddress intermediate_header_addr,VkDeviceAddress intermediate_as_addr,uint32_t leaf_count,VkGeometryTypeKHR geometry_type)145 debug_record_as_to_bvh_dump(struct anv_cmd_buffer *cmd_buffer,
146 VkDeviceAddress header_addr,
147 uint64_t bvh_anv_size,
148 VkDeviceAddress intermediate_header_addr,
149 VkDeviceAddress intermediate_as_addr,
150 uint32_t leaf_count,
151 VkGeometryTypeKHR geometry_type)
152 {
153 if (INTEL_DEBUG(DEBUG_BVH_BLAS) &&
154 geometry_type != VK_GEOMETRY_TYPE_INSTANCES_KHR) {
155 add_bvh_dump(cmd_buffer, header_addr, bvh_anv_size, geometry_type,
156 BVH_ANV);
157 }
158
159 if (INTEL_DEBUG(DEBUG_BVH_TLAS) &&
160 geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR) {
161 add_bvh_dump(cmd_buffer, header_addr, bvh_anv_size, geometry_type,
162 BVH_ANV);
163 }
164
165 if (INTEL_DEBUG(DEBUG_BVH_BLAS_IR_HDR) &&
166 geometry_type != VK_GEOMETRY_TYPE_INSTANCES_KHR) {
167 add_bvh_dump(cmd_buffer, intermediate_header_addr,
168 sizeof(struct vk_ir_header), geometry_type, BVH_IR_HDR);
169 }
170
171 if (INTEL_DEBUG(DEBUG_BVH_TLAS_IR_HDR) &&
172 geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR) {
173 add_bvh_dump(cmd_buffer, intermediate_header_addr,
174 sizeof(struct vk_ir_header), geometry_type, BVH_IR_HDR);
175 }
176
177 uint32_t internal_node_count = MAX2(leaf_count, 2) - 1;
178 uint64_t internal_node_total_size = sizeof(struct vk_ir_box_node) *
179 internal_node_count;
180
181 if (INTEL_DEBUG(DEBUG_BVH_BLAS_IR_AS) &&
182 geometry_type != VK_GEOMETRY_TYPE_INSTANCES_KHR) {
183 uint64_t leaf_total_size;
184
185 switch (geometry_type) {
186 case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
187 leaf_total_size = sizeof(struct vk_ir_triangle_node) * leaf_count;
188 break;
189 case VK_GEOMETRY_TYPE_AABBS_KHR:
190 leaf_total_size = sizeof(struct vk_ir_aabb_node) * leaf_count;
191 break;
192 default:
193 unreachable("invalid geometry type");
194 }
195
196 add_bvh_dump(cmd_buffer, intermediate_as_addr, internal_node_total_size +
197 leaf_total_size, geometry_type, BVH_IR_AS);
198 }
199
200 if (INTEL_DEBUG(DEBUG_BVH_TLAS_IR_AS) &&
201 geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR) {
202 uint64_t leaf_total_size = sizeof(struct vk_ir_instance_node) *
203 leaf_count;
204 add_bvh_dump(cmd_buffer, intermediate_as_addr, internal_node_total_size +
205 leaf_total_size, geometry_type, BVH_IR_AS);
206 }
207
208
209 if (geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR) {
210 tlas_id++;
211 } else {
212 blas_id++;
213 }
214 }
215
216 static const uint32_t encode_spv[] = {
217 #include "bvh/encode.spv.h"
218 };
219
220 static const uint32_t header_spv[] = {
221 #include "bvh/header.spv.h"
222 };
223
224 static const uint32_t copy_spv[] = {
225 #include "bvh/copy.spv.h"
226 };
227
228 static VkResult
get_pipeline_spv(struct anv_device * device,const char * name,const uint32_t * spv,uint32_t spv_size,unsigned push_constant_size,VkPipeline * pipeline,VkPipelineLayout * layout)229 get_pipeline_spv(struct anv_device *device,
230 const char *name, const uint32_t *spv, uint32_t spv_size,
231 unsigned push_constant_size, VkPipeline *pipeline,
232 VkPipelineLayout *layout)
233 {
234
235 size_t key_size = strlen(name);
236
237 const VkPushConstantRange pc_range = {
238 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
239 .offset = 0,
240 .size = push_constant_size,
241 };
242
243 VkResult result = vk_meta_get_pipeline_layout(&device->vk,
244 &device->meta_device, NULL,
245 &pc_range, name, key_size,
246 layout);
247
248 if (result != VK_SUCCESS)
249 return result;
250
251 VkPipeline pipeline_from_cache =
252 vk_meta_lookup_pipeline(&device->meta_device, name, key_size);
253 if (pipeline_from_cache != VK_NULL_HANDLE) {
254 *pipeline = pipeline_from_cache;
255 return VK_SUCCESS;
256 }
257
258 VkShaderModuleCreateInfo module_info = {
259 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
260 .pNext = NULL,
261 .flags = 0,
262 .codeSize = spv_size,
263 .pCode = spv,
264 };
265
266 VkPipelineShaderStageCreateInfo shader_stage = {
267 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
268 .pNext = &module_info,
269 .flags = 0,
270 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
271 .pName = "main",
272 .pSpecializationInfo = NULL,
273 };
274
275 VkComputePipelineCreateInfo pipeline_info = {
276 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
277 .flags = 0,
278 .stage = shader_stage,
279 .layout = *layout,
280 };
281
282 return vk_meta_create_compute_pipeline(&device->vk, &device->meta_device,
283 &pipeline_info, name, key_size, pipeline);
284 }
285
286 static void
get_bvh_layout(VkGeometryTypeKHR geometry_type,uint32_t leaf_count,struct bvh_layout * layout)287 get_bvh_layout(VkGeometryTypeKHR geometry_type, uint32_t leaf_count,
288 struct bvh_layout *layout)
289 {
290 uint32_t internal_count = MAX2(leaf_count, 2) - 1;
291
292 uint64_t offset = ANV_RT_BVH_HEADER_SIZE;
293
294 /* For a TLAS, we store the address of anv_instance_leaf after header
295 * This is for quick access in the copy.comp
296 */
297 if (geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR) {
298 offset += leaf_count * sizeof(uint64_t);
299 }
300 /* The BVH and hence bvh_offset needs 64 byte alignment for RT nodes. */
301 offset = ALIGN(offset, 64);
302
303 /* This is where internal_nodes/leaves start to be encoded */
304 layout->bvh_offset = offset;
305
306 offset += internal_count * ANV_RT_INTERNAL_NODE_SIZE;
307
308 switch (geometry_type) {
309 case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
310 /* Currently we encode one triangle within one quad leaf */
311 offset += leaf_count * ANV_RT_QUAD_LEAF_SIZE;
312 break;
313 case VK_GEOMETRY_TYPE_AABBS_KHR:
314 offset += leaf_count * ANV_RT_PROCEDURAL_LEAF_SIZE;
315 break;
316 case VK_GEOMETRY_TYPE_INSTANCES_KHR:
317 offset += leaf_count * ANV_RT_INSTANCE_LEAF_SIZE;
318 break;
319 default:
320 unreachable("Unknown VkGeometryTypeKHR");
321 }
322
323 layout->size = offset;
324 }
325
326 static VkDeviceSize
anv_get_as_size(VkDevice device,const VkAccelerationStructureBuildGeometryInfoKHR * pBuildInfo,uint32_t leaf_count)327 anv_get_as_size(VkDevice device,
328 const VkAccelerationStructureBuildGeometryInfoKHR *pBuildInfo,
329 uint32_t leaf_count)
330 {
331 struct bvh_layout layout;
332 get_bvh_layout(vk_get_as_geometry_type(pBuildInfo), leaf_count, &layout);
333 return layout.size;
334 }
335
336 static uint32_t
anv_get_encode_key(VkAccelerationStructureTypeKHR type,VkBuildAccelerationStructureFlagBitsKHR flags)337 anv_get_encode_key(VkAccelerationStructureTypeKHR type,
338 VkBuildAccelerationStructureFlagBitsKHR flags)
339 {
340 return 0;
341 }
342
343 static VkResult
anv_encode_bind_pipeline(VkCommandBuffer commandBuffer,uint32_t key)344 anv_encode_bind_pipeline(VkCommandBuffer commandBuffer, uint32_t key)
345 {
346 VK_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
347 struct anv_device *device = cmd_buffer->device;
348
349 VkPipeline pipeline;
350 VkPipelineLayout layout;
351 VkResult result = get_pipeline_spv(device, "encode", encode_spv,
352 sizeof(encode_spv),
353 sizeof(struct encode_args), &pipeline,
354 &layout);
355 if (result != VK_SUCCESS)
356 return result;
357
358 anv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
359 pipeline);
360
361 return VK_SUCCESS;
362 }
363
364 static void
anv_encode_as(VkCommandBuffer commandBuffer,const VkAccelerationStructureBuildGeometryInfoKHR * build_info,const VkAccelerationStructureBuildRangeInfoKHR * build_range_infos,VkDeviceAddress intermediate_as_addr,VkDeviceAddress intermediate_header_addr,uint32_t leaf_count,uint32_t key,struct vk_acceleration_structure * dst)365 anv_encode_as(VkCommandBuffer commandBuffer,
366 const VkAccelerationStructureBuildGeometryInfoKHR *build_info,
367 const VkAccelerationStructureBuildRangeInfoKHR *build_range_infos,
368 VkDeviceAddress intermediate_as_addr,
369 VkDeviceAddress intermediate_header_addr, uint32_t leaf_count,
370 uint32_t key,
371 struct vk_acceleration_structure *dst)
372 {
373 if (INTEL_DEBUG(DEBUG_BVH_NO_BUILD))
374 return;
375
376 VK_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
377 struct anv_device *device = cmd_buffer->device;
378
379 VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(build_info);
380
381 VkPipeline pipeline;
382 VkPipelineLayout layout;
383 get_pipeline_spv(device, "encode", encode_spv, sizeof(encode_spv),
384 sizeof(struct encode_args), &pipeline, &layout);
385
386 STATIC_ASSERT(sizeof(struct anv_accel_struct_header) == ANV_RT_BVH_HEADER_SIZE);
387 STATIC_ASSERT(sizeof(struct anv_instance_leaf) == ANV_RT_INSTANCE_LEAF_SIZE);
388 STATIC_ASSERT(sizeof(struct anv_quad_leaf_node) == ANV_RT_QUAD_LEAF_SIZE);
389 STATIC_ASSERT(sizeof(struct anv_procedural_leaf_node) == ANV_RT_PROCEDURAL_LEAF_SIZE);
390 STATIC_ASSERT(sizeof(struct anv_internal_node) == ANV_RT_INTERNAL_NODE_SIZE);
391
392 struct bvh_layout bvh_layout;
393 get_bvh_layout(geometry_type, leaf_count, &bvh_layout);
394
395 const struct encode_args args = {
396 .intermediate_bvh = intermediate_as_addr,
397 .output_bvh = vk_acceleration_structure_get_va(dst) +
398 bvh_layout.bvh_offset,
399 .header = intermediate_header_addr,
400 .output_bvh_offset = bvh_layout.bvh_offset,
401 .leaf_node_count = leaf_count,
402 .geometry_type = geometry_type,
403 };
404
405 VkPushConstantsInfoKHR push_info = {
406 .sType = VK_STRUCTURE_TYPE_PUSH_CONSTANTS_INFO_KHR,
407 .layout = layout,
408 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
409 .offset = 0,
410 .size = sizeof(args),
411 .pValues = &args,
412 };
413
414 anv_CmdPushConstants2KHR(commandBuffer, &push_info);
415
416 struct anv_address indirect_addr =
417 anv_address_from_u64(intermediate_header_addr +
418 offsetof(struct vk_ir_header, ir_internal_node_count));
419 anv_genX(cmd_buffer->device->info, cmd_buffer_dispatch_indirect)
420 (cmd_buffer, indirect_addr, true /* is_unaligned_size_x */);
421 }
422
423 static uint32_t
anv_get_header_key(VkAccelerationStructureTypeKHR type,VkBuildAccelerationStructureFlagBitsKHR flags)424 anv_get_header_key(VkAccelerationStructureTypeKHR type,
425 VkBuildAccelerationStructureFlagBitsKHR flags)
426 {
427 return (flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR) ?
428 1 : 0;
429 }
430
431 static VkResult
anv_init_header_bind_pipeline(VkCommandBuffer commandBuffer,uint32_t key)432 anv_init_header_bind_pipeline(VkCommandBuffer commandBuffer, uint32_t key)
433 {
434 VK_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
435
436 if (key == 1) {
437 VkPipeline pipeline;
438 VkPipelineLayout layout;
439 VkResult result = get_pipeline_spv(cmd_buffer->device, "header",
440 header_spv, sizeof(header_spv),
441 sizeof(struct header_args), &pipeline,
442 &layout);
443 if (result != VK_SUCCESS)
444 return result;
445
446 anv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
447 pipeline);
448 }
449
450 return VK_SUCCESS;
451 }
452
453 static void
anv_init_header(VkCommandBuffer commandBuffer,const VkAccelerationStructureBuildGeometryInfoKHR * build_info,const VkAccelerationStructureBuildRangeInfoKHR * build_range_infos,VkDeviceAddress intermediate_as_addr,VkDeviceAddress intermediate_header_addr,uint32_t leaf_count,uint32_t key,struct vk_acceleration_structure * dst)454 anv_init_header(VkCommandBuffer commandBuffer,
455 const VkAccelerationStructureBuildGeometryInfoKHR *build_info,
456 const VkAccelerationStructureBuildRangeInfoKHR *build_range_infos,
457 VkDeviceAddress intermediate_as_addr,
458 VkDeviceAddress intermediate_header_addr, uint32_t leaf_count,
459 uint32_t key,
460 struct vk_acceleration_structure *dst)
461 {
462 VK_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
463 struct anv_device *device = cmd_buffer->device;
464
465 VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(build_info);
466
467 struct bvh_layout bvh_layout;
468 get_bvh_layout(geometry_type, leaf_count, &bvh_layout);
469
470 VkDeviceAddress header_addr = vk_acceleration_structure_get_va(dst);
471
472 UNUSED size_t base = offsetof(struct anv_accel_struct_header,
473 copy_dispatch_size);
474
475 uint32_t instance_count = geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR ?
476 leaf_count : 0;
477
478 if (key == 1) {
479 /* Add a barrier to ensure the writes from encode.comp is ready to be
480 * read by header.comp
481 */
482 vk_barrier_compute_w_to_compute_r(commandBuffer);
483
484 /* VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR is set, so we
485 * want to populate header.compacted_size with the compacted size, which
486 * needs to be calculated by using ir_header.dst_node_offset, which we'll
487 * access in the header.comp.
488 */
489 base = offsetof(struct anv_accel_struct_header, instance_count);
490
491 VkPipeline pipeline;
492 VkPipelineLayout layout;
493 get_pipeline_spv(device, "header", header_spv, sizeof(header_spv),
494 sizeof(struct header_args), &pipeline, &layout);
495
496 struct header_args args = {
497 .src = intermediate_header_addr,
498 .dst = vk_acceleration_structure_get_va(dst),
499 .bvh_offset = bvh_layout.bvh_offset,
500 .instance_count = instance_count,
501 };
502
503 VkPushConstantsInfoKHR push_info = {
504 .sType = VK_STRUCTURE_TYPE_PUSH_CONSTANTS_INFO_KHR,
505 .layout = layout,
506 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
507 .offset = 0,
508 .size = sizeof(args),
509 .pValues = &args,
510 };
511
512 anv_CmdPushConstants2KHR(commandBuffer, &push_info);
513 vk_common_CmdDispatch(commandBuffer, 1, 1, 1);
514 } else {
515 vk_barrier_compute_w_to_host_r(commandBuffer);
516
517 struct anv_accel_struct_header header = {};
518
519 header.instance_count = instance_count;
520 header.self_ptr = header_addr;
521 header.compacted_size = bvh_layout.size;
522
523 /* 128 is local_size_x in copy.comp shader, 8 is the amount of data
524 * copied by each iteration of that shader's loop
525 */
526 header.copy_dispatch_size[0] = DIV_ROUND_UP(header.compacted_size,
527 8 * 128);
528 header.copy_dispatch_size[1] = 1;
529 header.copy_dispatch_size[2] = 1;
530
531 header.serialization_size =
532 header.compacted_size +
533 sizeof(struct vk_accel_struct_serialization_header) +
534 sizeof(uint64_t) * header.instance_count;
535
536 header.size = header.compacted_size;
537
538 size_t header_size = sizeof(struct anv_accel_struct_header) - base;
539 assert(base % sizeof(uint32_t) == 0);
540 assert(header_size % sizeof(uint32_t) == 0);
541 uint32_t *header_ptr = (uint32_t *)((char *)&header + base);
542
543 struct anv_address addr = anv_address_from_u64(header_addr + base);
544 anv_cmd_buffer_update_addr(cmd_buffer, addr, 0, header_size,
545 header_ptr, false);
546 }
547
548 if (INTEL_DEBUG(DEBUG_BVH_ANY)) {
549 genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info,
550 cmd_buffer->state.current_pipeline,
551 ANV_PIPE_END_OF_PIPE_SYNC_BIT |
552 ANV_PIPE_DATA_CACHE_FLUSH_BIT |
553 ANV_PIPE_HDC_PIPELINE_FLUSH_BIT |
554 ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT);
555 debug_record_as_to_bvh_dump(cmd_buffer, header_addr, bvh_layout.size,
556 intermediate_header_addr, intermediate_as_addr,
557 leaf_count, geometry_type);
558 }
559 }
560
561 static const struct vk_acceleration_structure_build_ops anv_build_ops = {
562 .begin_debug_marker = begin_debug_marker,
563 .end_debug_marker = end_debug_marker,
564 .get_as_size = anv_get_as_size,
565 .get_encode_key = { anv_get_encode_key, anv_get_header_key },
566 .encode_bind_pipeline = { anv_encode_bind_pipeline,
567 anv_init_header_bind_pipeline },
568 .encode_as = { anv_encode_as, anv_init_header },
569 };
570
571 static VkResult
anv_device_init_accel_struct_build_state(struct anv_device * device)572 anv_device_init_accel_struct_build_state(struct anv_device *device)
573 {
574 VkResult result = VK_SUCCESS;
575 simple_mtx_lock(&device->accel_struct_build.mutex);
576
577 if (device->accel_struct_build.radix_sort)
578 goto exit;
579
580 const struct radix_sort_vk_target_config radix_sort_config = {
581 .keyval_dwords = 2,
582 .init = { .workgroup_size_log2 = 8, },
583 .fill = { .workgroup_size_log2 = 8, .block_rows = 8 },
584 .histogram = {
585 .workgroup_size_log2 = 8,
586 .subgroup_size_log2 = device->info->ver >= 20 ? 4 : 3,
587 .block_rows = 14,
588 },
589 .prefix = {
590 .workgroup_size_log2 = 8,
591 .subgroup_size_log2 = device->info->ver >= 20 ? 4 : 3,
592 },
593 .scatter = {
594 .workgroup_size_log2 = 8,
595 .subgroup_size_log2 = device->info->ver >= 20 ? 4 : 3,
596 .block_rows = 14,
597 },
598 };
599
600 device->accel_struct_build.radix_sort =
601 vk_create_radix_sort_u64(anv_device_to_handle(device),
602 &device->vk.alloc,
603 VK_NULL_HANDLE, radix_sort_config);
604
605 device->vk.as_build_ops = &anv_build_ops;
606 device->vk.write_buffer_cp = anv_cmd_write_buffer_cp;
607 device->vk.flush_buffer_write_cp = anv_cmd_flush_buffer_write_cp;
608 device->vk.cmd_dispatch_unaligned = anv_cmd_dispatch_unaligned;
609 device->vk.cmd_fill_buffer_addr = anv_cmd_fill_buffer_addr;
610
611 device->accel_struct_build.build_args =
612 (struct vk_acceleration_structure_build_args) {
613 .emit_markers = u_trace_enabled(&device->ds.trace_context),
614 .subgroup_size = device->info->ver >= 20 ? 16 : 8,
615 .radix_sort = device->accel_struct_build.radix_sort,
616 /* See struct anv_accel_struct_header from anv_bvh.h
617 *
618 * Root pointer starts at offset 0 and bound box start at offset 8.
619 */
620 .bvh_bounds_offset = 8,
621 };
622
623 exit:
624 simple_mtx_unlock(&device->accel_struct_build.mutex);
625 return result;
626 }
627
628 void
genX(GetAccelerationStructureBuildSizesKHR)629 genX(GetAccelerationStructureBuildSizesKHR)(
630 VkDevice _device,
631 VkAccelerationStructureBuildTypeKHR buildType,
632 const VkAccelerationStructureBuildGeometryInfoKHR* pBuildInfo,
633 const uint32_t* pMaxPrimitiveCounts,
634 VkAccelerationStructureBuildSizesInfoKHR* pSizeInfo)
635 {
636 ANV_FROM_HANDLE(anv_device, device, _device);
637 if (anv_device_init_accel_struct_build_state(device) != VK_SUCCESS)
638 return;
639
640 vk_get_as_build_sizes(_device, buildType, pBuildInfo, pMaxPrimitiveCounts,
641 pSizeInfo, &device->accel_struct_build.build_args);
642 }
643
644 void
genX(GetDeviceAccelerationStructureCompatibilityKHR)645 genX(GetDeviceAccelerationStructureCompatibilityKHR)(
646 VkDevice _device,
647 const VkAccelerationStructureVersionInfoKHR* pVersionInfo,
648 VkAccelerationStructureCompatibilityKHR* pCompatibility)
649 {
650 ANV_FROM_HANDLE(anv_device, device, _device);
651 struct vk_accel_struct_serialization_header* ser_header =
652 (struct vk_accel_struct_serialization_header*)(pVersionInfo->pVersionData);
653
654 if (memcmp(ser_header->accel_struct_compat,
655 device->physical->rt_uuid,
656 sizeof(device->physical->rt_uuid)) == 0) {
657 *pCompatibility = VK_ACCELERATION_STRUCTURE_COMPATIBILITY_COMPATIBLE_KHR;
658 } else {
659 *pCompatibility =
660 VK_ACCELERATION_STRUCTURE_COMPATIBILITY_INCOMPATIBLE_KHR;
661 }
662 }
663
664 void
genX(CmdBuildAccelerationStructuresKHR)665 genX(CmdBuildAccelerationStructuresKHR)(
666 VkCommandBuffer commandBuffer,
667 uint32_t infoCount,
668 const VkAccelerationStructureBuildGeometryInfoKHR* pInfos,
669 const VkAccelerationStructureBuildRangeInfoKHR* const* ppBuildRangeInfos)
670 {
671 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
672
673 struct anv_device *device = cmd_buffer->device;
674
675 VkResult result = anv_device_init_accel_struct_build_state(device);
676 if (result != VK_SUCCESS) {
677 vk_command_buffer_set_error(&cmd_buffer->vk, result);
678 return;
679 }
680
681 struct anv_cmd_saved_state saved;
682 anv_cmd_buffer_save_state(cmd_buffer,
683 ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE |
684 ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_ALL |
685 ANV_CMD_SAVED_STATE_PUSH_CONSTANTS, &saved);
686
687 vk_cmd_build_acceleration_structures(commandBuffer, &device->vk,
688 &device->meta_device, infoCount,
689 pInfos, ppBuildRangeInfos,
690 &device->accel_struct_build.build_args);
691
692 anv_cmd_buffer_restore_state(cmd_buffer, &saved);
693 }
694
695 void
genX(CmdBuildAccelerationStructuresIndirectKHR)696 genX(CmdBuildAccelerationStructuresIndirectKHR)(
697 VkCommandBuffer commandBuffer,
698 uint32_t infoCount,
699 const VkAccelerationStructureBuildGeometryInfoKHR* pInfos,
700 const VkDeviceAddress* pIndirectDeviceAddresses,
701 const uint32_t* pIndirectStrides,
702 const uint32_t* const* ppMaxPrimitiveCounts)
703 {
704 unreachable("Unimplemented");
705 }
706
707 void
genX(CmdCopyAccelerationStructureKHR)708 genX(CmdCopyAccelerationStructureKHR)(
709 VkCommandBuffer commandBuffer,
710 const VkCopyAccelerationStructureInfoKHR* pInfo)
711 {
712 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
713 VK_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
714 VK_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
715
716 trace_intel_begin_as_copy(&cmd_buffer->trace);
717
718 VkPipeline pipeline;
719 VkPipelineLayout layout;
720 VkResult result = get_pipeline_spv(cmd_buffer->device, "copy", copy_spv,
721 sizeof(copy_spv), sizeof(struct copy_args),
722 &pipeline, &layout);
723 if (result != VK_SUCCESS) {
724 vk_command_buffer_set_error(&cmd_buffer->vk, result);
725 return;
726 }
727
728 struct anv_cmd_saved_state saved;
729 anv_cmd_buffer_save_state(cmd_buffer,
730 ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE |
731 ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_ALL |
732 ANV_CMD_SAVED_STATE_PUSH_CONSTANTS, &saved);
733
734 anv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
735 pipeline);
736
737 struct copy_args consts = {
738 .src_addr = vk_acceleration_structure_get_va(src),
739 .dst_addr = vk_acceleration_structure_get_va(dst),
740 .mode = ANV_COPY_MODE_COPY,
741 };
742
743 VkPushConstantsInfoKHR push_info = {
744 .sType = VK_STRUCTURE_TYPE_PUSH_CONSTANTS_INFO_KHR,
745 .layout = layout,
746 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
747 .offset = 0,
748 .size = sizeof(consts),
749 .pValues = &consts,
750 };
751
752 anv_CmdPushConstants2KHR(commandBuffer, &push_info);
753
754 /* L1/L2 caches flushes should have been dealt with by pipeline barriers.
755 * Unfortunately some platforms require L3 flush because CS (reading the
756 * dispatch paramters) is not L3 coherent.
757 */
758 if (!ANV_DEVINFO_HAS_COHERENT_L3_CS(cmd_buffer->device->info)) {
759 anv_add_pending_pipe_bits(cmd_buffer, ANV_PIPE_DATA_CACHE_FLUSH_BIT,
760 "bvh size read for dispatch");
761 }
762
763 anv_genX(cmd_buffer->device->info, CmdDispatchIndirect)(
764 commandBuffer, src->buffer,
765 src->offset + offsetof(struct anv_accel_struct_header,
766 copy_dispatch_size));
767
768 anv_cmd_buffer_restore_state(cmd_buffer, &saved);
769
770 trace_intel_end_as_copy(&cmd_buffer->trace);
771 }
772
773 void
genX(CmdCopyAccelerationStructureToMemoryKHR)774 genX(CmdCopyAccelerationStructureToMemoryKHR)(
775 VkCommandBuffer commandBuffer,
776 const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo)
777 {
778 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
779 VK_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
780 struct anv_device *device = cmd_buffer->device;
781
782 trace_intel_begin_as_copy(&cmd_buffer->trace);
783
784 VkPipeline pipeline;
785 VkPipelineLayout layout;
786 VkResult result = get_pipeline_spv(device, "copy", copy_spv,
787 sizeof(copy_spv),
788 sizeof(struct copy_args), &pipeline,
789 &layout);
790 if (result != VK_SUCCESS) {
791 vk_command_buffer_set_error(&cmd_buffer->vk, result);
792 return;
793 }
794
795 struct anv_cmd_saved_state saved;
796 anv_cmd_buffer_save_state(cmd_buffer,
797 ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE |
798 ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_ALL |
799 ANV_CMD_SAVED_STATE_PUSH_CONSTANTS, &saved);
800
801 anv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
802 pipeline);
803
804 struct copy_args consts = {
805 .src_addr = vk_acceleration_structure_get_va(src),
806 .dst_addr = pInfo->dst.deviceAddress,
807 .mode = ANV_COPY_MODE_SERIALIZE,
808 };
809
810 memcpy(consts.driver_uuid, device->physical->driver_uuid, VK_UUID_SIZE);
811 memcpy(consts.accel_struct_compat, device->physical->rt_uuid, VK_UUID_SIZE);
812
813 VkPushConstantsInfoKHR push_info = {
814 .sType = VK_STRUCTURE_TYPE_PUSH_CONSTANTS_INFO_KHR,
815 .layout = layout,
816 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
817 .offset = 0,
818 .size = sizeof(consts),
819 .pValues = &consts,
820 };
821
822 anv_CmdPushConstants2KHR(commandBuffer, &push_info);
823
824 /* L1/L2 caches flushes should have been dealt with by pipeline barriers.
825 * Unfortunately some platforms require L3 flush because CS (reading the
826 * dispatch paramters) is not L3 coherent.
827 */
828 if (!ANV_DEVINFO_HAS_COHERENT_L3_CS(cmd_buffer->device->info)) {
829 anv_add_pending_pipe_bits(cmd_buffer,
830 ANV_PIPE_DATA_CACHE_FLUSH_BIT,
831 "bvh size read for dispatch");
832 }
833
834 anv_genX(device->info, CmdDispatchIndirect)(
835 commandBuffer, src->buffer,
836 src->offset + offsetof(struct anv_accel_struct_header,
837 copy_dispatch_size));
838
839 anv_cmd_buffer_restore_state(cmd_buffer, &saved);
840
841 trace_intel_end_as_copy(&cmd_buffer->trace);
842 }
843
844 void
genX(CmdCopyMemoryToAccelerationStructureKHR)845 genX(CmdCopyMemoryToAccelerationStructureKHR)(
846 VkCommandBuffer commandBuffer,
847 const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo)
848 {
849 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
850 VK_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
851
852 trace_intel_begin_as_copy(&cmd_buffer->trace);
853
854 VkPipeline pipeline;
855 VkPipelineLayout layout;
856 VkResult result = get_pipeline_spv(cmd_buffer->device, "copy", copy_spv,
857 sizeof(copy_spv),
858 sizeof(struct copy_args), &pipeline,
859 &layout);
860 if (result != VK_SUCCESS) {
861 vk_command_buffer_set_error(&cmd_buffer->vk, result);
862 return;
863 }
864
865 struct anv_cmd_saved_state saved;
866 anv_cmd_buffer_save_state(cmd_buffer,
867 ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE |
868 ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_ALL |
869 ANV_CMD_SAVED_STATE_PUSH_CONSTANTS, &saved);
870
871 anv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
872 pipeline);
873
874 const struct copy_args consts = {
875 .src_addr = pInfo->src.deviceAddress,
876 .dst_addr = vk_acceleration_structure_get_va(dst),
877 .mode = ANV_COPY_MODE_DESERIALIZE,
878 };
879
880 VkPushConstantsInfoKHR push_info = {
881 .sType = VK_STRUCTURE_TYPE_PUSH_CONSTANTS_INFO_KHR,
882 .layout = layout,
883 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
884 .offset = 0,
885 .size = sizeof(consts),
886 .pValues = &consts,
887 };
888
889 anv_CmdPushConstants2KHR(commandBuffer, &push_info);
890
891 vk_common_CmdDispatch(commandBuffer, 512, 1, 1);
892 anv_cmd_buffer_restore_state(cmd_buffer, &saved);
893
894 trace_intel_end_as_copy(&cmd_buffer->trace);
895 }
896
897 /* TODO: Host commands */
898
899 VkResult
genX(BuildAccelerationStructuresKHR)900 genX(BuildAccelerationStructuresKHR)(
901 VkDevice _device,
902 VkDeferredOperationKHR deferredOperation,
903 uint32_t infoCount,
904 const VkAccelerationStructureBuildGeometryInfoKHR* pInfos,
905 const VkAccelerationStructureBuildRangeInfoKHR* const* ppBuildRangeInfos)
906 {
907 ANV_FROM_HANDLE(anv_device, device, _device);
908 unreachable("Unimplemented");
909 return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
910 }
911
912 VkResult
genX(CopyAccelerationStructureKHR)913 genX(CopyAccelerationStructureKHR)(
914 VkDevice _device,
915 VkDeferredOperationKHR deferredOperation,
916 const VkCopyAccelerationStructureInfoKHR* pInfo)
917 {
918 ANV_FROM_HANDLE(anv_device, device, _device);
919 unreachable("Unimplemented");
920 return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
921 }
922
923 VkResult
genX(CopyAccelerationStructureToMemoryKHR)924 genX(CopyAccelerationStructureToMemoryKHR)(
925 VkDevice _device,
926 VkDeferredOperationKHR deferredOperation,
927 const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo)
928 {
929 ANV_FROM_HANDLE(anv_device, device, _device);
930 unreachable("Unimplemented");
931 return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
932 }
933
934 VkResult
genX(CopyMemoryToAccelerationStructureKHR)935 genX(CopyMemoryToAccelerationStructureKHR)(
936 VkDevice _device,
937 VkDeferredOperationKHR deferredOperation,
938 const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo)
939 {
940 ANV_FROM_HANDLE(anv_device, device, _device);
941 unreachable("Unimplemented");
942 return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
943 }
944
945 VkResult
genX(WriteAccelerationStructuresPropertiesKHR)946 genX(WriteAccelerationStructuresPropertiesKHR)(
947 VkDevice _device,
948 uint32_t accelerationStructureCount,
949 const VkAccelerationStructureKHR* pAccelerationStructures,
950 VkQueryType queryType,
951 size_t dataSize,
952 void* pData,
953 size_t stride)
954 {
955 ANV_FROM_HANDLE(anv_device, device, _device);
956 unreachable("Unimplemented");
957 return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
958 }
959
960 void
genX(DestroyAccelerationStructureKHR)961 genX(DestroyAccelerationStructureKHR)(
962 VkDevice _device,
963 VkAccelerationStructureKHR accelerationStructure,
964 const VkAllocationCallbacks* pAllocator)
965 {
966 vk_common_DestroyAccelerationStructureKHR(_device, accelerationStructure,
967 pAllocator);
968 }
969 #endif
970