1 /*
2 * Copyright © 2020 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "anv_private.h"
25
26 #include <math.h>
27
28 #include "util/u_debug.h"
29 #include "util/half_float.h"
30 #include "util/u_atomic.h"
31
32 #include "genxml/gen_macros.h"
33 #include "genxml/genX_pack.h"
34 #include "genxml/genX_rt_pack.h"
35
36 #include "ds/intel_tracepoints.h"
37
38 #if GFX_VERx10 == 125
39 #include "grl/grl_structs.h"
40
41 /* Wait for the previous dispatches to finish and flush their data port
42 * writes.
43 */
44 #define ANV_GRL_FLUSH_FLAGS (ANV_PIPE_END_OF_PIPE_SYNC_BIT | \
45 ANV_PIPE_DATA_CACHE_FLUSH_BIT | \
46 ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT)
47
48 static const VkAccelerationStructureGeometryKHR *
get_geometry(const VkAccelerationStructureBuildGeometryInfoKHR * pInfo,uint32_t index)49 get_geometry(const VkAccelerationStructureBuildGeometryInfoKHR *pInfo,
50 uint32_t index)
51 {
52 return pInfo->pGeometries ? &pInfo->pGeometries[index] :
53 pInfo->ppGeometries[index];
54 }
55
align_transient_size(size_t bytes)56 static size_t align_transient_size(size_t bytes)
57 {
58 return align_uintptr(bytes, 64);
59 }
60
align_private_size(size_t bytes)61 static size_t align_private_size(size_t bytes)
62 {
63 return align_uintptr(bytes, 64);
64 }
65
get_scheduler_size(size_t num_builds)66 static size_t get_scheduler_size(size_t num_builds)
67 {
68 size_t scheduler_size = sizeof(union SchedulerUnion);
69 /* add more memory for qnode creation stage if needed */
70 if (num_builds > QNODE_GLOBAL_ROOT_BUFFER_MIN_ENTRIES_NUM) {
71 scheduler_size += (num_builds - QNODE_GLOBAL_ROOT_BUFFER_MIN_ENTRIES_NUM) * 2 *
72 sizeof(struct QNodeGlobalRootBufferEntry);
73 }
74
75 return align_private_size(scheduler_size);
76 }
77
78 static size_t
get_batched_binnedsah_transient_mem_size(size_t num_builds)79 get_batched_binnedsah_transient_mem_size(size_t num_builds)
80 {
81 if (num_builds == 0)
82 return 0;
83 return num_builds * (sizeof(struct SAHBuildBuffersInfo) + sizeof(gpuva_t));
84 }
85
86 static size_t
get_batched_binnedsah_private_mem_size(size_t num_builds)87 get_batched_binnedsah_private_mem_size(size_t num_builds)
88 {
89 if (num_builds == 0)
90 return 0;
91
92 size_t globals_size = align_private_size(num_builds * sizeof(struct SAHBuildGlobals));
93 return globals_size + get_scheduler_size(num_builds);
94 }
95
96 static uint32_t
estimate_qbvh6_nodes(const uint32_t N)97 estimate_qbvh6_nodes(const uint32_t N)
98 {
99 const uint32_t W = 6;
100 const uint32_t N0 = N / 2 + N % 2; // lowest level with 2 leaves per QBVH6 node
101 const uint32_t N1 = N0 / W + (N0 % W ? 1 : 0); // filled level
102 const uint32_t N2 = N0 / W + (N1 % W ? 1 : 0); // filled level
103 const uint32_t N3 = N0 / W + (N2 % W ? 1 : 0); // filled level
104 const uint32_t N4 = N3; // overestimate remaining nodes
105 return N0 + N1 + N2 + N3 + N4;
106 }
107
108 /* Estimates the worst case number of QBVH6 nodes for a top-down BVH
109 * build that guarantees to produce subtree with N >= K primitives
110 * from which a single QBVH6 node is created.
111 */
112 static uint32_t
estimate_qbvh6_nodes_minK(const uint32_t N,uint32_t K)113 estimate_qbvh6_nodes_minK(const uint32_t N, uint32_t K)
114 {
115 const uint32_t N0 = N / K + (N % K ? 1 : 0); // lowest level of nodes with K leaves minimally
116 return N0 + estimate_qbvh6_nodes(N0);
117 }
118
119 static size_t
estimate_qbvh6_fatleafs(const size_t P)120 estimate_qbvh6_fatleafs(const size_t P)
121 {
122 return P;
123 }
124
125 static size_t
estimate_qbvh6_nodes_worstcase(const size_t P)126 estimate_qbvh6_nodes_worstcase(const size_t P)
127 {
128 const size_t F = estimate_qbvh6_fatleafs(P);
129
130 // worst-case each inner node having 5 fat-leaf children.
131 // number of inner nodes is F/5 and number of fat-leaves is F
132 return F + ceil(F/5.0);
133 }
134
135 #define sizeof_PrimRef 32
136 #define sizeof_HwInstanceLeaf (GENX(RT_BVH_INSTANCE_LEAF_length) * 4)
137 #define sizeof_InternalNode (GENX(RT_BVH_INTERNAL_NODE_length) * 4)
138 #define sizeof_Procedural (GENX(RT_BVH_PROCEDURAL_LEAF_length) * 4)
139 #define sizeof_Quad (GENX(RT_BVH_QUAD_LEAF_length) * 4)
140
141 static struct MKSizeEstimate
get_gpu_size_estimate(const VkAccelerationStructureBuildGeometryInfoKHR * pInfo,const VkAccelerationStructureBuildRangeInfoKHR * pBuildRangeInfos,const uint32_t * pMaxPrimitiveCounts)142 get_gpu_size_estimate(const VkAccelerationStructureBuildGeometryInfoKHR *pInfo,
143 const VkAccelerationStructureBuildRangeInfoKHR *pBuildRangeInfos,
144 const uint32_t *pMaxPrimitiveCounts)
145 {
146 uint32_t num_triangles = 0, num_aabbs = 0, num_instances = 0;
147 for (unsigned g = 0; g < pInfo->geometryCount; g++) {
148 const VkAccelerationStructureGeometryKHR *pGeometry =
149 get_geometry(pInfo, g);
150 uint32_t prim_count = pBuildRangeInfos != NULL ?
151 pBuildRangeInfos[g].primitiveCount : pMaxPrimitiveCounts[g];
152
153 switch (pGeometry->geometryType) {
154 case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
155 num_triangles += prim_count;
156 break;
157 case VK_GEOMETRY_TYPE_AABBS_KHR:
158 num_aabbs += prim_count;
159 break;
160 case VK_GEOMETRY_TYPE_INSTANCES_KHR:
161 num_instances += prim_count;
162 break;
163 default:
164 unreachable("Unsupported geometry type");
165 }
166 }
167 const uint32_t num_primitives = num_triangles + num_aabbs + num_instances;
168
169 struct MKSizeEstimate est = {};
170
171 uint64_t size = sizeof(BVHBase);
172 size = align64(size, 64);
173
174 /* Must immediately follow BVHBase because we use fixed offset to nodes. */
175 est.node_data_start = size;
176
177 switch (pInfo->type) {
178 case VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR: {
179 assert(num_triangles == 0 && num_aabbs == 0);
180
181 est.numPrimitives = num_instances;
182 est.numPrimitivesToSplit = 0;
183 est.numBuildPrimitives = est.numPrimitives + est.numPrimitivesToSplit;
184
185 est.min_primitives = est.numPrimitives;
186 est.max_primitives = est.numPrimitives + est.numPrimitivesToSplit;
187
188 unsigned int sizeInnerNodes =
189 (unsigned int) estimate_qbvh6_nodes_worstcase(est.numBuildPrimitives) *
190 sizeof_InternalNode;
191 if (sizeInnerNodes == 0)
192 sizeInnerNodes = sizeof_InternalNode;
193
194 est.max_inner_nodes = sizeInnerNodes / sizeof_InternalNode;
195
196 size += sizeInnerNodes;
197 STATIC_ASSERT(sizeof_InternalNode % 64 == 0);
198
199 est.leaf_data_start = size;
200 size += est.numBuildPrimitives * sizeof_HwInstanceLeaf;
201 STATIC_ASSERT(sizeof_HwInstanceLeaf % 64 == 0);
202
203 est.leaf_data_size = est.numBuildPrimitives * sizeof_HwInstanceLeaf;
204
205 break;
206 }
207
208 case VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR: {
209 assert(num_instances == 0);
210
211 /* RT: TODO */
212 const float split_factor = 0.0f;
213 uint32_t num_prims_to_split = 0;
214 if (false)
215 num_prims_to_split = num_triangles + (double)split_factor;
216
217 const uint32_t num_build_triangles = num_triangles + num_prims_to_split;
218 const uint32_t num_build_primitives = num_build_triangles + num_aabbs;
219
220 est.numPrimitives = num_primitives;
221 est.numTriangles = num_triangles;
222 est.numProcedurals = num_aabbs;
223 est.numMeshes = pInfo->geometryCount;
224 est.numBuildPrimitives = num_build_primitives;
225 est.numPrimitivesToSplit = num_prims_to_split;
226 est.max_instance_leafs = 0;
227
228 est.min_primitives = (size_t)(num_build_triangles * 0.5f + num_aabbs);
229 est.max_primitives = num_build_triangles + num_aabbs;
230
231 size_t nodeBytes = 0;
232 nodeBytes += estimate_qbvh6_nodes_worstcase(num_build_triangles) * sizeof_InternalNode;
233 nodeBytes += estimate_qbvh6_nodes_worstcase(num_aabbs) * sizeof_InternalNode;
234 if (nodeBytes == 0) // for case with 0 primitives
235 nodeBytes = sizeof_InternalNode;
236 nodeBytes = MAX2(nodeBytes, 8 * (size_t)num_build_primitives); // for primref_index0/1 buffers
237
238 est.max_inner_nodes = nodeBytes / sizeof_InternalNode;
239
240 size += nodeBytes;
241 STATIC_ASSERT(sizeof_InternalNode % 64 == 0);
242
243 est.leaf_data_start = size;
244 size += num_build_triangles * sizeof_Quad;
245 STATIC_ASSERT(sizeof_Quad % 64 == 0);
246
247 est.procedural_data_start = size;
248 size += num_aabbs * sizeof_Procedural;
249 STATIC_ASSERT(sizeof_Procedural % 64 == 0);
250
251 est.leaf_data_size = num_build_triangles * sizeof_Quad +
252 num_aabbs * sizeof_Procedural;
253
254 if (num_build_primitives == 0)
255 size += MAX2(sizeof_Quad, sizeof_Procedural);
256 break;
257 }
258
259 default:
260 unreachable("Unsupported acceleration structure type");
261 }
262
263 size = align64(size, 64);
264 est.instance_descs_start = size;
265 size += sizeof(struct InstanceDesc) * num_instances;
266
267 est.geo_meta_data_start = size;
268 size += sizeof(struct GeoMetaData) * pInfo->geometryCount;
269 size = align64(size, 64);
270
271 assert(size == align64(size, 64));
272 est.back_pointer_start = size;
273
274 const bool alloc_backpointers = false; /* RT TODO */
275 if (alloc_backpointers) {
276 size += est.max_inner_nodes * sizeof(uint32_t);
277 size = align64(size, 64);
278 }
279
280 assert(size < UINT32_MAX);
281 est.sizeTotal = align64(size, 64);
282
283 return est;
284 }
285
286 struct scratch_layout {
287 gpuva_t base;
288 uint32_t total_size;
289
290 gpuva_t primrefs;
291 gpuva_t globals;
292 gpuva_t leaf_index_buffers;
293 uint32_t leaf_index_buffer_stride;
294
295 /* new_sah */
296 gpuva_t qnode_buffer;
297 gpuva_t bvh2_buffer;
298 };
299
300 static size_t
get_bvh2_size(uint32_t num_primitivies)301 get_bvh2_size(uint32_t num_primitivies)
302 {
303 if (num_primitivies == 0)
304 return 0;
305 return sizeof(struct BVH2) +
306 (2 * num_primitivies - 1) * sizeof(struct BVH2Node);
307 }
308
309 static struct scratch_layout
get_gpu_scratch_layout(struct anv_address base,struct MKSizeEstimate est,enum anv_rt_bvh_build_method build_method)310 get_gpu_scratch_layout(struct anv_address base,
311 struct MKSizeEstimate est,
312 enum anv_rt_bvh_build_method build_method)
313 {
314 struct scratch_layout scratch = {
315 .base = anv_address_physical(base),
316 };
317 gpuva_t current = anv_address_physical(base);
318
319 scratch.globals = current;
320 current += sizeof(struct Globals);
321
322 scratch.primrefs = intel_canonical_address(current);
323 current += est.numBuildPrimitives * sizeof_PrimRef;
324
325 scratch.leaf_index_buffers = intel_canonical_address(current);
326 current += est.numBuildPrimitives * sizeof(uint32_t) * 2;
327 scratch.leaf_index_buffer_stride = sizeof(uint32_t);
328
329 switch (build_method) {
330 case ANV_BVH_BUILD_METHOD_TRIVIAL:
331 break;
332
333 case ANV_BVH_BUILD_METHOD_NEW_SAH: {
334 size_t bvh2_size = get_bvh2_size(est.numBuildPrimitives);
335 if (est.leaf_data_size < bvh2_size) {
336 scratch.bvh2_buffer = intel_canonical_address(current);
337 current += bvh2_size;
338 }
339
340 scratch.qnode_buffer = intel_canonical_address(current);
341 current += 2 * sizeof(dword) * est.max_inner_nodes;
342 break;
343 }
344
345 default:
346 unreachable("invalid build");
347 }
348
349 assert((current - scratch.base) < UINT32_MAX);
350 scratch.total_size = current - scratch.base;
351
352 return scratch;
353 }
354
355 static void
anv_get_gpu_acceleration_structure_size(UNUSED struct anv_device * device,VkAccelerationStructureBuildTypeKHR buildType,const VkAccelerationStructureBuildGeometryInfoKHR * pBuildInfo,const uint32_t * pMaxPrimitiveCounts,VkAccelerationStructureBuildSizesInfoKHR * pSizeInfo)356 anv_get_gpu_acceleration_structure_size(
357 UNUSED struct anv_device *device,
358 VkAccelerationStructureBuildTypeKHR buildType,
359 const VkAccelerationStructureBuildGeometryInfoKHR* pBuildInfo,
360 const uint32_t* pMaxPrimitiveCounts,
361 VkAccelerationStructureBuildSizesInfoKHR* pSizeInfo)
362 {
363
364 struct MKSizeEstimate est = get_gpu_size_estimate(pBuildInfo, NULL,
365 pMaxPrimitiveCounts);
366 struct scratch_layout scratch = get_gpu_scratch_layout(ANV_NULL_ADDRESS, est,
367 device->bvh_build_method);
368
369 pSizeInfo->accelerationStructureSize = est.sizeTotal;
370 pSizeInfo->buildScratchSize = scratch.total_size;
371 pSizeInfo->updateScratchSize = scratch.total_size; /* TODO */
372 }
373
374 void
genX(GetAccelerationStructureBuildSizesKHR)375 genX(GetAccelerationStructureBuildSizesKHR)(
376 VkDevice _device,
377 VkAccelerationStructureBuildTypeKHR buildType,
378 const VkAccelerationStructureBuildGeometryInfoKHR* pBuildInfo,
379 const uint32_t* pMaxPrimitiveCounts,
380 VkAccelerationStructureBuildSizesInfoKHR* pSizeInfo)
381 {
382 ANV_FROM_HANDLE(anv_device, device, _device);
383 assert(pSizeInfo->sType ==
384 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR);
385
386 VkAccelerationStructureBuildSizesInfoKHR gpu_size_info;
387 anv_get_gpu_acceleration_structure_size(device, buildType, pBuildInfo,
388 pMaxPrimitiveCounts,
389 &gpu_size_info);
390
391 pSizeInfo->accelerationStructureSize =
392 gpu_size_info.accelerationStructureSize;
393 pSizeInfo->buildScratchSize = gpu_size_info.buildScratchSize;
394 pSizeInfo->updateScratchSize = gpu_size_info.updateScratchSize;
395 }
396
397 void
genX(GetDeviceAccelerationStructureCompatibilityKHR)398 genX(GetDeviceAccelerationStructureCompatibilityKHR)(
399 VkDevice _device,
400 const VkAccelerationStructureVersionInfoKHR* pVersionInfo,
401 VkAccelerationStructureCompatibilityKHR* pCompatibility)
402 {
403 ANV_FROM_HANDLE(anv_device, device, _device);
404
405 if (memcmp(pVersionInfo->pVersionData,
406 device->physical->rt_uuid,
407 sizeof(device->physical->rt_uuid)) == 0) {
408 *pCompatibility = VK_ACCELERATION_STRUCTURE_COMPATIBILITY_COMPATIBLE_KHR;
409 } else {
410 *pCompatibility = VK_ACCELERATION_STRUCTURE_COMPATIBILITY_INCOMPATIBLE_KHR;
411 }
412 }
413
414 static inline uint8_t
vk_to_grl_GeometryFlags(VkGeometryFlagsKHR flags)415 vk_to_grl_GeometryFlags(VkGeometryFlagsKHR flags)
416 {
417 uint8_t grl_flags = GEOMETRY_FLAG_NONE;
418 unsigned mask = flags;
419 while (mask) {
420 int i = u_bit_scan(&mask);
421 switch ((VkGeometryFlagBitsKHR)(1u << i)) {
422 case VK_GEOMETRY_OPAQUE_BIT_KHR:
423 grl_flags |= GEOMETRY_FLAG_OPAQUE;
424 break;
425 case VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_KHR:
426 grl_flags |= GEOMETRY_FLAG_NO_DUPLICATE_ANYHIT_INVOCATION;
427 break;
428 default:
429 unreachable("Unsupported acceleration structure build flag");
430 }
431 }
432 return grl_flags;
433 }
434
435 static inline IndexFormat
vk_to_grl_IndexFormat(VkIndexType type)436 vk_to_grl_IndexFormat(VkIndexType type)
437 {
438 switch (type) {
439 case VK_INDEX_TYPE_NONE_KHR: return INDEX_FORMAT_NONE;
440 case VK_INDEX_TYPE_UINT8_KHR: unreachable("No UINT8 support yet");
441 case VK_INDEX_TYPE_UINT16: return INDEX_FORMAT_R16_UINT;
442 case VK_INDEX_TYPE_UINT32: return INDEX_FORMAT_R32_UINT;
443 default:
444 unreachable("Unsupported index type");
445 }
446 }
447
448 static inline VertexFormat
vk_to_grl_VertexFormat(VkFormat format)449 vk_to_grl_VertexFormat(VkFormat format)
450 {
451 switch (format) {
452 case VK_FORMAT_R32G32_SFLOAT: return VERTEX_FORMAT_R32G32_FLOAT;
453 case VK_FORMAT_R32G32B32_SFLOAT: return VERTEX_FORMAT_R32G32B32_FLOAT;
454 case VK_FORMAT_R16G16_SFLOAT: return VERTEX_FORMAT_R16G16_FLOAT;
455 case VK_FORMAT_R16G16B16A16_SFLOAT: return VERTEX_FORMAT_R16G16B16A16_FLOAT;
456 case VK_FORMAT_R16G16_SNORM: return VERTEX_FORMAT_R16G16_SNORM;
457 case VK_FORMAT_R16G16B16A16_SNORM: return VERTEX_FORMAT_R16G16B16A16_SNORM;
458 case VK_FORMAT_R16G16B16A16_UNORM: return VERTEX_FORMAT_R16G16B16A16_UNORM;
459 case VK_FORMAT_R16G16_UNORM: return VERTEX_FORMAT_R16G16_UNORM;
460 /* case VK_FORMAT_R10G10B10A2_UNORM: return VERTEX_FORMAT_R10G10B10A2_UNORM; */
461 case VK_FORMAT_R8G8B8A8_UNORM: return VERTEX_FORMAT_R8G8B8A8_UNORM;
462 case VK_FORMAT_R8G8_UNORM: return VERTEX_FORMAT_R8G8_UNORM;
463 case VK_FORMAT_R8G8B8A8_SNORM: return VERTEX_FORMAT_R8G8B8A8_SNORM;
464 case VK_FORMAT_R8G8_SNORM: return VERTEX_FORMAT_R8G8_SNORM;
465 default:
466 unreachable("Unsupported vertex format");
467 }
468 }
469
470 static struct Geo
vk_to_grl_Geo(const VkAccelerationStructureGeometryKHR * pGeometry,uint32_t prim_count,uint32_t transform_offset,uint32_t primitive_offset,uint32_t first_vertex)471 vk_to_grl_Geo(const VkAccelerationStructureGeometryKHR *pGeometry,
472 uint32_t prim_count,
473 uint32_t transform_offset,
474 uint32_t primitive_offset,
475 uint32_t first_vertex)
476 {
477 struct Geo geo = {
478 .Flags = vk_to_grl_GeometryFlags(pGeometry->flags),
479 };
480
481 switch (pGeometry->geometryType) {
482 case VK_GEOMETRY_TYPE_TRIANGLES_KHR: {
483 const VkAccelerationStructureGeometryTrianglesDataKHR *vk_tri =
484 &pGeometry->geometry.triangles;
485
486 geo.Type = GEOMETRY_TYPE_TRIANGLES;
487
488 geo.Desc.Triangles.pTransformBuffer =
489 vk_tri->transformData.deviceAddress;
490 geo.Desc.Triangles.pIndexBuffer =
491 vk_tri->indexData.deviceAddress;
492 geo.Desc.Triangles.pVertexBuffer =
493 vk_tri->vertexData.deviceAddress;
494 geo.Desc.Triangles.VertexBufferByteStride = vk_tri->vertexStride;
495
496 if (geo.Desc.Triangles.pTransformBuffer)
497 geo.Desc.Triangles.pTransformBuffer += transform_offset;
498
499 if (vk_tri->indexType == VK_INDEX_TYPE_NONE_KHR) {
500 geo.Desc.Triangles.IndexCount = 0;
501 geo.Desc.Triangles.VertexCount = prim_count * 3;
502 geo.Desc.Triangles.IndexFormat = INDEX_FORMAT_NONE;
503 geo.Desc.Triangles.pVertexBuffer += primitive_offset;
504 } else {
505 geo.Desc.Triangles.IndexCount = prim_count * 3;
506 geo.Desc.Triangles.VertexCount = vk_tri->maxVertex;
507 geo.Desc.Triangles.IndexFormat =
508 vk_to_grl_IndexFormat(vk_tri->indexType);
509 geo.Desc.Triangles.pIndexBuffer += primitive_offset;
510 }
511
512 geo.Desc.Triangles.VertexFormat =
513 vk_to_grl_VertexFormat(vk_tri->vertexFormat);
514 geo.Desc.Triangles.pVertexBuffer += vk_tri->vertexStride * first_vertex;
515 break;
516 }
517
518 case VK_GEOMETRY_TYPE_AABBS_KHR: {
519 const VkAccelerationStructureGeometryAabbsDataKHR *vk_aabbs =
520 &pGeometry->geometry.aabbs;
521 geo.Type = GEOMETRY_TYPE_PROCEDURAL;
522 geo.Desc.Procedural.pAABBs_GPUVA =
523 vk_aabbs->data.deviceAddress + primitive_offset;
524 geo.Desc.Procedural.AABBByteStride = vk_aabbs->stride;
525 geo.Desc.Procedural.AABBCount = prim_count;
526 break;
527 }
528
529 default:
530 unreachable("Invalid geometry type");
531 }
532
533 return geo;
534 }
535
536 #include "grl/grl_metakernel_copy.h"
537 #include "grl/grl_metakernel_misc.h"
538 #include "grl/grl_metakernel_build_primref.h"
539 #include "grl/grl_metakernel_new_sah_builder.h"
540 #include "grl/grl_metakernel_build_leaf.h"
541
542 struct build_state {
543 enum anv_rt_bvh_build_method build_method;
544
545 struct MKSizeEstimate estimate;
546 struct scratch_layout scratch;
547 struct MKBuilderState state;
548
549 struct anv_address bvh_addr;
550
551 size_t geom_size_prefix_sum_buffer;
552 size_t transient_size;
553
554 uint32_t leaf_type;
555 uint32_t leaf_size;
556
557 uint32_t num_geometries;
558 uint32_t num_instances;
559
560 uint64_t instances_addr;
561 bool array_of_instances_ptr;
562
563 const VkAccelerationStructureGeometryKHR *vk_geoms;
564 };
565
566 static void
get_binnedsah_scratch_buffers(struct build_state * bs,uint64_t * p_qnode_buffer,uint64_t * p_primref_indices,uint64_t * p_bvh2)567 get_binnedsah_scratch_buffers(struct build_state *bs,
568 uint64_t *p_qnode_buffer,
569 uint64_t *p_primref_indices,
570 uint64_t *p_bvh2)
571 {
572 if (bs->estimate.numBuildPrimitives == 0)
573 {
574 *p_bvh2 = 0;
575 *p_qnode_buffer = 0;
576 *p_primref_indices = 0;
577 return;
578 }
579
580 size_t bvh2_size = get_bvh2_size(bs->estimate.numBuildPrimitives);
581 if (bs->estimate.leaf_data_size < bvh2_size) {
582 assert(bs->scratch.bvh2_buffer != 0);
583 *p_bvh2 = bs->scratch.bvh2_buffer;
584 } else {
585 *p_bvh2 = intel_canonical_address(bs->state.bvh_buffer +
586 bs->estimate.leaf_data_start);
587 }
588
589 assert(bs->scratch.qnode_buffer != 0);
590 *p_qnode_buffer = bs->scratch.qnode_buffer;
591
592 assert(bs->scratch.leaf_index_buffers != 0);
593 *p_primref_indices = bs->scratch.leaf_index_buffers;
594 }
595
596 static void
write_memory(struct anv_cmd_alloc alloc,size_t offset,const void * data,size_t data_len)597 write_memory(struct anv_cmd_alloc alloc, size_t offset, const void *data, size_t data_len)
598 {
599 assert((offset + data_len) < alloc.size);
600 memcpy(alloc.map + offset, data, data_len);
601 }
602
603 static void
cmd_build_acceleration_structures(struct anv_cmd_buffer * cmd_buffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,const VkAccelerationStructureBuildRangeInfoKHR * const * ppBuildRangeInfos,const VkDeviceAddress * pIndirectDeviceAddresses,const uint32_t * pIndirectStrides,const uint32_t * const * ppMaxPrimitiveCounts)604 cmd_build_acceleration_structures(
605 struct anv_cmd_buffer *cmd_buffer,
606 uint32_t infoCount,
607 const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
608 const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos,
609 const VkDeviceAddress *pIndirectDeviceAddresses,
610 const uint32_t *pIndirectStrides,
611 const uint32_t *const *ppMaxPrimitiveCounts)
612 {
613 struct anv_device *device = cmd_buffer->device;
614 VK_MULTIALLOC(ma);
615
616 struct build_state *builds;
617 vk_multialloc_add(&ma, &builds, struct build_state, infoCount);
618
619 if (!vk_multialloc_zalloc(&ma,
620 &cmd_buffer->device->vk.alloc,
621 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND)) {
622 anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_HOST_MEMORY);
623 return;
624 }
625
626 trace_intel_begin_as_build(&cmd_buffer->trace);
627
628 /* TODO: Indirect */
629 assert(ppBuildRangeInfos != NULL);
630
631 size_t transient_mem_init_globals_size = 0;
632 size_t transient_mem_init_globals_offset = 0;
633
634 size_t transient_total = 0;
635
636 size_t private_mem_total = 0;
637
638 size_t num_trivial_builds = 0;
639 size_t num_new_sah_builds = 0;
640
641 /* Prepare a bunch of data for the kernels we have to run. */
642 for (uint32_t i = 0; i < infoCount; i++) {
643 struct build_state *bs = &builds[i];
644
645 const VkAccelerationStructureBuildGeometryInfoKHR *pInfo = &pInfos[i];
646 struct anv_address scratch_addr =
647 anv_address_from_u64(pInfo->scratchData.deviceAddress);
648
649 const VkAccelerationStructureBuildRangeInfoKHR *pBuildRangeInfos =
650 ppBuildRangeInfos ? ppBuildRangeInfos[i] : NULL;
651 const uint32_t *pMaxPrimitiveCounts =
652 ppMaxPrimitiveCounts ? ppMaxPrimitiveCounts[i] : NULL;
653
654 ANV_FROM_HANDLE(vk_acceleration_structure, dst_accel,
655 pInfo->dstAccelerationStructure);
656
657 bs->build_method = device->bvh_build_method;
658
659 bs->bvh_addr = anv_address_from_u64(vk_acceleration_structure_get_va(dst_accel));
660
661 bs->estimate = get_gpu_size_estimate(pInfo, pBuildRangeInfos,
662 pMaxPrimitiveCounts);
663 bs->scratch = get_gpu_scratch_layout(scratch_addr, bs->estimate,
664 bs->build_method);
665
666 uint32_t leaf_size, leaf_type;
667
668 switch (pInfo->type) {
669 case VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR: {
670 assert(pInfo->geometryCount == 1);
671
672 const VkAccelerationStructureGeometryKHR *pGeometry =
673 get_geometry(pInfo, 0);
674 assert(pGeometry->geometryType == VK_GEOMETRY_TYPE_INSTANCES_KHR);
675
676 const VkAccelerationStructureGeometryInstancesDataKHR *instances =
677 &pGeometry->geometry.instances;
678
679 bs->num_instances = pBuildRangeInfos[0].primitiveCount;
680 bs->instances_addr = instances->data.deviceAddress;
681 bs->array_of_instances_ptr = instances->arrayOfPointers;
682 leaf_type = NODE_TYPE_INSTANCE;
683 leaf_size = GENX(RT_BVH_INSTANCE_LEAF_length) * 4;
684 break;
685 }
686
687 case VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR: {
688 bs->num_geometries = pInfo->geometryCount;
689 leaf_type = NODE_TYPE_QUAD;
690 leaf_size = GENX(RT_BVH_QUAD_LEAF_length) * 4;
691 break;
692 }
693
694 default:
695 unreachable("Unsupported acceleration structure type");
696 }
697
698 size_t geom_struct_size = bs->num_geometries * sizeof(struct Geo);
699 size_t geom_prefix_sum_size = align_uintptr(sizeof(uint32_t) * (bs->num_geometries + 1), 64);
700
701 bs->transient_size = geom_prefix_sum_size + geom_struct_size;
702
703 bs->geom_size_prefix_sum_buffer = transient_total + 0;
704
705 bs->state = (struct MKBuilderState) {
706 .geomDesc_buffer = bs->geom_size_prefix_sum_buffer +
707 geom_prefix_sum_size,
708 .build_primref_buffer = bs->scratch.primrefs,
709 .build_globals = bs->scratch.globals,
710 .bvh_buffer = anv_address_physical(bs->bvh_addr),
711 .leaf_type = leaf_type,
712 .leaf_size = leaf_size,
713 };
714
715 transient_total += bs->transient_size;
716
717 switch (device->bvh_build_method) {
718 case ANV_BVH_BUILD_METHOD_TRIVIAL:
719 num_trivial_builds++;
720 break;
721 case ANV_BVH_BUILD_METHOD_NEW_SAH:
722 num_new_sah_builds++;
723 break;
724 default:
725 unreachable("invalid BVH build method");
726 }
727
728 transient_mem_init_globals_size += sizeof(struct BatchedInitGlobalsData);
729 }
730
731 transient_total = align_transient_size(transient_total);
732 transient_mem_init_globals_offset = transient_total;
733 transient_total += align_transient_size(transient_mem_init_globals_size);
734
735 size_t transient_mem_binnedsah_size = 0;
736 size_t transient_mem_binnedsah_offset = 0;
737 size_t private_mem_binnedsah_size = 0;
738 size_t private_mem_binnedsah_offset = 0;
739
740 transient_mem_binnedsah_size = get_batched_binnedsah_transient_mem_size(num_new_sah_builds);
741 transient_mem_binnedsah_offset = transient_total;
742 transient_total += align_transient_size(transient_mem_binnedsah_size);
743
744 private_mem_binnedsah_size = get_batched_binnedsah_private_mem_size(num_new_sah_builds);
745 private_mem_binnedsah_offset = private_mem_total;
746 private_mem_total += align_private_size(private_mem_binnedsah_size);
747
748 /* Allocate required memory, unless we already have a suiteable buffer */
749 struct anv_cmd_alloc private_mem_alloc;
750 if (private_mem_total > cmd_buffer->state.rt.build_priv_mem_size) {
751 private_mem_alloc =
752 anv_cmd_buffer_alloc_space(cmd_buffer, private_mem_total, 64,
753 false /* mapped */);
754 if (anv_cmd_alloc_is_empty(private_mem_alloc)) {
755 anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_DEVICE_MEMORY);
756 goto error;
757 }
758
759 cmd_buffer->state.rt.build_priv_mem_addr = private_mem_alloc.address;
760 cmd_buffer->state.rt.build_priv_mem_size = private_mem_alloc.size;
761 } else {
762 private_mem_alloc = (struct anv_cmd_alloc) {
763 .address = cmd_buffer->state.rt.build_priv_mem_addr,
764 .map = anv_address_map(cmd_buffer->state.rt.build_priv_mem_addr),
765 .size = cmd_buffer->state.rt.build_priv_mem_size,
766 };
767 }
768
769 struct anv_cmd_alloc transient_mem_alloc =
770 anv_cmd_buffer_alloc_space(cmd_buffer, transient_total, 64,
771 true /* mapped */);
772 if (transient_total > 0 && anv_cmd_alloc_is_empty(transient_mem_alloc)) {
773 anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_DEVICE_MEMORY);
774 goto error;
775 }
776
777 uint64_t private_base = anv_address_physical(private_mem_alloc.address);
778 uint64_t transient_base = anv_address_physical(transient_mem_alloc.address);
779
780 /* Prepare transient memory */
781 for (uint32_t i = 0; i < infoCount; i++) {
782 struct build_state *bs = &builds[i];
783
784 const VkAccelerationStructureBuildGeometryInfoKHR *pInfo = &pInfos[i];
785
786 const VkAccelerationStructureBuildRangeInfoKHR *pBuildRangeInfos =
787 ppBuildRangeInfos ? ppBuildRangeInfos[i] : NULL;
788
789 struct Geo *geos = transient_mem_alloc.map + bs->state.geomDesc_buffer;
790 uint32_t *prefixes = transient_mem_alloc.map + bs->geom_size_prefix_sum_buffer;
791 uint32_t prefix_sum = 0;
792 for (unsigned g = 0; g < bs->num_geometries; g++) {
793 const VkAccelerationStructureGeometryKHR *pGeometry = get_geometry(pInfo, g);
794 uint32_t prim_count = pBuildRangeInfos[g].primitiveCount;
795 geos[g] = vk_to_grl_Geo(pGeometry, prim_count,
796 pBuildRangeInfos[g].transformOffset,
797 pBuildRangeInfos[g].primitiveOffset,
798 pBuildRangeInfos[g].firstVertex);
799
800 prefixes[g] = prefix_sum;
801 prefix_sum += prim_count;
802 }
803
804 prefixes[bs->num_geometries] = prefix_sum;
805
806 bs->geom_size_prefix_sum_buffer =
807 intel_canonical_address(bs->geom_size_prefix_sum_buffer +
808 transient_base);
809 bs->state.geomDesc_buffer =
810 intel_canonical_address(bs->state.geomDesc_buffer +
811 transient_base);
812
813 struct BatchedInitGlobalsData data = {
814 .p_build_globals = bs->scratch.globals,
815 .p_bvh_buffer = anv_address_physical(bs->bvh_addr),
816
817 .numPrimitives = 0,
818 .numGeometries = bs->num_geometries,
819 .numInstances = bs->num_instances,
820
821 .instance_descs_start = bs->estimate.instance_descs_start,
822 .geo_meta_data_start = bs->estimate.geo_meta_data_start,
823 .node_data_start = bs->estimate.node_data_start,
824 .leaf_data_start = bs->estimate.leaf_data_start,
825 .procedural_data_start = bs->estimate.procedural_data_start,
826 .back_pointer_start = bs->estimate.back_pointer_start,
827 .sizeTotal = bs->estimate.sizeTotal,
828
829 .leafType = bs->state.leaf_type,
830 .leafSize = bs->state.leaf_size,
831 };
832
833 write_memory(transient_mem_alloc,
834 transient_mem_init_globals_offset + i * sizeof(data),
835 &data, sizeof(data));
836 }
837
838 if (anv_cmd_buffer_is_render_queue(cmd_buffer))
839 genX(flush_pipeline_select_gpgpu)(cmd_buffer);
840
841 /* Due to the nature of GRL and its heavy use of jumps/predication, we
842 * cannot tell exactly in what order the CFE_STATE we insert are going to
843 * be executed. So always use the largest possible size.
844 */
845 genX(cmd_buffer_ensure_cfe_state)(
846 cmd_buffer,
847 cmd_buffer->device->physical->max_grl_scratch_size);
848
849 /* Round 1 : init_globals kernel */
850 genX(grl_misc_batched_init_globals)(
851 cmd_buffer,
852 intel_canonical_address(transient_base +
853 transient_mem_init_globals_offset),
854 infoCount);
855
856 anv_add_pending_pipe_bits(cmd_buffer,
857 ANV_GRL_FLUSH_FLAGS,
858 "building accel struct");
859
860 /* Round 2 : Copy instance/geometry data from the application provided
861 * buffers into the acceleration structures.
862 */
863 for (uint32_t i = 0; i < infoCount; i++) {
864 struct build_state *bs = &builds[i];
865
866 /* Metadata */
867 if (bs->num_instances) {
868 assert(bs->num_geometries == 0);
869
870 const uint64_t copy_size = bs->num_instances * sizeof(InstanceDesc);
871 /* This must be calculated in same way as
872 * groupCountForGeoMetaDataCopySize
873 */
874 const uint32_t num_threads = (copy_size >> 8) + 3;
875
876 if (bs->array_of_instances_ptr) {
877 genX(grl_misc_copy_instance_ptrs)(
878 cmd_buffer,
879 anv_address_physical(anv_address_add(bs->bvh_addr,
880 bs->estimate.instance_descs_start)),
881 bs->instances_addr,
882 copy_size, num_threads);
883 } else {
884 genX(grl_misc_copy_instances)(
885 cmd_buffer,
886 anv_address_physical(anv_address_add(bs->bvh_addr,
887 bs->estimate.instance_descs_start)),
888 bs->instances_addr,
889 copy_size, num_threads);
890 }
891 }
892
893 if (bs->num_geometries) {
894 assert(bs->num_instances == 0);
895 const uint64_t copy_size = bs->num_geometries * sizeof(struct GeoMetaData);
896
897 /* This must be calculated in same way as
898 * groupCountForGeoMetaDataCopySize
899 */
900 const uint32_t num_threads = (copy_size >> 6) + 1;
901
902 genX(grl_misc_copy_geo_meta_data)(
903 cmd_buffer,
904 anv_address_physical(anv_address_add(bs->bvh_addr,
905 bs->estimate.geo_meta_data_start)),
906 bs->state.geomDesc_buffer,
907 copy_size,
908 num_threads);
909 }
910
911 /* Primrefs */
912 if (bs->num_instances) {
913 if (bs->array_of_instances_ptr) {
914 genX(grl_build_primref_buildPrimirefsFromInstancesArrOfPtrs)(
915 cmd_buffer,
916 bs->instances_addr,
917 PREFIX_MK_SIZE(grl_build_primref, bs->estimate),
918 PREFIX_MK_STATE(grl_build_primref, bs->state),
919 false /* allowUpdate */);
920 } else {
921 genX(grl_build_primref_buildPrimirefsFromInstances)(
922 cmd_buffer,
923 bs->instances_addr,
924 PREFIX_MK_SIZE(grl_build_primref, bs->estimate),
925 PREFIX_MK_STATE(grl_build_primref, bs->state),
926 false /* allowUpdate */);
927 }
928 }
929
930 if (bs->num_geometries) {
931 const VkAccelerationStructureBuildGeometryInfoKHR *pInfo = &pInfos[i];
932 const VkAccelerationStructureBuildRangeInfoKHR *pBuildRangeInfos =
933 ppBuildRangeInfos ? ppBuildRangeInfos[i] : NULL;
934
935 assert(pInfo->geometryCount == bs->num_geometries);
936 for (unsigned g = 0; g < pInfo->geometryCount; g++) {
937 const VkAccelerationStructureGeometryKHR *pGeometry =
938 get_geometry(pInfo, g);
939
940 switch (pGeometry->geometryType) {
941 case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
942 genX(grl_build_primref_primrefs_from_tris)(
943 cmd_buffer,
944 PREFIX_MK_STATE(grl_build_primref, bs->state),
945 PREFIX_MK_SIZE(grl_build_primref, bs->estimate),
946 bs->state.geomDesc_buffer + g * sizeof(struct Geo),
947 g,
948 vk_to_grl_GeometryFlags(pGeometry->flags),
949 /* TODO: Indirect */
950 pBuildRangeInfos[g].primitiveCount);
951 break;
952
953 case VK_GEOMETRY_TYPE_AABBS_KHR:
954 genX(grl_build_primref_primrefs_from_proc)(
955 cmd_buffer,
956 PREFIX_MK_STATE(grl_build_primref, bs->state),
957 PREFIX_MK_SIZE(grl_build_primref, bs->estimate),
958 bs->state.geomDesc_buffer + g * sizeof(struct Geo),
959 g,
960 vk_to_grl_GeometryFlags(pGeometry->flags),
961 /* TODO: Indirect */
962 pBuildRangeInfos[g].primitiveCount);
963 break;
964
965 default:
966 unreachable("Invalid geometry type");
967 }
968 }
969 }
970 }
971
972 anv_add_pending_pipe_bits(cmd_buffer,
973 ANV_GRL_FLUSH_FLAGS,
974 "building accel struct");
975
976 /* Dispatch trivial builds */
977 if (num_trivial_builds) {
978 for (uint32_t i = 0; i < infoCount; i++) {
979 struct build_state *bs = &builds[i];
980
981 if (bs->build_method != ANV_BVH_BUILD_METHOD_TRIVIAL)
982 continue;
983
984 genX(grl_new_sah_builder_single_pass_binsah)(
985 cmd_buffer,
986 bs->scratch.globals,
987 bs->state.bvh_buffer,
988 bs->state.build_primref_buffer,
989 bs->scratch.leaf_index_buffers,
990 false /* alloc_backpointers */);
991 }
992 }
993
994 /* Dispatch new SAH builds */
995 if (num_new_sah_builds) {
996 size_t global_ptrs_offset = transient_mem_binnedsah_offset;
997 size_t buffers_info_offset = transient_mem_binnedsah_offset + sizeof(gpuva_t) * num_new_sah_builds;
998
999 size_t scheduler_offset = private_mem_binnedsah_offset;
1000 size_t sah_globals_offset = private_mem_binnedsah_offset + get_scheduler_size(num_new_sah_builds);
1001
1002 struct SAHBuildArgsBatchable args = {
1003 .num_builds = infoCount,
1004 .p_globals_ptrs = intel_canonical_address(transient_base + global_ptrs_offset),
1005 .p_buffers_info = intel_canonical_address(transient_base + buffers_info_offset),
1006 .p_scheduler = intel_canonical_address(private_base + scheduler_offset),
1007 .p_sah_globals = intel_canonical_address(private_base + sah_globals_offset),
1008 .num_max_qnode_global_root_buffer_entries = MAX2(num_new_sah_builds, QNODE_GLOBAL_ROOT_BUFFER_MIN_ENTRIES_NUM),
1009 };
1010
1011 for (uint32_t i = 0; i < infoCount; i++) {
1012 struct build_state *bs = &builds[i];
1013
1014 if (bs->build_method != ANV_BVH_BUILD_METHOD_NEW_SAH)
1015 continue;
1016
1017 uint64_t p_build_primref_index_buffers;
1018 uint64_t p_bvh2;
1019 uint64_t p_qnode_child_buffer;
1020
1021 get_binnedsah_scratch_buffers(bs,
1022 &p_qnode_child_buffer,
1023 &p_build_primref_index_buffers,
1024 &p_bvh2);
1025
1026 struct SAHBuildBuffersInfo buffers = {
1027 .p_primref_index_buffers = bs->scratch.leaf_index_buffers,
1028 .p_bvh_base = bs->state.bvh_buffer,
1029 .p_primrefs_buffer = bs->state.build_primref_buffer,
1030 .p_bvh2 = p_bvh2,
1031 .p_qnode_root_buffer = p_qnode_child_buffer,
1032 .sah_globals_flags = 0,
1033 };
1034
1035 write_memory(transient_mem_alloc, buffers_info_offset, &buffers, sizeof(buffers));
1036 buffers_info_offset += sizeof(buffers);
1037
1038 write_memory(transient_mem_alloc, global_ptrs_offset, &bs->state.build_globals,
1039 sizeof(bs->state.build_globals));
1040 global_ptrs_offset += sizeof(bs->state.build_globals);
1041 }
1042
1043 genX(grl_new_sah_builder_new_sah_build_batchable)(
1044 cmd_buffer, PREFIX_MK_SAH_BUILD_ARGS_BATCHABLE(grl_new_sah_builder, args));
1045 }
1046
1047 if (num_new_sah_builds == 0)
1048 anv_add_pending_pipe_bits(cmd_buffer,
1049 ANV_GRL_FLUSH_FLAGS,
1050 "building accel struct");
1051
1052 /* Finally write the leaves. */
1053 for (uint32_t i = 0; i < infoCount; i++) {
1054 struct build_state *bs = &builds[i];
1055
1056 if (bs->num_instances) {
1057 assert(bs->num_geometries == 0);
1058 if (bs->array_of_instances_ptr) {
1059 genX(grl_leaf_builder_buildLeafDXR_instances_pointers)(cmd_buffer,
1060 PREFIX_MK_STATE(grl_leaf_builder, bs->state),
1061 bs->scratch.leaf_index_buffers,
1062 bs->instances_addr,
1063 bs->scratch.leaf_index_buffer_stride,
1064 0 /* offset */,
1065 bs->estimate.numBuildPrimitives);
1066 } else {
1067 genX(grl_leaf_builder_buildLeafDXR_instances)(cmd_buffer,
1068 PREFIX_MK_STATE(grl_leaf_builder, bs->state),
1069 bs->scratch.leaf_index_buffers,
1070 bs->instances_addr,
1071 bs->scratch.leaf_index_buffer_stride,
1072 0 /* offset */,
1073 bs->estimate.numBuildPrimitives);
1074 }
1075 }
1076
1077 if (bs->num_geometries) {
1078 assert(bs->num_instances == 0);
1079 const uint64_t p_numPrimitives =
1080 bs->state.build_globals + offsetof(struct Globals, numPrimitives);
1081
1082 assert(bs->estimate.numProcedurals == 0 ||
1083 bs->estimate.numTriangles == 0);
1084 if (bs->estimate.numProcedurals) {
1085 genX(grl_leaf_builder_buildLeafDXR_procedurals)(
1086 cmd_buffer,
1087 PREFIX_MK_STATE(grl_leaf_builder, bs->state),
1088 bs->scratch.leaf_index_buffers,
1089 bs->scratch.leaf_index_buffer_stride,
1090 0 /* offset */,
1091 p_numPrimitives);
1092 } else {
1093 genX(grl_leaf_builder_buildLeafDXR_quads)(
1094 cmd_buffer,
1095 PREFIX_MK_STATE(grl_leaf_builder, bs->state),
1096 bs->scratch.leaf_index_buffers,
1097 bs->scratch.leaf_index_buffer_stride,
1098 0 /* offset */,
1099 p_numPrimitives,
1100 false /* allow_updates */);
1101 }
1102 }
1103 }
1104
1105 anv_add_pending_pipe_bits(cmd_buffer,
1106 ANV_GRL_FLUSH_FLAGS,
1107 "building accel struct");
1108
1109 trace_intel_end_as_build(&cmd_buffer->trace);
1110
1111 error:
1112 vk_free(&cmd_buffer->device->vk.alloc, builds);
1113 }
1114
1115 void
genX(CmdBuildAccelerationStructuresKHR)1116 genX(CmdBuildAccelerationStructuresKHR)(
1117 VkCommandBuffer commandBuffer,
1118 uint32_t infoCount,
1119 const VkAccelerationStructureBuildGeometryInfoKHR* pInfos,
1120 const VkAccelerationStructureBuildRangeInfoKHR* const* ppBuildRangeInfos)
1121 {
1122 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1123
1124 if (anv_batch_has_error(&cmd_buffer->batch))
1125 return;
1126
1127 cmd_build_acceleration_structures(cmd_buffer, infoCount, pInfos,
1128 ppBuildRangeInfos, NULL, NULL, NULL);
1129 }
1130
1131 void
genX(CmdBuildAccelerationStructuresIndirectKHR)1132 genX(CmdBuildAccelerationStructuresIndirectKHR)(
1133 VkCommandBuffer commandBuffer,
1134 uint32_t infoCount,
1135 const VkAccelerationStructureBuildGeometryInfoKHR* pInfos,
1136 const VkDeviceAddress* pIndirectDeviceAddresses,
1137 const uint32_t* pIndirectStrides,
1138 const uint32_t* const* ppMaxPrimitiveCounts)
1139 {
1140 unreachable("Unimplemented");
1141 }
1142
1143 void
genX(CmdCopyAccelerationStructureKHR)1144 genX(CmdCopyAccelerationStructureKHR)(
1145 VkCommandBuffer commandBuffer,
1146 const VkCopyAccelerationStructureInfoKHR* pInfo)
1147 {
1148 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1149 ANV_FROM_HANDLE(vk_acceleration_structure, src_accel, pInfo->src);
1150 ANV_FROM_HANDLE(vk_acceleration_structure, dst_accel, pInfo->dst);
1151
1152 assert(pInfo->mode == VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_KHR ||
1153 pInfo->mode == VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_KHR);
1154
1155 if (pInfo->mode == VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_KHR) {
1156 uint64_t src_size_addr =
1157 vk_acceleration_structure_get_va(src_accel) +
1158 offsetof(struct BVHBase, Meta.allocationSize);
1159 genX(grl_copy_clone_indirect)(
1160 cmd_buffer,
1161 vk_acceleration_structure_get_va(dst_accel),
1162 vk_acceleration_structure_get_va(src_accel),
1163 src_size_addr);
1164 } else {
1165 genX(grl_copy_compact)(
1166 cmd_buffer,
1167 vk_acceleration_structure_get_va(dst_accel),
1168 vk_acceleration_structure_get_va(src_accel));
1169 }
1170
1171 anv_add_pending_pipe_bits(cmd_buffer,
1172 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1173 "after copy acceleration struct");
1174 }
1175
1176 void
genX(CmdCopyAccelerationStructureToMemoryKHR)1177 genX(CmdCopyAccelerationStructureToMemoryKHR)(
1178 VkCommandBuffer commandBuffer,
1179 const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo)
1180 {
1181 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1182 ANV_FROM_HANDLE(vk_acceleration_structure, src_accel, pInfo->src);
1183 struct anv_device *device = cmd_buffer->device;
1184 uint64_t src_size_addr =
1185 vk_acceleration_structure_get_va(src_accel) +
1186 offsetof(struct BVHBase, Meta.allocationSize);
1187
1188 assert(pInfo->mode == VK_COPY_ACCELERATION_STRUCTURE_MODE_SERIALIZE_KHR);
1189
1190 genX(grl_copy_serialize_indirect)(
1191 cmd_buffer,
1192 pInfo->dst.deviceAddress,
1193 vk_acceleration_structure_get_va(src_accel),
1194 anv_address_physical(device->rt_uuid_addr),
1195 src_size_addr);
1196
1197 anv_add_pending_pipe_bits(cmd_buffer,
1198 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1199 "after copy acceleration struct");
1200 }
1201
1202 void
genX(CmdCopyMemoryToAccelerationStructureKHR)1203 genX(CmdCopyMemoryToAccelerationStructureKHR)(
1204 VkCommandBuffer commandBuffer,
1205 const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo)
1206 {
1207 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1208 ANV_FROM_HANDLE(vk_acceleration_structure, dst_accel, pInfo->dst);
1209
1210 assert(pInfo->mode == VK_COPY_ACCELERATION_STRUCTURE_MODE_DESERIALIZE_KHR);
1211
1212 uint64_t src_size_addr = pInfo->src.deviceAddress +
1213 offsetof(struct SerializationHeader, DeserializedSizeInBytes);
1214 genX(grl_copy_deserialize_indirect)(
1215 cmd_buffer,
1216 vk_acceleration_structure_get_va(dst_accel),
1217 pInfo->src.deviceAddress,
1218 src_size_addr);
1219
1220 anv_add_pending_pipe_bits(cmd_buffer,
1221 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1222 "after copy acceleration struct");
1223 }
1224
1225 /* TODO: Host commands */
1226
1227 VkResult
genX(BuildAccelerationStructuresKHR)1228 genX(BuildAccelerationStructuresKHR)(
1229 VkDevice _device,
1230 VkDeferredOperationKHR deferredOperation,
1231 uint32_t infoCount,
1232 const VkAccelerationStructureBuildGeometryInfoKHR* pInfos,
1233 const VkAccelerationStructureBuildRangeInfoKHR* const* ppBuildRangeInfos)
1234 {
1235 ANV_FROM_HANDLE(anv_device, device, _device);
1236 unreachable("Unimplemented");
1237 return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
1238 }
1239
1240 VkResult
genX(CopyAccelerationStructureKHR)1241 genX(CopyAccelerationStructureKHR)(
1242 VkDevice _device,
1243 VkDeferredOperationKHR deferredOperation,
1244 const VkCopyAccelerationStructureInfoKHR* pInfo)
1245 {
1246 ANV_FROM_HANDLE(anv_device, device, _device);
1247 unreachable("Unimplemented");
1248 return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
1249 }
1250
1251 VkResult
genX(CopyAccelerationStructureToMemoryKHR)1252 genX(CopyAccelerationStructureToMemoryKHR)(
1253 VkDevice _device,
1254 VkDeferredOperationKHR deferredOperation,
1255 const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo)
1256 {
1257 ANV_FROM_HANDLE(anv_device, device, _device);
1258 unreachable("Unimplemented");
1259 return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
1260 }
1261
1262 VkResult
genX(CopyMemoryToAccelerationStructureKHR)1263 genX(CopyMemoryToAccelerationStructureKHR)(
1264 VkDevice _device,
1265 VkDeferredOperationKHR deferredOperation,
1266 const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo)
1267 {
1268 ANV_FROM_HANDLE(anv_device, device, _device);
1269 unreachable("Unimplemented");
1270 return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
1271 }
1272
1273 VkResult
genX(WriteAccelerationStructuresPropertiesKHR)1274 genX(WriteAccelerationStructuresPropertiesKHR)(
1275 VkDevice _device,
1276 uint32_t accelerationStructureCount,
1277 const VkAccelerationStructureKHR* pAccelerationStructures,
1278 VkQueryType queryType,
1279 size_t dataSize,
1280 void* pData,
1281 size_t stride)
1282 {
1283 ANV_FROM_HANDLE(anv_device, device, _device);
1284 unreachable("Unimplemented");
1285 return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
1286 }
1287
1288 #endif /* GFX_VERx10 >= 125 */
1289