• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Friedrich Vock
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "bvh/bvh.h"
25 #include "util/half_float.h"
26 #include "amd_family.h"
27 #include "radv_private.h"
28 #include "vk_acceleration_structure.h"
29 #include "vk_common_entrypoints.h"
30 
31 #define RRA_MAGIC 0x204644525F444D41
32 
33 struct rra_file_header {
34    uint64_t magic;
35    uint32_t version;
36    uint32_t unused;
37    uint64_t chunk_descriptions_offset;
38    uint64_t chunk_descriptions_size;
39 };
40 
41 static_assert(sizeof(struct rra_file_header) == 32, "rra_file_header does not match RRA spec");
42 
43 enum rra_chunk_version {
44    RADV_RRA_ASIC_API_INFO_CHUNK_VERSION = 0x1,
45    RADV_RRA_RAY_HISTORY_CHUNK_VERSION = 0x2,
46    RADV_RRA_ACCEL_STRUCT_CHUNK_VERSION = 0xF0005,
47 };
48 
49 enum rra_file_api {
50    RADV_RRA_API_DX9,
51    RADV_RRA_API_DX11,
52    RADV_RRA_API_DX12,
53    RADV_RRA_API_VULKAN,
54    RADV_RRA_API_OPENGL,
55    RADV_RRA_API_OPENCL,
56    RADV_RRA_API_MANTLE,
57    RADV_RRA_API_GENERIC,
58 };
59 
60 struct rra_file_chunk_description {
61    char name[16];
62    uint32_t is_zstd_compressed;
63    enum rra_chunk_version version;
64    uint64_t header_offset;
65    uint64_t header_size;
66    uint64_t data_offset;
67    uint64_t data_size;
68    uint64_t unused;
69 };
70 
71 static_assert(sizeof(struct rra_file_chunk_description) == 64, "rra_file_chunk_description does not match RRA spec");
72 
73 static uint64_t
node_to_addr(uint64_t node)74 node_to_addr(uint64_t node)
75 {
76    node &= ~7ull;
77    node <<= 19;
78    return ((int64_t)node) >> 16;
79 }
80 
81 static void
rra_dump_header(FILE * output,uint64_t chunk_descriptions_offset,uint64_t chunk_descriptions_size)82 rra_dump_header(FILE *output, uint64_t chunk_descriptions_offset, uint64_t chunk_descriptions_size)
83 {
84    struct rra_file_header header = {
85       .magic = RRA_MAGIC,
86       .version = 3,
87       .chunk_descriptions_offset = chunk_descriptions_offset,
88       .chunk_descriptions_size = chunk_descriptions_size,
89    };
90    fwrite(&header, sizeof(header), 1, output);
91 }
92 
93 static void
rra_dump_chunk_description(uint64_t offset,uint64_t header_size,uint64_t data_size,const char * name,enum rra_chunk_version version,FILE * output)94 rra_dump_chunk_description(uint64_t offset, uint64_t header_size, uint64_t data_size, const char *name,
95                            enum rra_chunk_version version, FILE *output)
96 {
97    struct rra_file_chunk_description chunk = {
98       .version = version,
99       .header_offset = offset,
100       .header_size = header_size,
101       .data_offset = offset + header_size,
102       .data_size = data_size,
103    };
104    memcpy(chunk.name, name, strnlen(name, sizeof(chunk.name)));
105    fwrite(&chunk, sizeof(struct rra_file_chunk_description), 1, output);
106 }
107 
108 enum rra_memory_type {
109    RRA_MEMORY_TYPE_UNKNOWN,
110    RRA_MEMORY_TYPE_DDR,
111    RRA_MEMORY_TYPE_DDR2,
112    RRA_MEMORY_TYPE_DDR3,
113    RRA_MEMORY_TYPE_DDR4,
114    RRA_MEMORY_TYPE_DDR5,
115    RRA_MEMORY_TYPE_GDDR3,
116    RRA_MEMORY_TYPE_GDDR4,
117    RRA_MEMORY_TYPE_GDDR5,
118    RRA_MEMORY_TYPE_GDDR6,
119    RRA_MEMORY_TYPE_HBM,
120    RRA_MEMORY_TYPE_HBM2,
121    RRA_MEMORY_TYPE_HBM3,
122    RRA_MEMORY_TYPE_LPDDR4,
123    RRA_MEMORY_TYPE_LPDDR5,
124 };
125 
126 #define RRA_FILE_DEVICE_NAME_MAX_SIZE 256
127 
128 struct rra_asic_info {
129    uint64_t min_shader_clk_freq;
130    uint64_t min_mem_clk_freq;
131    char unused[8];
132    uint64_t max_shader_clk_freq;
133    uint64_t max_mem_clk_freq;
134    uint32_t device_id;
135    uint32_t rev_id;
136    char unused2[80];
137    uint64_t vram_size;
138    uint32_t bus_width;
139    char unused3[12];
140    char device_name[RRA_FILE_DEVICE_NAME_MAX_SIZE];
141    char unused4[16];
142    uint32_t mem_ops_per_clk;
143    uint32_t mem_type;
144    char unused5[135];
145    bool valid;
146 };
147 
148 static_assert(sizeof(struct rra_asic_info) == 568, "rra_asic_info does not match RRA spec");
149 
150 static uint32_t
amdgpu_vram_type_to_rra(uint32_t type)151 amdgpu_vram_type_to_rra(uint32_t type)
152 {
153    switch (type) {
154    case AMD_VRAM_TYPE_UNKNOWN:
155       return RRA_MEMORY_TYPE_UNKNOWN;
156    case AMD_VRAM_TYPE_DDR2:
157       return RRA_MEMORY_TYPE_DDR2;
158    case AMD_VRAM_TYPE_DDR3:
159       return RRA_MEMORY_TYPE_DDR3;
160    case AMD_VRAM_TYPE_DDR4:
161       return RRA_MEMORY_TYPE_DDR4;
162    case AMD_VRAM_TYPE_DDR5:
163       return RRA_MEMORY_TYPE_DDR5;
164    case AMD_VRAM_TYPE_HBM:
165       return RRA_MEMORY_TYPE_HBM;
166    case AMD_VRAM_TYPE_GDDR3:
167       return RRA_MEMORY_TYPE_GDDR3;
168    case AMD_VRAM_TYPE_GDDR4:
169       return RRA_MEMORY_TYPE_GDDR4;
170    case AMD_VRAM_TYPE_GDDR5:
171       return RRA_MEMORY_TYPE_GDDR5;
172    case AMD_VRAM_TYPE_GDDR6:
173       return RRA_MEMORY_TYPE_GDDR6;
174    case AMD_VRAM_TYPE_LPDDR4:
175       return RRA_MEMORY_TYPE_LPDDR4;
176    case AMD_VRAM_TYPE_LPDDR5:
177       return RRA_MEMORY_TYPE_LPDDR5;
178    default:
179       unreachable("invalid vram type");
180    }
181 }
182 
183 static void
rra_dump_asic_info(const struct radeon_info * rad_info,FILE * output)184 rra_dump_asic_info(const struct radeon_info *rad_info, FILE *output)
185 {
186    struct rra_asic_info asic_info = {
187       /* All frequencies are in Hz */
188       .min_shader_clk_freq = 0,
189       .max_shader_clk_freq = rad_info->max_gpu_freq_mhz * 1000000,
190       .min_mem_clk_freq = 0,
191       .max_mem_clk_freq = rad_info->memory_freq_mhz * 1000000,
192 
193       .vram_size = (uint64_t)rad_info->vram_size_kb * 1024,
194 
195       .mem_type = amdgpu_vram_type_to_rra(rad_info->vram_type),
196       .mem_ops_per_clk = ac_memory_ops_per_clock(rad_info->vram_type),
197       .bus_width = rad_info->memory_bus_width,
198 
199       .device_id = rad_info->pci.dev,
200       .rev_id = rad_info->pci_rev_id,
201    };
202 
203    strncpy(asic_info.device_name, rad_info->marketing_name ? rad_info->marketing_name : rad_info->name,
204            RRA_FILE_DEVICE_NAME_MAX_SIZE - 1);
205 
206    fwrite(&asic_info, sizeof(struct rra_asic_info), 1, output);
207 }
208 
209 enum rra_bvh_type {
210    RRA_BVH_TYPE_TLAS,
211    RRA_BVH_TYPE_BLAS,
212 };
213 
214 struct rra_accel_struct_chunk_header {
215    /*
216     * Declaring this as uint64_t would make the compiler insert padding to
217     * satisfy alignment restrictions.
218     */
219    uint32_t virtual_address[2];
220    uint32_t metadata_offset;
221    uint32_t metadata_size;
222    uint32_t header_offset;
223    uint32_t header_size;
224    enum rra_bvh_type bvh_type;
225 };
226 
227 static_assert(sizeof(struct rra_accel_struct_chunk_header) == 28,
228               "rra_accel_struct_chunk_header does not match RRA spec");
229 
230 struct rra_accel_struct_post_build_info {
231    uint32_t bvh_type : 1;
232    uint32_t reserved1 : 5;
233    uint32_t tri_compression_mode : 2;
234    uint32_t fp16_interior_mode : 2;
235    uint32_t reserved2 : 6;
236    uint32_t build_flags : 16;
237 };
238 
239 static_assert(sizeof(struct rra_accel_struct_post_build_info) == 4,
240               "rra_accel_struct_post_build_info does not match RRA spec");
241 
242 struct rra_accel_struct_header {
243    struct rra_accel_struct_post_build_info post_build_info;
244    /*
245     * Size of the internal acceleration structure metadata in the
246     * proprietary drivers. Seems to always be 128.
247     */
248    uint32_t metadata_size;
249    uint32_t file_size;
250    uint32_t primitive_count;
251    uint32_t active_primitive_count;
252    uint32_t unused1;
253    uint32_t geometry_description_count;
254    VkGeometryTypeKHR geometry_type;
255    uint32_t internal_nodes_offset;
256    uint32_t leaf_nodes_offset;
257    uint32_t geometry_infos_offset;
258    uint32_t leaf_ids_offset;
259    uint32_t interior_fp32_node_count;
260    uint32_t interior_fp16_node_count;
261    uint32_t leaf_node_count;
262    uint32_t rt_driver_interface_version;
263    uint64_t unused2;
264    uint32_t half_fp32_node_count;
265    char unused3[44];
266 };
267 
268 #define RRA_ROOT_NODE_OFFSET align(sizeof(struct rra_accel_struct_header), 64)
269 
270 static_assert(sizeof(struct rra_accel_struct_header) == 120, "rra_accel_struct_header does not match RRA spec");
271 
272 struct rra_accel_struct_metadata {
273    uint64_t virtual_address;
274    uint32_t byte_size;
275    char unused[116];
276 };
277 
278 static_assert(sizeof(struct rra_accel_struct_metadata) == 128, "rra_accel_struct_metadata does not match RRA spec");
279 
280 struct rra_geometry_info {
281    uint32_t primitive_count : 29;
282    uint32_t flags : 3;
283    uint32_t unknown;
284    uint32_t leaf_node_list_offset;
285 };
286 
287 static_assert(sizeof(struct rra_geometry_info) == 12, "rra_geometry_info does not match RRA spec");
288 
289 static struct rra_accel_struct_header
rra_fill_accel_struct_header_common(struct radv_accel_struct_header * header,size_t parent_id_table_size,size_t leaf_node_data_size,size_t internal_node_data_size,uint64_t primitive_count)290 rra_fill_accel_struct_header_common(struct radv_accel_struct_header *header, size_t parent_id_table_size,
291                                     size_t leaf_node_data_size, size_t internal_node_data_size,
292                                     uint64_t primitive_count)
293 {
294    struct rra_accel_struct_header result = {
295       .post_build_info =
296          {
297             .build_flags = header->build_flags,
298             /* Seems to be no compression */
299             .tri_compression_mode = 0,
300          },
301       .primitive_count = primitive_count,
302       /* TODO: calculate active primitives */
303       .active_primitive_count = primitive_count,
304       .geometry_description_count = header->geometry_count,
305       .interior_fp32_node_count = internal_node_data_size / sizeof(struct radv_bvh_box32_node),
306       .leaf_node_count = primitive_count,
307    };
308 
309    result.metadata_size = sizeof(struct rra_accel_struct_metadata) + parent_id_table_size;
310    result.file_size =
311       result.metadata_size + sizeof(struct rra_accel_struct_header) + internal_node_data_size + leaf_node_data_size;
312 
313    result.internal_nodes_offset = sizeof(struct rra_accel_struct_metadata);
314    result.leaf_nodes_offset = result.internal_nodes_offset + internal_node_data_size;
315    result.geometry_infos_offset = result.leaf_nodes_offset + leaf_node_data_size;
316    result.leaf_ids_offset = result.geometry_infos_offset;
317    if (!header->instance_count)
318       result.leaf_ids_offset += header->geometry_count * sizeof(struct rra_geometry_info);
319 
320    return result;
321 }
322 
323 struct rra_box32_node {
324    uint32_t children[4];
325    float coords[4][2][3];
326    uint32_t reserved[4];
327 };
328 
329 struct rra_box16_node {
330    uint32_t children[4];
331    float16_t coords[4][2][3];
332 };
333 
334 /*
335  * RRA files contain this struct in place of hardware
336  * instance nodes. They're named "instance desc" internally.
337  */
338 struct rra_instance_node {
339    float wto_matrix[12];
340    uint32_t custom_instance_id : 24;
341    uint32_t mask : 8;
342    uint32_t sbt_offset : 24;
343    uint32_t instance_flags : 8;
344    uint64_t blas_va : 54;
345    uint64_t hw_instance_flags : 10;
346    uint32_t instance_id;
347    uint32_t unused1;
348    uint32_t blas_metadata_size;
349    uint32_t unused2;
350    float otw_matrix[12];
351 };
352 
353 static_assert(sizeof(struct rra_instance_node) == 128, "rra_instance_node does not match RRA spec!");
354 
355 /*
356  * Format RRA uses for aabb nodes
357  */
358 struct rra_aabb_node {
359    float aabb[2][3];
360    uint32_t unused1[6];
361    uint32_t geometry_id : 28;
362    uint32_t flags : 4;
363    uint32_t primitive_id;
364    uint32_t unused[2];
365 };
366 
367 static_assert(sizeof(struct rra_aabb_node) == 64, "rra_aabb_node does not match RRA spec!");
368 
369 struct rra_triangle_node {
370    float coords[3][3];
371    uint32_t reserved[3];
372    uint32_t geometry_id : 28;
373    uint32_t flags : 4;
374    uint32_t triangle_id;
375    uint32_t reserved2;
376    uint32_t id;
377 };
378 
379 static_assert(sizeof(struct rra_triangle_node) == 64, "rra_triangle_node does not match RRA spec!");
380 
381 static void
rra_dump_tlas_header(struct radv_accel_struct_header * header,size_t parent_id_table_size,size_t leaf_node_data_size,size_t internal_node_data_size,uint64_t primitive_count,FILE * output)382 rra_dump_tlas_header(struct radv_accel_struct_header *header, size_t parent_id_table_size, size_t leaf_node_data_size,
383                      size_t internal_node_data_size, uint64_t primitive_count, FILE *output)
384 {
385    struct rra_accel_struct_header file_header = rra_fill_accel_struct_header_common(
386       header, parent_id_table_size, leaf_node_data_size, internal_node_data_size, primitive_count);
387    file_header.post_build_info.bvh_type = RRA_BVH_TYPE_TLAS;
388    file_header.geometry_type = VK_GEOMETRY_TYPE_INSTANCES_KHR;
389 
390    fwrite(&file_header, sizeof(struct rra_accel_struct_header), 1, output);
391 }
392 
393 static void
rra_dump_blas_header(struct radv_accel_struct_header * header,size_t parent_id_table_size,struct radv_accel_struct_geometry_info * geometry_infos,size_t leaf_node_data_size,size_t internal_node_data_size,uint64_t primitive_count,FILE * output)394 rra_dump_blas_header(struct radv_accel_struct_header *header, size_t parent_id_table_size,
395                      struct radv_accel_struct_geometry_info *geometry_infos, size_t leaf_node_data_size,
396                      size_t internal_node_data_size, uint64_t primitive_count, FILE *output)
397 {
398    struct rra_accel_struct_header file_header = rra_fill_accel_struct_header_common(
399       header, parent_id_table_size, leaf_node_data_size, internal_node_data_size, primitive_count);
400    file_header.post_build_info.bvh_type = RRA_BVH_TYPE_BLAS;
401    file_header.geometry_type = header->geometry_count ? geometry_infos->type : VK_GEOMETRY_TYPE_TRIANGLES_KHR;
402 
403    fwrite(&file_header, sizeof(struct rra_accel_struct_header), 1, output);
404 }
405 
406 static uint32_t
rra_parent_table_index_from_offset(uint32_t offset,uint32_t parent_table_size)407 rra_parent_table_index_from_offset(uint32_t offset, uint32_t parent_table_size)
408 {
409    uint32_t max_parent_table_index = parent_table_size / sizeof(uint32_t) - 1;
410    return max_parent_table_index - (offset - RRA_ROOT_NODE_OFFSET) / 64;
411 }
412 
413 struct rra_validation_context {
414    bool failed;
415    char location[31];
416 };
417 
rra_validation_fail(struct rra_validation_context * ctx,const char * message,...)418 static void PRINTFLIKE(2, 3) rra_validation_fail(struct rra_validation_context *ctx, const char *message, ...)
419 {
420    if (!ctx->failed) {
421       fprintf(stderr, "radv: rra: Validation failed at %s:\n", ctx->location);
422       ctx->failed = true;
423    }
424 
425    fprintf(stderr, "   ");
426 
427    va_list list;
428    va_start(list, message);
429    vfprintf(stderr, message, list);
430    va_end(list);
431 
432    fprintf(stderr, "\n");
433 }
434 
435 static bool
rra_validate_header(struct radv_rra_accel_struct_data * accel_struct,const struct radv_accel_struct_header * header)436 rra_validate_header(struct radv_rra_accel_struct_data *accel_struct, const struct radv_accel_struct_header *header)
437 {
438    struct rra_validation_context ctx = {
439       .location = "header",
440    };
441 
442    if (accel_struct->type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR && header->instance_count > 0)
443       rra_validation_fail(&ctx, "BLAS contains instances");
444 
445    if (header->bvh_offset >= accel_struct->size)
446       rra_validation_fail(&ctx, "Invalid BVH offset %u", header->bvh_offset);
447 
448    if (header->instance_count * sizeof(struct radv_bvh_instance_node) >= accel_struct->size)
449       rra_validation_fail(&ctx, "Too many instances");
450 
451    return ctx.failed;
452 }
453 
454 static bool
is_internal_node(uint32_t type)455 is_internal_node(uint32_t type)
456 {
457    return type == radv_bvh_node_box16 || type == radv_bvh_node_box32;
458 }
459 
460 static const char *node_type_names[8] = {
461    [radv_bvh_node_triangle + 0] = "triangle0",
462    [radv_bvh_node_triangle + 1] = "triangle1",
463    [radv_bvh_node_triangle + 2] = "triangle2",
464    [radv_bvh_node_triangle + 3] = "triangle3",
465    [radv_bvh_node_box16] = "box16",
466    [radv_bvh_node_box32] = "box32",
467    [radv_bvh_node_instance] = "instance",
468    [radv_bvh_node_aabb] = "aabb",
469 };
470 
471 static bool
rra_validate_node(struct hash_table_u64 * accel_struct_vas,uint8_t * data,void * node,uint32_t geometry_count,uint32_t size,bool is_bottom_level)472 rra_validate_node(struct hash_table_u64 *accel_struct_vas, uint8_t *data, void *node, uint32_t geometry_count,
473                   uint32_t size, bool is_bottom_level)
474 {
475    struct rra_validation_context ctx = {0};
476 
477    uint32_t cur_offset = (uint8_t *)node - data;
478    snprintf(ctx.location, sizeof(ctx.location), "internal node (offset=%u)", cur_offset);
479 
480    /* The child ids are located at offset=0 for both box16 and box32 nodes. */
481    uint32_t *children = node;
482    for (uint32_t i = 0; i < 4; ++i) {
483       if (children[i] == 0xFFFFFFFF)
484          continue;
485 
486       uint32_t type = children[i] & 7;
487       uint32_t offset = (children[i] & (~7u)) << 3;
488 
489       if (!is_internal_node(type) && is_bottom_level == (type == radv_bvh_node_instance))
490          rra_validation_fail(&ctx,
491                              is_bottom_level ? "%s node in BLAS (child index %u)" : "%s node in TLAS (child index %u)",
492                              node_type_names[type], i);
493 
494       if (offset > size) {
495          rra_validation_fail(&ctx, "Invalid child offset (child index %u)", i);
496          continue;
497       }
498 
499       struct rra_validation_context child_ctx = {0};
500       snprintf(child_ctx.location, sizeof(child_ctx.location), "%s node (offset=%u)", node_type_names[type], offset);
501 
502       if (is_internal_node(type)) {
503          ctx.failed |= rra_validate_node(accel_struct_vas, data, data + offset, geometry_count, size, is_bottom_level);
504       } else if (type == radv_bvh_node_instance) {
505          struct radv_bvh_instance_node *src = (struct radv_bvh_instance_node *)(data + offset);
506          uint64_t blas_va = node_to_addr(src->bvh_ptr) - src->bvh_offset;
507          if (!_mesa_hash_table_u64_search(accel_struct_vas, blas_va))
508             rra_validation_fail(&child_ctx, "Invalid instance node pointer 0x%llx (offset: 0x%x)",
509                                 (unsigned long long)src->bvh_ptr, src->bvh_offset);
510       } else if (type == radv_bvh_node_aabb) {
511          struct radv_bvh_aabb_node *src = (struct radv_bvh_aabb_node *)(data + offset);
512          if ((src->geometry_id_and_flags & 0xFFFFFFF) >= geometry_count)
513             rra_validation_fail(&ctx, "geometry_id >= geometry_count");
514       } else {
515          struct radv_bvh_triangle_node *src = (struct radv_bvh_triangle_node *)(data + offset);
516          if ((src->geometry_id_and_flags & 0xFFFFFFF) >= geometry_count)
517             rra_validation_fail(&ctx, "geometry_id >= geometry_count");
518       }
519 
520       ctx.failed |= child_ctx.failed;
521    }
522    return ctx.failed;
523 }
524 
525 struct rra_transcoding_context {
526    const uint8_t *src;
527    uint8_t *dst;
528    uint32_t dst_leaf_offset;
529    uint32_t dst_internal_offset;
530    uint32_t *parent_id_table;
531    uint32_t parent_id_table_size;
532    uint32_t *leaf_node_ids;
533    uint32_t *leaf_indices;
534 };
535 
536 static void
rra_transcode_triangle_node(struct rra_transcoding_context * ctx,const struct radv_bvh_triangle_node * src)537 rra_transcode_triangle_node(struct rra_transcoding_context *ctx, const struct radv_bvh_triangle_node *src)
538 {
539    struct rra_triangle_node *dst = (struct rra_triangle_node *)(ctx->dst + ctx->dst_leaf_offset);
540    ctx->dst_leaf_offset += sizeof(struct rra_triangle_node);
541 
542    for (int i = 0; i < 3; ++i)
543       for (int j = 0; j < 3; ++j)
544          dst->coords[i][j] = src->coords[i][j];
545    dst->triangle_id = src->triangle_id;
546    dst->geometry_id = src->geometry_id_and_flags & 0xfffffff;
547    dst->flags = src->geometry_id_and_flags >> 28;
548    dst->id = src->id;
549 }
550 
551 static void
rra_transcode_aabb_node(struct rra_transcoding_context * ctx,const struct radv_bvh_aabb_node * src,radv_aabb bounds)552 rra_transcode_aabb_node(struct rra_transcoding_context *ctx, const struct radv_bvh_aabb_node *src, radv_aabb bounds)
553 {
554    struct rra_aabb_node *dst = (struct rra_aabb_node *)(ctx->dst + ctx->dst_leaf_offset);
555    ctx->dst_leaf_offset += sizeof(struct rra_aabb_node);
556 
557    dst->aabb[0][0] = bounds.min.x;
558    dst->aabb[0][1] = bounds.min.y;
559    dst->aabb[0][2] = bounds.min.z;
560    dst->aabb[1][0] = bounds.max.x;
561    dst->aabb[1][1] = bounds.max.y;
562    dst->aabb[1][2] = bounds.max.z;
563 
564    dst->geometry_id = src->geometry_id_and_flags & 0xfffffff;
565    dst->flags = src->geometry_id_and_flags >> 28;
566    dst->primitive_id = src->primitive_id;
567 }
568 
569 static void
rra_transcode_instance_node(struct rra_transcoding_context * ctx,const struct radv_bvh_instance_node * src)570 rra_transcode_instance_node(struct rra_transcoding_context *ctx, const struct radv_bvh_instance_node *src)
571 {
572    uint64_t blas_va = node_to_addr(src->bvh_ptr) - src->bvh_offset;
573 
574    struct rra_instance_node *dst = (struct rra_instance_node *)(ctx->dst + ctx->dst_leaf_offset);
575    ctx->dst_leaf_offset += sizeof(struct rra_instance_node);
576 
577    dst->custom_instance_id = src->custom_instance_and_mask & 0xffffff;
578    dst->mask = src->custom_instance_and_mask >> 24;
579    dst->sbt_offset = src->sbt_offset_and_flags & 0xffffff;
580    dst->instance_flags = src->sbt_offset_and_flags >> 24;
581    dst->blas_va = (blas_va + sizeof(struct rra_accel_struct_metadata)) >> 3;
582    dst->instance_id = src->instance_id;
583    dst->blas_metadata_size = sizeof(struct rra_accel_struct_metadata);
584 
585    memcpy(dst->wto_matrix, src->wto_matrix.values, sizeof(dst->wto_matrix));
586    memcpy(dst->otw_matrix, src->otw_matrix.values, sizeof(dst->otw_matrix));
587 }
588 
589 static uint32_t rra_transcode_node(struct rra_transcoding_context *ctx, uint32_t parent_id, uint32_t src_id,
590                                    radv_aabb bounds);
591 
592 static void
rra_transcode_box16_node(struct rra_transcoding_context * ctx,const struct radv_bvh_box16_node * src)593 rra_transcode_box16_node(struct rra_transcoding_context *ctx, const struct radv_bvh_box16_node *src)
594 {
595    uint32_t dst_offset = ctx->dst_internal_offset;
596    ctx->dst_internal_offset += sizeof(struct rra_box16_node);
597    struct rra_box16_node *dst = (struct rra_box16_node *)(ctx->dst + dst_offset);
598 
599    memcpy(dst->coords, src->coords, sizeof(dst->coords));
600 
601    for (uint32_t i = 0; i < 4; ++i) {
602       if (src->children[i] == 0xffffffff) {
603          dst->children[i] = 0xffffffff;
604          continue;
605       }
606 
607       radv_aabb bounds = {
608          .min =
609             {
610                _mesa_half_to_float(src->coords[i][0][0]),
611                _mesa_half_to_float(src->coords[i][0][1]),
612                _mesa_half_to_float(src->coords[i][0][2]),
613             },
614          .max =
615             {
616                _mesa_half_to_float(src->coords[i][1][0]),
617                _mesa_half_to_float(src->coords[i][1][1]),
618                _mesa_half_to_float(src->coords[i][1][2]),
619             },
620       };
621 
622       dst->children[i] = rra_transcode_node(ctx, radv_bvh_node_box16 | (dst_offset >> 3), src->children[i], bounds);
623    }
624 }
625 
626 static void
rra_transcode_box32_node(struct rra_transcoding_context * ctx,const struct radv_bvh_box32_node * src)627 rra_transcode_box32_node(struct rra_transcoding_context *ctx, const struct radv_bvh_box32_node *src)
628 {
629    uint32_t dst_offset = ctx->dst_internal_offset;
630    ctx->dst_internal_offset += sizeof(struct rra_box32_node);
631    struct rra_box32_node *dst = (struct rra_box32_node *)(ctx->dst + dst_offset);
632 
633    memcpy(dst->coords, src->coords, sizeof(dst->coords));
634 
635    for (uint32_t i = 0; i < 4; ++i) {
636       if (isnan(src->coords[i].min.x)) {
637          dst->children[i] = 0xffffffff;
638          continue;
639       }
640 
641       dst->children[i] =
642          rra_transcode_node(ctx, radv_bvh_node_box32 | (dst_offset >> 3), src->children[i], src->coords[i]);
643    }
644 }
645 
646 static uint32_t
get_geometry_id(const void * node,uint32_t node_type)647 get_geometry_id(const void *node, uint32_t node_type)
648 {
649    if (node_type == radv_bvh_node_triangle) {
650       const struct radv_bvh_triangle_node *triangle = node;
651       return triangle->geometry_id_and_flags & 0xFFFFFFF;
652    }
653 
654    if (node_type == radv_bvh_node_aabb) {
655       const struct radv_bvh_aabb_node *aabb = node;
656       return aabb->geometry_id_and_flags & 0xFFFFFFF;
657    }
658 
659    return 0;
660 }
661 
662 static uint32_t
rra_transcode_node(struct rra_transcoding_context * ctx,uint32_t parent_id,uint32_t src_id,radv_aabb bounds)663 rra_transcode_node(struct rra_transcoding_context *ctx, uint32_t parent_id, uint32_t src_id, radv_aabb bounds)
664 {
665    uint32_t node_type = src_id & 7;
666    uint32_t src_offset = (src_id & (~7u)) << 3;
667 
668    uint32_t dst_offset;
669 
670    const void *src_child_node = ctx->src + src_offset;
671    if (is_internal_node(node_type)) {
672       dst_offset = ctx->dst_internal_offset;
673       if (node_type == radv_bvh_node_box32)
674          rra_transcode_box32_node(ctx, src_child_node);
675       else
676          rra_transcode_box16_node(ctx, src_child_node);
677    } else {
678       dst_offset = ctx->dst_leaf_offset;
679 
680       if (node_type == radv_bvh_node_triangle)
681          rra_transcode_triangle_node(ctx, src_child_node);
682       else if (node_type == radv_bvh_node_aabb)
683          rra_transcode_aabb_node(ctx, src_child_node, bounds);
684       else if (node_type == radv_bvh_node_instance)
685          rra_transcode_instance_node(ctx, src_child_node);
686    }
687 
688    uint32_t parent_id_index = rra_parent_table_index_from_offset(dst_offset, ctx->parent_id_table_size);
689    ctx->parent_id_table[parent_id_index] = parent_id;
690 
691    uint32_t dst_id = node_type | (dst_offset >> 3);
692    if (!is_internal_node(node_type))
693       ctx->leaf_node_ids[ctx->leaf_indices[get_geometry_id(src_child_node, node_type)]++] = dst_id;
694 
695    return dst_id;
696 }
697 
698 struct rra_bvh_info {
699    uint32_t leaf_nodes_size;
700    uint32_t internal_nodes_size;
701    struct rra_geometry_info *geometry_infos;
702 };
703 
704 static void
rra_gather_bvh_info(const uint8_t * bvh,uint32_t node_id,struct rra_bvh_info * dst)705 rra_gather_bvh_info(const uint8_t *bvh, uint32_t node_id, struct rra_bvh_info *dst)
706 {
707    uint32_t node_type = node_id & 7;
708 
709    switch (node_type) {
710    case radv_bvh_node_box16:
711       dst->internal_nodes_size += sizeof(struct rra_box16_node);
712       break;
713    case radv_bvh_node_box32:
714       dst->internal_nodes_size += sizeof(struct rra_box32_node);
715       break;
716    case radv_bvh_node_instance:
717       dst->leaf_nodes_size += sizeof(struct rra_instance_node);
718       break;
719    case radv_bvh_node_triangle:
720       dst->leaf_nodes_size += sizeof(struct rra_triangle_node);
721       break;
722    case radv_bvh_node_aabb:
723       dst->leaf_nodes_size += sizeof(struct rra_aabb_node);
724       break;
725    default:
726       break;
727    }
728 
729    const void *node = bvh + ((node_id & (~7u)) << 3);
730    if (is_internal_node(node_type)) {
731       /* The child ids are located at offset=0 for both box16 and box32 nodes. */
732       const uint32_t *children = node;
733       for (uint32_t i = 0; i < 4; i++)
734          if (children[i] != 0xffffffff)
735             rra_gather_bvh_info(bvh, children[i], dst);
736    } else {
737       dst->geometry_infos[get_geometry_id(node, node_type)].primitive_count++;
738    }
739 }
740 
741 static VkResult
rra_dump_acceleration_structure(struct radv_rra_accel_struct_data * accel_struct,uint8_t * data,struct hash_table_u64 * accel_struct_vas,bool should_validate,FILE * output)742 rra_dump_acceleration_structure(struct radv_rra_accel_struct_data *accel_struct, uint8_t *data,
743                                 struct hash_table_u64 *accel_struct_vas, bool should_validate, FILE *output)
744 {
745    struct radv_accel_struct_header *header = (struct radv_accel_struct_header *)data;
746 
747    bool is_tlas = header->instance_count > 0;
748 
749    uint64_t geometry_infos_offset = sizeof(struct radv_accel_struct_header);
750 
751    /* convert root node id to offset */
752    uint32_t src_root_offset = (RADV_BVH_ROOT_NODE & ~7) << 3;
753 
754    if (should_validate) {
755       if (rra_validate_header(accel_struct, header)) {
756          return VK_ERROR_VALIDATION_FAILED_EXT;
757       }
758       if (rra_validate_node(accel_struct_vas, data + header->bvh_offset, data + header->bvh_offset + src_root_offset,
759                             header->geometry_count, accel_struct->size, !is_tlas)) {
760          return VK_ERROR_VALIDATION_FAILED_EXT;
761       }
762    }
763 
764    VkResult result = VK_SUCCESS;
765 
766    struct rra_geometry_info *rra_geometry_infos = NULL;
767    uint32_t *leaf_indices = NULL;
768    uint32_t *node_parent_table = NULL;
769    uint32_t *leaf_node_ids = NULL;
770    uint8_t *dst_structure_data = NULL;
771 
772    rra_geometry_infos = calloc(header->geometry_count, sizeof(struct rra_geometry_info));
773    if (!rra_geometry_infos) {
774       result = VK_ERROR_OUT_OF_HOST_MEMORY;
775       goto exit;
776    }
777 
778    struct rra_bvh_info bvh_info = {
779       .geometry_infos = rra_geometry_infos,
780    };
781    rra_gather_bvh_info(data + header->bvh_offset, RADV_BVH_ROOT_NODE, &bvh_info);
782 
783    leaf_indices = calloc(header->geometry_count, sizeof(struct rra_geometry_info));
784    if (!leaf_indices) {
785       result = VK_ERROR_OUT_OF_HOST_MEMORY;
786       goto exit;
787    }
788 
789    uint64_t primitive_count = 0;
790 
791    struct radv_accel_struct_geometry_info *geometry_infos =
792       (struct radv_accel_struct_geometry_info *)(data + geometry_infos_offset);
793 
794    for (uint32_t i = 0; i < header->geometry_count; ++i) {
795       rra_geometry_infos[i].flags = geometry_infos[i].flags;
796       rra_geometry_infos[i].leaf_node_list_offset = primitive_count * sizeof(uint32_t);
797       leaf_indices[i] = primitive_count;
798       primitive_count += rra_geometry_infos[i].primitive_count;
799    }
800 
801    uint32_t node_parent_table_size =
802       ((bvh_info.leaf_nodes_size + bvh_info.internal_nodes_size) / 64) * sizeof(uint32_t);
803 
804    node_parent_table = calloc(node_parent_table_size, 1);
805    if (!node_parent_table) {
806       result = VK_ERROR_OUT_OF_HOST_MEMORY;
807       goto exit;
808    }
809 
810    leaf_node_ids = calloc(primitive_count, sizeof(uint32_t));
811    if (!leaf_node_ids) {
812       result = VK_ERROR_OUT_OF_HOST_MEMORY;
813       goto exit;
814    }
815    dst_structure_data = calloc(RRA_ROOT_NODE_OFFSET + bvh_info.internal_nodes_size + bvh_info.leaf_nodes_size, 1);
816    if (!dst_structure_data) {
817       result = VK_ERROR_OUT_OF_HOST_MEMORY;
818       goto exit;
819    }
820 
821    struct rra_transcoding_context ctx = {
822       .src = data + header->bvh_offset,
823       .dst = dst_structure_data,
824       .dst_leaf_offset = RRA_ROOT_NODE_OFFSET + bvh_info.internal_nodes_size,
825       .dst_internal_offset = RRA_ROOT_NODE_OFFSET,
826       .parent_id_table = node_parent_table,
827       .parent_id_table_size = node_parent_table_size,
828       .leaf_node_ids = leaf_node_ids,
829       .leaf_indices = leaf_indices,
830    };
831 
832    rra_transcode_node(&ctx, 0xFFFFFFFF, RADV_BVH_ROOT_NODE, header->aabb);
833 
834    struct rra_accel_struct_chunk_header chunk_header = {
835       .metadata_offset = 0,
836       /*
837        * RRA loads the part of the metadata that is used into a struct.
838        * If the size is larger than just the "used" part, the loading
839        * operation overwrites internal pointers with data from the file,
840        * likely causing a crash.
841        */
842       .metadata_size = offsetof(struct rra_accel_struct_metadata, unused),
843       .header_offset = sizeof(struct rra_accel_struct_metadata) + node_parent_table_size,
844       .header_size = sizeof(struct rra_accel_struct_header),
845       .bvh_type = is_tlas ? RRA_BVH_TYPE_TLAS : RRA_BVH_TYPE_BLAS,
846    };
847 
848    /*
849     * When associating TLASes with BLASes, acceleration structure VAs are
850     * looked up in a hashmap. But due to the way BLAS VAs are stored for
851     * each instance in the RRA file format (divided by 8, and limited to 54 bits),
852     * the top bits are masked away.
853     * In order to make sure BLASes can be found in the hashmap, we have
854     * to replicate that mask here.
855     */
856    uint64_t va = accel_struct->va & 0x1FFFFFFFFFFFFFF;
857    memcpy(chunk_header.virtual_address, &va, sizeof(uint64_t));
858 
859    struct rra_accel_struct_metadata rra_metadata = {
860       .virtual_address = va,
861       .byte_size = bvh_info.leaf_nodes_size + bvh_info.internal_nodes_size + sizeof(struct rra_accel_struct_header),
862    };
863 
864    fwrite(&chunk_header, sizeof(struct rra_accel_struct_chunk_header), 1, output);
865    fwrite(&rra_metadata, sizeof(struct rra_accel_struct_metadata), 1, output);
866 
867    /* Write node parent id data */
868    fwrite(node_parent_table, 1, node_parent_table_size, output);
869 
870    if (is_tlas)
871       rra_dump_tlas_header(header, node_parent_table_size, bvh_info.leaf_nodes_size, bvh_info.internal_nodes_size,
872                            primitive_count, output);
873    else
874       rra_dump_blas_header(header, node_parent_table_size, geometry_infos, bvh_info.leaf_nodes_size,
875                            bvh_info.internal_nodes_size, primitive_count, output);
876 
877    /* Write acceleration structure data  */
878    fwrite(dst_structure_data + RRA_ROOT_NODE_OFFSET, 1, bvh_info.internal_nodes_size + bvh_info.leaf_nodes_size,
879           output);
880 
881    if (!is_tlas)
882       fwrite(rra_geometry_infos, sizeof(struct rra_geometry_info), header->geometry_count, output);
883 
884    /* Write leaf node ids */
885    uint32_t leaf_node_list_size = primitive_count * sizeof(uint32_t);
886    fwrite(leaf_node_ids, 1, leaf_node_list_size, output);
887 
888 exit:
889    free(rra_geometry_infos);
890    free(leaf_indices);
891    free(dst_structure_data);
892    free(node_parent_table);
893    free(leaf_node_ids);
894 
895    return result;
896 }
897 
898 VkResult
radv_rra_trace_init(struct radv_device * device)899 radv_rra_trace_init(struct radv_device *device)
900 {
901    device->rra_trace.validate_as = debug_get_bool_option("RADV_RRA_TRACE_VALIDATE", false);
902    device->rra_trace.copy_after_build = debug_get_bool_option("RADV_RRA_TRACE_COPY_AFTER_BUILD", false);
903    device->rra_trace.accel_structs = _mesa_pointer_hash_table_create(NULL);
904    device->rra_trace.accel_struct_vas = _mesa_hash_table_u64_create(NULL);
905    simple_mtx_init(&device->rra_trace.data_mtx, mtx_plain);
906 
907    device->rra_trace.copy_memory_index = radv_find_memory_index(
908       device->physical_device,
909       VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
910 
911    util_dynarray_init(&device->rra_trace.ray_history, NULL);
912 
913    device->rra_trace.ray_history_buffer_size = debug_get_num_option("RADV_RRA_TRACE_HISTORY_SIZE", 100 * 1024 * 1024);
914    if (device->rra_trace.ray_history_buffer_size <
915        sizeof(struct radv_ray_history_header) + sizeof(struct radv_packed_end_trace_token))
916       return VK_SUCCESS;
917 
918    device->rra_trace.ray_history_resolution_scale = debug_get_num_option("RADV_RRA_TRACE_RESOLUTION_SCALE", 1);
919    device->rra_trace.ray_history_resolution_scale = MAX2(device->rra_trace.ray_history_resolution_scale, 1);
920 
921    VkBufferCreateInfo buffer_create_info = {
922       .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
923       .pNext =
924          &(VkBufferUsageFlags2CreateInfoKHR){
925             .sType = VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO_KHR,
926             .usage = VK_BUFFER_USAGE_2_TRANSFER_SRC_BIT_KHR | VK_BUFFER_USAGE_2_SHADER_DEVICE_ADDRESS_BIT_KHR,
927          },
928       .size = device->rra_trace.ray_history_buffer_size,
929    };
930 
931    VkDevice _device = radv_device_to_handle(device);
932    VkResult result = radv_CreateBuffer(_device, &buffer_create_info, NULL, &device->rra_trace.ray_history_buffer);
933    if (result != VK_SUCCESS)
934       return result;
935 
936    VkMemoryRequirements requirements;
937    vk_common_GetBufferMemoryRequirements(_device, device->rra_trace.ray_history_buffer, &requirements);
938 
939    VkMemoryAllocateInfo alloc_info = {
940       .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
941       .allocationSize = requirements.size,
942       .memoryTypeIndex = radv_find_memory_index(device->physical_device, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
943                                                                             VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
944                                                                             VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
945    };
946 
947    result = radv_AllocateMemory(_device, &alloc_info, NULL, &device->rra_trace.ray_history_memory);
948    if (result != VK_SUCCESS)
949       return result;
950 
951    result = vk_common_MapMemory(_device, device->rra_trace.ray_history_memory, 0, VK_WHOLE_SIZE, 0,
952                                 (void **)&device->rra_trace.ray_history_data);
953    if (result != VK_SUCCESS)
954       return result;
955 
956    result = vk_common_BindBufferMemory(_device, device->rra_trace.ray_history_buffer,
957                                        device->rra_trace.ray_history_memory, 0);
958 
959    VkBufferDeviceAddressInfo addr_info = {
960       .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,
961       .buffer = device->rra_trace.ray_history_buffer,
962    };
963    device->rra_trace.ray_history_addr = radv_GetBufferDeviceAddress(_device, &addr_info);
964 
965    struct radv_ray_history_header *ray_history_header = device->rra_trace.ray_history_data;
966    memset(ray_history_header, 0, sizeof(struct radv_ray_history_header));
967    ray_history_header->offset = 1;
968 
969    return result;
970 }
971 
972 void
radv_rra_trace_clear_ray_history(VkDevice _device,struct radv_rra_trace_data * data)973 radv_rra_trace_clear_ray_history(VkDevice _device, struct radv_rra_trace_data *data)
974 {
975    util_dynarray_foreach (&data->ray_history, struct radv_rra_ray_history_data *, _entry) {
976       struct radv_rra_ray_history_data *entry = *_entry;
977       free(entry);
978    }
979    util_dynarray_clear(&data->ray_history);
980 }
981 
982 void
radv_rra_trace_finish(VkDevice vk_device,struct radv_rra_trace_data * data)983 radv_rra_trace_finish(VkDevice vk_device, struct radv_rra_trace_data *data)
984 {
985    radv_DestroyBuffer(vk_device, data->ray_history_buffer, NULL);
986 
987    if (data->ray_history_memory)
988       vk_common_UnmapMemory(vk_device, data->ray_history_memory);
989 
990    radv_FreeMemory(vk_device, data->ray_history_memory, NULL);
991 
992    radv_rra_trace_clear_ray_history(vk_device, data);
993    util_dynarray_fini(&data->ray_history);
994 
995    if (data->accel_structs)
996       hash_table_foreach (data->accel_structs, entry)
997          radv_destroy_rra_accel_struct_data(vk_device, entry->data);
998 
999    simple_mtx_destroy(&data->data_mtx);
1000    _mesa_hash_table_destroy(data->accel_structs, NULL);
1001    _mesa_hash_table_u64_destroy(data->accel_struct_vas);
1002 }
1003 
1004 void
radv_destroy_rra_accel_struct_data(VkDevice device,struct radv_rra_accel_struct_data * data)1005 radv_destroy_rra_accel_struct_data(VkDevice device, struct radv_rra_accel_struct_data *data)
1006 {
1007    radv_DestroyEvent(device, data->build_event, NULL);
1008    radv_DestroyBuffer(device, data->buffer, NULL);
1009    radv_FreeMemory(device, data->memory, NULL);
1010    free(data);
1011 }
1012 
1013 static int
accel_struct_entry_cmp(const void * a,const void * b)1014 accel_struct_entry_cmp(const void *a, const void *b)
1015 {
1016    struct hash_entry *entry_a = *(struct hash_entry *const *)a;
1017    struct hash_entry *entry_b = *(struct hash_entry *const *)b;
1018    const struct radv_rra_accel_struct_data *s_a = entry_a->data;
1019    const struct radv_rra_accel_struct_data *s_b = entry_b->data;
1020 
1021    return s_a->va > s_b->va ? 1 : s_a->va < s_b->va ? -1 : 0;
1022 }
1023 
1024 struct rra_copy_context {
1025    VkDevice device;
1026    VkQueue queue;
1027 
1028    VkCommandPool pool;
1029    VkCommandBuffer cmd_buffer;
1030    uint32_t family_index;
1031 
1032    VkDeviceMemory memory;
1033    VkBuffer buffer;
1034    void *mapped_data;
1035 
1036    struct hash_entry **entries;
1037 
1038    uint32_t min_size;
1039 };
1040 
1041 static VkResult
rra_copy_context_init(struct rra_copy_context * ctx)1042 rra_copy_context_init(struct rra_copy_context *ctx)
1043 {
1044    RADV_FROM_HANDLE(radv_device, device, ctx->device);
1045    if (device->rra_trace.copy_after_build)
1046       return VK_SUCCESS;
1047 
1048    uint32_t max_size = ctx->min_size;
1049    uint32_t accel_struct_count = _mesa_hash_table_num_entries(device->rra_trace.accel_structs);
1050    for (unsigned i = 0; i < accel_struct_count; i++) {
1051       struct radv_rra_accel_struct_data *data = ctx->entries[i]->data;
1052       max_size = MAX2(max_size, data->size);
1053    }
1054 
1055    VkCommandPoolCreateInfo pool_info = {
1056       .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
1057       .queueFamilyIndex = ctx->family_index,
1058    };
1059 
1060    VkResult result = vk_common_CreateCommandPool(ctx->device, &pool_info, NULL, &ctx->pool);
1061    if (result != VK_SUCCESS)
1062       return result;
1063 
1064    VkCommandBufferAllocateInfo cmdbuf_alloc_info = {
1065       .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
1066       .commandPool = ctx->pool,
1067       .commandBufferCount = 1,
1068    };
1069 
1070    result = vk_common_AllocateCommandBuffers(ctx->device, &cmdbuf_alloc_info, &ctx->cmd_buffer);
1071    if (result != VK_SUCCESS)
1072       goto fail_pool;
1073 
1074    VkBufferCreateInfo buffer_create_info = {
1075       .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
1076       .pNext =
1077          &(VkBufferUsageFlags2CreateInfoKHR){
1078             .sType = VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO_KHR,
1079             .usage = VK_BUFFER_USAGE_2_TRANSFER_DST_BIT_KHR,
1080          },
1081       .size = max_size,
1082    };
1083 
1084    result = radv_CreateBuffer(ctx->device, &buffer_create_info, NULL, &ctx->buffer);
1085    if (result != VK_SUCCESS)
1086       goto fail_pool;
1087 
1088    VkMemoryRequirements requirements;
1089    vk_common_GetBufferMemoryRequirements(ctx->device, ctx->buffer, &requirements);
1090 
1091    VkMemoryAllocateInfo alloc_info = {
1092       .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
1093       .allocationSize = requirements.size,
1094       .memoryTypeIndex = device->rra_trace.copy_memory_index,
1095    };
1096 
1097    result = radv_AllocateMemory(ctx->device, &alloc_info, NULL, &ctx->memory);
1098    if (result != VK_SUCCESS)
1099       goto fail_buffer;
1100 
1101    result = vk_common_MapMemory(ctx->device, ctx->memory, 0, VK_WHOLE_SIZE, 0, (void **)&ctx->mapped_data);
1102    if (result != VK_SUCCESS)
1103       goto fail_memory;
1104 
1105    result = vk_common_BindBufferMemory(ctx->device, ctx->buffer, ctx->memory, 0);
1106    if (result != VK_SUCCESS)
1107       goto fail_memory;
1108 
1109    return result;
1110 fail_memory:
1111    radv_FreeMemory(ctx->device, ctx->memory, NULL);
1112 fail_buffer:
1113    radv_DestroyBuffer(ctx->device, ctx->buffer, NULL);
1114 fail_pool:
1115    vk_common_DestroyCommandPool(ctx->device, ctx->pool, NULL);
1116    return result;
1117 }
1118 
1119 static void
rra_copy_context_finish(struct rra_copy_context * ctx)1120 rra_copy_context_finish(struct rra_copy_context *ctx)
1121 {
1122    RADV_FROM_HANDLE(radv_device, device, ctx->device);
1123    if (device->rra_trace.copy_after_build)
1124       return;
1125 
1126    vk_common_DestroyCommandPool(ctx->device, ctx->pool, NULL);
1127    radv_DestroyBuffer(ctx->device, ctx->buffer, NULL);
1128    vk_common_UnmapMemory(ctx->device, ctx->memory);
1129    radv_FreeMemory(ctx->device, ctx->memory, NULL);
1130 }
1131 
1132 static void *
rra_map_accel_struct_data(struct rra_copy_context * ctx,uint32_t i)1133 rra_map_accel_struct_data(struct rra_copy_context *ctx, uint32_t i)
1134 {
1135    struct radv_rra_accel_struct_data *data = ctx->entries[i]->data;
1136    if (radv_GetEventStatus(ctx->device, data->build_event) != VK_EVENT_SET)
1137       return NULL;
1138 
1139    if (data->memory) {
1140       void *mapped_data;
1141       vk_common_MapMemory(ctx->device, data->memory, 0, VK_WHOLE_SIZE, 0, &mapped_data);
1142       return mapped_data;
1143    }
1144 
1145    const struct vk_acceleration_structure *accel_struct = ctx->entries[i]->key;
1146    VkResult result;
1147 
1148    VkCommandBufferBeginInfo begin_info = {
1149       .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
1150    };
1151    result = radv_BeginCommandBuffer(ctx->cmd_buffer, &begin_info);
1152    if (result != VK_SUCCESS)
1153       return NULL;
1154 
1155    VkBufferCopy2 copy = {
1156       .sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2,
1157       .srcOffset = accel_struct->offset,
1158       .size = accel_struct->size,
1159    };
1160 
1161    VkCopyBufferInfo2 copy_info = {
1162       .sType = VK_STRUCTURE_TYPE_COPY_BUFFER_INFO_2,
1163       .srcBuffer = accel_struct->buffer,
1164       .dstBuffer = ctx->buffer,
1165       .regionCount = 1,
1166       .pRegions = &copy,
1167    };
1168 
1169    radv_CmdCopyBuffer2(ctx->cmd_buffer, &copy_info);
1170 
1171    result = radv_EndCommandBuffer(ctx->cmd_buffer);
1172    if (result != VK_SUCCESS)
1173       return NULL;
1174 
1175    VkSubmitInfo submit_info = {
1176       .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
1177       .commandBufferCount = 1,
1178       .pCommandBuffers = &ctx->cmd_buffer,
1179    };
1180 
1181    result = vk_common_QueueSubmit(ctx->queue, 1, &submit_info, VK_NULL_HANDLE);
1182    if (result != VK_SUCCESS)
1183       return NULL;
1184 
1185    result = vk_common_QueueWaitIdle(ctx->queue);
1186    if (result != VK_SUCCESS)
1187       return NULL;
1188 
1189    return ctx->mapped_data;
1190 }
1191 
1192 static void
rra_unmap_accel_struct_data(struct rra_copy_context * ctx,uint32_t i)1193 rra_unmap_accel_struct_data(struct rra_copy_context *ctx, uint32_t i)
1194 {
1195    struct radv_rra_accel_struct_data *data = ctx->entries[i]->data;
1196 
1197    if (data->memory)
1198       vk_common_UnmapMemory(ctx->device, data->memory);
1199 }
1200 
1201 enum rra_ray_history_token_type {
1202    rra_ray_history_token_begin,
1203    rra_ray_history_token_tlas,
1204    rra_ray_history_token_blas,
1205    rra_ray_history_token_end,
1206    rra_ray_history_token_call,
1207    rra_ray_history_token_timestamp,
1208    rra_ray_history_token_ahit_status,
1209    rra_ray_history_token_call2,
1210    rra_ray_history_token_isec_status,
1211    rra_ray_history_token_end2,
1212    rra_ray_history_token_begin2,
1213    rra_ray_history_token_normal = 0xFFFF,
1214 };
1215 
1216 struct rra_ray_history_id_token {
1217    uint32_t id : 30;
1218    uint32_t reserved : 1;
1219    uint32_t has_control : 1;
1220 };
1221 static_assert(sizeof(struct rra_ray_history_id_token) == 4, "rra_ray_history_id_token does not match RRA expectations");
1222 
1223 struct rra_ray_history_control_token {
1224    uint32_t type : 16;
1225    uint32_t length : 8;
1226    uint32_t data : 8;
1227 };
1228 static_assert(sizeof(struct rra_ray_history_control_token) == 4,
1229               "rra_ray_history_control_token does not match RRA expectations");
1230 
1231 struct rra_ray_history_begin_token {
1232    uint32_t wave_id;
1233    uint32_t launch_ids[3];
1234    uint32_t accel_struct_lo;
1235    uint32_t accel_struct_hi;
1236    uint32_t ray_flags;
1237    uint32_t cull_mask : 8;
1238    uint32_t stb_offset : 4;
1239    uint32_t stb_stride : 4;
1240    uint32_t miss_index : 16;
1241    float origin[3];
1242    float tmin;
1243    float direction[3];
1244    float tmax;
1245 };
1246 static_assert(sizeof(struct rra_ray_history_begin_token) == 64,
1247               "rra_ray_history_begin_token does not match RRA expectations");
1248 
1249 struct rra_ray_history_begin2_token {
1250    struct rra_ray_history_begin_token base;
1251    uint32_t call_instruction_id;
1252    uint32_t unique_wave_id;
1253    uint32_t parent_unique_wave_id;
1254 };
1255 static_assert(sizeof(struct rra_ray_history_begin2_token) == 76,
1256               "rra_ray_history_begin2_token does not match RRA expectations");
1257 
1258 struct rra_ray_history_end_token {
1259    uint32_t primitive_index;
1260    uint32_t geometry_index;
1261 };
1262 static_assert(sizeof(struct rra_ray_history_end_token) == 8,
1263               "rra_ray_history_end_token does not match RRA expectations");
1264 
1265 struct rra_ray_history_end2_token {
1266    struct rra_ray_history_end_token base;
1267    uint32_t instance_index : 24;
1268    uint32_t hit_kind : 8;
1269    uint32_t iteration_count;
1270    uint32_t candidate_instance_count;
1271    float t;
1272 };
1273 static_assert(sizeof(struct rra_ray_history_end2_token) == 24,
1274               "rra_ray_history_end2_token does not match RRA expectations");
1275 
1276 struct rra_ray_history_tlas_token {
1277    uint64_t addr;
1278 };
1279 static_assert(sizeof(struct rra_ray_history_tlas_token) == 8,
1280               "rra_ray_history_tlas_token does not match RRA expectations");
1281 
1282 struct rra_ray_history_blas_token {
1283    uint64_t addr;
1284 };
1285 static_assert(sizeof(struct rra_ray_history_blas_token) == 8,
1286               "rra_ray_history_blas_token does not match RRA expectations");
1287 
1288 struct rra_ray_history_call_token {
1289    uint32_t addr[2];
1290 };
1291 static_assert(sizeof(struct rra_ray_history_call_token) == 8,
1292               "rra_ray_history_call_token does not match RRA expectations");
1293 
1294 struct rra_ray_history_call2_token {
1295    struct rra_ray_history_call_token base;
1296    uint32_t sbt_index;
1297 };
1298 static_assert(sizeof(struct rra_ray_history_call2_token) == 12,
1299               "rra_ray_history_call2_token does not match RRA expectations");
1300 
1301 struct rra_ray_history_isec_token {
1302    float t;
1303    uint32_t hit_kind;
1304 };
1305 static_assert(sizeof(struct rra_ray_history_isec_token) == 8,
1306               "rra_ray_history_isec_token does not match RRA expectations");
1307 
1308 struct rra_ray_history_timestamp_token {
1309    uint64_t gpu_timestamp;
1310 };
1311 static_assert(sizeof(struct rra_ray_history_timestamp_token) == 8,
1312               "rra_ray_history_timestamp_token does not match RRA expectations");
1313 
1314 VkResult
radv_rra_dump_trace(VkQueue vk_queue,char * filename)1315 radv_rra_dump_trace(VkQueue vk_queue, char *filename)
1316 {
1317    RADV_FROM_HANDLE(radv_queue, queue, vk_queue);
1318    struct radv_device *device = queue->device;
1319    VkDevice vk_device = radv_device_to_handle(device);
1320 
1321    VkResult result = vk_common_DeviceWaitIdle(vk_device);
1322    if (result != VK_SUCCESS)
1323       return result;
1324 
1325    uint64_t *accel_struct_offsets = NULL;
1326    uint64_t *ray_history_offsets = NULL;
1327    struct hash_entry **hash_entries = NULL;
1328    FILE *file = NULL;
1329 
1330    uint32_t struct_count = _mesa_hash_table_num_entries(device->rra_trace.accel_structs);
1331    accel_struct_offsets = calloc(struct_count, sizeof(uint64_t));
1332    if (!accel_struct_offsets)
1333       return VK_ERROR_OUT_OF_HOST_MEMORY;
1334 
1335    uint32_t dispatch_count =
1336       util_dynarray_num_elements(&device->rra_trace.ray_history, struct radv_rra_ray_history_data *);
1337    ray_history_offsets = calloc(dispatch_count, sizeof(uint64_t));
1338    if (!ray_history_offsets) {
1339       result = VK_ERROR_OUT_OF_HOST_MEMORY;
1340       goto cleanup;
1341    }
1342 
1343    hash_entries = malloc(sizeof(*hash_entries) * struct_count);
1344    if (!hash_entries) {
1345       result = VK_ERROR_OUT_OF_HOST_MEMORY;
1346       goto cleanup;
1347    }
1348 
1349    file = fopen(filename, "w");
1350    if (!file) {
1351       result = VK_ERROR_OUT_OF_HOST_MEMORY;
1352       goto cleanup;
1353    }
1354 
1355    /*
1356     * The header contents can only be determined after all acceleration
1357     * structures have been dumped. An empty struct is written instead
1358     * to keep offsets intact.
1359     */
1360    struct rra_file_header header = {0};
1361    fwrite(&header, sizeof(struct rra_file_header), 1, file);
1362 
1363    uint64_t api_info_offset = (uint64_t)ftell(file);
1364    uint64_t api = RADV_RRA_API_VULKAN;
1365    fwrite(&api, sizeof(uint64_t), 1, file);
1366 
1367    uint64_t asic_info_offset = (uint64_t)ftell(file);
1368    rra_dump_asic_info(&device->physical_device->rad_info, file);
1369 
1370    uint64_t written_accel_struct_count = 0;
1371 
1372    struct hash_entry *last_entry = NULL;
1373    for (unsigned i = 0; (last_entry = _mesa_hash_table_next_entry(device->rra_trace.accel_structs, last_entry)); ++i)
1374       hash_entries[i] = last_entry;
1375 
1376    qsort(hash_entries, struct_count, sizeof(*hash_entries), accel_struct_entry_cmp);
1377 
1378    struct rra_copy_context copy_ctx = {
1379       .device = vk_device,
1380       .queue = vk_queue,
1381       .entries = hash_entries,
1382       .family_index = queue->vk.queue_family_index,
1383       .min_size = device->rra_trace.ray_history_buffer_size,
1384    };
1385 
1386    result = rra_copy_context_init(&copy_ctx);
1387    if (result != VK_SUCCESS)
1388       goto cleanup;
1389 
1390    for (unsigned i = 0; i < struct_count; i++) {
1391       struct radv_rra_accel_struct_data *data = hash_entries[i]->data;
1392       void *mapped_data = rra_map_accel_struct_data(&copy_ctx, i);
1393       if (!mapped_data)
1394          continue;
1395 
1396       accel_struct_offsets[written_accel_struct_count] = (uint64_t)ftell(file);
1397       result = rra_dump_acceleration_structure(data, mapped_data, device->rra_trace.accel_struct_vas,
1398                                                device->rra_trace.validate_as, file);
1399 
1400       rra_unmap_accel_struct_data(&copy_ctx, i);
1401 
1402       if (result == VK_SUCCESS)
1403          written_accel_struct_count++;
1404    }
1405 
1406    uint64_t ray_history_offset = (uint64_t)ftell(file);
1407 
1408    uint32_t ray_history_index = 0xFFFFFFFF;
1409    struct radv_rra_ray_history_data *ray_history = NULL;
1410 
1411    uint8_t *history = device->rra_trace.ray_history_data;
1412    struct radv_ray_history_header *history_header = (void *)history;
1413 
1414    uint32_t history_buffer_size_mb = device->rra_trace.ray_history_buffer_size / 1024 / 1024;
1415    uint32_t history_size_mb = history_header->offset / 1024 / 1024;
1416    if (history_header->offset > device->rra_trace.ray_history_buffer_size) {
1417       fprintf(stderr, "radv: rra: The ray history buffer size (%u MB) is to small. %u MB is required.\n",
1418               history_buffer_size_mb, history_size_mb);
1419    } else {
1420       fprintf(stderr, "radv: rra: Ray history buffer size = %u MB, ray history size = %u MB.\n", history_buffer_size_mb,
1421               history_size_mb);
1422    }
1423 
1424    uint32_t history_size = MIN2(history_header->offset, device->rra_trace.ray_history_buffer_size);
1425 
1426    uint32_t token_size;
1427    for (uint32_t offset = sizeof(struct radv_ray_history_header); offset < history_size; offset += token_size) {
1428       struct radv_packed_end_trace_token *src = (void *)(history + offset);
1429       token_size = src->header.hit ? sizeof(struct radv_packed_end_trace_token)
1430                                    : offsetof(struct radv_packed_end_trace_token, primitive_id);
1431 
1432       if (src->dispatch_index != ray_history_index) {
1433          ray_history_index = src->dispatch_index;
1434          assert(ray_history_index < dispatch_count);
1435          ray_history = *util_dynarray_element(&device->rra_trace.ray_history, struct radv_rra_ray_history_data *,
1436                                               ray_history_index);
1437 
1438          assert(!ray_history_offsets[ray_history_index]);
1439          ray_history_offsets[ray_history_index] = (uint64_t)ftell(file);
1440          fwrite(&ray_history->metadata, sizeof(struct radv_rra_ray_history_metadata), 1, file);
1441       }
1442 
1443       uint32_t *dispatch_size = ray_history->metadata.dispatch_size.size;
1444 
1445       uint32_t x = src->header.launch_index % dispatch_size[0];
1446       uint32_t y = (src->header.launch_index / dispatch_size[0]) % dispatch_size[1];
1447       uint32_t z = src->header.launch_index / (dispatch_size[0] * dispatch_size[1]);
1448 
1449       struct rra_ray_history_id_token begin_id = {
1450          .id = src->header.launch_index,
1451          .has_control = true,
1452       };
1453       struct rra_ray_history_control_token begin_control = {
1454          .type = rra_ray_history_token_begin,
1455          .length = sizeof(struct rra_ray_history_begin_token) / 4,
1456       };
1457       struct rra_ray_history_begin_token begin = {
1458          .wave_id = src->header.launch_index / 32,
1459          .launch_ids = {x, y, z},
1460          .accel_struct_lo = src->accel_struct_lo,
1461          .accel_struct_hi = src->accel_struct_hi & 0x1FFFFFF,
1462          .ray_flags = src->flags,
1463          .cull_mask = src->cull_mask,
1464          .stb_offset = src->sbt_offset,
1465          .stb_stride = src->sbt_stride,
1466          .miss_index = src->miss_index,
1467          .origin[0] = src->origin[0],
1468          .origin[1] = src->origin[1],
1469          .origin[2] = src->origin[2],
1470          .tmin = src->tmin,
1471          .direction[0] = src->direction[0],
1472          .direction[1] = src->direction[1],
1473          .direction[2] = src->direction[2],
1474          .tmax = src->tmax,
1475       };
1476       fwrite(&begin_id, sizeof(begin_id), 1, file);
1477       fwrite(&begin_control, sizeof(begin_control), 1, file);
1478       fwrite(&begin, sizeof(begin), 1, file);
1479 
1480       for (uint32_t i = 0; i < src->ahit_count; i++) {
1481          struct rra_ray_history_id_token ahit_status_id = {
1482             .id = src->header.launch_index,
1483             .has_control = true,
1484          };
1485          struct rra_ray_history_control_token ahit_status_control = {
1486             .type = rra_ray_history_token_ahit_status,
1487             .data = i == src->ahit_count - 1 ? 2 : 0,
1488          };
1489          fwrite(&ahit_status_id, sizeof(ahit_status_id), 1, file);
1490          fwrite(&ahit_status_control, sizeof(ahit_status_control), 1, file);
1491       }
1492 
1493       for (uint32_t i = 0; i < src->isec_count; i++) {
1494          struct rra_ray_history_id_token isec_status_id = {
1495             .id = src->header.launch_index,
1496             .has_control = true,
1497          };
1498          struct rra_ray_history_control_token isec_status_control = {
1499             .type = rra_ray_history_token_isec_status,
1500             .data = i == src->ahit_count - 1 ? 2 : 0,
1501          };
1502          fwrite(&isec_status_id, sizeof(isec_status_id), 1, file);
1503          fwrite(&isec_status_control, sizeof(isec_status_control), 1, file);
1504       }
1505 
1506       struct rra_ray_history_id_token end_id = {
1507          .id = src->header.launch_index,
1508          .has_control = true,
1509       };
1510       struct rra_ray_history_control_token end_control = {
1511          .type = rra_ray_history_token_end2,
1512          .length = sizeof(struct rra_ray_history_end2_token) / 4,
1513       };
1514       struct rra_ray_history_end2_token end = {
1515          .base.primitive_index = 0xFFFFFFFF,
1516          .base.geometry_index = 0xFFFFFFFF,
1517          .iteration_count = src->iteration_count,
1518          .candidate_instance_count = src->instance_count,
1519       };
1520 
1521       if (src->header.hit) {
1522          end.base.primitive_index = src->primitive_id;
1523          end.base.geometry_index = src->geometry_id;
1524          end.instance_index = src->instance_id;
1525          end.hit_kind = src->hit_kind;
1526          end.t = src->t;
1527       }
1528 
1529       fwrite(&end_id, sizeof(end_id), 1, file);
1530       fwrite(&end_control, sizeof(end_control), 1, file);
1531       fwrite(&end, sizeof(end), 1, file);
1532    }
1533 
1534    for (uint32_t i = 0; i < dispatch_count; i++) {
1535       if (ray_history_offsets[i])
1536          continue;
1537 
1538       ray_history = *util_dynarray_element(&device->rra_trace.ray_history, struct radv_rra_ray_history_data *, i);
1539       ray_history_offsets[i] = (uint64_t)ftell(file);
1540       fwrite(&ray_history->metadata, sizeof(struct radv_rra_ray_history_metadata), 1, file);
1541    }
1542 
1543    history_header->offset = 1;
1544 
1545    rra_copy_context_finish(&copy_ctx);
1546 
1547    uint64_t chunk_info_offset = (uint64_t)ftell(file);
1548    rra_dump_chunk_description(api_info_offset, 0, 8, "ApiInfo", RADV_RRA_ASIC_API_INFO_CHUNK_VERSION, file);
1549    rra_dump_chunk_description(asic_info_offset, 0, sizeof(struct rra_asic_info), "AsicInfo",
1550                               RADV_RRA_ASIC_API_INFO_CHUNK_VERSION, file);
1551 
1552    for (uint32_t i = 0; i < dispatch_count; i++) {
1553       uint64_t tokens_size;
1554       if (i == dispatch_count - 1)
1555          tokens_size = (uint64_t)(chunk_info_offset - ray_history_offsets[i]);
1556       else
1557          tokens_size = (uint64_t)(ray_history_offsets[i + 1] - ray_history_offsets[i]);
1558       tokens_size -= sizeof(struct radv_rra_ray_history_metadata);
1559 
1560       rra_dump_chunk_description(ray_history_offsets[i], 0, sizeof(struct radv_rra_ray_history_metadata),
1561                                  "HistoryMetadata", RADV_RRA_RAY_HISTORY_CHUNK_VERSION, file);
1562       rra_dump_chunk_description(ray_history_offsets[i] + sizeof(struct radv_rra_ray_history_metadata), 0, tokens_size,
1563                                  "HistoryTokensRaw", RADV_RRA_RAY_HISTORY_CHUNK_VERSION, file);
1564    }
1565 
1566    for (uint32_t i = 0; i < written_accel_struct_count; ++i) {
1567       uint64_t accel_struct_size;
1568       if (i == written_accel_struct_count - 1)
1569          accel_struct_size = (uint64_t)(ray_history_offset - accel_struct_offsets[i]);
1570       else
1571          accel_struct_size = (uint64_t)(accel_struct_offsets[i + 1] - accel_struct_offsets[i]);
1572 
1573       rra_dump_chunk_description(accel_struct_offsets[i], sizeof(struct rra_accel_struct_chunk_header),
1574                                  accel_struct_size, "RawAccelStruct", RADV_RRA_ACCEL_STRUCT_CHUNK_VERSION, file);
1575    }
1576 
1577    uint64_t file_end = (uint64_t)ftell(file);
1578 
1579    /* All info is available, dump header now */
1580    fseek(file, 0, SEEK_SET);
1581    rra_dump_header(file, chunk_info_offset, file_end - chunk_info_offset);
1582 
1583    result = VK_SUCCESS;
1584 cleanup:
1585    if (file)
1586       fclose(file);
1587 
1588    free(hash_entries);
1589    free(ray_history_offsets);
1590    free(accel_struct_offsets);
1591    return result;
1592 }
1593