• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/* Copyright © 2022 Bas Nieuwenhuizen
2 * Copyright © 2024 Intel Coorporation
3 * SPDX-License-Identifier: MIT
4 */
5
6#version 460
7
8#extension GL_GOOGLE_include_directive : require
9
10#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
11#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
12#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
13#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require
14#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
15#extension GL_EXT_scalar_block_layout : require
16#extension GL_EXT_buffer_reference : require
17#extension GL_EXT_buffer_reference2 : require
18
19layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;
20
21#include "anv_build_interface.h"
22
23layout(push_constant) uniform CONSTS {
24   copy_args args;
25};
26
27// Layout of serialized data
28/**************************************|
29| vk_accel_struct_serialization_header |
30|--------------------------------------|
31| For a TLAS, all handles to the BLAS  |
32| within this TLAS.                    |
33| For a BLAS, nothing.                 |
34|--------------------------------------|
35| Driver-specific part.                |
36| For Intel, this starts with          |
37| anv_accel_struct_header as drawn     |
38| in anv_bvh.h                         |
39|**************************************/
40
41/*
42 * Explanation of BLAS handles:
43 * According to the spec of vkCmdCopyAccelerationStructureToMemoryKHR,
44 * for a TLAS, the handles of all BLAS/instances within this TLAS are
45 * tightly stored after vk_accel_struct_serialization_header, making this
46 * serialized-memory a semi-opaque object. The application might be able
47 * to swap/replace these handles with other handles. In fact this is what
48 * dEQP-VK.ray_tracing_pipeline.acceleration_structures.header_bottom_address.*
49 * is doing.
50 *
51 * Therefore, if the application updates the handles, we need to replace
52 * the old handles in anv_instance_leaf with the new one. To access
53 * anv_instance_leaf without traversing the TLAS, pointers to these
54 * anv_instance_leaf are stored right after anv_accel_struct_header,
55 * allowing us to know where they are in the TLAS instantly.
56 *
57 * Although, the fact that the application can swap/replace new handles
58 * of BLAS without rebuilding the TLAS sounds a bit odd.
59 */
60
61void
62main(void)
63{
64   uint32_t global_id = gl_GlobalInvocationID.x;
65   uint32_t lanes = gl_NumWorkGroups.x * 128;
66   uint32_t increment = lanes * 8;
67
68   uint64_t copy_src_addr = args.src_addr;
69   uint64_t copy_dst_addr = args.dst_addr;
70
71   if (args.mode == ANV_COPY_MODE_DESERIALIZE) {
72      copy_src_addr += SIZEOF(vk_accel_struct_serialization_header) +
73                       DEREF(REF(vk_accel_struct_serialization_header)(args.src_addr)).instance_count * SIZEOF(uint64_t);
74   }
75
76   REF(anv_accel_struct_header) header = REF(anv_accel_struct_header)(copy_src_addr);
77
78   uint64_t instance_base = args.src_addr + SIZEOF(vk_accel_struct_serialization_header);
79   uint64_t instance_offset = SIZEOF(anv_accel_struct_header);
80
81   /* We store the address of instance_leaf after bvh header */
82   uint64_t instance_end = DEREF(header).instance_count * SIZEOF(uint64_t);
83
84   if (instance_end > 0)
85      instance_end += instance_offset;
86
87   if (args.mode == ANV_COPY_MODE_SERIALIZE) {
88      copy_dst_addr += SIZEOF(vk_accel_struct_serialization_header) +
89                       DEREF(REF(anv_accel_struct_header)(args.src_addr)).instance_count * SIZEOF(uint64_t);
90
91      if (global_id == 0) {
92         REF(vk_accel_struct_serialization_header) ser_header =
93            REF(vk_accel_struct_serialization_header)(args.dst_addr);
94         DEREF(ser_header).serialization_size = DEREF(header).serialization_size;
95         DEREF(ser_header).deserialization_size = DEREF(header).compacted_size;
96         DEREF(ser_header).instance_count = DEREF(header).instance_count;
97
98         for (uint32_t offset = 0; offset < VK_UUID_SIZE; offset++) {
99            DEREF(ser_header).driver_uuid[offset] = args.driver_uuid[offset];
100         }
101
102         for (uint32_t offset = 0; offset < VK_UUID_SIZE; offset++) {
103            DEREF(ser_header).accel_struct_compat[offset] = args.accel_struct_compat[offset];
104         }
105      }
106
107      instance_base = args.dst_addr + SIZEOF(vk_accel_struct_serialization_header);
108   } else if (args.mode == ANV_COPY_MODE_COPY) {
109      instance_end = 0;
110   }
111
112   uint64_t size = DEREF(header).compacted_size;
113   for (uint64_t offset = global_id * 8; offset < size; offset += increment) {
114      /* copy 8 bytes per iteration */
115      DEREF(REF(uint64_t)(copy_dst_addr + offset)) =
116         DEREF(REF(uint64_t)(copy_src_addr + offset));
117
118      /* Do the adjustment inline in the same invocation that copies the data so that we don't have
119       * to synchronize.
120       */
121      if (offset < instance_end && offset >= instance_offset &&
122          (offset - instance_offset) % SIZEOF(uint64_t) == 0) {
123         uint64_t idx = (offset - instance_offset) / SIZEOF(uint64_t);
124
125         if (args.mode == ANV_COPY_MODE_SERIALIZE) {
126            /* Indirectly access the anv_instance_leaf, and store the blas_ptrs after ser_header */
127            uint64_t instance_leaf_addr = DEREF(REF(uint64_t)(copy_src_addr + offset));
128            REF(anv_instance_leaf) instance_leaf = REF(anv_instance_leaf)(instance_leaf_addr);
129            uint64_t blas_ptr = DEREF(instance_leaf).part1.bvh_ptr & 0xfffffffffffful;
130            DEREF(INDEX(uint64_t, instance_base, idx)) = blas_ptr;
131         } else { /* ANV_COPY_MODE_DESERIALIZE */
132            /* Indirectly access the anv_instance_leaf, and replace the bvh_ptr with the ones after ser_header */
133            uint64_t instance_leaf_addr = DEREF(REF(uint64_t)(copy_dst_addr + offset));
134            REF(anv_instance_leaf) instance_leaf = REF(anv_instance_leaf)(instance_leaf_addr);
135            uint64_t blas_ptr = DEREF(INDEX(uint64_t, instance_base, idx));
136            DEREF(instance_leaf).part1.bvh_ptr = (blas_ptr & 0xfffffffffffful);
137
138            /* set the startNodePtr to blas_ptr + ANV_HEADER_SIZE */
139            uint64_t mask = 0x0000fffffffffffful;
140            uint64_t new_startNodePtr = blas_ptr + ANV_RT_BVH_HEADER_SIZE;
141            /* clear bits and set */
142            DEREF(instance_leaf).part0.start_node_ptr_and_inst_flags =
143               (DEREF(instance_leaf).part0.start_node_ptr_and_inst_flags & ~mask) | (new_startNodePtr & mask);
144         }
145      }
146   }
147}
148