• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright © 2024 Intel Corporation
2  * SPDX-License-Identifier: MIT
3  */
4 
5 #ifndef ANV_BVH_H
6 #define ANV_BVH_H
7 
8 #ifdef VULKAN
9 #define VK_UUID_SIZE 16
10 #else
11 #include <vulkan/vulkan.h>
12 typedef struct anv_prim_leaf_desc anv_prim_leaf_desc;
13 typedef struct child_data child_data;
14 typedef struct instance_leaf_part0 instance_leaf_part0;
15 typedef struct instance_leaf_part1 instance_leaf_part1;
16 #endif
17 
18 #include "vk_bvh.h"
19 
20 #define ANV_RT_BLOCK_SIZE             64
21 #define ANV_RT_BVH_HEADER_SIZE        256
22 #define ANV_RT_INSTANCE_LEAF_SIZE     (2 * ANV_RT_BLOCK_SIZE)
23 #define ANV_RT_QUAD_LEAF_SIZE         ANV_RT_BLOCK_SIZE
24 #define ANV_RT_PROCEDURAL_LEAF_SIZE   ANV_RT_BLOCK_SIZE
25 #define ANV_RT_INTERNAL_NODE_SIZE     ANV_RT_BLOCK_SIZE
26 
27 /* This header is stored at the beginning of ANV BVH, i.e. the return value of
28  * vk_acceleration_structure_get_va(). The compiler will look for a specific location
29  * defined in this header, so the order in which the members are defined is important.
30  * Eg. The first qword is currently rootNodeOffset, where the compiler uses to find the
31  * TLAS and provide it for the shader.
32  */
33 struct anv_accel_struct_header {
34    /* 64-bit offset from the start of this header to the location where the
35     * root node resides. That is, the address of root node can be calculated
36     * as address of header + header.rootNodeOffset.
37     */
38    uint64_t rootNodeOffset;
39 
40    /* The bounding box that encloses this bvh. */
41    vk_aabb aabb;
42 
43    /* This word contains flags that should be set in the leaf nodes for
44     * instances pointing to this BLAS. ALL_NODES_{OPAQUE_NONOPAQUE} may be
45     * modified by the FORCE_OPAQUE and FORCE_NON_OPAQUE instance flags.
46     */
47    uint32_t instance_flags;
48 
49    /* Everything after this gets either updated/copied from the CPU or written
50     * by header.comp.
51     */
52    uint32_t copy_dispatch_size[3];
53 
54    /* Represents the number of bytes required by a compacted acceleration
55     * structure (VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR)
56     */
57    uint64_t compacted_size;
58 
59    /* Represents the number of bytes required by a serialized acceleration
60     * structure (VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR)
61     */
62    uint64_t serialization_size;
63 
64    /* Represents the acceleration structure size on the device timeline
65     * (VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR)
66     */
67    uint64_t size;
68 
69    /* Represents the number of bottom level acceleration structure pointers for
70     * serialization
71     * (VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR)
72     */
73    uint64_t instance_count;
74 
75    uint64_t self_ptr;
76 
77    uint32_t padding[42];
78 };
79 
80 /* Mixed internal node with type per child */
81 #define ANV_NODE_TYPE_MIXED            0x0
82 /* Internal node with 6 children (64 bytes) */
83 #define ANV_NODE_TYPE_INTERNAL         0x0
84 /* Instance leaf (64 bytes) */
85 #define ANV_NODE_TYPE_INSTANCE         0x1
86 /* Procedural leaf (64 bytes) */
87 #define ANV_NODE_TYPE_PROCEDURAL       0x3
88 /* Quad leaf (64 bytes) */
89 #define ANV_NODE_TYPE_QUAD             0x4
90 /* Indicates invalid node */
91 #define ANV_NODE_TYPE_INVALID          0x7
92 
93 
94 /* Sub-type for NODE_TYPE_INTERNAL */
95 #define ANV_SUB_TYPE_INTERNAL6         0x0
96 /* Sub-type for NODE_TYPE_QUAD */
97 #define ANV_SUB_TYPE_QUAD              0x0
98 /* Sub-type for NODE_TYPE_PROCEDURAL */
99 #define ANV_SUB_TYPE_PROCEDURAL        0x0
100 
101 #define ANV_GEOMETRY_FLAG_OPAQUE       0x1
102 
103 struct anv_prim_leaf_desc {
104    /* Shader index (24-bits) used for shader record calculations
105     * Geometry mask (8-bits) used for ray masking
106     */
107    uint32_t shader_index_and_geom_mask;
108 
109    /* The geometry index (24-bits) specifies the n'th geometry of the scene
110     * Geometry sub-type (4-bits)
111     * Reserved bit (1-bit)
112     * disable opacity culling (1-bit)
113     * Is geometry opaque (1-bit)
114     * Reserved bit (1-bit)
115     */
116    uint32_t geometry_id_and_flags;
117 };
118 
119 struct anv_quad_leaf_node {
120    anv_prim_leaf_desc leaf_desc;
121 
122    /* primID of first triangle */
123    uint32_t prim_index0;
124 
125    /* offset of primID of second triangle (16-bits)
126     * index of first vertex of second triangle (2-bits)
127     * index of second vertex of second triangle (2-bits)
128     * index of third vertex of second triangle (2-bits)
129     * last quad in BVH leaf (1-bit)
130     * Reserved (9-bits)
131     */
132    uint32_t prim_index1_delta;
133    float v[4][3];
134 };
135 
136 struct anv_procedural_leaf_node {
137    anv_prim_leaf_desc leaf_desc;
138 
139    /* This remaining part is software defined, hardware does not have to access it
140     * uint32_t numPrimitives:4; // number of stored primitives
141     * uint32_t pad           : 32-4-N;
142     * uint32_t last          : N; // bit vector with a last bit per primitive
143     */
144    uint32_t DW1;
145    uint32_t primIndex[13];
146 };
147 
148 struct child_data {
149    /* blockIncr (2-bits) size of child in 64 byte blocks
150     * startPrim (4-bits) start primitive in fat leaf mode or child type in mixed mode
151     * padding (2-bits)
152     */
153    uint8_t block_incr_and_start_prim;
154 };
155 
156 struct anv_internal_node {
157    /* world space origin of quantization grid */
158    float lower[3];
159 
160    /* offset to its children, measured in 64B blocks. */
161    uint32_t child_block_offset;
162 
163    /* The type of this internal node. If the node_type is ANV_NODE_TYPE_MIXED,
164     * it means that its children have more than one type, and each type is
165     * stored in the startPrim of child_data. Otherwise, this internal node is
166     * called a fat leaf, where all children have the same
167     * ANV_NODE_TYPE_{INSTANCE, PROCEDURAL, QUAD} as this node_type.
168     */
169    uint8_t node_type;
170 
171    uint8_t reserved;
172 
173    /* 2^exp_x is the size of the grid in x dimension */
174    int8_t exp_x;
175 
176    /* 2^exp_y is the size of the grid in y dimension */
177    int8_t exp_y;
178 
179    /* 2^exp_z is the size of the grid in z dimension */
180    int8_t exp_z;
181 
182    /* mask used for ray filtering */
183    uint8_t node_mask;
184 
185    child_data data[6];
186 
187    /* the quantized lower bounds in x-dimension */
188    uint8_t lower_x[6];
189 
190    /* the quantized upper bounds in x-dimension */
191    uint8_t upper_x[6];
192 
193    /* the quantized lower bounds in y-dimension */
194    uint8_t lower_y[6];
195 
196    /* the quantized upper bounds in y-dimension */
197    uint8_t upper_y[6];
198 
199    /* the quantized lower bounds in z-dimension */
200    uint8_t lower_z[6];
201 
202    /* the quantized upper bounds in z-dimension */
203    uint8_t upper_z[6];
204 };
205 
206 #define ANV_INSTNACE_FLAG_NONE                              0x0
207 #define ANV_INSTANCE_FLAG_TRIANGLE_CULL_DISABLE             0x1
208 #define ANV_INSTANCE_FLAG_TRIANGLE_FRONT_COUNTERCLOCKWISE   0x2
209 #define ANV_INSTANCE_FLAG_FORCE_OPAQUE                      0x4
210 #define ANV_INSTANCE_FLAG_FORCE_NON_OPAQUE                  0x8
211 #define ANV_INSTANCE_ALL_AABB                               0x40
212 
213 struct instance_leaf_part0 {
214    /* shader index (24-bits) for software instancing
215     * geometry mask (8-bits) used for ray masking
216     */
217    uint32_t shader_index_and_geom_mask;
218 
219    /* instance contribution to hit group index (24-bits)
220     * Padding (5-bits)
221     * DisableOpacityCull (1-bit)
222     * OpaqueGeometry (1-bit)
223     * Padding (1-bit)
224     */
225    uint32_t instance_contribution_and_geom_flags;
226 
227    /* 48 bit start node of the instanced object
228     * instFlags (8-bits)
229     * Padding (16-bits)
230     */
231    uint64_t start_node_ptr_and_inst_flags;
232 
233    /* 1st row of Worl2Obj transform */
234    float    world2obj_vx_x;
235    float    world2obj_vx_y;
236    float    world2obj_vx_z;
237 
238    /* 2nd row of Worl2Obj transform */
239    float    world2obj_vy_x;
240    float    world2obj_vy_y;
241    float    world2obj_vy_z;
242 
243    /* 3rd row of Worl2Obj transform */
244    float    world2obj_vz_x;
245    float    world2obj_vz_y;
246    float    world2obj_vz_z;
247 
248    /* translation of Obj2World transform (on purpose in fist 64 bytes) */
249    float    obj2world_p_x;
250    float    obj2world_p_y;
251    float    obj2world_p_z;
252 };
253 
254 struct instance_leaf_part1 {
255    /* 48-bits pointer to BVH where start node belongs too */
256    uint64_t bvh_ptr;
257 
258    /* The instanceCustomIndex in VkAccelerationStructureInstanceKHR */
259    uint32_t instance_id;
260 
261    /* The ascending assigned index */
262    uint32_t instance_index;
263 
264    /* 1st row of Obj2World transform */
265    float    obj2world_vx_x;
266    float    obj2world_vx_y;
267    float    obj2world_vx_z;
268 
269    /* 2nd row of Obj2World transform */
270    float    obj2world_vy_x;
271    float    obj2world_vy_y;
272    float    obj2world_vy_z;
273 
274    /* 3rd row of Obj2World transform */
275    float    obj2world_vz_x;
276    float    obj2world_vz_y;
277    float    obj2world_vz_z;
278 
279    /* translation of World2Obj transform (on purpose in fist 64 bytes) */
280    float    world2obj_p_x;
281    float    world2obj_p_y;
282    float    world2obj_p_z;
283 };
284 
285 struct anv_instance_leaf {
286    /* first 64 bytes accessed during traversal */
287    instance_leaf_part0 part0;
288 
289    /* second 64 bytes not accessed by hardware but accessed during shading */
290    instance_leaf_part1 part1;
291 };
292 
293 /*******************************| 0
294 | anv_accel_struct_header       |
295 |-------------------------------|
296 | For a TLAS, the pointers      |
297 | to all anv_instance_leaves    |
298 | For a BLAS, nothing here      |
299 |-------------------------------|
300 | padding to align to           |
301 | 64 bytes boundary             |
302 |-------------------------------| bvh_layout.bvh_offset
303 | start with root node,         |
304 | followed by interleaving      |
305 | internal nodes and leaves     | bvh_layout.size
306 |*******************************/
307 struct bvh_layout {
308    /* This should be same as anv_accel_struct_header.rootNodeOffset.
309     * For blas, it's equal to ANV_RT_BVH_HEADER_SIZE;
310     * For tlas, it's ANV_RT_BVH_HEADER_SIZE + instance_count * sizeof(uint64_t)
311     * Both will then be aligned to 64B boundary.
312     */
313    uint64_t bvh_offset;
314 
315    /* Total size = bvh_offset + leaves + internal_nodes (assuming there's no
316     * internal node collpased)
317     */
318    uint64_t size;
319 };
320 
321 #endif
322