1 /* Copyright © 2024 Intel Corporation 2 * SPDX-License-Identifier: MIT 3 */ 4 5 #ifndef ANV_BVH_H 6 #define ANV_BVH_H 7 8 #ifdef VULKAN 9 #define VK_UUID_SIZE 16 10 #else 11 #include <vulkan/vulkan.h> 12 typedef struct anv_prim_leaf_desc anv_prim_leaf_desc; 13 typedef struct child_data child_data; 14 typedef struct instance_leaf_part0 instance_leaf_part0; 15 typedef struct instance_leaf_part1 instance_leaf_part1; 16 #endif 17 18 #include "vk_bvh.h" 19 20 #define ANV_RT_BLOCK_SIZE 64 21 #define ANV_RT_BVH_HEADER_SIZE 256 22 #define ANV_RT_INSTANCE_LEAF_SIZE (2 * ANV_RT_BLOCK_SIZE) 23 #define ANV_RT_QUAD_LEAF_SIZE ANV_RT_BLOCK_SIZE 24 #define ANV_RT_PROCEDURAL_LEAF_SIZE ANV_RT_BLOCK_SIZE 25 #define ANV_RT_INTERNAL_NODE_SIZE ANV_RT_BLOCK_SIZE 26 27 /* This header is stored at the beginning of ANV BVH, i.e. the return value of 28 * vk_acceleration_structure_get_va(). The compiler will look for a specific location 29 * defined in this header, so the order in which the members are defined is important. 30 * Eg. The first qword is currently rootNodeOffset, where the compiler uses to find the 31 * TLAS and provide it for the shader. 32 */ 33 struct anv_accel_struct_header { 34 /* 64-bit offset from the start of this header to the location where the 35 * root node resides. That is, the address of root node can be calculated 36 * as address of header + header.rootNodeOffset. 37 */ 38 uint64_t rootNodeOffset; 39 40 /* The bounding box that encloses this bvh. */ 41 vk_aabb aabb; 42 43 /* This word contains flags that should be set in the leaf nodes for 44 * instances pointing to this BLAS. ALL_NODES_{OPAQUE_NONOPAQUE} may be 45 * modified by the FORCE_OPAQUE and FORCE_NON_OPAQUE instance flags. 46 */ 47 uint32_t instance_flags; 48 49 /* Everything after this gets either updated/copied from the CPU or written 50 * by header.comp. 51 */ 52 uint32_t copy_dispatch_size[3]; 53 54 /* Represents the number of bytes required by a compacted acceleration 55 * structure (VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR) 56 */ 57 uint64_t compacted_size; 58 59 /* Represents the number of bytes required by a serialized acceleration 60 * structure (VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR) 61 */ 62 uint64_t serialization_size; 63 64 /* Represents the acceleration structure size on the device timeline 65 * (VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR) 66 */ 67 uint64_t size; 68 69 /* Represents the number of bottom level acceleration structure pointers for 70 * serialization 71 * (VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR) 72 */ 73 uint64_t instance_count; 74 75 uint64_t self_ptr; 76 77 uint32_t padding[42]; 78 }; 79 80 /* Mixed internal node with type per child */ 81 #define ANV_NODE_TYPE_MIXED 0x0 82 /* Internal node with 6 children (64 bytes) */ 83 #define ANV_NODE_TYPE_INTERNAL 0x0 84 /* Instance leaf (64 bytes) */ 85 #define ANV_NODE_TYPE_INSTANCE 0x1 86 /* Procedural leaf (64 bytes) */ 87 #define ANV_NODE_TYPE_PROCEDURAL 0x3 88 /* Quad leaf (64 bytes) */ 89 #define ANV_NODE_TYPE_QUAD 0x4 90 /* Indicates invalid node */ 91 #define ANV_NODE_TYPE_INVALID 0x7 92 93 94 /* Sub-type for NODE_TYPE_INTERNAL */ 95 #define ANV_SUB_TYPE_INTERNAL6 0x0 96 /* Sub-type for NODE_TYPE_QUAD */ 97 #define ANV_SUB_TYPE_QUAD 0x0 98 /* Sub-type for NODE_TYPE_PROCEDURAL */ 99 #define ANV_SUB_TYPE_PROCEDURAL 0x0 100 101 #define ANV_GEOMETRY_FLAG_OPAQUE 0x1 102 103 struct anv_prim_leaf_desc { 104 /* Shader index (24-bits) used for shader record calculations 105 * Geometry mask (8-bits) used for ray masking 106 */ 107 uint32_t shader_index_and_geom_mask; 108 109 /* The geometry index (24-bits) specifies the n'th geometry of the scene 110 * Geometry sub-type (4-bits) 111 * Reserved bit (1-bit) 112 * disable opacity culling (1-bit) 113 * Is geometry opaque (1-bit) 114 * Reserved bit (1-bit) 115 */ 116 uint32_t geometry_id_and_flags; 117 }; 118 119 struct anv_quad_leaf_node { 120 anv_prim_leaf_desc leaf_desc; 121 122 /* primID of first triangle */ 123 uint32_t prim_index0; 124 125 /* offset of primID of second triangle (16-bits) 126 * index of first vertex of second triangle (2-bits) 127 * index of second vertex of second triangle (2-bits) 128 * index of third vertex of second triangle (2-bits) 129 * last quad in BVH leaf (1-bit) 130 * Reserved (9-bits) 131 */ 132 uint32_t prim_index1_delta; 133 float v[4][3]; 134 }; 135 136 struct anv_procedural_leaf_node { 137 anv_prim_leaf_desc leaf_desc; 138 139 /* This remaining part is software defined, hardware does not have to access it 140 * uint32_t numPrimitives:4; // number of stored primitives 141 * uint32_t pad : 32-4-N; 142 * uint32_t last : N; // bit vector with a last bit per primitive 143 */ 144 uint32_t DW1; 145 uint32_t primIndex[13]; 146 }; 147 148 struct child_data { 149 /* blockIncr (2-bits) size of child in 64 byte blocks 150 * startPrim (4-bits) start primitive in fat leaf mode or child type in mixed mode 151 * padding (2-bits) 152 */ 153 uint8_t block_incr_and_start_prim; 154 }; 155 156 struct anv_internal_node { 157 /* world space origin of quantization grid */ 158 float lower[3]; 159 160 /* offset to its children, measured in 64B blocks. */ 161 uint32_t child_block_offset; 162 163 /* The type of this internal node. If the node_type is ANV_NODE_TYPE_MIXED, 164 * it means that its children have more than one type, and each type is 165 * stored in the startPrim of child_data. Otherwise, this internal node is 166 * called a fat leaf, where all children have the same 167 * ANV_NODE_TYPE_{INSTANCE, PROCEDURAL, QUAD} as this node_type. 168 */ 169 uint8_t node_type; 170 171 uint8_t reserved; 172 173 /* 2^exp_x is the size of the grid in x dimension */ 174 int8_t exp_x; 175 176 /* 2^exp_y is the size of the grid in y dimension */ 177 int8_t exp_y; 178 179 /* 2^exp_z is the size of the grid in z dimension */ 180 int8_t exp_z; 181 182 /* mask used for ray filtering */ 183 uint8_t node_mask; 184 185 child_data data[6]; 186 187 /* the quantized lower bounds in x-dimension */ 188 uint8_t lower_x[6]; 189 190 /* the quantized upper bounds in x-dimension */ 191 uint8_t upper_x[6]; 192 193 /* the quantized lower bounds in y-dimension */ 194 uint8_t lower_y[6]; 195 196 /* the quantized upper bounds in y-dimension */ 197 uint8_t upper_y[6]; 198 199 /* the quantized lower bounds in z-dimension */ 200 uint8_t lower_z[6]; 201 202 /* the quantized upper bounds in z-dimension */ 203 uint8_t upper_z[6]; 204 }; 205 206 #define ANV_INSTNACE_FLAG_NONE 0x0 207 #define ANV_INSTANCE_FLAG_TRIANGLE_CULL_DISABLE 0x1 208 #define ANV_INSTANCE_FLAG_TRIANGLE_FRONT_COUNTERCLOCKWISE 0x2 209 #define ANV_INSTANCE_FLAG_FORCE_OPAQUE 0x4 210 #define ANV_INSTANCE_FLAG_FORCE_NON_OPAQUE 0x8 211 #define ANV_INSTANCE_ALL_AABB 0x40 212 213 struct instance_leaf_part0 { 214 /* shader index (24-bits) for software instancing 215 * geometry mask (8-bits) used for ray masking 216 */ 217 uint32_t shader_index_and_geom_mask; 218 219 /* instance contribution to hit group index (24-bits) 220 * Padding (5-bits) 221 * DisableOpacityCull (1-bit) 222 * OpaqueGeometry (1-bit) 223 * Padding (1-bit) 224 */ 225 uint32_t instance_contribution_and_geom_flags; 226 227 /* 48 bit start node of the instanced object 228 * instFlags (8-bits) 229 * Padding (16-bits) 230 */ 231 uint64_t start_node_ptr_and_inst_flags; 232 233 /* 1st row of Worl2Obj transform */ 234 float world2obj_vx_x; 235 float world2obj_vx_y; 236 float world2obj_vx_z; 237 238 /* 2nd row of Worl2Obj transform */ 239 float world2obj_vy_x; 240 float world2obj_vy_y; 241 float world2obj_vy_z; 242 243 /* 3rd row of Worl2Obj transform */ 244 float world2obj_vz_x; 245 float world2obj_vz_y; 246 float world2obj_vz_z; 247 248 /* translation of Obj2World transform (on purpose in fist 64 bytes) */ 249 float obj2world_p_x; 250 float obj2world_p_y; 251 float obj2world_p_z; 252 }; 253 254 struct instance_leaf_part1 { 255 /* 48-bits pointer to BVH where start node belongs too */ 256 uint64_t bvh_ptr; 257 258 /* The instanceCustomIndex in VkAccelerationStructureInstanceKHR */ 259 uint32_t instance_id; 260 261 /* The ascending assigned index */ 262 uint32_t instance_index; 263 264 /* 1st row of Obj2World transform */ 265 float obj2world_vx_x; 266 float obj2world_vx_y; 267 float obj2world_vx_z; 268 269 /* 2nd row of Obj2World transform */ 270 float obj2world_vy_x; 271 float obj2world_vy_y; 272 float obj2world_vy_z; 273 274 /* 3rd row of Obj2World transform */ 275 float obj2world_vz_x; 276 float obj2world_vz_y; 277 float obj2world_vz_z; 278 279 /* translation of World2Obj transform (on purpose in fist 64 bytes) */ 280 float world2obj_p_x; 281 float world2obj_p_y; 282 float world2obj_p_z; 283 }; 284 285 struct anv_instance_leaf { 286 /* first 64 bytes accessed during traversal */ 287 instance_leaf_part0 part0; 288 289 /* second 64 bytes not accessed by hardware but accessed during shading */ 290 instance_leaf_part1 part1; 291 }; 292 293 /*******************************| 0 294 | anv_accel_struct_header | 295 |-------------------------------| 296 | For a TLAS, the pointers | 297 | to all anv_instance_leaves | 298 | For a BLAS, nothing here | 299 |-------------------------------| 300 | padding to align to | 301 | 64 bytes boundary | 302 |-------------------------------| bvh_layout.bvh_offset 303 | start with root node, | 304 | followed by interleaving | 305 | internal nodes and leaves | bvh_layout.size 306 |*******************************/ 307 struct bvh_layout { 308 /* This should be same as anv_accel_struct_header.rootNodeOffset. 309 * For blas, it's equal to ANV_RT_BVH_HEADER_SIZE; 310 * For tlas, it's ANV_RT_BVH_HEADER_SIZE + instance_count * sizeof(uint64_t) 311 * Both will then be aligned to 64B boundary. 312 */ 313 uint64_t bvh_offset; 314 315 /* Total size = bvh_offset + leaves + internal_nodes (assuming there's no 316 * internal node collpased) 317 */ 318 uint64_t size; 319 }; 320 321 #endif 322