1/* 2 * Copyright © 2022 Konstantin Seurer 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#version 460 25 26#extension GL_GOOGLE_include_directive : require 27 28#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require 29#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require 30#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require 31#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require 32#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require 33#extension GL_EXT_scalar_block_layout : require 34#extension GL_EXT_buffer_reference : require 35#extension GL_EXT_buffer_reference2 : require 36#extension GL_KHR_shader_subgroup_vote : require 37#extension GL_KHR_shader_subgroup_arithmetic : require 38#extension GL_KHR_shader_subgroup_ballot : require 39 40#include "vk_build_interface.h" 41 42layout(local_size_x_id = SUBGROUP_SIZE_ID, local_size_y = 1, local_size_z = 1) in; 43 44layout(push_constant) uniform CONSTS { 45 leaf_args args; 46}; 47 48/* A GLSL-adapted copy of VkAccelerationStructureInstanceKHR. */ 49struct AccelerationStructureInstance { 50 mat3x4 transform; 51 uint32_t custom_instance_and_mask; 52 uint32_t sbt_offset_and_flags; 53 uint64_t accelerationStructureReference; 54}; 55TYPE(AccelerationStructureInstance, 8); 56 57bool 58build_triangle(inout vk_aabb bounds, VOID_REF dst_ptr, vk_bvh_geometry_data geom_data, uint32_t global_id) 59{ 60 bool is_valid = true; 61 triangle_indices indices = load_indices(geom_data.indices, geom_data.index_format, global_id); 62 63 triangle_vertices vertices = load_vertices(geom_data.data, indices, geom_data.vertex_format, geom_data.stride); 64 65 /* An inactive triangle is one for which the first (X) component of any vertex is NaN. If any 66 * other vertex component is NaN, and the first is not, the behavior is undefined. If the vertex 67 * format does not have a NaN representation, then all triangles are considered active. 68 */ 69 if (isnan(vertices.vertex[0].x) || isnan(vertices.vertex[1].x) || isnan(vertices.vertex[2].x)) 70#if ALWAYS_ACTIVE 71 is_valid = false; 72#else 73 return false; 74#endif 75 76 if (geom_data.transform != NULL) { 77 mat4 transform = mat4(1.0); 78 79 for (uint32_t col = 0; col < 4; col++) 80 for (uint32_t row = 0; row < 3; row++) 81 transform[col][row] = DEREF(INDEX(float, geom_data.transform, col + row * 4)); 82 83 for (uint32_t i = 0; i < 3; i++) 84 vertices.vertex[i] = transform * vertices.vertex[i]; 85 } 86 87 REF(vk_ir_triangle_node) node = REF(vk_ir_triangle_node)(dst_ptr); 88 89 bounds.min = vec3(INFINITY); 90 bounds.max = vec3(-INFINITY); 91 92 for (uint32_t coord = 0; coord < 3; coord++) 93 for (uint32_t comp = 0; comp < 3; comp++) { 94 DEREF(node).coords[coord][comp] = vertices.vertex[coord][comp]; 95 bounds.min[comp] = min(bounds.min[comp], vertices.vertex[coord][comp]); 96 bounds.max[comp] = max(bounds.max[comp], vertices.vertex[coord][comp]); 97 } 98 99 DEREF(node).base.aabb = bounds; 100 DEREF(node).triangle_id = global_id; 101 DEREF(node).geometry_id_and_flags = geom_data.geometry_id; 102 DEREF(node).id = 9; 103 104 return is_valid; 105} 106 107bool 108build_aabb(inout vk_aabb bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_t geometry_id, uint32_t global_id) 109{ 110 bool is_valid = true; 111 REF(vk_ir_aabb_node) node = REF(vk_ir_aabb_node)(dst_ptr); 112 113 for (uint32_t vec = 0; vec < 2; vec++) 114 for (uint32_t comp = 0; comp < 3; comp++) { 115 float coord = DEREF(INDEX(float, src_ptr, comp + vec * 3)); 116 117 if (vec == 0) 118 bounds.min[comp] = coord; 119 else 120 bounds.max[comp] = coord; 121 } 122 123 /* An inactive AABB is one for which the minimum X coordinate is NaN. If any other component is 124 * NaN, and the first is not, the behavior is undefined. 125 */ 126 if (isnan(bounds.min.x)) 127#if ALWAYS_ACTIVE 128 is_valid = false; 129#else 130 return false; 131#endif 132 133 DEREF(node).base.aabb = bounds; 134 DEREF(node).primitive_id = global_id; 135 DEREF(node).geometry_id_and_flags = geometry_id; 136 137 return is_valid; 138} 139 140vk_aabb 141calculate_instance_node_bounds(uint64_t base_ptr, mat3x4 otw_matrix) 142{ 143 vk_aabb aabb; 144 145 vk_aabb blas_aabb = DEREF(REF(vk_aabb)(base_ptr + BVH_BOUNDS_OFFSET)); 146 147 for (uint32_t comp = 0; comp < 3; ++comp) { 148 aabb.min[comp] = otw_matrix[comp][3]; 149 aabb.max[comp] = otw_matrix[comp][3]; 150 for (uint32_t col = 0; col < 3; ++col) { 151 aabb.min[comp] += 152 min(otw_matrix[comp][col] * blas_aabb.min[col], otw_matrix[comp][col] * blas_aabb.max[col]); 153 aabb.max[comp] += 154 max(otw_matrix[comp][col] * blas_aabb.min[col], otw_matrix[comp][col] * blas_aabb.max[col]); 155 } 156 } 157 return aabb; 158} 159 160bool 161build_instance(inout vk_aabb bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_t global_id) 162{ 163 REF(vk_ir_instance_node) node = REF(vk_ir_instance_node)(dst_ptr); 164 165 AccelerationStructureInstance instance = DEREF(REF(AccelerationStructureInstance)(src_ptr)); 166 167 /* An inactive instance is one whose acceleration structure handle is VK_NULL_HANDLE. Since the active terminology is 168 * only relevant for BVH updates, which we do not implement, we can also skip instances with mask == 0. 169 */ 170 if (instance.accelerationStructureReference == 0 || instance.custom_instance_and_mask < (1u << 24u)) 171 return false; 172 173 DEREF(node).base_ptr = instance.accelerationStructureReference; 174 175 mat4 transform = mat4(instance.transform); 176 DEREF(node).otw_matrix = mat3x4(transform); 177 178 bounds = calculate_instance_node_bounds(instance.accelerationStructureReference, mat3x4(transform)); 179 180 DEREF(node).base.aabb = bounds; 181 DEREF(node).custom_instance_and_mask = instance.custom_instance_and_mask; 182 DEREF(node).sbt_offset_and_flags = instance.sbt_offset_and_flags; 183 DEREF(node).instance_id = global_id; 184 185 return true; 186} 187 188void 189main(void) 190{ 191 uint32_t global_id = gl_GlobalInvocationID.x; 192 uint32_t primitive_id = args.geom_data.first_id + global_id; 193 194 REF(key_id_pair) id_ptr = INDEX(key_id_pair, args.ids, primitive_id); 195 uint32_t src_offset = global_id * args.geom_data.stride; 196 197 uint32_t dst_stride; 198 uint32_t node_type; 199 if (args.geom_data.geometry_type == VK_GEOMETRY_TYPE_TRIANGLES_KHR) { 200 dst_stride = SIZEOF(vk_ir_triangle_node); 201 node_type = vk_ir_node_triangle; 202 } else if (args.geom_data.geometry_type == VK_GEOMETRY_TYPE_AABBS_KHR) { 203 dst_stride = SIZEOF(vk_ir_aabb_node); 204 node_type = vk_ir_node_aabb; 205 } else { 206 dst_stride = SIZEOF(vk_ir_instance_node); 207 node_type = vk_ir_node_instance; 208 } 209 210 uint32_t dst_offset = primitive_id * dst_stride; 211 VOID_REF dst_ptr = OFFSET(args.bvh, dst_offset); 212 213 vk_aabb bounds; 214 bool is_active; 215 if (args.geom_data.geometry_type == VK_GEOMETRY_TYPE_TRIANGLES_KHR) { 216 is_active = build_triangle(bounds, dst_ptr, args.geom_data, global_id); 217 } else if (args.geom_data.geometry_type == VK_GEOMETRY_TYPE_AABBS_KHR) { 218 VOID_REF src_ptr = OFFSET(args.geom_data.data, src_offset); 219 is_active = build_aabb(bounds, src_ptr, dst_ptr, args.geom_data.geometry_id, global_id); 220 } else { 221 VOID_REF src_ptr = OFFSET(args.geom_data.data, src_offset); 222 /* arrayOfPointers */ 223 if (args.geom_data.stride == 8) { 224 src_ptr = DEREF(REF(VOID_REF)(src_ptr)); 225 } 226 227 is_active = build_instance(bounds, src_ptr, dst_ptr, global_id); 228 } 229 230#if ALWAYS_ACTIVE 231 if (!is_active && args.geom_data.geometry_type != VK_GEOMETRY_TYPE_INSTANCES_KHR) { 232 bounds.min = vec3(0.0); 233 bounds.max = vec3(0.0); 234 is_active = true; 235 } 236#endif 237 238 DEREF(id_ptr).id = is_active ? pack_ir_node_id(dst_offset, node_type) : VK_BVH_INVALID_NODE; 239 240 uvec4 ballot = subgroupBallot(is_active); 241 if (subgroupElect()) 242 atomicAdd(DEREF(args.header).active_leaf_count, subgroupBallotBitCount(ballot)); 243 244 atomicMin(DEREF(args.header).min_bounds[0], to_emulated_float(bounds.min.x)); 245 atomicMin(DEREF(args.header).min_bounds[1], to_emulated_float(bounds.min.y)); 246 atomicMin(DEREF(args.header).min_bounds[2], to_emulated_float(bounds.min.z)); 247 atomicMax(DEREF(args.header).max_bounds[0], to_emulated_float(bounds.max.x)); 248 atomicMax(DEREF(args.header).max_bounds[1], to_emulated_float(bounds.max.y)); 249 atomicMax(DEREF(args.header).max_bounds[2], to_emulated_float(bounds.max.z)); 250} 251