1/* 2 * Copyright © 2022 Friedrich Vock 3 * 4 * SPDX-License-Identifier: MIT 5 */ 6 7#version 460 8 9#extension GL_GOOGLE_include_directive : require 10 11#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require 12#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require 13#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require 14#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require 15#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require 16#extension GL_EXT_scalar_block_layout : require 17#extension GL_EXT_buffer_reference : require 18#extension GL_EXT_buffer_reference2 : require 19#extension GL_KHR_memory_scope_semantics : require 20 21layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; 22 23#include "build_helpers.h" 24#include "build_interface.h" 25 26layout(push_constant) uniform CONSTS { 27 encode_args args; 28}; 29 30void set_parent(uint32_t child, uint32_t parent) 31{ 32 uint64_t addr = args.output_bvh - child / 8 * 4 - 4; 33 DEREF(REF(uint32_t)(addr)) = parent; 34} 35 36void 37main() 38{ 39 /* Encode leaf nodes. */ 40 uint32_t dst_leaf_offset = 41 id_to_offset(RADV_BVH_ROOT_NODE) + SIZEOF(radv_bvh_box32_node); 42 43 uint32_t ir_leaf_node_size; 44 uint32_t output_leaf_node_size; 45 switch (args.geometry_type) { 46 case VK_GEOMETRY_TYPE_TRIANGLES_KHR: { 47 ir_leaf_node_size = SIZEOF(vk_ir_triangle_node); 48 output_leaf_node_size = SIZEOF(radv_bvh_triangle_node); 49 50 vk_ir_triangle_node src_node = 51 DEREF(REF(vk_ir_triangle_node)(OFFSET(args.intermediate_bvh, gl_GlobalInvocationID.x * ir_leaf_node_size))); 52 REF(radv_bvh_triangle_node) dst_node = 53 REF(radv_bvh_triangle_node)(OFFSET(args.output_bvh, dst_leaf_offset + gl_GlobalInvocationID.x * output_leaf_node_size)); 54 55 DEREF(dst_node).coords = src_node.coords; 56 DEREF(dst_node).triangle_id = src_node.triangle_id; 57 DEREF(dst_node).geometry_id_and_flags = src_node.geometry_id_and_flags; 58 DEREF(dst_node).id = 9; 59 60 break; 61 } 62 case VK_GEOMETRY_TYPE_AABBS_KHR: { 63 ir_leaf_node_size = SIZEOF(vk_ir_aabb_node); 64 output_leaf_node_size = SIZEOF(radv_bvh_aabb_node); 65 66 vk_ir_aabb_node src_node = 67 DEREF(REF(vk_ir_aabb_node)(OFFSET(args.intermediate_bvh, gl_GlobalInvocationID.x * ir_leaf_node_size))); 68 REF(radv_bvh_aabb_node) dst_node = 69 REF(radv_bvh_aabb_node)(OFFSET(args.output_bvh, dst_leaf_offset + gl_GlobalInvocationID.x * output_leaf_node_size)); 70 71 DEREF(dst_node).primitive_id = src_node.primitive_id; 72 DEREF(dst_node).geometry_id_and_flags = src_node.geometry_id_and_flags; 73 74 break; 75 } 76 default: 77 /* instances */ 78 ir_leaf_node_size = SIZEOF(vk_ir_instance_node); 79 output_leaf_node_size = SIZEOF(radv_bvh_instance_node); 80 /* Instance nodes have to be emitted inside the loop since encoding them 81 * loads an address from the IR node which is uninitialized for inactive nodes. 82 */ 83 break; 84 } 85 86 if (gl_GlobalInvocationID.x >= DEREF(args.header).ir_internal_node_count) 87 return; 88 89 /* Encode internal nodes. Revert the order so we start at the root */ 90 uint32_t global_id = DEREF(args.header).ir_internal_node_count - 1 - gl_GlobalInvocationID.x; 91 92 uint32_t intermediate_leaf_nodes_size = args.leaf_node_count * ir_leaf_node_size; 93 uint32_t dst_internal_offset = dst_leaf_offset + args.leaf_node_count * output_leaf_node_size; 94 95 REF(vk_ir_box_node) intermediate_internal_nodes = 96 REF(vk_ir_box_node)OFFSET(args.intermediate_bvh, intermediate_leaf_nodes_size); 97 REF(vk_ir_box_node) src_node = INDEX(vk_ir_box_node, intermediate_internal_nodes, global_id); 98 vk_ir_box_node src = DEREF(src_node); 99 100 bool is_root_node = global_id == DEREF(args.header).ir_internal_node_count - 1; 101 102 for (;;) { 103 /* Make changes to the current node's BVH offset value visible. */ 104 memoryBarrier(gl_ScopeDevice, gl_StorageSemanticsBuffer, 105 gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible); 106 107 uint32_t bvh_offset = is_root_node ? id_to_offset(RADV_BVH_ROOT_NODE) : DEREF(src_node).bvh_offset; 108 if (bvh_offset == VK_UNKNOWN_BVH_OFFSET) 109 continue; 110 111 if (bvh_offset == VK_NULL_BVH_OFFSET) 112 break; 113 114 REF(radv_bvh_box32_node) dst_node = REF(radv_bvh_box32_node)(OFFSET(args.output_bvh, bvh_offset)); 115 uint32_t node_id = pack_node_id(bvh_offset, radv_bvh_node_box32); 116 117 uint32_t found_child_count = 0; 118 uint32_t children[4] = {RADV_BVH_INVALID_NODE, RADV_BVH_INVALID_NODE, 119 RADV_BVH_INVALID_NODE, RADV_BVH_INVALID_NODE}; 120 121 for (uint32_t i = 0; i < 2; ++i) 122 if (src.children[i] != RADV_BVH_INVALID_NODE) 123 children[found_child_count++] = src.children[i]; 124 125 while (found_child_count < 4) { 126 int32_t collapsed_child_index = -1; 127 float largest_surface_area = -INFINITY; 128 129 for (int32_t i = 0; i < found_child_count; ++i) { 130 if (ir_id_to_type(children[i]) != vk_ir_node_internal) 131 continue; 132 133 vk_aabb bounds = 134 DEREF(REF(vk_ir_node)OFFSET(args.intermediate_bvh, 135 ir_id_to_offset(children[i]))).aabb; 136 137 float surface_area = aabb_surface_area(bounds); 138 if (surface_area > largest_surface_area) { 139 largest_surface_area = surface_area; 140 collapsed_child_index = i; 141 } 142 } 143 144 if (collapsed_child_index != -1) { 145 REF(vk_ir_box_node) child_node = 146 REF(vk_ir_box_node)OFFSET(args.intermediate_bvh, 147 ir_id_to_offset(children[collapsed_child_index])); 148 uint32_t grandchildren[2] = DEREF(child_node).children; 149 uint32_t valid_grandchild_count = 0; 150 151 if (grandchildren[1] != RADV_BVH_INVALID_NODE) 152 ++valid_grandchild_count; 153 154 if (grandchildren[0] != RADV_BVH_INVALID_NODE) 155 ++valid_grandchild_count; 156 else 157 grandchildren[0] = grandchildren[1]; 158 159 if (valid_grandchild_count > 1) 160 children[found_child_count++] = grandchildren[1]; 161 162 if (valid_grandchild_count > 0) 163 children[collapsed_child_index] = grandchildren[0]; 164 else { 165 found_child_count--; 166 children[collapsed_child_index] = children[found_child_count]; 167 } 168 169 DEREF(child_node).bvh_offset = VK_NULL_BVH_OFFSET; 170 } else 171 break; 172 } 173 174 for (uint32_t i = 0; i < found_child_count; ++i) { 175 uint32_t type = ir_id_to_type(children[i]); 176 uint32_t offset = ir_id_to_offset(children[i]); 177 uint32_t dst_offset; 178 179 if (type == vk_ir_node_internal) { 180#if COMPACT 181 dst_offset = atomicAdd(DEREF(args.header).dst_node_offset, SIZEOF(radv_bvh_box32_node)); 182#else 183 uint32_t offset_in_internal_nodes = offset - intermediate_leaf_nodes_size; 184 uint32_t child_index = offset_in_internal_nodes / SIZEOF(vk_ir_box_node); 185 dst_offset = dst_internal_offset + child_index * SIZEOF(radv_bvh_box32_node); 186#endif 187 188 REF(vk_ir_box_node) child_node = REF(vk_ir_box_node)OFFSET(args.intermediate_bvh, offset); 189 DEREF(child_node).bvh_offset = dst_offset; 190 } else { 191 uint32_t child_index = offset / ir_leaf_node_size; 192 dst_offset = dst_leaf_offset + child_index * output_leaf_node_size; 193 194 if (type == vk_ir_node_instance) { 195 vk_ir_instance_node src_node = 196 DEREF(REF(vk_ir_instance_node)(OFFSET(args.intermediate_bvh, offset))); 197 REF(radv_bvh_instance_node) dst_node = 198 REF(radv_bvh_instance_node)(OFFSET(args.output_bvh, dst_offset)); 199 200 radv_accel_struct_header blas_header = 201 DEREF(REF(radv_accel_struct_header)(src_node.base_ptr)); 202 203 DEREF(dst_node).bvh_ptr = addr_to_node(src_node.base_ptr + blas_header.bvh_offset); 204 DEREF(dst_node).bvh_offset = blas_header.bvh_offset; 205 206 mat4 transform = mat4(src_node.otw_matrix); 207 mat4 inv_transform = transpose(inverse(transpose(transform))); 208 DEREF(dst_node).wto_matrix = mat3x4(inv_transform); 209 DEREF(dst_node).otw_matrix = mat3x4(transform); 210 211 DEREF(dst_node).custom_instance_and_mask = src_node.custom_instance_and_mask; 212 DEREF(dst_node).sbt_offset_and_flags = encode_sbt_offset_and_flags(src_node.sbt_offset_and_flags); 213 DEREF(dst_node).instance_id = src_node.instance_id; 214 } 215 } 216 217 vk_aabb child_aabb = 218 DEREF(REF(vk_ir_node)OFFSET(args.intermediate_bvh, offset)).aabb; 219 220 DEREF(dst_node).coords[i] = child_aabb; 221 222 uint32_t child_id = pack_node_id(dst_offset, ir_type_to_bvh_type(type)); 223 children[i] = child_id; 224 set_parent(child_id, node_id); 225 } 226 227 for (uint i = found_child_count; i < 4; ++i) { 228 for (uint comp = 0; comp < 3; ++comp) { 229 DEREF(dst_node).coords[i].min[comp] = NAN; 230 DEREF(dst_node).coords[i].max[comp] = NAN; 231 } 232 } 233 234 /* Make changes to the children's BVH offset value available to the other invocations. */ 235 memoryBarrier(gl_ScopeDevice, gl_StorageSemanticsBuffer, 236 gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible); 237 238 DEREF(dst_node).children = children; 239 break; 240 } 241 242 if (is_root_node) { 243 REF(radv_accel_struct_header) header = REF(radv_accel_struct_header)(args.output_bvh - args.output_bvh_offset); 244 DEREF(header).aabb = src.base.aabb; 245 DEREF(header).bvh_offset = args.output_bvh_offset; 246 247 set_parent(RADV_BVH_ROOT_NODE, RADV_BVH_INVALID_NODE); 248 } 249} 250