1/* 2 * Copyright © 2022 Friedrich Vock 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#version 460 25 26#extension GL_GOOGLE_include_directive : require 27 28#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require 29#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require 30#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require 31#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require 32#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require 33#extension GL_EXT_scalar_block_layout : require 34#extension GL_EXT_buffer_reference : require 35#extension GL_EXT_buffer_reference2 : require 36#extension GL_KHR_memory_scope_semantics : require 37 38layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; 39 40#include "build_helpers.h" 41#include "build_interface.h" 42 43layout(push_constant) uniform CONSTS { 44 encode_args args; 45}; 46 47void set_parent(uint32_t child, uint32_t parent) 48{ 49 uint64_t addr = args.output_bvh - child / 8 * 4 - 4; 50 DEREF(REF(uint32_t)(addr)) = parent; 51} 52 53void 54main() 55{ 56 /* Revert the order so we start at the root */ 57 uint32_t global_id = DEREF(args.header).ir_internal_node_count - 1 - gl_GlobalInvocationID.x; 58 59 uint32_t output_leaf_node_size; 60 switch (args.geometry_type) { 61 case VK_GEOMETRY_TYPE_TRIANGLES_KHR: 62 output_leaf_node_size = SIZEOF(radv_bvh_triangle_node); 63 break; 64 case VK_GEOMETRY_TYPE_AABBS_KHR: 65 output_leaf_node_size = SIZEOF(radv_bvh_aabb_node); 66 break; 67 default: /* instances */ 68 output_leaf_node_size = SIZEOF(radv_bvh_instance_node); 69 break; 70 } 71 72 uint32_t intermediate_leaf_nodes_size = args.leaf_node_count * SIZEOF(radv_ir_node); 73 uint32_t dst_leaf_offset = 74 id_to_offset(RADV_BVH_ROOT_NODE) + SIZEOF(radv_bvh_box32_node); 75 uint32_t dst_internal_offset = dst_leaf_offset + args.leaf_node_count * output_leaf_node_size; 76 77 REF(radv_ir_box_node) intermediate_internal_nodes = 78 REF(radv_ir_box_node)OFFSET(args.intermediate_bvh, intermediate_leaf_nodes_size); 79 REF(radv_ir_box_node) src_node = INDEX(radv_ir_box_node, intermediate_internal_nodes, global_id); 80 radv_ir_box_node src = DEREF(src_node); 81 82 bool is_root_node = global_id == DEREF(args.header).ir_internal_node_count - 1; 83 84 for (;;) { 85 /* Make changes to the current node's BVH offset value visible. */ 86 memoryBarrier(gl_ScopeDevice, gl_StorageSemanticsBuffer, 87 gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible); 88 89 uint32_t bvh_offset = is_root_node ? id_to_offset(RADV_BVH_ROOT_NODE) : DEREF(src_node).bvh_offset; 90 if (bvh_offset == RADV_UNKNOWN_BVH_OFFSET) 91 continue; 92 93 if (bvh_offset == RADV_NULL_BVH_OFFSET) 94 break; 95 96 REF(radv_bvh_box32_node) dst_node = REF(radv_bvh_box32_node)(OFFSET(args.output_bvh, bvh_offset)); 97 uint32_t node_id = pack_node_id(bvh_offset, radv_bvh_node_box32); 98 99 uint32_t found_child_count = 0; 100 uint32_t children[4] = {RADV_BVH_INVALID_NODE, RADV_BVH_INVALID_NODE, 101 RADV_BVH_INVALID_NODE, RADV_BVH_INVALID_NODE}; 102 103 for (uint32_t i = 0; i < 2; ++i) 104 if (src.children[i] != RADV_BVH_INVALID_NODE) 105 children[found_child_count++] = src.children[i]; 106 107 while (found_child_count < 4) { 108 int32_t collapsed_child_index = -1; 109 float largest_surface_area = -INFINITY; 110 111 for (int32_t i = 0; i < found_child_count; ++i) { 112 if (ir_id_to_type(children[i]) != radv_ir_node_internal) 113 continue; 114 115 radv_aabb bounds = 116 DEREF(REF(radv_ir_node)OFFSET(args.intermediate_bvh, 117 ir_id_to_offset(children[i]))).aabb; 118 119 float surface_area = aabb_surface_area(bounds); 120 if (surface_area > largest_surface_area) { 121 largest_surface_area = surface_area; 122 collapsed_child_index = i; 123 } 124 } 125 126 if (collapsed_child_index != -1) { 127 REF(radv_ir_box_node) child_node = 128 REF(radv_ir_box_node)OFFSET(args.intermediate_bvh, 129 ir_id_to_offset(children[collapsed_child_index])); 130 uint32_t grandchildren[2] = DEREF(child_node).children; 131 uint32_t valid_grandchild_count = 0; 132 133 if (grandchildren[1] != RADV_BVH_INVALID_NODE) 134 ++valid_grandchild_count; 135 136 if (grandchildren[0] != RADV_BVH_INVALID_NODE) 137 ++valid_grandchild_count; 138 else 139 grandchildren[0] = grandchildren[1]; 140 141 if (valid_grandchild_count > 1) 142 children[found_child_count++] = grandchildren[1]; 143 144 if (valid_grandchild_count > 0) 145 children[collapsed_child_index] = grandchildren[0]; 146 else { 147 found_child_count--; 148 children[collapsed_child_index] = children[found_child_count]; 149 } 150 151 DEREF(child_node).bvh_offset = RADV_NULL_BVH_OFFSET; 152 } else 153 break; 154 } 155 156 for (uint32_t i = 0; i < found_child_count; ++i) { 157 uint32_t type = ir_id_to_type(children[i]); 158 uint32_t offset = ir_id_to_offset(children[i]); 159 uint32_t dst_offset; 160 161 if (type == radv_ir_node_internal) { 162#if COMPACT 163 dst_offset = atomicAdd(DEREF(args.header).dst_node_offset, SIZEOF(radv_bvh_box32_node)); 164#else 165 uint32_t offset_in_internal_nodes = offset - intermediate_leaf_nodes_size; 166 uint32_t child_index = offset_in_internal_nodes / SIZEOF(radv_ir_box_node); 167 dst_offset = dst_internal_offset + child_index * SIZEOF(radv_bvh_box32_node); 168#endif 169 170 REF(radv_ir_box_node) child_node = REF(radv_ir_box_node)OFFSET(args.intermediate_bvh, offset); 171 DEREF(child_node).bvh_offset = dst_offset; 172 } else { 173 uint32_t child_index = offset / SIZEOF(radv_ir_node); 174 dst_offset = dst_leaf_offset + child_index * output_leaf_node_size; 175 } 176 177 radv_aabb child_aabb = 178 DEREF(REF(radv_ir_node)OFFSET(args.intermediate_bvh, offset)).aabb; 179 180 DEREF(dst_node).coords[i] = child_aabb; 181 182 uint32_t child_id = pack_node_id(dst_offset, ir_type_to_bvh_type(type)); 183 children[i] = child_id; 184 set_parent(child_id, node_id); 185 } 186 187 for (uint i = found_child_count; i < 4; ++i) { 188 for (uint comp = 0; comp < 3; ++comp) { 189 DEREF(dst_node).coords[i].min[comp] = NAN; 190 DEREF(dst_node).coords[i].max[comp] = NAN; 191 } 192 } 193 194 /* Make changes to the children's BVH offset value available to the other invocations. */ 195 memoryBarrier(gl_ScopeDevice, gl_StorageSemanticsBuffer, 196 gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible); 197 198 DEREF(dst_node).children = children; 199 break; 200 } 201 202 if (is_root_node) { 203 REF(radv_accel_struct_header) header = REF(radv_accel_struct_header)(args.output_bvh - args.output_bvh_offset); 204 DEREF(header).aabb = src.base.aabb; 205 DEREF(header).bvh_offset = args.output_bvh_offset; 206 207 set_parent(RADV_BVH_ROOT_NODE, RADV_BVH_INVALID_NODE); 208 } 209} 210