1/* 2 * Copyright © 2022 Bas Nieuwenhuizen 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#version 460 25 26#extension GL_GOOGLE_include_directive : require 27 28#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require 29#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require 30#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require 31#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require 32#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require 33#extension GL_EXT_scalar_block_layout : require 34#extension GL_EXT_buffer_reference : require 35#extension GL_EXT_buffer_reference2 : require 36#extension GL_KHR_memory_scope_semantics : require 37 38#include "vk_build_interface.h" 39 40layout(local_size_x_id = SUBGROUP_SIZE_ID, local_size_y = 1, local_size_z = 1) in; 41 42TYPE(lbvh_node_info, 4); 43 44layout(push_constant) uniform CONSTS 45{ 46 lbvh_generate_ir_args args; 47}; 48 49void 50main(void) 51{ 52 uint32_t global_id = gl_GlobalInvocationID.x; 53 54 uint32_t idx = global_id; 55 56 uint32_t previous_id = VK_BVH_INVALID_NODE; 57 vk_aabb previous_bounds; 58 previous_bounds.min = vec3(INFINITY); 59 previous_bounds.max = vec3(-INFINITY); 60 61 for (;;) { 62 uint32_t count = 0; 63 64 /* Check if all children have been processed. As this is an atomic the last path coming from 65 * a child will pass here, while earlier paths break. 66 */ 67 count = atomicAdd( 68 DEREF(INDEX(lbvh_node_info, args.node_info, idx)).path_count, 1, gl_ScopeDevice, 69 gl_StorageSemanticsBuffer, 70 gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible); 71 if (count != 2) 72 break; 73 74 /* We allocate nodes on demand with the atomic here to ensure children come before their 75 * parents, which is a requirement of the encoder. 76 */ 77 uint32_t dst_idx = 78 atomicAdd(DEREF(REF(vk_ir_header)(args.header)).ir_internal_node_count, 1); 79 80 uint32_t current_offset = args.internal_node_base + dst_idx * SIZEOF(vk_ir_box_node); 81 uint32_t current_id = pack_ir_node_id(current_offset, vk_ir_node_internal); 82 83 REF(vk_ir_box_node) node = REF(vk_ir_box_node)(OFFSET(args.bvh, current_offset)); 84 vk_aabb bounds = previous_bounds; 85 86 lbvh_node_info info = DEREF(INDEX(lbvh_node_info, args.node_info, idx)); 87 88 uint32_t children[2] = info.children; 89 90 /* Try using the cached previous_bounds instead of fetching the bounds twice. */ 91 int32_t previous_child_index = -1; 92 if (previous_id == children[0]) 93 previous_child_index = 0; 94 else if (previous_id == children[1]) 95 previous_child_index = 1; 96 97 if (previous_child_index == -1) { 98 if (children[0] != VK_BVH_INVALID_NODE) { 99 uint32_t child_offset = ir_id_to_offset(children[0]); 100 REF(vk_ir_node) child = REF(vk_ir_node)(OFFSET(args.bvh, child_offset)); 101 vk_aabb child_bounds = DEREF(child).aabb; 102 bounds.min = min(bounds.min, child_bounds.min); 103 bounds.max = max(bounds.max, child_bounds.max); 104 } 105 previous_child_index = 0; 106 } 107 108 /* Fetch the non-cached child */ 109 if (children[1 - previous_child_index] != VK_BVH_INVALID_NODE) { 110 uint32_t child_offset = ir_id_to_offset(children[1 - previous_child_index]); 111 REF(vk_ir_node) child = REF(vk_ir_node)(OFFSET(args.bvh, child_offset)); 112 vk_aabb child_bounds = DEREF(child).aabb; 113 bounds.min = min(bounds.min, child_bounds.min); 114 bounds.max = max(bounds.max, child_bounds.max); 115 } 116 117 vk_ir_box_node node_value; 118 119 node_value.base.aabb = bounds; 120 node_value.bvh_offset = VK_UNKNOWN_BVH_OFFSET; 121 node_value.children = children; 122 123 DEREF(node) = node_value; 124 125 if (info.parent == VK_BVH_INVALID_NODE) 126 break; 127 128 idx = info.parent & ~LBVH_RIGHT_CHILD_BIT; 129 130 DEREF(INDEX(lbvh_node_info, args.node_info, idx)) 131 .children[(info.parent >> LBVH_RIGHT_CHILD_BIT_SHIFT) & 1] = current_id; 132 133 previous_id = current_id; 134 previous_bounds = bounds; 135 136 memoryBarrierBuffer(); 137 } 138} 139