1/* 2 * Copyright © 2022 Bas Nieuwenhuizen 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#version 460 25 26#extension GL_GOOGLE_include_directive : require 27 28#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require 29#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require 30#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require 31#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require 32#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require 33#extension GL_EXT_scalar_block_layout : require 34#extension GL_EXT_buffer_reference : require 35#extension GL_EXT_buffer_reference2 : require 36 37#include "vk_build_interface.h" 38 39layout(local_size_x_id = SUBGROUP_SIZE_ID, local_size_y = 1, local_size_z = 1) in; 40 41TYPE(lbvh_node_info, 4); 42 43layout(push_constant) uniform CONSTS 44{ 45 lbvh_main_args args; 46}; 47 48int32_t 49longest_common_prefix(int32_t i, uint32_t key_i, int32_t j) 50{ 51 if (j < 0 || j >= args.id_count) 52 return -1; 53 54 uint32_t key_j = DEREF(INDEX(key_id_pair, args.src_ids, j)).key; 55 56 uint32_t diff = key_i ^ key_j; 57 int32_t ret = 0; 58 if (key_i == key_j) { 59 ret += 32; 60 diff = i ^ j; 61 } 62 63 return ret + 31 - findMSB(diff); 64} 65 66/* 67 * The LBVH algorithm constructs a radix tree of the sorted nodes according to their key. 68 * 69 * We do this by making the following decision: 70 * 71 * Node N always either starts or ends at leaf N. 72 * 73 * From there it follows that we always have to extend it into the direction which has 74 * a longer common prefix with the direct neighbour. Then we try to make the node cover 75 * as many leaves as possible without including the other neighbour. 76 * 77 * For finding the split point we compute the longest common prefix of all the leaves within the 78 * node, and look for the first leaf the same length common prefix with leaf N as that. 79 * 80 * To give an example: leaves=[000,001,010,011,100,101,110,111], node=5 (with value 101) 81 * 82 * lcp(101, 100) = 2 and lcp(101, 110) = 1, so we extend down. 83 * lcp(101, 011) = 0, so the range of the node is [4,5] with values [100, 101] 84 * 85 * the lcp of all the leaves in the range is the same as the lcp of the first and last leaf, in this 86 * case that is lcp(101, 100) = 2. Then we have lcp(101, 101) = 3 and lcp(101, 100) = 2, so the first 87 * leaf that has a longer lcp is 4. Hence the two children of this node have range [4,4] and [5,5] 88 */ 89void 90main() 91{ 92 if (args.id_count <= 1) { 93 REF(lbvh_node_info) dst = REF(lbvh_node_info)(args.node_info); 94 DEREF(dst).parent = VK_BVH_INVALID_NODE; 95 DEREF(dst).path_count = 2; 96 DEREF(dst).children[0] = 97 args.id_count == 1 ? DEREF(INDEX(key_id_pair, args.src_ids, 0)).id : VK_BVH_INVALID_NODE; 98 DEREF(dst).children[1] = VK_BVH_INVALID_NODE; 99 return; 100 } 101 102 int32_t id = int32_t(gl_GlobalInvocationID.x); 103 uint32_t id_key = DEREF(INDEX(key_id_pair, args.src_ids, id)).key; 104 105 int32_t left_lcp = longest_common_prefix(id, id_key, id - 1); 106 int32_t right_lcp = longest_common_prefix(id, id_key, id + 1); 107 int32_t dir = right_lcp > left_lcp ? 1 : -1; 108 int32_t lcp_min = min(left_lcp, right_lcp); 109 110 /* Determine the bounds for the binary search for the length of the range that 111 * this subtree is going to own. 112 */ 113 int32_t lmax = 128; 114 while (longest_common_prefix(id, id_key, id + dir * lmax) > lcp_min) { 115 lmax *= 2; 116 } 117 118 int32_t length = 0; 119 for (int32_t t = lmax / 2; t >= 1; t /= 2) { 120 if (longest_common_prefix(id, id_key, id + (length + t) * dir) > lcp_min) 121 length += t; 122 } 123 int32_t other_end = id + length * dir; 124 125 /* The number of bits in the prefix that is the same for all elements in the 126 * range. 127 */ 128 int32_t lcp_node = longest_common_prefix(id, id_key, other_end); 129 int32_t child_range = 0; 130 for (int32_t diff = 2; diff < 2 * length; diff *= 2) { 131 int32_t t = DIV_ROUND_UP(length, diff); 132 if (longest_common_prefix(id, id_key, id + (child_range + t) * dir) > lcp_node) 133 child_range += t; 134 } 135 136 int32_t child_split = id + child_range * dir; 137 138 /* If dir = -1, right = child_split */ 139 int32_t left = child_split + min(dir, 0); 140 int32_t right = left + 1; 141 142 /* if the number of leaves covered by a child is 1, we can use the leaf directly */ 143 bool left_leaf = min(id, other_end) == left; 144 bool right_leaf = max(id, other_end) == right; 145 146 if (!left_leaf) 147 DEREF(INDEX(lbvh_node_info, args.node_info, left)).parent = id; 148 if (!right_leaf) 149 DEREF(INDEX(lbvh_node_info, args.node_info, right)).parent = LBVH_RIGHT_CHILD_BIT | id; 150 151 REF(lbvh_node_info) dst = INDEX(lbvh_node_info, args.node_info, id); 152 DEREF(dst).path_count = (left_leaf ? 1 : 0) + (right_leaf ? 1 : 0); 153 DEREF(dst).children[0] = DEREF(INDEX(key_id_pair, args.src_ids, left)).id; 154 DEREF(dst).children[1] = DEREF(INDEX(key_id_pair, args.src_ids, right)).id; 155 if (id == 0) 156 DEREF(dst).parent = VK_BVH_INVALID_NODE; 157} 158