• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * Copyright © 2022 Bas Nieuwenhuizen
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#version 460
25
26#extension GL_GOOGLE_include_directive : require
27
28#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
29#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
30#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
31#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require
32#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
33#extension GL_EXT_scalar_block_layout : require
34#extension GL_EXT_buffer_reference : require
35#extension GL_EXT_buffer_reference2 : require
36
37#include "vk_build_interface.h"
38
39layout(local_size_x_id = SUBGROUP_SIZE_ID, local_size_y = 1, local_size_z = 1) in;
40
41TYPE(lbvh_node_info, 4);
42
43layout(push_constant) uniform CONSTS
44{
45   lbvh_main_args args;
46};
47
48int32_t
49longest_common_prefix(int32_t i, uint32_t key_i, int32_t j)
50{
51   if (j < 0 || j >= args.id_count)
52      return -1;
53
54   uint32_t key_j = DEREF(INDEX(key_id_pair, args.src_ids, j)).key;
55
56   uint32_t diff = key_i ^ key_j;
57   int32_t ret = 0;
58   if (key_i == key_j) {
59      ret += 32;
60      diff = i ^ j;
61   }
62
63   return ret + 31 - findMSB(diff);
64}
65
66/*
67 * The LBVH algorithm constructs a radix tree of the sorted nodes according to their key.
68 *
69 * We do this by making the following decision:
70 *
71 *    Node N always either starts or ends at leaf N.
72 *
73 * From there it follows that we always have to extend it into the direction which has
74 * a longer common prefix with the direct neighbour. Then we try to make the node cover
75 * as many leaves as possible without including the other neighbour.
76 *
77 * For finding the split point we compute the longest common prefix of all the leaves within the
78 * node, and look for the first leaf the same length common prefix with leaf N as that.
79 *
80 * To give an example: leaves=[000,001,010,011,100,101,110,111], node=5 (with value 101)
81 *
82 * lcp(101, 100) = 2 and lcp(101, 110) = 1, so we extend down.
83 * lcp(101, 011) = 0, so the range of the node is [4,5] with values [100, 101]
84 *
85 * the lcp of all the leaves in the range is the same as the lcp of the first and last leaf, in this
86 * case that is lcp(101, 100) = 2. Then we have lcp(101, 101) = 3 and lcp(101, 100) = 2, so the first
87 * leaf that has a longer lcp is 4. Hence the two children of this node have range [4,4] and [5,5]
88 */
89void
90main()
91{
92   if (args.id_count <= 1) {
93      REF(lbvh_node_info) dst = REF(lbvh_node_info)(args.node_info);
94      DEREF(dst).parent = VK_BVH_INVALID_NODE;
95      DEREF(dst).path_count = 2;
96      DEREF(dst).children[0] =
97         args.id_count == 1 ? DEREF(INDEX(key_id_pair, args.src_ids, 0)).id : VK_BVH_INVALID_NODE;
98      DEREF(dst).children[1] = VK_BVH_INVALID_NODE;
99      return;
100   }
101
102   int32_t id = int32_t(gl_GlobalInvocationID.x);
103   uint32_t id_key = DEREF(INDEX(key_id_pair, args.src_ids, id)).key;
104
105   int32_t left_lcp = longest_common_prefix(id, id_key, id - 1);
106   int32_t right_lcp = longest_common_prefix(id, id_key, id + 1);
107   int32_t dir = right_lcp > left_lcp ? 1 : -1;
108   int32_t lcp_min = min(left_lcp, right_lcp);
109
110   /* Determine the bounds for the binary search for the length of the range that
111    * this subtree is going to own.
112    */
113   int32_t lmax = 128;
114   while (longest_common_prefix(id, id_key, id + dir * lmax) > lcp_min) {
115      lmax *= 2;
116   }
117
118   int32_t length = 0;
119   for (int32_t t = lmax / 2; t >= 1; t /= 2) {
120      if (longest_common_prefix(id, id_key, id + (length + t) * dir) > lcp_min)
121         length += t;
122   }
123   int32_t other_end = id + length * dir;
124
125   /* The number of bits in the prefix that is the same for all elements in the
126    * range.
127    */
128   int32_t lcp_node = longest_common_prefix(id, id_key, other_end);
129   int32_t child_range = 0;
130   for (int32_t diff = 2; diff < 2 * length; diff *= 2) {
131      int32_t t = DIV_ROUND_UP(length, diff);
132      if (longest_common_prefix(id, id_key, id + (child_range + t) * dir) > lcp_node)
133         child_range += t;
134   }
135
136   int32_t child_split = id + child_range * dir;
137
138   /* If dir = -1, right = child_split */
139   int32_t left = child_split + min(dir, 0);
140   int32_t right = left + 1;
141
142   /* if the number of leaves covered by a child is 1, we can use the leaf directly */
143   bool left_leaf = min(id, other_end) == left;
144   bool right_leaf = max(id, other_end) == right;
145
146   if (!left_leaf)
147      DEREF(INDEX(lbvh_node_info, args.node_info, left)).parent = id;
148   if (!right_leaf)
149      DEREF(INDEX(lbvh_node_info, args.node_info, right)).parent = LBVH_RIGHT_CHILD_BIT | id;
150
151   REF(lbvh_node_info) dst = INDEX(lbvh_node_info, args.node_info, id);
152   DEREF(dst).path_count = (left_leaf ? 1 : 0) + (right_leaf ? 1 : 0);
153   DEREF(dst).children[0] = DEREF(INDEX(key_id_pair, args.src_ids, left)).id;
154   DEREF(dst).children[1] = DEREF(INDEX(key_id_pair, args.src_ids, right)).id;
155   if (id == 0)
156      DEREF(dst).parent = VK_BVH_INVALID_NODE;
157}
158