• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * Copyright © 2022 Friedrich Vock
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#version 460
25
26#extension GL_GOOGLE_include_directive : require
27
28#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
29#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
30#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
31#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require
32#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
33#extension GL_EXT_scalar_block_layout : require
34#extension GL_EXT_buffer_reference : require
35#extension GL_EXT_buffer_reference2 : require
36#extension GL_KHR_memory_scope_semantics : require
37
38layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
39
40#include "build_helpers.h"
41#include "build_interface.h"
42
43layout(push_constant) uniform CONSTS {
44   encode_args args;
45};
46
47void set_parent(uint32_t child, uint32_t parent)
48{
49   uint64_t addr = args.output_bvh - child / 8 * 4 - 4;
50   DEREF(REF(uint32_t)(addr)) = parent;
51}
52
53void
54main()
55{
56   /* Revert the order so we start at the root */
57   uint32_t global_id = DEREF(args.header).ir_internal_node_count - 1 - gl_GlobalInvocationID.x;
58
59   uint32_t output_leaf_node_size;
60   switch (args.geometry_type) {
61   case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
62      output_leaf_node_size = SIZEOF(radv_bvh_triangle_node);
63      break;
64   case VK_GEOMETRY_TYPE_AABBS_KHR:
65      output_leaf_node_size = SIZEOF(radv_bvh_aabb_node);
66      break;
67   default: /* instances */
68      output_leaf_node_size = SIZEOF(radv_bvh_instance_node);
69      break;
70   }
71
72   uint32_t intermediate_leaf_nodes_size = args.leaf_node_count * SIZEOF(radv_ir_node);
73   uint32_t dst_leaf_offset =
74      id_to_offset(RADV_BVH_ROOT_NODE) + SIZEOF(radv_bvh_box32_node);
75   uint32_t dst_internal_offset = dst_leaf_offset + args.leaf_node_count * output_leaf_node_size;
76
77   REF(radv_ir_box_node) intermediate_internal_nodes =
78      REF(radv_ir_box_node)OFFSET(args.intermediate_bvh, intermediate_leaf_nodes_size);
79   REF(radv_ir_box_node) src_node = INDEX(radv_ir_box_node, intermediate_internal_nodes, global_id);
80   radv_ir_box_node src = DEREF(src_node);
81
82   bool is_root_node = global_id == DEREF(args.header).ir_internal_node_count - 1;
83
84   for (;;) {
85      /* Make changes to the current node's BVH offset value visible. */
86      memoryBarrier(gl_ScopeDevice, gl_StorageSemanticsBuffer,
87                    gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
88
89      uint32_t bvh_offset = is_root_node ? id_to_offset(RADV_BVH_ROOT_NODE) : DEREF(src_node).bvh_offset;
90      if (bvh_offset == RADV_UNKNOWN_BVH_OFFSET)
91         continue;
92
93      if (bvh_offset == RADV_NULL_BVH_OFFSET)
94         break;
95
96      REF(radv_bvh_box32_node) dst_node = REF(radv_bvh_box32_node)(OFFSET(args.output_bvh, bvh_offset));
97      uint32_t node_id = pack_node_id(bvh_offset, radv_bvh_node_box32);
98
99      uint32_t found_child_count = 0;
100      uint32_t children[4] = {RADV_BVH_INVALID_NODE, RADV_BVH_INVALID_NODE,
101                              RADV_BVH_INVALID_NODE, RADV_BVH_INVALID_NODE};
102
103      for (uint32_t i = 0; i < 2; ++i)
104         if (src.children[i] != RADV_BVH_INVALID_NODE)
105            children[found_child_count++] = src.children[i];
106
107      while (found_child_count < 4) {
108         int32_t collapsed_child_index = -1;
109         float largest_surface_area = -INFINITY;
110
111         for (int32_t i = 0; i < found_child_count; ++i) {
112            if (ir_id_to_type(children[i]) != radv_ir_node_internal)
113               continue;
114
115            radv_aabb bounds =
116               DEREF(REF(radv_ir_node)OFFSET(args.intermediate_bvh,
117                                             ir_id_to_offset(children[i]))).aabb;
118
119            float surface_area = aabb_surface_area(bounds);
120            if (surface_area > largest_surface_area) {
121               largest_surface_area = surface_area;
122               collapsed_child_index = i;
123            }
124         }
125
126         if (collapsed_child_index != -1) {
127            REF(radv_ir_box_node) child_node =
128               REF(radv_ir_box_node)OFFSET(args.intermediate_bvh,
129                                        ir_id_to_offset(children[collapsed_child_index]));
130            uint32_t grandchildren[2] = DEREF(child_node).children;
131            uint32_t valid_grandchild_count = 0;
132
133            if (grandchildren[1] != RADV_BVH_INVALID_NODE)
134               ++valid_grandchild_count;
135
136            if (grandchildren[0] != RADV_BVH_INVALID_NODE)
137               ++valid_grandchild_count;
138            else
139               grandchildren[0] = grandchildren[1];
140
141            if (valid_grandchild_count > 1)
142               children[found_child_count++] = grandchildren[1];
143
144            if (valid_grandchild_count > 0)
145               children[collapsed_child_index] = grandchildren[0];
146            else {
147               found_child_count--;
148               children[collapsed_child_index] = children[found_child_count];
149            }
150
151            DEREF(child_node).bvh_offset = RADV_NULL_BVH_OFFSET;
152         } else
153            break;
154      }
155
156      for (uint32_t i = 0; i < found_child_count; ++i) {
157         uint32_t type = ir_id_to_type(children[i]);
158         uint32_t offset = ir_id_to_offset(children[i]);
159         uint32_t dst_offset;
160
161         if (type == radv_ir_node_internal) {
162#if COMPACT
163            dst_offset = atomicAdd(DEREF(args.header).dst_node_offset, SIZEOF(radv_bvh_box32_node));
164#else
165            uint32_t offset_in_internal_nodes = offset - intermediate_leaf_nodes_size;
166            uint32_t child_index = offset_in_internal_nodes / SIZEOF(radv_ir_box_node);
167            dst_offset = dst_internal_offset + child_index * SIZEOF(radv_bvh_box32_node);
168#endif
169
170            REF(radv_ir_box_node) child_node = REF(radv_ir_box_node)OFFSET(args.intermediate_bvh, offset);
171            DEREF(child_node).bvh_offset = dst_offset;
172         } else {
173            uint32_t child_index = offset / SIZEOF(radv_ir_node);
174            dst_offset = dst_leaf_offset + child_index * output_leaf_node_size;
175         }
176
177         radv_aabb child_aabb =
178            DEREF(REF(radv_ir_node)OFFSET(args.intermediate_bvh, offset)).aabb;
179
180         DEREF(dst_node).coords[i] = child_aabb;
181
182         uint32_t child_id = pack_node_id(dst_offset, ir_type_to_bvh_type(type));
183         children[i] = child_id;
184         set_parent(child_id, node_id);
185      }
186
187      for (uint i = found_child_count; i < 4; ++i) {
188            for (uint comp = 0; comp < 3; ++comp) {
189               DEREF(dst_node).coords[i].min[comp] = NAN;
190               DEREF(dst_node).coords[i].max[comp] = NAN;
191            }
192      }
193
194      /* Make changes to the children's BVH offset value available to the other invocations. */
195      memoryBarrier(gl_ScopeDevice, gl_StorageSemanticsBuffer,
196                    gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
197
198      DEREF(dst_node).children = children;
199      break;
200   }
201
202   if (is_root_node) {
203      REF(radv_accel_struct_header) header = REF(radv_accel_struct_header)(args.output_bvh - args.output_bvh_offset);
204      DEREF(header).aabb = src.base.aabb;
205      DEREF(header).bvh_offset = args.output_bvh_offset;
206
207      set_parent(RADV_BVH_ROOT_NODE, RADV_BVH_INVALID_NODE);
208   }
209}
210