• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * Copyright © 2022 Bas Nieuwenhuizen
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#version 460
25
26#extension GL_GOOGLE_include_directive : require
27
28#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
29#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
30#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
31#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require
32#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
33#extension GL_EXT_scalar_block_layout : require
34#extension GL_EXT_buffer_reference : require
35#extension GL_EXT_buffer_reference2 : require
36#extension GL_KHR_memory_scope_semantics : require
37
38#include "vk_build_interface.h"
39
40layout(local_size_x_id = SUBGROUP_SIZE_ID, local_size_y = 1, local_size_z = 1) in;
41
42TYPE(lbvh_node_info, 4);
43
44layout(push_constant) uniform CONSTS
45{
46   lbvh_generate_ir_args args;
47};
48
49void
50main(void)
51{
52   uint32_t global_id = gl_GlobalInvocationID.x;
53
54   uint32_t idx = global_id;
55
56   uint32_t previous_id = VK_BVH_INVALID_NODE;
57   vk_aabb previous_bounds;
58   previous_bounds.min = vec3(INFINITY);
59   previous_bounds.max = vec3(-INFINITY);
60
61   for (;;) {
62      uint32_t count = 0;
63
64      /* Check if all children have been processed. As this is an atomic the last path coming from
65       * a child will pass here, while earlier paths break.
66       */
67      count = atomicAdd(
68         DEREF(INDEX(lbvh_node_info, args.node_info, idx)).path_count, 1, gl_ScopeDevice,
69         gl_StorageSemanticsBuffer,
70         gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
71      if (count != 2)
72         break;
73
74      /* We allocate nodes on demand with the atomic here to ensure children come before their
75       * parents, which is a requirement of the encoder.
76       */
77      uint32_t dst_idx =
78         atomicAdd(DEREF(REF(vk_ir_header)(args.header)).ir_internal_node_count, 1);
79
80      uint32_t current_offset = args.internal_node_base + dst_idx * SIZEOF(vk_ir_box_node);
81      uint32_t current_id = pack_ir_node_id(current_offset, vk_ir_node_internal);
82
83      REF(vk_ir_box_node) node = REF(vk_ir_box_node)(OFFSET(args.bvh, current_offset));
84      vk_aabb bounds = previous_bounds;
85
86      lbvh_node_info info = DEREF(INDEX(lbvh_node_info, args.node_info, idx));
87
88      uint32_t children[2] = info.children;
89
90      /* Try using the cached previous_bounds instead of fetching the bounds twice. */
91      int32_t previous_child_index = -1;
92      if (previous_id == children[0])
93         previous_child_index = 0;
94      else if (previous_id == children[1])
95         previous_child_index = 1;
96
97      if (previous_child_index == -1) {
98         if (children[0] != VK_BVH_INVALID_NODE) {
99            uint32_t child_offset = ir_id_to_offset(children[0]);
100            REF(vk_ir_node) child = REF(vk_ir_node)(OFFSET(args.bvh, child_offset));
101            vk_aabb child_bounds = DEREF(child).aabb;
102            bounds.min = min(bounds.min, child_bounds.min);
103            bounds.max = max(bounds.max, child_bounds.max);
104         }
105         previous_child_index = 0;
106      }
107
108      /* Fetch the non-cached child */
109      if (children[1 - previous_child_index] != VK_BVH_INVALID_NODE) {
110         uint32_t child_offset = ir_id_to_offset(children[1 - previous_child_index]);
111         REF(vk_ir_node) child = REF(vk_ir_node)(OFFSET(args.bvh, child_offset));
112         vk_aabb child_bounds = DEREF(child).aabb;
113         bounds.min = min(bounds.min, child_bounds.min);
114         bounds.max = max(bounds.max, child_bounds.max);
115      }
116
117      vk_ir_box_node node_value;
118
119      node_value.base.aabb = bounds;
120      node_value.bvh_offset = VK_UNKNOWN_BVH_OFFSET;
121      node_value.children = children;
122
123      DEREF(node) = node_value;
124
125      if (info.parent == VK_BVH_INVALID_NODE)
126         break;
127
128      idx = info.parent & ~LBVH_RIGHT_CHILD_BIT;
129
130      DEREF(INDEX(lbvh_node_info, args.node_info, idx))
131         .children[(info.parent >> LBVH_RIGHT_CHILD_BIT_SHIFT) & 1] = current_id;
132
133      previous_id = current_id;
134      previous_bounds = bounds;
135
136      memoryBarrierBuffer();
137   }
138}
139