• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * Copyright © 2022 Friedrich Vock
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7#version 460
8
9#extension GL_GOOGLE_include_directive : require
10
11#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
12#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
13#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
14#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require
15#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
16#extension GL_EXT_scalar_block_layout : require
17#extension GL_EXT_buffer_reference : require
18#extension GL_EXT_buffer_reference2 : require
19#extension GL_KHR_memory_scope_semantics : require
20
21layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
22
23#include "build_helpers.h"
24#include "build_interface.h"
25
26layout(push_constant) uniform CONSTS {
27   encode_args args;
28};
29
30void set_parent(uint32_t child, uint32_t parent)
31{
32   uint64_t addr = args.output_bvh - child / 8 * 4 - 4;
33   DEREF(REF(uint32_t)(addr)) = parent;
34}
35
36void
37main()
38{
39   /* Encode leaf nodes. */
40   uint32_t dst_leaf_offset =
41      id_to_offset(RADV_BVH_ROOT_NODE) + SIZEOF(radv_bvh_box32_node);
42
43   uint32_t ir_leaf_node_size;
44   uint32_t output_leaf_node_size;
45   switch (args.geometry_type) {
46   case VK_GEOMETRY_TYPE_TRIANGLES_KHR: {
47      ir_leaf_node_size = SIZEOF(vk_ir_triangle_node);
48      output_leaf_node_size = SIZEOF(radv_bvh_triangle_node);
49
50      vk_ir_triangle_node src_node =
51         DEREF(REF(vk_ir_triangle_node)(OFFSET(args.intermediate_bvh, gl_GlobalInvocationID.x * ir_leaf_node_size)));
52      REF(radv_bvh_triangle_node) dst_node =
53         REF(radv_bvh_triangle_node)(OFFSET(args.output_bvh, dst_leaf_offset + gl_GlobalInvocationID.x * output_leaf_node_size));
54
55      DEREF(dst_node).coords = src_node.coords;
56      DEREF(dst_node).triangle_id = src_node.triangle_id;
57      DEREF(dst_node).geometry_id_and_flags = src_node.geometry_id_and_flags;
58      DEREF(dst_node).id = 9;
59
60      break;
61   }
62   case VK_GEOMETRY_TYPE_AABBS_KHR: {
63      ir_leaf_node_size = SIZEOF(vk_ir_aabb_node);
64      output_leaf_node_size = SIZEOF(radv_bvh_aabb_node);
65
66      vk_ir_aabb_node src_node =
67         DEREF(REF(vk_ir_aabb_node)(OFFSET(args.intermediate_bvh, gl_GlobalInvocationID.x * ir_leaf_node_size)));
68      REF(radv_bvh_aabb_node) dst_node =
69         REF(radv_bvh_aabb_node)(OFFSET(args.output_bvh, dst_leaf_offset + gl_GlobalInvocationID.x * output_leaf_node_size));
70
71      DEREF(dst_node).primitive_id = src_node.primitive_id;
72      DEREF(dst_node).geometry_id_and_flags = src_node.geometry_id_and_flags;
73
74      break;
75   }
76   default:
77      /* instances */
78      ir_leaf_node_size = SIZEOF(vk_ir_instance_node);
79      output_leaf_node_size = SIZEOF(radv_bvh_instance_node);
80      /* Instance nodes have to be emitted inside the loop since encoding them
81       * loads an address from the IR node which is uninitialized for inactive nodes.
82       */
83      break;
84   }
85
86   if (gl_GlobalInvocationID.x >= DEREF(args.header).ir_internal_node_count)
87      return;
88
89   /* Encode internal nodes. Revert the order so we start at the root */
90   uint32_t global_id = DEREF(args.header).ir_internal_node_count - 1 - gl_GlobalInvocationID.x;
91
92   uint32_t intermediate_leaf_nodes_size = args.leaf_node_count * ir_leaf_node_size;
93   uint32_t dst_internal_offset = dst_leaf_offset + args.leaf_node_count * output_leaf_node_size;
94
95   REF(vk_ir_box_node) intermediate_internal_nodes =
96      REF(vk_ir_box_node)OFFSET(args.intermediate_bvh, intermediate_leaf_nodes_size);
97   REF(vk_ir_box_node) src_node = INDEX(vk_ir_box_node, intermediate_internal_nodes, global_id);
98   vk_ir_box_node src = DEREF(src_node);
99
100   bool is_root_node = global_id == DEREF(args.header).ir_internal_node_count - 1;
101
102   for (;;) {
103      /* Make changes to the current node's BVH offset value visible. */
104      memoryBarrier(gl_ScopeDevice, gl_StorageSemanticsBuffer,
105                    gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
106
107      uint32_t bvh_offset = is_root_node ? id_to_offset(RADV_BVH_ROOT_NODE) : DEREF(src_node).bvh_offset;
108      if (bvh_offset == VK_UNKNOWN_BVH_OFFSET)
109         continue;
110
111      if (bvh_offset == VK_NULL_BVH_OFFSET)
112         break;
113
114      REF(radv_bvh_box32_node) dst_node = REF(radv_bvh_box32_node)(OFFSET(args.output_bvh, bvh_offset));
115      uint32_t node_id = pack_node_id(bvh_offset, radv_bvh_node_box32);
116
117      uint32_t found_child_count = 0;
118      uint32_t children[4] = {RADV_BVH_INVALID_NODE, RADV_BVH_INVALID_NODE,
119                              RADV_BVH_INVALID_NODE, RADV_BVH_INVALID_NODE};
120
121      for (uint32_t i = 0; i < 2; ++i)
122         if (src.children[i] != RADV_BVH_INVALID_NODE)
123            children[found_child_count++] = src.children[i];
124
125      while (found_child_count < 4) {
126         int32_t collapsed_child_index = -1;
127         float largest_surface_area = -INFINITY;
128
129         for (int32_t i = 0; i < found_child_count; ++i) {
130            if (ir_id_to_type(children[i]) != vk_ir_node_internal)
131               continue;
132
133            vk_aabb bounds =
134               DEREF(REF(vk_ir_node)OFFSET(args.intermediate_bvh,
135                                             ir_id_to_offset(children[i]))).aabb;
136
137            float surface_area = aabb_surface_area(bounds);
138            if (surface_area > largest_surface_area) {
139               largest_surface_area = surface_area;
140               collapsed_child_index = i;
141            }
142         }
143
144         if (collapsed_child_index != -1) {
145            REF(vk_ir_box_node) child_node =
146               REF(vk_ir_box_node)OFFSET(args.intermediate_bvh,
147                                        ir_id_to_offset(children[collapsed_child_index]));
148            uint32_t grandchildren[2] = DEREF(child_node).children;
149            uint32_t valid_grandchild_count = 0;
150
151            if (grandchildren[1] != RADV_BVH_INVALID_NODE)
152               ++valid_grandchild_count;
153
154            if (grandchildren[0] != RADV_BVH_INVALID_NODE)
155               ++valid_grandchild_count;
156            else
157               grandchildren[0] = grandchildren[1];
158
159            if (valid_grandchild_count > 1)
160               children[found_child_count++] = grandchildren[1];
161
162            if (valid_grandchild_count > 0)
163               children[collapsed_child_index] = grandchildren[0];
164            else {
165               found_child_count--;
166               children[collapsed_child_index] = children[found_child_count];
167            }
168
169            DEREF(child_node).bvh_offset = VK_NULL_BVH_OFFSET;
170         } else
171            break;
172      }
173
174      for (uint32_t i = 0; i < found_child_count; ++i) {
175         uint32_t type = ir_id_to_type(children[i]);
176         uint32_t offset = ir_id_to_offset(children[i]);
177         uint32_t dst_offset;
178
179         if (type == vk_ir_node_internal) {
180#if COMPACT
181            dst_offset = atomicAdd(DEREF(args.header).dst_node_offset, SIZEOF(radv_bvh_box32_node));
182#else
183            uint32_t offset_in_internal_nodes = offset - intermediate_leaf_nodes_size;
184            uint32_t child_index = offset_in_internal_nodes / SIZEOF(vk_ir_box_node);
185            dst_offset = dst_internal_offset + child_index * SIZEOF(radv_bvh_box32_node);
186#endif
187
188            REF(vk_ir_box_node) child_node = REF(vk_ir_box_node)OFFSET(args.intermediate_bvh, offset);
189            DEREF(child_node).bvh_offset = dst_offset;
190         } else {
191            uint32_t child_index = offset / ir_leaf_node_size;
192            dst_offset = dst_leaf_offset + child_index * output_leaf_node_size;
193
194            if (type == vk_ir_node_instance) {
195               vk_ir_instance_node src_node =
196                  DEREF(REF(vk_ir_instance_node)(OFFSET(args.intermediate_bvh, offset)));
197               REF(radv_bvh_instance_node) dst_node =
198                  REF(radv_bvh_instance_node)(OFFSET(args.output_bvh, dst_offset));
199
200               radv_accel_struct_header blas_header =
201                  DEREF(REF(radv_accel_struct_header)(src_node.base_ptr));
202
203               DEREF(dst_node).bvh_ptr = addr_to_node(src_node.base_ptr + blas_header.bvh_offset);
204               DEREF(dst_node).bvh_offset = blas_header.bvh_offset;
205
206               mat4 transform = mat4(src_node.otw_matrix);
207               mat4 inv_transform = transpose(inverse(transpose(transform)));
208               DEREF(dst_node).wto_matrix = mat3x4(inv_transform);
209               DEREF(dst_node).otw_matrix = mat3x4(transform);
210
211               DEREF(dst_node).custom_instance_and_mask = src_node.custom_instance_and_mask;
212               DEREF(dst_node).sbt_offset_and_flags = encode_sbt_offset_and_flags(src_node.sbt_offset_and_flags);
213               DEREF(dst_node).instance_id = src_node.instance_id;
214            }
215         }
216
217         vk_aabb child_aabb =
218            DEREF(REF(vk_ir_node)OFFSET(args.intermediate_bvh, offset)).aabb;
219
220         DEREF(dst_node).coords[i] = child_aabb;
221
222         uint32_t child_id = pack_node_id(dst_offset, ir_type_to_bvh_type(type));
223         children[i] = child_id;
224         set_parent(child_id, node_id);
225      }
226
227      for (uint i = found_child_count; i < 4; ++i) {
228            for (uint comp = 0; comp < 3; ++comp) {
229               DEREF(dst_node).coords[i].min[comp] = NAN;
230               DEREF(dst_node).coords[i].max[comp] = NAN;
231            }
232      }
233
234      /* Make changes to the children's BVH offset value available to the other invocations. */
235      memoryBarrier(gl_ScopeDevice, gl_StorageSemanticsBuffer,
236                    gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
237
238      DEREF(dst_node).children = children;
239      break;
240   }
241
242   if (is_root_node) {
243      REF(radv_accel_struct_header) header = REF(radv_accel_struct_header)(args.output_bvh - args.output_bvh_offset);
244      DEREF(header).aabb = src.base.aabb;
245      DEREF(header).bvh_offset = args.output_bvh_offset;
246
247      set_parent(RADV_BVH_ROOT_NODE, RADV_BVH_INVALID_NODE);
248   }
249}
250