• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Konstantin Seurer
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #ifndef BVH_BUILD_HELPERS_H
25 #define BVH_BUILD_HELPERS_H
26 
27 #include "bvh.h"
28 
29 #define VK_FORMAT_UNDEFINED                  0
30 #define VK_FORMAT_R4G4_UNORM_PACK8           1
31 #define VK_FORMAT_R4G4B4A4_UNORM_PACK16      2
32 #define VK_FORMAT_B4G4R4A4_UNORM_PACK16      3
33 #define VK_FORMAT_R5G6B5_UNORM_PACK16        4
34 #define VK_FORMAT_B5G6R5_UNORM_PACK16        5
35 #define VK_FORMAT_R5G5B5A1_UNORM_PACK16      6
36 #define VK_FORMAT_B5G5R5A1_UNORM_PACK16      7
37 #define VK_FORMAT_A1R5G5B5_UNORM_PACK16      8
38 #define VK_FORMAT_R8_UNORM                   9
39 #define VK_FORMAT_R8_SNORM                   10
40 #define VK_FORMAT_R8_USCALED                 11
41 #define VK_FORMAT_R8_SSCALED                 12
42 #define VK_FORMAT_R8_UINT                    13
43 #define VK_FORMAT_R8_SINT                    14
44 #define VK_FORMAT_R8_SRGB                    15
45 #define VK_FORMAT_R8G8_UNORM                 16
46 #define VK_FORMAT_R8G8_SNORM                 17
47 #define VK_FORMAT_R8G8_USCALED               18
48 #define VK_FORMAT_R8G8_SSCALED               19
49 #define VK_FORMAT_R8G8_UINT                  20
50 #define VK_FORMAT_R8G8_SINT                  21
51 #define VK_FORMAT_R8G8_SRGB                  22
52 #define VK_FORMAT_R8G8B8_UNORM               23
53 #define VK_FORMAT_R8G8B8_SNORM               24
54 #define VK_FORMAT_R8G8B8_USCALED             25
55 #define VK_FORMAT_R8G8B8_SSCALED             26
56 #define VK_FORMAT_R8G8B8_UINT                27
57 #define VK_FORMAT_R8G8B8_SINT                28
58 #define VK_FORMAT_R8G8B8_SRGB                29
59 #define VK_FORMAT_B8G8R8_UNORM               30
60 #define VK_FORMAT_B8G8R8_SNORM               31
61 #define VK_FORMAT_B8G8R8_USCALED             32
62 #define VK_FORMAT_B8G8R8_SSCALED             33
63 #define VK_FORMAT_B8G8R8_UINT                34
64 #define VK_FORMAT_B8G8R8_SINT                35
65 #define VK_FORMAT_B8G8R8_SRGB                36
66 #define VK_FORMAT_R8G8B8A8_UNORM             37
67 #define VK_FORMAT_R8G8B8A8_SNORM             38
68 #define VK_FORMAT_R8G8B8A8_USCALED           39
69 #define VK_FORMAT_R8G8B8A8_SSCALED           40
70 #define VK_FORMAT_R8G8B8A8_UINT              41
71 #define VK_FORMAT_R8G8B8A8_SINT              42
72 #define VK_FORMAT_R8G8B8A8_SRGB              43
73 #define VK_FORMAT_B8G8R8A8_UNORM             44
74 #define VK_FORMAT_B8G8R8A8_SNORM             45
75 #define VK_FORMAT_B8G8R8A8_USCALED           46
76 #define VK_FORMAT_B8G8R8A8_SSCALED           47
77 #define VK_FORMAT_B8G8R8A8_UINT              48
78 #define VK_FORMAT_B8G8R8A8_SINT              49
79 #define VK_FORMAT_B8G8R8A8_SRGB              50
80 #define VK_FORMAT_A8B8G8R8_UNORM_PACK32      51
81 #define VK_FORMAT_A8B8G8R8_SNORM_PACK32      52
82 #define VK_FORMAT_A8B8G8R8_USCALED_PACK32    53
83 #define VK_FORMAT_A8B8G8R8_SSCALED_PACK32    54
84 #define VK_FORMAT_A8B8G8R8_UINT_PACK32       55
85 #define VK_FORMAT_A8B8G8R8_SINT_PACK32       56
86 #define VK_FORMAT_A8B8G8R8_SRGB_PACK32       57
87 #define VK_FORMAT_A2R10G10B10_UNORM_PACK32   58
88 #define VK_FORMAT_A2R10G10B10_SNORM_PACK32   59
89 #define VK_FORMAT_A2R10G10B10_USCALED_PACK32 60
90 #define VK_FORMAT_A2R10G10B10_SSCALED_PACK32 61
91 #define VK_FORMAT_A2R10G10B10_UINT_PACK32    62
92 #define VK_FORMAT_A2R10G10B10_SINT_PACK32    63
93 #define VK_FORMAT_A2B10G10R10_UNORM_PACK32   64
94 #define VK_FORMAT_A2B10G10R10_SNORM_PACK32   65
95 #define VK_FORMAT_A2B10G10R10_USCALED_PACK32 66
96 #define VK_FORMAT_A2B10G10R10_SSCALED_PACK32 67
97 #define VK_FORMAT_A2B10G10R10_UINT_PACK32    68
98 #define VK_FORMAT_A2B10G10R10_SINT_PACK32    69
99 #define VK_FORMAT_R16_UNORM                  70
100 #define VK_FORMAT_R16_SNORM                  71
101 #define VK_FORMAT_R16_USCALED                72
102 #define VK_FORMAT_R16_SSCALED                73
103 #define VK_FORMAT_R16_UINT                   74
104 #define VK_FORMAT_R16_SINT                   75
105 #define VK_FORMAT_R16_SFLOAT                 76
106 #define VK_FORMAT_R16G16_UNORM               77
107 #define VK_FORMAT_R16G16_SNORM               78
108 #define VK_FORMAT_R16G16_USCALED             79
109 #define VK_FORMAT_R16G16_SSCALED             80
110 #define VK_FORMAT_R16G16_UINT                81
111 #define VK_FORMAT_R16G16_SINT                82
112 #define VK_FORMAT_R16G16_SFLOAT              83
113 #define VK_FORMAT_R16G16B16_UNORM            84
114 #define VK_FORMAT_R16G16B16_SNORM            85
115 #define VK_FORMAT_R16G16B16_USCALED          86
116 #define VK_FORMAT_R16G16B16_SSCALED          87
117 #define VK_FORMAT_R16G16B16_UINT             88
118 #define VK_FORMAT_R16G16B16_SINT             89
119 #define VK_FORMAT_R16G16B16_SFLOAT           90
120 #define VK_FORMAT_R16G16B16A16_UNORM         91
121 #define VK_FORMAT_R16G16B16A16_SNORM         92
122 #define VK_FORMAT_R16G16B16A16_USCALED       93
123 #define VK_FORMAT_R16G16B16A16_SSCALED       94
124 #define VK_FORMAT_R16G16B16A16_UINT          95
125 #define VK_FORMAT_R16G16B16A16_SINT          96
126 #define VK_FORMAT_R16G16B16A16_SFLOAT        97
127 #define VK_FORMAT_R32_UINT                   98
128 #define VK_FORMAT_R32_SINT                   99
129 #define VK_FORMAT_R32_SFLOAT                 100
130 #define VK_FORMAT_R32G32_UINT                101
131 #define VK_FORMAT_R32G32_SINT                102
132 #define VK_FORMAT_R32G32_SFLOAT              103
133 #define VK_FORMAT_R32G32B32_UINT             104
134 #define VK_FORMAT_R32G32B32_SINT             105
135 #define VK_FORMAT_R32G32B32_SFLOAT           106
136 #define VK_FORMAT_R32G32B32A32_UINT          107
137 #define VK_FORMAT_R32G32B32A32_SINT          108
138 #define VK_FORMAT_R32G32B32A32_SFLOAT        109
139 #define VK_FORMAT_R64_UINT                   110
140 #define VK_FORMAT_R64_SINT                   111
141 #define VK_FORMAT_R64_SFLOAT                 112
142 #define VK_FORMAT_R64G64_UINT                113
143 #define VK_FORMAT_R64G64_SINT                114
144 #define VK_FORMAT_R64G64_SFLOAT              115
145 #define VK_FORMAT_R64G64B64_UINT             116
146 #define VK_FORMAT_R64G64B64_SINT             117
147 #define VK_FORMAT_R64G64B64_SFLOAT           118
148 #define VK_FORMAT_R64G64B64A64_UINT          119
149 #define VK_FORMAT_R64G64B64A64_SINT          120
150 #define VK_FORMAT_R64G64B64A64_SFLOAT        121
151 
152 #define VK_INDEX_TYPE_UINT16    0
153 #define VK_INDEX_TYPE_UINT32    1
154 #define VK_INDEX_TYPE_NONE_KHR  1000165000
155 #define VK_INDEX_TYPE_UINT8_EXT 1000265000
156 
157 #define VK_GEOMETRY_TYPE_TRIANGLES_KHR 0
158 #define VK_GEOMETRY_TYPE_AABBS_KHR     1
159 #define VK_GEOMETRY_TYPE_INSTANCES_KHR 2
160 
161 #define VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR 1
162 #define VK_GEOMETRY_INSTANCE_TRIANGLE_FLIP_FACING_BIT_KHR         2
163 #define VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR                 4
164 #define VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR              8
165 
166 #define TYPE(type, align)                                                                                              \
167    layout(buffer_reference, buffer_reference_align = align, scalar) buffer type##_ref                                  \
168    {                                                                                                                   \
169       type value;                                                                                                      \
170    };
171 
172 #define REF(type)  type##_ref
173 #define VOID_REF   uint64_t
174 #define NULL       0
175 #define DEREF(var) var.value
176 
177 #define SIZEOF(type) uint32_t(uint64_t(REF(type)(uint64_t(0)) + 1))
178 
179 #define OFFSET(ptr, offset) (uint64_t(ptr) + offset)
180 
181 #define INFINITY (1.0 / 0.0)
182 #define NAN      (0.0 / 0.0)
183 
184 #define INDEX(type, ptr, index) REF(type)(OFFSET(ptr, (index)*SIZEOF(type)))
185 
186 TYPE(int8_t, 1);
187 TYPE(uint8_t, 1);
188 TYPE(int16_t, 2);
189 TYPE(uint16_t, 2);
190 TYPE(int32_t, 4);
191 TYPE(uint32_t, 4);
192 TYPE(int64_t, 8);
193 TYPE(uint64_t, 8);
194 
195 TYPE(float, 4);
196 
197 TYPE(vec2, 4);
198 TYPE(vec3, 4);
199 TYPE(vec4, 4);
200 
201 TYPE(uvec4, 16);
202 
203 TYPE(VOID_REF, 8);
204 
205 /* copied from u_math.h */
206 uint32_t
align(uint32_t value,uint32_t alignment)207 align(uint32_t value, uint32_t alignment)
208 {
209    return (value + alignment - 1) & ~(alignment - 1);
210 }
211 
212 int32_t
to_emulated_float(float f)213 to_emulated_float(float f)
214 {
215    int32_t bits = floatBitsToInt(f);
216    return f < 0 ? -2147483648 - bits : bits;
217 }
218 
219 float
from_emulated_float(int32_t bits)220 from_emulated_float(int32_t bits)
221 {
222    return intBitsToFloat(bits < 0 ? -2147483648 - bits : bits);
223 }
224 
225 TYPE(radv_aabb, 4);
226 
227 struct key_id_pair {
228    uint32_t id;
229    uint32_t key;
230 };
231 TYPE(key_id_pair, 4);
232 
233 TYPE(radv_accel_struct_serialization_header, 8);
234 TYPE(radv_accel_struct_header, 8);
235 TYPE(radv_bvh_triangle_node, 4);
236 TYPE(radv_bvh_aabb_node, 4);
237 TYPE(radv_bvh_instance_node, 8);
238 TYPE(radv_bvh_box16_node, 4);
239 TYPE(radv_bvh_box32_node, 4);
240 
241 TYPE(radv_ir_header, 4);
242 TYPE(radv_ir_node, 4);
243 TYPE(radv_ir_box_node, 4);
244 
245 TYPE(radv_global_sync_data, 4);
246 
247 uint32_t
id_to_offset(uint32_t id)248 id_to_offset(uint32_t id)
249 {
250    return (id & (~7u)) << 3;
251 }
252 
253 uint32_t
id_to_type(uint32_t id)254 id_to_type(uint32_t id)
255 {
256    return id & 7u;
257 }
258 
259 uint32_t
pack_node_id(uint32_t offset,uint32_t type)260 pack_node_id(uint32_t offset, uint32_t type)
261 {
262    return (offset >> 3) | type;
263 }
264 
265 uint64_t
node_to_addr(uint64_t node)266 node_to_addr(uint64_t node)
267 {
268    node &= ~7ul;
269    node <<= 19;
270    return int64_t(node) >> 16;
271 }
272 
273 uint64_t
addr_to_node(uint64_t addr)274 addr_to_node(uint64_t addr)
275 {
276    return (addr >> 3) & ((1ul << 45) - 1);
277 }
278 
279 uint32_t
ir_id_to_offset(uint32_t id)280 ir_id_to_offset(uint32_t id)
281 {
282    return id & (~3u);
283 }
284 
285 uint32_t
ir_id_to_type(uint32_t id)286 ir_id_to_type(uint32_t id)
287 {
288    return id & 3u;
289 }
290 
291 uint32_t
pack_ir_node_id(uint32_t offset,uint32_t type)292 pack_ir_node_id(uint32_t offset, uint32_t type)
293 {
294    return offset | type;
295 }
296 
297 uint32_t
ir_type_to_bvh_type(uint32_t type)298 ir_type_to_bvh_type(uint32_t type)
299 {
300    switch (type) {
301    case radv_ir_node_triangle:
302       return radv_bvh_node_triangle;
303    case radv_ir_node_internal:
304       return radv_bvh_node_box32;
305    case radv_ir_node_instance:
306       return radv_bvh_node_instance;
307    case radv_ir_node_aabb:
308       return radv_bvh_node_aabb;
309    }
310    /* unreachable in valid nodes */
311    return RADV_BVH_INVALID_NODE;
312 }
313 
314 float
aabb_surface_area(radv_aabb aabb)315 aabb_surface_area(radv_aabb aabb)
316 {
317    vec3 diagonal = aabb.max - aabb.min;
318    return 2 * diagonal.x * diagonal.y + 2 * diagonal.y * diagonal.z + 2 * diagonal.x * diagonal.z;
319 }
320 
321 /* Just a wrapper for 3 uints. */
322 struct triangle_indices {
323    uint32_t index[3];
324 };
325 
326 triangle_indices
load_indices(VOID_REF indices,uint32_t index_format,uint32_t global_id)327 load_indices(VOID_REF indices, uint32_t index_format, uint32_t global_id)
328 {
329    triangle_indices result;
330 
331    uint32_t index_base = global_id * 3;
332 
333    switch (index_format) {
334    case VK_INDEX_TYPE_UINT16: {
335       result.index[0] = DEREF(INDEX(uint16_t, indices, index_base + 0));
336       result.index[1] = DEREF(INDEX(uint16_t, indices, index_base + 1));
337       result.index[2] = DEREF(INDEX(uint16_t, indices, index_base + 2));
338       break;
339    }
340    case VK_INDEX_TYPE_UINT32: {
341       result.index[0] = DEREF(INDEX(uint32_t, indices, index_base + 0));
342       result.index[1] = DEREF(INDEX(uint32_t, indices, index_base + 1));
343       result.index[2] = DEREF(INDEX(uint32_t, indices, index_base + 2));
344       break;
345    }
346    case VK_INDEX_TYPE_NONE_KHR: {
347       result.index[0] = index_base + 0;
348       result.index[1] = index_base + 1;
349       result.index[2] = index_base + 2;
350       break;
351    }
352    case VK_INDEX_TYPE_UINT8_EXT: {
353       result.index[0] = DEREF(INDEX(uint8_t, indices, index_base + 0));
354       result.index[1] = DEREF(INDEX(uint8_t, indices, index_base + 1));
355       result.index[2] = DEREF(INDEX(uint8_t, indices, index_base + 2));
356       break;
357    }
358    }
359 
360    return result;
361 }
362 
363 /* Just a wrapper for 3 vec4s. */
364 struct triangle_vertices {
365    vec4 vertex[3];
366 };
367 
368 TYPE(float16_t, 2);
369 
370 triangle_vertices
load_vertices(VOID_REF vertices,triangle_indices indices,uint32_t vertex_format,uint32_t stride)371 load_vertices(VOID_REF vertices, triangle_indices indices, uint32_t vertex_format, uint32_t stride)
372 {
373    triangle_vertices result;
374 
375    for (uint32_t i = 0; i < 3; i++) {
376       VOID_REF vertex_ptr = OFFSET(vertices, indices.index[i] * stride);
377       vec4 vertex = vec4(0.0, 0.0, 0.0, 1.0);
378 
379       switch (vertex_format) {
380       case VK_FORMAT_R32G32_SFLOAT:
381          vertex.x = DEREF(INDEX(float, vertex_ptr, 0));
382          vertex.y = DEREF(INDEX(float, vertex_ptr, 1));
383          break;
384       case VK_FORMAT_R32G32B32_SFLOAT:
385       case VK_FORMAT_R32G32B32A32_SFLOAT:
386          vertex.x = DEREF(INDEX(float, vertex_ptr, 0));
387          vertex.y = DEREF(INDEX(float, vertex_ptr, 1));
388          vertex.z = DEREF(INDEX(float, vertex_ptr, 2));
389          break;
390       case VK_FORMAT_R16G16_SFLOAT:
391          vertex.x = DEREF(INDEX(float16_t, vertex_ptr, 0));
392          vertex.y = DEREF(INDEX(float16_t, vertex_ptr, 1));
393          break;
394       case VK_FORMAT_R16G16B16_SFLOAT:
395       case VK_FORMAT_R16G16B16A16_SFLOAT:
396          vertex.x = DEREF(INDEX(float16_t, vertex_ptr, 0));
397          vertex.y = DEREF(INDEX(float16_t, vertex_ptr, 1));
398          vertex.z = DEREF(INDEX(float16_t, vertex_ptr, 2));
399          break;
400       case VK_FORMAT_R16G16_SNORM:
401          vertex.x = max(-1.0, DEREF(INDEX(int16_t, vertex_ptr, 0)) / float(0x7FFF));
402          vertex.y = max(-1.0, DEREF(INDEX(int16_t, vertex_ptr, 1)) / float(0x7FFF));
403          break;
404       case VK_FORMAT_R16G16B16A16_SNORM:
405          vertex.x = max(-1.0, DEREF(INDEX(int16_t, vertex_ptr, 0)) / float(0x7FFF));
406          vertex.y = max(-1.0, DEREF(INDEX(int16_t, vertex_ptr, 1)) / float(0x7FFF));
407          vertex.z = max(-1.0, DEREF(INDEX(int16_t, vertex_ptr, 2)) / float(0x7FFF));
408          break;
409       case VK_FORMAT_R8G8_SNORM:
410          vertex.x = max(-1.0, DEREF(INDEX(int8_t, vertex_ptr, 0)) / float(0x7F));
411          vertex.y = max(-1.0, DEREF(INDEX(int8_t, vertex_ptr, 1)) / float(0x7F));
412          break;
413       case VK_FORMAT_R8G8B8A8_SNORM:
414          vertex.x = max(-1.0, DEREF(INDEX(int8_t, vertex_ptr, 0)) / float(0x7F));
415          vertex.y = max(-1.0, DEREF(INDEX(int8_t, vertex_ptr, 1)) / float(0x7F));
416          vertex.z = max(-1.0, DEREF(INDEX(int8_t, vertex_ptr, 2)) / float(0x7F));
417          break;
418       case VK_FORMAT_R16G16_UNORM:
419          vertex.x = DEREF(INDEX(uint16_t, vertex_ptr, 0)) / float(0xFFFF);
420          vertex.y = DEREF(INDEX(uint16_t, vertex_ptr, 1)) / float(0xFFFF);
421          break;
422       case VK_FORMAT_R16G16B16A16_UNORM:
423          vertex.x = DEREF(INDEX(uint16_t, vertex_ptr, 0)) / float(0xFFFF);
424          vertex.y = DEREF(INDEX(uint16_t, vertex_ptr, 1)) / float(0xFFFF);
425          vertex.z = DEREF(INDEX(uint16_t, vertex_ptr, 2)) / float(0xFFFF);
426          break;
427       case VK_FORMAT_R8G8_UNORM:
428          vertex.x = DEREF(INDEX(uint8_t, vertex_ptr, 0)) / float(0xFF);
429          vertex.y = DEREF(INDEX(uint8_t, vertex_ptr, 1)) / float(0xFF);
430          break;
431       case VK_FORMAT_R8G8B8A8_UNORM:
432          vertex.x = DEREF(INDEX(uint8_t, vertex_ptr, 0)) / float(0xFF);
433          vertex.y = DEREF(INDEX(uint8_t, vertex_ptr, 1)) / float(0xFF);
434          vertex.z = DEREF(INDEX(uint8_t, vertex_ptr, 2)) / float(0xFF);
435          break;
436       case VK_FORMAT_A2B10G10R10_UNORM_PACK32: {
437          uint32_t data = DEREF(REF(uint32_t)(vertex_ptr));
438          vertex.x = float(data & 0x3FF) / 0x3FF;
439          vertex.y = float((data >> 10) & 0x3FF) / 0x3FF;
440          vertex.z = float((data >> 20) & 0x3FF) / 0x3FF;
441          break;
442       }
443       }
444 
445       result.vertex[i] = vertex;
446    }
447 
448    return result;
449 }
450 
451 /* A GLSL-adapted copy of VkAccelerationStructureInstanceKHR. */
452 struct AccelerationStructureInstance {
453    mat3x4 transform;
454    uint32_t custom_instance_and_mask;
455    uint32_t sbt_offset_and_flags;
456    uint64_t accelerationStructureReference;
457 };
458 TYPE(AccelerationStructureInstance, 8);
459 
460 bool
build_triangle(inout radv_aabb bounds,VOID_REF dst_ptr,radv_bvh_geometry_data geom_data,uint32_t global_id)461 build_triangle(inout radv_aabb bounds, VOID_REF dst_ptr, radv_bvh_geometry_data geom_data, uint32_t global_id)
462 {
463    bool is_valid = true;
464    triangle_indices indices = load_indices(geom_data.indices, geom_data.index_format, global_id);
465 
466    triangle_vertices vertices = load_vertices(geom_data.data, indices, geom_data.vertex_format, geom_data.stride);
467 
468    /* An inactive triangle is one for which the first (X) component of any vertex is NaN. If any
469     * other vertex component is NaN, and the first is not, the behavior is undefined. If the vertex
470     * format does not have a NaN representation, then all triangles are considered active.
471     */
472    if (isnan(vertices.vertex[0].x) || isnan(vertices.vertex[1].x) || isnan(vertices.vertex[2].x))
473 #if ALWAYS_ACTIVE
474       is_valid = false;
475 #else
476       return false;
477 #endif
478 
479    if (geom_data.transform != NULL) {
480       mat4 transform = mat4(1.0);
481 
482       for (uint32_t col = 0; col < 4; col++)
483          for (uint32_t row = 0; row < 3; row++)
484             transform[col][row] = DEREF(INDEX(float, geom_data.transform, col + row * 4));
485 
486       for (uint32_t i = 0; i < 3; i++)
487          vertices.vertex[i] = transform * vertices.vertex[i];
488    }
489 
490    REF(radv_bvh_triangle_node) node = REF(radv_bvh_triangle_node)(dst_ptr);
491 
492    bounds.min = vec3(INFINITY);
493    bounds.max = vec3(-INFINITY);
494 
495    for (uint32_t coord = 0; coord < 3; coord++)
496       for (uint32_t comp = 0; comp < 3; comp++) {
497          DEREF(node).coords[coord][comp] = vertices.vertex[coord][comp];
498          bounds.min[comp] = min(bounds.min[comp], vertices.vertex[coord][comp]);
499          bounds.max[comp] = max(bounds.max[comp], vertices.vertex[coord][comp]);
500       }
501 
502    DEREF(node).triangle_id = global_id;
503    DEREF(node).geometry_id_and_flags = geom_data.geometry_id;
504    DEREF(node).id = 9;
505 
506    return is_valid;
507 }
508 
509 bool
build_aabb(inout radv_aabb bounds,VOID_REF src_ptr,VOID_REF dst_ptr,uint32_t geometry_id,uint32_t global_id)510 build_aabb(inout radv_aabb bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_t geometry_id, uint32_t global_id)
511 {
512    bool is_valid = true;
513    REF(radv_bvh_aabb_node) node = REF(radv_bvh_aabb_node)(dst_ptr);
514 
515    for (uint32_t vec = 0; vec < 2; vec++)
516       for (uint32_t comp = 0; comp < 3; comp++) {
517          float coord = DEREF(INDEX(float, src_ptr, comp + vec * 3));
518 
519          if (vec == 0)
520             bounds.min[comp] = coord;
521          else
522             bounds.max[comp] = coord;
523       }
524 
525    /* An inactive AABB is one for which the minimum X coordinate is NaN. If any other component is
526     * NaN, and the first is not, the behavior is undefined.
527     */
528    if (isnan(bounds.min.x))
529 #if ALWAYS_ACTIVE
530       is_valid = false;
531 #else
532       return false;
533 #endif
534 
535    DEREF(node).primitive_id = global_id;
536    DEREF(node).geometry_id_and_flags = geometry_id;
537 
538    return is_valid;
539 }
540 
541 radv_aabb
calculate_instance_node_bounds(radv_accel_struct_header header,mat3x4 otw_matrix)542 calculate_instance_node_bounds(radv_accel_struct_header header, mat3x4 otw_matrix)
543 {
544    radv_aabb aabb;
545    for (uint32_t comp = 0; comp < 3; ++comp) {
546       aabb.min[comp] = otw_matrix[comp][3];
547       aabb.max[comp] = otw_matrix[comp][3];
548       for (uint32_t col = 0; col < 3; ++col) {
549          aabb.min[comp] +=
550             min(otw_matrix[comp][col] * header.aabb.min[col], otw_matrix[comp][col] * header.aabb.max[col]);
551          aabb.max[comp] +=
552             max(otw_matrix[comp][col] * header.aabb.min[col], otw_matrix[comp][col] * header.aabb.max[col]);
553       }
554    }
555    return aabb;
556 }
557 
558 uint32_t
encode_sbt_offset_and_flags(uint32_t src)559 encode_sbt_offset_and_flags(uint32_t src)
560 {
561    uint32_t flags = src >> 24;
562    uint32_t ret = src & 0xffffffu;
563    if ((flags & VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR) != 0)
564       ret |= RADV_INSTANCE_FORCE_OPAQUE;
565    if ((flags & VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR) == 0)
566       ret |= RADV_INSTANCE_NO_FORCE_NOT_OPAQUE;
567    if ((flags & VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR) != 0)
568       ret |= RADV_INSTANCE_TRIANGLE_FACING_CULL_DISABLE;
569    if ((flags & VK_GEOMETRY_INSTANCE_TRIANGLE_FLIP_FACING_BIT_KHR) != 0)
570       ret |= RADV_INSTANCE_TRIANGLE_FLIP_FACING;
571    return ret;
572 }
573 
574 bool
build_instance(inout radv_aabb bounds,VOID_REF src_ptr,VOID_REF dst_ptr,uint32_t global_id)575 build_instance(inout radv_aabb bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_t global_id)
576 {
577    REF(radv_bvh_instance_node) node = REF(radv_bvh_instance_node)(dst_ptr);
578 
579    AccelerationStructureInstance instance = DEREF(REF(AccelerationStructureInstance)(src_ptr));
580 
581    /* An inactive instance is one whose acceleration structure handle is VK_NULL_HANDLE. Since the active terminology is
582     * only relevant for BVH updates, which we do not implement, we can also skip instances with mask == 0.
583     */
584    if (instance.accelerationStructureReference == 0 || instance.custom_instance_and_mask < (1u << 24u))
585       return false;
586 
587    radv_accel_struct_header instance_header =
588       DEREF(REF(radv_accel_struct_header)(instance.accelerationStructureReference));
589 
590    DEREF(node).bvh_ptr = addr_to_node(instance.accelerationStructureReference + instance_header.bvh_offset);
591    DEREF(node).bvh_offset = instance_header.bvh_offset;
592 
593    mat4 transform = mat4(instance.transform);
594    mat4 inv_transform = transpose(inverse(transpose(transform)));
595    DEREF(node).wto_matrix = mat3x4(inv_transform);
596    DEREF(node).otw_matrix = mat3x4(transform);
597 
598    bounds = calculate_instance_node_bounds(instance_header, mat3x4(transform));
599 
600    DEREF(node).custom_instance_and_mask = instance.custom_instance_and_mask;
601    DEREF(node).sbt_offset_and_flags = encode_sbt_offset_and_flags(instance.sbt_offset_and_flags);
602    DEREF(node).instance_id = global_id;
603 
604    return true;
605 }
606 
607 /** Compute ceiling of integer quotient of A divided by B.
608     From macros.h */
609 #define DIV_ROUND_UP(A, B) (((A) + (B)-1) / (B))
610 
611 #ifdef USE_GLOBAL_SYNC
612 
613 /* There might be more invocations available than tasks to do.
614  * In that case, the fetched task index is greater than the
615  * counter offset for the next phase. To avoid out-of-bounds
616  * accessing, phases will be skipped until the task index is
617  * is in-bounds again. */
618 uint32_t num_tasks_to_skip = 0;
619 uint32_t phase_index = 0;
620 bool should_skip = false;
621 shared uint32_t global_task_index;
622 
623 shared uint32_t shared_phase_index;
624 
625 uint32_t
task_count(REF (radv_ir_header)header)626 task_count(REF(radv_ir_header) header)
627 {
628    uint32_t phase_index = DEREF(header).sync_data.phase_index;
629    return DEREF(header).sync_data.task_counts[phase_index & 1];
630 }
631 
632 /* Sets the task count for the next phase. */
633 void
set_next_task_count(REF (radv_ir_header)header,uint32_t new_count)634 set_next_task_count(REF(radv_ir_header) header, uint32_t new_count)
635 {
636    uint32_t phase_index = DEREF(header).sync_data.phase_index;
637    DEREF(header).sync_data.task_counts[(phase_index + 1) & 1] = new_count;
638 }
639 
640 /*
641  * This function has two main objectives:
642  * Firstly, it partitions pending work among free invocations.
643  * Secondly, it guarantees global synchronization between different phases.
644  *
645  * After every call to fetch_task, a new task index is returned.
646  * fetch_task will also set num_tasks_to_skip. Use should_execute_phase
647  * to determine if the current phase should be executed or skipped.
648  *
649  * Since tasks are assigned per-workgroup, there is a possibility of the task index being
650  * greater than the total task count.
651  */
652 uint32_t
fetch_task(REF (radv_ir_header)header,bool did_work)653 fetch_task(REF(radv_ir_header) header, bool did_work)
654 {
655    /* Perform a memory + control barrier for all buffer writes for the entire workgroup.
656     * This guarantees that once the workgroup leaves the PHASE loop, all invocations have finished
657     * and their results are written to memory. */
658    controlBarrier(gl_ScopeWorkgroup, gl_ScopeDevice, gl_StorageSemanticsBuffer,
659                   gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
660    if (gl_LocalInvocationIndex == 0) {
661       if (did_work)
662          atomicAdd(DEREF(header).sync_data.task_done_counter, 1);
663       global_task_index = atomicAdd(DEREF(header).sync_data.task_started_counter, 1);
664 
665       do {
666          /* Perform a memory barrier to refresh the current phase's end counter, in case
667           * another workgroup changed it. */
668          memoryBarrier(gl_ScopeDevice, gl_StorageSemanticsBuffer,
669                        gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
670 
671          /* The first invocation of the first workgroup in a new phase is responsible to initiate the
672           * switch to a new phase. It is only possible to switch to a new phase if all tasks of the
673           * previous phase have been completed. Switching to a new phase and incrementing the phase
674           * end counter in turn notifies all invocations for that phase that it is safe to execute.
675           */
676          if (global_task_index == DEREF(header).sync_data.current_phase_end_counter &&
677              DEREF(header).sync_data.task_done_counter == DEREF(header).sync_data.current_phase_end_counter) {
678             if (DEREF(header).sync_data.next_phase_exit_flag != 0) {
679                DEREF(header).sync_data.phase_index = TASK_INDEX_INVALID;
680                memoryBarrier(gl_ScopeDevice, gl_StorageSemanticsBuffer,
681                              gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
682             } else {
683                atomicAdd(DEREF(header).sync_data.phase_index, 1);
684                DEREF(header).sync_data.current_phase_start_counter = DEREF(header).sync_data.current_phase_end_counter;
685                /* Ensure the changes to the phase index and start/end counter are visible for other
686                 * workgroup waiting in the loop. */
687                memoryBarrier(gl_ScopeDevice, gl_StorageSemanticsBuffer,
688                              gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
689                atomicAdd(DEREF(header).sync_data.current_phase_end_counter,
690                          DIV_ROUND_UP(task_count(header), gl_WorkGroupSize.x));
691             }
692             break;
693          }
694 
695          /* If other invocations have finished all nodes, break out; there is no work to do */
696          if (DEREF(header).sync_data.phase_index == TASK_INDEX_INVALID) {
697             break;
698          }
699       } while (global_task_index >= DEREF(header).sync_data.current_phase_end_counter);
700 
701       shared_phase_index = DEREF(header).sync_data.phase_index;
702    }
703 
704    barrier();
705    if (DEREF(header).sync_data.phase_index == TASK_INDEX_INVALID)
706       return TASK_INDEX_INVALID;
707 
708    num_tasks_to_skip = shared_phase_index - phase_index;
709 
710    uint32_t local_task_index = global_task_index - DEREF(header).sync_data.current_phase_start_counter;
711    return local_task_index * gl_WorkGroupSize.x + gl_LocalInvocationID.x;
712 }
713 
714 bool
should_execute_phase()715 should_execute_phase()
716 {
717    if (num_tasks_to_skip > 0) {
718       /* Skip to next phase. */
719       ++phase_index;
720       --num_tasks_to_skip;
721       return false;
722    }
723    return true;
724 }
725 
726 #define PHASE(header)                                                                                                  \
727    for (; task_index != TASK_INDEX_INVALID && should_execute_phase(); task_index = fetch_task(header, true))
728 #endif
729 
730 #endif
731