• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * Copyright © 2022 Bas Nieuwenhuizen
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#version 460
25
26#extension GL_GOOGLE_include_directive : require
27
28#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
29#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
30#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
31#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require
32#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
33#extension GL_EXT_scalar_block_layout : require
34#extension GL_EXT_buffer_reference : require
35#extension GL_EXT_buffer_reference2 : require
36
37layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;
38
39#include "tu_build_interface.h"
40
41layout(push_constant) uniform CONSTS {
42   copy_args args;
43};
44
45void
46main(void)
47{
48   uint32_t global_id = gl_GlobalInvocationID.x;
49   uint32_t lanes = gl_NumWorkGroups.x * 128;
50   uint32_t increment = lanes * 16;
51
52   uint64_t copy_src_addr = args.src_addr;
53   uint64_t copy_dst_addr = args.dst_addr;
54
55   if (args.mode == TU_COPY_MODE_DESERIALIZE) {
56      copy_src_addr += SIZEOF(vk_accel_struct_serialization_header) +
57                       DEREF(REF(vk_accel_struct_serialization_header)(args.src_addr)).instance_count * SIZEOF(uint64_t);
58   }
59
60   REF(tu_accel_struct_header) header = REF(tu_accel_struct_header)(copy_src_addr);
61
62   uint64_t instance_base = args.src_addr + SIZEOF(vk_accel_struct_serialization_header);
63   uint64_t instance_offset = SIZEOF(tu_accel_struct_header);
64   uint64_t instance_end = DEREF(header).instance_count * SIZEOF(tu_instance_descriptor);
65   if (instance_end > 0)
66      instance_end += instance_offset;
67
68   if (args.mode == TU_COPY_MODE_SERIALIZE) {
69      copy_dst_addr += SIZEOF(vk_accel_struct_serialization_header) +
70                       DEREF(REF(tu_accel_struct_header)(args.src_addr)).instance_count * SIZEOF(uint64_t);
71
72      if (global_id == 0) {
73         REF(vk_accel_struct_serialization_header) ser_header =
74            REF(vk_accel_struct_serialization_header)(args.dst_addr);
75         DEREF(ser_header).serialization_size = DEREF(header).serialization_size;
76         DEREF(ser_header).deserialization_size = DEREF(header).compacted_size;
77         DEREF(ser_header).instance_count = DEREF(header).instance_count;
78      }
79
80      instance_base = args.dst_addr + SIZEOF(vk_accel_struct_serialization_header);
81   } else if (args.mode == TU_COPY_MODE_COPY) {
82      instance_end = 0;
83   }
84
85   uint64_t size = DEREF(header).compacted_size;
86   for (uint64_t offset = global_id * 16; offset < size; offset += increment) {
87      DEREF(REF(uvec4)(copy_dst_addr + offset)) =
88         DEREF(REF(uvec4)(copy_src_addr + offset));
89
90      /* Do the adjustment inline in the same invocation that copies the data so that we don't have
91       * to synchronize. */
92      if (offset < instance_end && offset >= instance_offset &&
93          (offset - instance_offset) % SIZEOF(tu_instance_descriptor) == 0) {
94         uint64_t idx = (offset - instance_offset) / SIZEOF(tu_instance_descriptor);
95
96         uint32_t bvh_offset = DEREF(REF(tu_instance_descriptor)(copy_src_addr + offset)).bvh_offset;
97         if (args.mode == TU_COPY_MODE_SERIALIZE) {
98            DEREF(INDEX(uint64_t, instance_base, idx)) =
99               DEREF(REF(tu_instance_descriptor)(copy_src_addr + offset)).bvh_ptr - bvh_offset;
100         } else { /* TU_COPY_MODE_DESERIALIZE */
101            uint64_t blas_addr = DEREF(INDEX(uint64_t, instance_base, idx));
102            DEREF(REF(tu_instance_descriptor)(copy_dst_addr + offset)).bvh_ptr = blas_addr + bvh_offset;
103         }
104      }
105   }
106}
107