1/* Copyright © 2022 Bas Nieuwenhuizen 2 * Copyright © 2024 Intel Coorporation 3 * SPDX-License-Identifier: MIT 4 */ 5 6#version 460 7 8#extension GL_GOOGLE_include_directive : require 9 10#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require 11#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require 12#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require 13#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require 14#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require 15#extension GL_EXT_scalar_block_layout : require 16#extension GL_EXT_buffer_reference : require 17#extension GL_EXT_buffer_reference2 : require 18 19layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in; 20 21#include "anv_build_interface.h" 22 23layout(push_constant) uniform CONSTS { 24 copy_args args; 25}; 26 27// Layout of serialized data 28/**************************************| 29| vk_accel_struct_serialization_header | 30|--------------------------------------| 31| For a TLAS, all handles to the BLAS | 32| within this TLAS. | 33| For a BLAS, nothing. | 34|--------------------------------------| 35| Driver-specific part. | 36| For Intel, this starts with | 37| anv_accel_struct_header as drawn | 38| in anv_bvh.h | 39|**************************************/ 40 41/* 42 * Explanation of BLAS handles: 43 * According to the spec of vkCmdCopyAccelerationStructureToMemoryKHR, 44 * for a TLAS, the handles of all BLAS/instances within this TLAS are 45 * tightly stored after vk_accel_struct_serialization_header, making this 46 * serialized-memory a semi-opaque object. The application might be able 47 * to swap/replace these handles with other handles. In fact this is what 48 * dEQP-VK.ray_tracing_pipeline.acceleration_structures.header_bottom_address.* 49 * is doing. 50 * 51 * Therefore, if the application updates the handles, we need to replace 52 * the old handles in anv_instance_leaf with the new one. To access 53 * anv_instance_leaf without traversing the TLAS, pointers to these 54 * anv_instance_leaf are stored right after anv_accel_struct_header, 55 * allowing us to know where they are in the TLAS instantly. 56 * 57 * Although, the fact that the application can swap/replace new handles 58 * of BLAS without rebuilding the TLAS sounds a bit odd. 59 */ 60 61void 62main(void) 63{ 64 uint32_t global_id = gl_GlobalInvocationID.x; 65 uint32_t lanes = gl_NumWorkGroups.x * 128; 66 uint32_t increment = lanes * 8; 67 68 uint64_t copy_src_addr = args.src_addr; 69 uint64_t copy_dst_addr = args.dst_addr; 70 71 if (args.mode == ANV_COPY_MODE_DESERIALIZE) { 72 copy_src_addr += SIZEOF(vk_accel_struct_serialization_header) + 73 DEREF(REF(vk_accel_struct_serialization_header)(args.src_addr)).instance_count * SIZEOF(uint64_t); 74 } 75 76 REF(anv_accel_struct_header) header = REF(anv_accel_struct_header)(copy_src_addr); 77 78 uint64_t instance_base = args.src_addr + SIZEOF(vk_accel_struct_serialization_header); 79 uint64_t instance_offset = SIZEOF(anv_accel_struct_header); 80 81 /* We store the address of instance_leaf after bvh header */ 82 uint64_t instance_end = DEREF(header).instance_count * SIZEOF(uint64_t); 83 84 if (instance_end > 0) 85 instance_end += instance_offset; 86 87 if (args.mode == ANV_COPY_MODE_SERIALIZE) { 88 copy_dst_addr += SIZEOF(vk_accel_struct_serialization_header) + 89 DEREF(REF(anv_accel_struct_header)(args.src_addr)).instance_count * SIZEOF(uint64_t); 90 91 if (global_id == 0) { 92 REF(vk_accel_struct_serialization_header) ser_header = 93 REF(vk_accel_struct_serialization_header)(args.dst_addr); 94 DEREF(ser_header).serialization_size = DEREF(header).serialization_size; 95 DEREF(ser_header).deserialization_size = DEREF(header).compacted_size; 96 DEREF(ser_header).instance_count = DEREF(header).instance_count; 97 98 for (uint32_t offset = 0; offset < VK_UUID_SIZE; offset++) { 99 DEREF(ser_header).driver_uuid[offset] = args.driver_uuid[offset]; 100 } 101 102 for (uint32_t offset = 0; offset < VK_UUID_SIZE; offset++) { 103 DEREF(ser_header).accel_struct_compat[offset] = args.accel_struct_compat[offset]; 104 } 105 } 106 107 instance_base = args.dst_addr + SIZEOF(vk_accel_struct_serialization_header); 108 } else if (args.mode == ANV_COPY_MODE_COPY) { 109 instance_end = 0; 110 } 111 112 uint64_t size = DEREF(header).compacted_size; 113 for (uint64_t offset = global_id * 8; offset < size; offset += increment) { 114 /* copy 8 bytes per iteration */ 115 DEREF(REF(uint64_t)(copy_dst_addr + offset)) = 116 DEREF(REF(uint64_t)(copy_src_addr + offset)); 117 118 /* Do the adjustment inline in the same invocation that copies the data so that we don't have 119 * to synchronize. 120 */ 121 if (offset < instance_end && offset >= instance_offset && 122 (offset - instance_offset) % SIZEOF(uint64_t) == 0) { 123 uint64_t idx = (offset - instance_offset) / SIZEOF(uint64_t); 124 125 if (args.mode == ANV_COPY_MODE_SERIALIZE) { 126 /* Indirectly access the anv_instance_leaf, and store the blas_ptrs after ser_header */ 127 uint64_t instance_leaf_addr = DEREF(REF(uint64_t)(copy_src_addr + offset)); 128 REF(anv_instance_leaf) instance_leaf = REF(anv_instance_leaf)(instance_leaf_addr); 129 uint64_t blas_ptr = DEREF(instance_leaf).part1.bvh_ptr & 0xfffffffffffful; 130 DEREF(INDEX(uint64_t, instance_base, idx)) = blas_ptr; 131 } else { /* ANV_COPY_MODE_DESERIALIZE */ 132 /* Indirectly access the anv_instance_leaf, and replace the bvh_ptr with the ones after ser_header */ 133 uint64_t instance_leaf_addr = DEREF(REF(uint64_t)(copy_dst_addr + offset)); 134 REF(anv_instance_leaf) instance_leaf = REF(anv_instance_leaf)(instance_leaf_addr); 135 uint64_t blas_ptr = DEREF(INDEX(uint64_t, instance_base, idx)); 136 DEREF(instance_leaf).part1.bvh_ptr = (blas_ptr & 0xfffffffffffful); 137 138 /* set the startNodePtr to blas_ptr + ANV_HEADER_SIZE */ 139 uint64_t mask = 0x0000fffffffffffful; 140 uint64_t new_startNodePtr = blas_ptr + ANV_RT_BVH_HEADER_SIZE; 141 /* clear bits and set */ 142 DEREF(instance_leaf).part0.start_node_ptr_and_inst_flags = 143 (DEREF(instance_leaf).part0.start_node_ptr_and_inst_flags & ~mask) | (new_startNodePtr & mask); 144 } 145 } 146 } 147} 148