1 // 2 // Copyright (C) 2009-2021 Intel Corporation 3 // 4 // SPDX-License-Identifier: MIT 5 // 6 // 7 8 // 9 // This file contains structure definitions shared by GRL OCL kernels and host code 10 // 11 12 #include "GRLGen12.h" 13 #pragma once 14 15 #define BFS_NUM_BINS 16 16 #define BFS_NUM_VCONTEXTS 256 17 #define BFS_MAX_DEPTH 32 18 19 #define TRIVIAL_BUILD_THRESHOLD 6 20 #define SINGLE_WG_BUILD_THRESHOLD 256 21 22 #define QNODE_GLOBAL_ROOT_BUFFER_MIN_ENTRIES_NUM 16384 23 24 25 typedef uchar vcontext_id_t; 26 27 GRL_NAMESPACE_BEGIN(GRL) 28 GRL_NAMESPACE_BEGIN(RTAS) 29 GRL_NAMESPACE_BEGIN(GPUBVHBuilder) 30 31 struct BFS_Split 32 { 33 float sah; 34 int dim; 35 int pos; 36 }; 37 38 39 struct BFS_BinInfo 40 { 41 float min_max[18 * BFS_NUM_BINS]; // layout: bins[axis][num_bins][6] 42 // The 6 are lower(xyz) and -upper(xyz) 43 // bins use negated-max so that we can use vectorized mins instead of min/max pairs 44 uint counts[3 * BFS_NUM_BINS]; 45 }; 46 enum_uint8(SAHBuildFlags)47enum_uint8(SAHBuildFlags) 48 { 49 SAH_FLAG_NEED_BACKPOINTERS = 1, // identifies a mixed internal node where each child can have a different type 50 SAH_FLAG_NEED_MASKS = 2 51 }; 52 53 struct SAHBuildGlobals 54 { 55 qword p_primref_index_buffers; 56 qword p_primrefs_buffer; 57 qword p_bvh2; 58 qword p_globals; // TODO: deprecate this 59 qword p_bvh_base; 60 gpuva_t p_qnode_root_buffer; 61 62 dword flags; // bit 1 is 'alloc_backpointers'. bit 2 is 'need_masks' 63 dword num_primrefs; 64 dword leaf_size; 65 dword leaf_type; 66 67 dword root_buffer_num_produced; 68 dword root_buffer_num_produced_hi; 69 dword root_buffer_num_consumed; 70 dword root_buffer_num_consumed_hi; 71 dword root_buffer_num_to_consume; 72 dword root_buffer_num_to_consume_hi; 73 }; 74 75 struct SAHBuildBuffersInfo 76 { 77 gpuva_t p_globals; 78 gpuva_t p_primref_index_buffers; 79 gpuva_t p_primrefs_buffer; 80 gpuva_t p_bvh2; 81 gpuva_t p_bvh_base; 82 gpuva_t p_qnode_root_buffer; 83 dword sah_globals_flags; 84 dword _pad; 85 gpuva_t _pad2; 86 }; 87 88 typedef union LRBounds 89 { 90 struct 91 { 92 struct AABB3f left_centroid_bounds; 93 struct AABB3f left_geom_bounds; 94 struct AABB3f right_centroid_bounds; 95 struct AABB3f right_geom_bounds; 96 } boxes; 97 struct 98 { 99 float Array[24]; 100 } scalars; 101 } LRBounds; 102 103 104 struct VContext 105 { 106 uint dispatch_primref_begin; // range of primrefs for this task 107 uint dispatch_primref_end; 108 uint bvh2_root; // BVH2 root node for this task 109 uint tree_depth; // depth of this node in the tree 110 uint num_left; // primref counts 111 uint num_right; 112 uint lr_mask; // lower 8b : left mask. upper 8b : right mask 113 uint batch_index; 114 115 // pass1 global working state and output 116 struct BFS_Split split; 117 struct BFS_BinInfo global_bin_info; 118 119 // pass2 global working state and output 120 LRBounds lr_bounds; 121 }; 122 123 124 125 struct BFSDispatchRecord 126 { 127 ushort batch_index; 128 ushort context_id; 129 }; 130 131 132 struct BFSDispatchQueue 133 { 134 uint num_dispatches; 135 uint wg_count[BFS_NUM_VCONTEXTS]; 136 struct BFSDispatchRecord records[BFS_NUM_VCONTEXTS]; 137 }; 138 139 struct BFS1SpillStackEntry 140 { 141 uint primref_begin; 142 uint primref_end; 143 uint bvh2_root; 144 ushort tree_depth; 145 ushort batch_index; 146 }; 147 148 struct BFS1SpillStack 149 { 150 uint size; 151 struct BFS1SpillStackEntry entries[BFS_NUM_VCONTEXTS * BFS_MAX_DEPTH]; 152 }; 153 154 struct QNodeGlobalRootBufferEntry 155 { 156 uint bvh2_node; 157 uint qnode; 158 uint build_idx; 159 uint _pad; 160 }; 161 162 struct QNodeGlobalRootBuffer 163 { 164 uint curr_entries_offset; // we use "entries" as two buffers, so offset is either 0 or QNODE_GLOBAL_ROOT_BUFFER_MIN_ENTRIES_NUM 165 struct QNodeGlobalRootBufferEntry entries[QNODE_GLOBAL_ROOT_BUFFER_MIN_ENTRIES_NUM * 2]; 166 }; 167 168 struct DFSDispatchRecord 169 { 170 uint primref_base; 171 uint bvh2_base; 172 uint batch_index; 173 ushort num_primrefs; 174 ushort tree_depth; 175 }; 176 177 178 struct DFSDispatchQueue 179 { 180 struct DFSDispatchRecord records[BFS_NUM_VCONTEXTS * 2]; 181 }; 182 183 #define VCONTEXT_STATE_EXECUTING 0 184 #define VCONTEXT_STATE_UNALLOCATED 1 185 186 union SchedulerUnion 187 { 188 struct VContextScheduler 189 { 190 ///////////////////////////////////////////////////////////// 191 // State data used for communication with command streamer 192 // NOTE: This part must match definition in 'new_sah_builder.grl' 193 ///////////////////////////////////////////////////////////// 194 195 dword num_bfs_wgs; 196 dword num_dfs_wgs; 197 198 dword scheduler_postsync; 199 dword _pad1; 200 201 dword num_trivial_builds; // number of trivial builds (#primrefs < leaf_size). 202 dword num_single_builds; // number of single-wg builds (#primrefs < threshold) 203 204 dword batched_build_wg_count; // number of wgs to dispatch for initial BFS pass 205 dword batched_build_loop_mask; // value is 0 if #builds <= #contexts. else 1 command streamer uses this as a loop condition 206 207 ///////////////////////////////////////////////////////////// 208 209 dword batched_build_count; // number of batched builds in the SAHBuildGlobals buffer 210 dword batched_build_offset; // location of the first batched-build in the SAHBuildGlobals buffer 211 212 dword vcontext_state[BFS_NUM_VCONTEXTS]; 213 214 struct BFSDispatchQueue bfs_queue; 215 struct DFSDispatchQueue dfs_queue; 216 217 struct VContext contexts[BFS_NUM_VCONTEXTS]; 218 219 struct BFS1SpillStack bfs2_spill_stack; 220 } vContextScheduler; 221 222 struct QnodeScheduler 223 { 224 dword num_qnode_grb_curr_entries; 225 dword num_qnode_grb_new_entries; 226 227 dword scheduler_postsync; 228 dword _pad1; 229 230 dword num_trivial_builds; // number of trivial builds (#primrefs < leaf_size). 231 dword num_single_builds; // number of single-wg builds (#primrefs < threshold) 232 233 dword batched_builds_to_process; 234 dword num_max_qnode_global_root_buffer_entries; // number of maximum entries to global root buffer 235 236 ///////////////////////////////////////////////////////////// 237 238 dword batched_build_count; // number of batched builds in the SAHBuildGlobals buffer 239 dword batched_build_offset; // location of the first batched-build in the SAHBuildGlobals buffer 240 241 struct QNodeGlobalRootBuffer qnode_global_root_buffer; 242 } qnodeScheduler; 243 }; 244 245 246 struct BVH2Node 247 { 248 struct AABB3f box; 249 uint meta_u; // leaf: primref start. inner: offset from node to its first child 250 uint meta_ss; 251 //ushort meta_s; // leaf: primref count. inner: offset from first to second child, in nodes 252 //uchar is_inner; // 1 if inner, 0 if leaf 253 //uchar mask; 254 }; 255 256 struct BVH2 257 { 258 uint num_nodes; 259 uint _pad[7]; // align to 32B 260 }; 261 262 263 GRL_NAMESPACE_END(GPUBVHBuilder) 264 GRL_NAMESPACE_END(RTAS) 265 GRL_NAMESPACE_END(GRL) 266