• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (C) 2009-2021 Intel Corporation
3 //
4 // SPDX-License-Identifier: MIT
5 //
6 //
7 
8 //
9 //   This file contains structure definitions shared by GRL OCL kernels and host code
10 //
11 
12 #include "GRLGen12.h"
13 #pragma once
14 
15 #define BFS_NUM_BINS        16
16 #define BFS_NUM_VCONTEXTS   256
17 #define BFS_MAX_DEPTH 32
18 
19 #define TRIVIAL_BUILD_THRESHOLD   6
20 #define SINGLE_WG_BUILD_THRESHOLD 256
21 
22 #define QNODE_GLOBAL_ROOT_BUFFER_MIN_ENTRIES_NUM 16384
23 
24 
25 typedef uchar vcontext_id_t;
26 
27 GRL_NAMESPACE_BEGIN(GRL)
28 GRL_NAMESPACE_BEGIN(RTAS)
29 GRL_NAMESPACE_BEGIN(GPUBVHBuilder)
30 
31 struct BFS_Split
32 {
33     float sah;
34     int dim;
35     int pos;
36 };
37 
38 
39 struct BFS_BinInfo
40 {
41     float min_max[18 * BFS_NUM_BINS]; //  layout: bins[axis][num_bins][6]
42                                       //          The 6 are lower(xyz) and -upper(xyz)
43                                       // bins use negated-max so that we can use vectorized mins instead of min/max pairs
44     uint counts[3 * BFS_NUM_BINS];
45 };
46 
enum_uint8(SAHBuildFlags)47 enum_uint8(SAHBuildFlags)
48 {
49     SAH_FLAG_NEED_BACKPOINTERS = 1,        // identifies a mixed internal node where each child can have a different type
50     SAH_FLAG_NEED_MASKS        = 2
51 };
52 
53 struct SAHBuildGlobals
54 {
55     qword   p_primref_index_buffers;
56     qword   p_primrefs_buffer;
57     qword   p_bvh2;
58     qword   p_globals;     // TODO: deprecate this
59     qword   p_bvh_base;
60     gpuva_t p_qnode_root_buffer;
61 
62     dword flags; // bit 1 is 'alloc_backpointers'.  bit 2 is 'need_masks'
63     dword num_primrefs;
64     dword leaf_size;
65     dword leaf_type;
66 
67     dword root_buffer_num_produced;
68     dword root_buffer_num_produced_hi;
69     dword root_buffer_num_consumed;
70     dword root_buffer_num_consumed_hi;
71     dword root_buffer_num_to_consume;
72     dword root_buffer_num_to_consume_hi;
73 };
74 
75 struct SAHBuildBuffersInfo
76 {
77     gpuva_t p_globals;
78     gpuva_t p_primref_index_buffers;
79     gpuva_t p_primrefs_buffer;
80     gpuva_t p_bvh2;
81     gpuva_t p_bvh_base;
82     gpuva_t p_qnode_root_buffer;
83     dword   sah_globals_flags;
84     dword   _pad;
85     gpuva_t _pad2;
86 };
87 
88 typedef union LRBounds
89 {
90     struct
91     {
92         struct AABB3f left_centroid_bounds;
93         struct AABB3f left_geom_bounds;
94         struct AABB3f right_centroid_bounds;
95         struct AABB3f right_geom_bounds;
96     } boxes;
97     struct
98     {
99         float Array[24];
100     } scalars;
101 } LRBounds;
102 
103 
104 struct VContext
105 {
106     uint dispatch_primref_begin;    // range of primrefs for this task
107     uint dispatch_primref_end;
108     uint bvh2_root;                 // BVH2 root node for this task
109     uint tree_depth;                // depth of this node in the tree
110     uint num_left;          // primref counts
111     uint num_right;
112     uint lr_mask;      // lower 8b : left mask.  upper 8b : right mask
113     uint batch_index;
114 
115     // pass1 global working state and output
116     struct BFS_Split split;
117     struct BFS_BinInfo global_bin_info;
118 
119     // pass2 global working state and output
120     LRBounds lr_bounds;
121 };
122 
123 
124 
125 struct BFSDispatchRecord
126 {
127     ushort batch_index;
128     ushort context_id;
129 };
130 
131 
132 struct BFSDispatchQueue
133 {
134     uint num_dispatches;
135     uint wg_count[BFS_NUM_VCONTEXTS];
136     struct BFSDispatchRecord records[BFS_NUM_VCONTEXTS];
137 };
138 
139 struct BFS1SpillStackEntry
140 {
141     uint primref_begin;
142     uint primref_end;
143     uint bvh2_root;
144     ushort tree_depth;
145     ushort batch_index;
146 };
147 
148 struct BFS1SpillStack
149 {
150     uint size;
151     struct BFS1SpillStackEntry entries[BFS_NUM_VCONTEXTS * BFS_MAX_DEPTH];
152 };
153 
154 struct QNodeGlobalRootBufferEntry
155 {
156     uint bvh2_node;
157     uint qnode;
158     uint build_idx;
159     uint _pad;
160 };
161 
162 struct QNodeGlobalRootBuffer
163 {
164     uint curr_entries_offset; // we use "entries" as two buffers, so offset is either 0 or QNODE_GLOBAL_ROOT_BUFFER_MIN_ENTRIES_NUM
165     struct QNodeGlobalRootBufferEntry entries[QNODE_GLOBAL_ROOT_BUFFER_MIN_ENTRIES_NUM * 2];
166 };
167 
168 struct DFSDispatchRecord
169 {
170     uint primref_base;
171     uint bvh2_base;
172     uint batch_index;
173     ushort num_primrefs;
174     ushort tree_depth;
175 };
176 
177 
178 struct DFSDispatchQueue
179 {
180     struct DFSDispatchRecord records[BFS_NUM_VCONTEXTS * 2];
181 };
182 
183 #define VCONTEXT_STATE_EXECUTING   0
184 #define VCONTEXT_STATE_UNALLOCATED 1
185 
186 union SchedulerUnion
187 {
188     struct VContextScheduler
189     {
190         /////////////////////////////////////////////////////////////
191         //  State data used for communication with command streamer
192         //   NOTE: This part must match definition in 'new_sah_builder.grl'
193         /////////////////////////////////////////////////////////////
194 
195         dword num_bfs_wgs;
196         dword num_dfs_wgs;
197 
198         dword scheduler_postsync;
199         dword _pad1;
200 
201         dword num_trivial_builds; // number of trivial builds (#primrefs < leaf_size).
202         dword num_single_builds;  // number of single-wg builds (#primrefs < threshold)
203 
204         dword batched_build_wg_count;  // number of wgs to dispatch for initial BFS pass
205         dword batched_build_loop_mask; // value is 0 if  #builds <= #contexts.  else 1  command streamer uses this as a loop condition
206 
207         /////////////////////////////////////////////////////////////
208 
209         dword batched_build_count;  // number of batched builds in the SAHBuildGlobals buffer
210         dword batched_build_offset; // location of the first batched-build in the SAHBuildGlobals buffer
211 
212         dword vcontext_state[BFS_NUM_VCONTEXTS];
213 
214         struct BFSDispatchQueue bfs_queue;
215         struct DFSDispatchQueue dfs_queue;
216 
217         struct VContext contexts[BFS_NUM_VCONTEXTS];
218 
219         struct BFS1SpillStack bfs2_spill_stack;
220     } vContextScheduler;
221 
222     struct QnodeScheduler
223     {
224         dword num_qnode_grb_curr_entries;
225         dword num_qnode_grb_new_entries;
226 
227         dword scheduler_postsync;
228         dword _pad1;
229 
230         dword num_trivial_builds; // number of trivial builds (#primrefs < leaf_size).
231         dword num_single_builds;  // number of single-wg builds (#primrefs < threshold)
232 
233         dword batched_builds_to_process;
234         dword num_max_qnode_global_root_buffer_entries; // number of maximum entries to global root buffer
235 
236         /////////////////////////////////////////////////////////////
237 
238         dword batched_build_count;  // number of batched builds in the SAHBuildGlobals buffer
239         dword batched_build_offset; // location of the first batched-build in the SAHBuildGlobals buffer
240 
241         struct QNodeGlobalRootBuffer qnode_global_root_buffer;
242     } qnodeScheduler;
243 };
244 
245 
246 struct BVH2Node
247 {
248     struct AABB3f box;
249     uint  meta_u;   // leaf:  primref start.  inner: offset from node to its first child
250     uint  meta_ss;
251     //ushort meta_s;   // leaf: primref count.  inner: offset from first to second child, in nodes
252     //uchar is_inner; //  1 if inner, 0 if leaf
253     //uchar mask;
254 };
255 
256 struct BVH2
257 {
258     uint num_nodes;
259     uint _pad[7];  // align to 32B
260 };
261 
262 
263 GRL_NAMESPACE_END(GPUBVHBuilder)
264 GRL_NAMESPACE_END(RTAS)
265 GRL_NAMESPACE_END(GRL)
266