• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1//
2// Copyright (C) 2009-2021 Intel Corporation
3//
4// SPDX-License-Identifier: MIT
5//
6//
7
8#include "api_interface.h"
9#include "common.h"
10#include "instance.h"
11
12
13GRL_ANNOTATE_IGC_DO_NOT_SPILL
14__attribute__((reqd_work_group_size(32, 1, 1)))
15__attribute__((intel_reqd_sub_group_size(16)))
16void kernel
17primref_to_quads(global struct Globals *globals,
18                 global struct AABB *primref,
19                 global char *primref_index,
20                 global char *bvh_mem,
21                 global GRL_RAYTRACING_GEOMETRY_DESC *geomDesc,
22                 const uint stride,
23                 const uint offset,
24                 const uint allow_update)
25{
26    global struct BVHBase* bvh = (global struct BVHBase*)bvh_mem;
27    global struct Quad *quads = (global struct Quad *)(bvh_mem + 64*bvh->quadLeafStart );
28    uint quadIndicesStart = bvh->quadIndicesDataStart;
29
30    const uint numPrimitives = globals->numPrimitives;
31    uint i = get_group_id( 0 ) * get_local_size( 0 ) + get_local_id(0);
32    if (i < numPrimitives)
33    {
34        global struct BVHBase* bvh = (global struct BVHBase*)bvh_mem;
35
36        const uint primrefID = *(uint *)(primref_index + i * stride + offset);
37
38        const uint geomID    = PRIMREF_geomID(&primref[primrefID]);
39        const uint primID0   = PRIMREF_primID0(&primref[primrefID]);
40        const uint primID1   = PRIMREF_primID1(&primref[primrefID]);
41        const uint geomFlags = PRIMREF_geomFlags(&primref[primrefID]);
42
43        const uint3 tri0 = GRL_load_triangle(&geomDesc[geomID], primID0);
44        const uint3 tri1 = GRL_load_triangle(&geomDesc[geomID], primID1);
45
46        const struct TrianglePair q = TrianglePair_Constructor(tri0, primID0, tri1, primID1);
47
48        uint vertex_stride = geomDesc[geomID].Desc.Triangles.VertexBufferByteStride;
49
50        const uint4 indices = q.a;
51
52        const uint mask = 0xff; // FIXME: hardcoded mask
53        float3 vtx0, vtx1, vtx2, vtx3;
54        GRL_load_quad_vertices(&geomDesc[geomID], &vtx0, &vtx1, &vtx2, &vtx3, indices);
55
56        uint j0 = q.lb.x;
57        uint j1 = q.lb.y;
58        uint j2 = q.lb.z;
59        uint shaderIndex = (mask << 24) | geomID;
60        uint geomIndex = geomID | (geomFlags << 30);
61        uint primIndex0 = primID0;
62        const uint delta = primID1 - primID0;
63        const uint j = (((j0) << 0) | ((j1) << 2) | ((j2) << 4));
64        uint primIndex1Delta = delta | (j << 16) | (1 << 22);
65
66        uint4 pack0 = (uint4)(shaderIndex, geomIndex, primIndex0, primIndex1Delta);
67        float4 pack1 = (float4)(vtx0.x, vtx0.y, vtx0.z, vtx1.x);
68        float4 pack2 = (float4)(vtx1.y, vtx1.z, vtx2.x, vtx2.y);
69        float4 pack3 = (float4)(vtx2.z, vtx3.x, vtx3.y, vtx3.z);
70
71        global uint4* dst = (global uint4*)&quads[i];
72        store_uint4_L1WB_L3WB(dst, 0, pack0);
73        store_uint4_L1WB_L3WB(dst, 1, as_uint4(pack1));
74        store_uint4_L1WB_L3WB(dst, 2, as_uint4(pack2));
75        store_uint4_L1WB_L3WB(dst, 3, as_uint4(pack3));
76
77        if(allow_update)
78        {
79            global uint4* vertex_indice_ptr = (global uint4*)(((char*)bvh) + (64u * quadIndicesStart + 32 * i));
80
81            uint4 pack_indices = (uint4) ( indices.x , indices.y, indices.z, indices.w );
82
83            store_uint4_L1WB_L3WB( vertex_indice_ptr, 0, pack0 );
84            store_uint4_L1WB_L3WB( vertex_indice_ptr, 1, pack_indices * vertex_stride);
85        }
86
87        if (i == 0)
88            bvh->quadLeafCur += numPrimitives ;
89    }
90
91
92
93#if 0
94    global struct BVHBase* bvh = (global struct BVHBase*)bvh_mem;
95    global struct Quad *quads = (global struct Quad *)(bvh_mem + 64*bvh->quadLeafStart );
96
97    const uint numPrimitives = globals->numPrimitives;
98    const uint startID = get_group_id( 0 ) * get_local_size( 0 );
99    const uint endID   = min((uint)(startID + get_local_size( 0 )), numPrimitives);
100
101    for (uint i = startID + get_local_id(0); i < endID; i += get_local_size(0))
102    {
103        const uint primrefID = *(uint *)(primref_index + i * stride + offset);
104
105        const uint geomID    = PRIMREF_geomID(&primref[primrefID]);
106        const uint primID0   = PRIMREF_primID0(&primref[primrefID]);
107        const uint primID1   = PRIMREF_primID1(&primref[primrefID]);
108        const uint geomFlags = PRIMREF_geomFlags(&primref[primrefID]);
109
110        const uint3 tri0 = GRL_load_triangle(&geomDesc[geomID], primID0);
111        const uint3 tri1 = GRL_load_triangle(&geomDesc[geomID], primID1);
112
113        const struct TrianglePair q = TrianglePair_Constructor(tri0, primID0, tri1, primID1);
114
115        const uint4 indices = q.a;
116        const uint mask = 0xff; // FIXME: hardcoded mask
117        float3 vtx0, vtx1, vtx2, vtx3;
118        GRL_load_quad_vertices(&geomDesc[geomID], &vtx0, &vtx1, &vtx2, &vtx3, indices);
119
120        setQuad(&quads[i], (float4)(vtx0,0), (float4)(vtx1,0), (float4)(vtx2,0), (float4)(vtx3,0), q.lb.x, q.lb.y, q.lb.z, geomID, primID0, primID1, mask, geomFlags );
121    }
122
123    if (get_local_id(0) + get_group_id(0)*get_local_size(0) == 0)
124        bvh->quadLeafCur += numPrimitives ;
125#endif
126}
127
128GRL_INLINE void create_procedural_leaf(global struct Globals *globals,
129                            global struct AABB *primref,
130                            local uint *primrefids,
131                            uint numProcedurals,
132                            struct QBVHNodeN *qnode,
133                            global char *bvh_mem,
134                            global GRL_RAYTRACING_GEOMETRY_DESC *geomDesc)
135{
136    if (get_local_id(0) >= 8)
137        return;
138
139    global struct BVHBase* bvh_base = (global struct BVHBase*)bvh_mem;
140
141    /* first read geomID of all primitives */
142    uint primrefID = -1;
143    uint geomID = -1;
144    uint geomFlags = 0;
145    if (get_local_id(0) < numProcedurals)
146    {
147        primrefID = primrefids[get_local_id(0)];
148        geomID = PRIMREF_geomID(&primref[primrefID]);
149        geomFlags = PRIMREF_geomFlags( &primref[primrefID] );
150    }
151
152    // cannot sort by geomID as bounds in parent node are then wrong
153    //ulong geomID_primrefID = (((ulong)geomID) << 32) | ((ulong)primrefID);
154    //geomID_primrefID = sort8_ascending_ulong(geomID_primrefID);
155    //geomID = geomID_primrefID >> 32;
156    //primrefID = geomID_primrefID;
157
158    /* We have to split at geomID boundaries into multiple leaves. This
159   * block calculates the lane where a leaf starts and ends. */
160    const uint geomIDprev = intel_sub_group_shuffle_up(0xFFFFFFFFu, geomID, 1u);
161    const uint geomIDnext = intel_sub_group_shuffle_down(geomID, 0xFFFFFFFFu, 1u);
162    const uint leaf_start = geomIDprev != geomID;
163    const uint leaf_end = geomIDnext != geomID;
164    const uint leaf_start_next = intel_sub_group_shuffle_down(leaf_start, 0u, 1u);
165
166    /* This computes which leaf a lane processes. E.g. form geomID =
167   * [3,3,4,4,4,0] we get leaf_id = [0,0,1,1,1,2] */
168    //const uint leaf_id = sub_group_scan_inclusive_add(leaf_start); // FIXME: exclusive?
169
170    /* This computes the n'th primitive a lane processes inside its
171    * leaf. For the example above we compute leaf_prim =
172    * [0,1,0,1,2,0]. */
173    const uint leaf_prim = get_local_id(0) - sub_group_scan_inclusive_max(leaf_start ? get_local_id(0) : 0);
174
175    /* from here on we allocate data and write to memory, thus only
176   * lanes that process a primitive should continue. */
177    if (get_local_id(0) >= numProcedurals)
178        return;
179
180    /* Here we allocate a single memory block for each required
181     * ProceduralLeaf node. We do this from a single lane to ensure
182     * the allocation is contiguous. */
183    uint leaf_base_offset = 0;
184    uint n_leafs = sub_group_reduce_add(leaf_start);
185    if (get_local_id(0) == 0)
186       leaf_base_offset = allocate_procedural_leaves( bvh_base, n_leafs );
187    leaf_base_offset = sub_group_broadcast(leaf_base_offset, 0);
188
189    /* Compute the leaf offset for each lane. */
190    uint leaf_offset = leaf_base_offset + sub_group_scan_inclusive_add(leaf_start) - 1;
191
192    struct ProceduralLeaf *pleaf = ((global struct ProceduralLeaf *)(bvh_mem)) + leaf_offset;
193
194    /* write the procedural leaf headers */
195    if (leaf_end)
196    {
197        pleaf->leafDesc.shaderIndex_geomMask = 0xFF000000 | (geomID & 0x00FFFFFF); // FIXME: use accessor function.   Future extensions may have shaderIndex != geomID
198        pleaf->leafDesc.geomIndex_flags = geomID | (geomFlags<<30); // FIXME:  Use setter function
199        pleaf->DW1 = 0xFFFFFFF0 | (leaf_prim + 1); // !!!
200    }
201    /* write the procedural leaf primIDs */
202    pleaf->_primIndex[leaf_prim] = PRIMREF_primID0(&primref[primrefID]);
203
204    /* update leaf node offset inside parent node */
205    if (get_local_id(0) == 0)
206    {
207        QBVH6Node_set_offset(qnode, pleaf);
208        QBVH6Node_set_type(qnode, NODE_TYPE_PROCEDURAL);
209    }
210
211    /* Let parent node children point to proper procedural leaf block
212   * and primitive. */
213    qnode->childData[get_local_id(0)] = leaf_start_next | (leaf_prim << 2);
214}
215
216GRL_ANNOTATE_IGC_DO_NOT_SPILL
217GRL_ANNOTATE_BIG_REG_REQ
218__attribute__((reqd_work_group_size(MAX_HW_SIMD_WIDTH, 1, 1)))
219__attribute__((intel_reqd_sub_group_size(16))) void kernel
220primref_to_procedurals(global struct Globals *globals,
221                                 global struct AABB *primref,
222                                 global char *primref_index,
223                                 global char *bvh_mem,
224                                 global GRL_RAYTRACING_GEOMETRY_DESC *geomDesc,
225                                 const uint stride,
226                                 const uint offset)
227{
228    global struct BVHBase *bvh = (global struct BVHBase *)bvh_mem;
229
230    const uint numPrimitives = globals->numPrimitives;
231    uint startID = get_group_id( 0 ) * get_local_size( 0 );
232    uint endID   = min((uint)(startID + get_local_size( 0 )), numPrimitives);
233
234    uint offset1 = stride * globals->numPrimitives;
235    if (stride == 8)
236        offset1 = 4;
237
238    uint prev_start_back_pointer = startID == 0 ? -1 : *(uint *)(primref_index + (startID-1) * stride + offset1);
239    /* start at leaf start */
240    while (startID < numPrimitives)
241    {
242        const uint back_pointer = *(uint *)(primref_index + startID * stride + offset1);
243        if (back_pointer != prev_start_back_pointer)
244            break;
245        startID++;
246    }
247
248    uint prev_end_back_pointer = *(uint *)(primref_index + (endID-1) * stride + offset1);
249    /* end at next leaf start */
250    while (endID < numPrimitives)
251    {
252        const uint back_pointer = *(uint *)(primref_index + endID * stride + offset1);
253        if (back_pointer != prev_end_back_pointer)
254            break;
255        endID++;
256    }
257
258    local uint procedurals[16];
259
260    for (uint lid = startID + get_local_id(0); lid < endID + get_local_id(0);)
261    {
262        /* load leaf start points and back_pointer */
263        const uint primrefID = *(uint *)(primref_index + lid * stride + offset);
264        uint back_pointer = *(uint *)(primref_index + lid * stride + offset1);
265        uint prev_back_pointer = get_local_id(0) == 0 ? -1 : *(uint *)(primref_index + (lid-1) * stride + offset1);
266
267        const uint leaf_start = back_pointer != prev_back_pointer;
268        uint leaf_start_back_pointer = sub_group_broadcast(back_pointer, 0);
269
270        /* compute number of primitives inside the leaf starting at lid */
271        const uint leaf_id = sub_group_scan_inclusive_add(leaf_start);
272        uint numPrimitives = 0;
273        if (back_pointer == leaf_start_back_pointer && lid < endID)
274            numPrimitives = sub_group_reduce_add(1);
275        numPrimitives = sub_group_broadcast(numPrimitives, 0);
276
277        procedurals[get_local_id(0)] = primrefID;
278
279        struct QBVHNodeN *qnode = (struct QBVHNodeN *)bvh_mem + back_pointer;
280
281        create_procedural_leaf(globals, primref, procedurals, numPrimitives, qnode, bvh_mem, geomDesc);
282
283        lid += numPrimitives;
284    }
285}
286
287GRL_INLINE void create_HW_instance_leaf(
288    global struct BVHBase* bvh,
289    global const struct GRL_RAYTRACING_INSTANCE_DESC* instDesc,
290    uint dstLeafId,
291    uint instanceIndex,
292    uint rootNodeByteOffset,
293    uint instanceMask)
294{
295    /* convert DXR instance to instance leaf node */
296    global struct HwInstanceLeaf* leaves = (__global struct HwInstanceLeaf*)BVHBase_quadLeaves(bvh);
297    HwInstanceLeaf_Constructor(&leaves[dstLeafId], instDesc, instanceIndex, rootNodeByteOffset, instanceMask);
298}
299
300
301
302GRL_ANNOTATE_IGC_DO_NOT_SPILL
303__attribute__((reqd_work_group_size(MAX_HW_SIMD_WIDTH, 1, 1)))
304__attribute__((intel_reqd_sub_group_size(16)))
305void kernel create_HW_instance_nodes(
306    global const struct Globals *globals,
307    global char *primref_index,
308    global struct AABB *primref,
309    global struct BVHBase *bvh,
310    global struct GRL_RAYTRACING_INSTANCE_DESC *src_instances,
311    uint32_t stride,
312    uint32_t offset)
313{
314    uint dstLeafId = get_group_id(0) * MAX_HW_SIMD_WIDTH + get_sub_group_local_id();
315    uint num_prims = globals->numPrimitives;
316    if (dstLeafId >= num_prims)
317        return;
318    if( dstLeafId == 0 )
319        bvh->instanceLeafEnd += 2*num_prims;
320
321    /* get instance ID */
322    const uint primrefID = *(uint *)(primref_index + dstLeafId * stride + offset);
323    const uint instIndex = PRIMREF_instanceIndex(&primref[primrefID]);
324    const uint rootByteOffset = PRIMREF_instanceRootNodeOffset(&primref[primrefID]);
325    const uint instMask = PRIMREF_instanceMask(&primref[primrefID]);
326    create_HW_instance_leaf(bvh, &src_instances[instIndex], dstLeafId, instIndex, rootByteOffset, instMask );
327}
328
329GRL_ANNOTATE_IGC_DO_NOT_SPILL
330__attribute__((reqd_work_group_size(MAX_HW_SIMD_WIDTH, 1, 1)))
331__attribute__((intel_reqd_sub_group_size(16)))
332void kernel create_HW_instance_nodes_pointers(
333    global const struct Globals *globals,
334    global char *primref_index,
335    global struct AABB *primref,
336    global struct BVHBase *bvh,
337    global void *instances_in,
338    uint32_t stride,
339    uint32_t offset)
340{
341    uint dstLeafId = get_group_id(0) * MAX_HW_SIMD_WIDTH + get_sub_group_local_id();
342    uint num_prims = globals->numPrimitives;
343    if (dstLeafId >= num_prims)
344        return;
345    if (dstLeafId == 0)
346        bvh->instanceLeafEnd += 2 * num_prims;
347
348    global const struct GRL_RAYTRACING_INSTANCE_DESC **instances =
349        (global const struct GRL_RAYTRACING_INSTANCE_DESC **)instances_in;
350
351    /* get instance ID */
352    const uint primrefID = *(uint *)(primref_index + dstLeafId * stride + offset);
353    const uint instIndex = PRIMREF_instanceIndex(&primref[primrefID]);
354    const uint rootByteOffset = PRIMREF_instanceRootNodeOffset(&primref[primrefID]);
355    const uint instMask = PRIMREF_instanceMask(&primref[primrefID]);
356    create_HW_instance_leaf(bvh, instances[instIndex], dstLeafId, instIndex, rootByteOffset, instMask );
357}
358