• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1//
2// Copyright (C) 2009-2021 Intel Corporation
3//
4// SPDX-License-Identifier: MIT
5//
6//
7
8#include "bvh_build_refit.h"
9#include "api_interface.h"
10#include "common.h"
11
12
13
14
15
16#if 0
17GRL_ANNOTATE_IGC_DO_NOT_SPILL
18__attribute__( (reqd_work_group_size( 16, 1, 1 )) )
19void kernel
20update_instance_leaves( global struct BVHBase* bvh,
21    uint64_t dxrInstancesArray,
22    uint64_t dxrInstancesPtr,
23    global struct AABB3f* instance_aabb_scratch
24)
25{
26    uint num_leaves = BVHBase_GetNumHWInstanceLeaves( bvh );
27    uint id = get_local_id( 0 ) + get_local_size( 0 ) * get_group_id( 0 );
28    if ( id >= num_leaves )
29        return;
30
31    global struct GRL_RAYTRACING_INSTANCE_DESC* instancesArray =
32        (global struct GRL_RAYTRACING_INSTANCE_DESC*)dxrInstancesArray;
33    global struct GRL_RAYTRACING_INSTANCE_DESC** instancesPtrArray =
34        (global struct GRL_RAYTRACING_INSTANCE_DESC**)dxrInstancesPtr;
35
36    global struct HwInstanceLeaf* leafs = (global struct HwInstanceLeaf*) BVHBase_GetHWInstanceLeaves( bvh );
37
38    /* iterate over all children of the instance node and get their bounds */
39
40    uint32_t instanceIdx = HwInstanceLeafPart1_getInstanceIndex( &leafs[id] );
41    global struct GRL_RAYTRACING_INSTANCE_DESC* instance = NULL;
42    if ( dxrInstancesArray != NULL )
43        instance = &instancesArray[instanceIdx];
44    else
45        instance = instancesPtrArray[instanceIdx];
46
47    struct AffineSpace3f xfm = AffineSpace3f_load_row_major( instance->Transform );
48    global struct BVHBase* instanceBvh = (global struct BVHBase*)instance->AccelerationStructure;
49    struct AABB3f newSubtreeBounds = instanceBvh->Meta.bounds;
50    struct AABB3f bbox = AABB3f_transform( xfm, newSubtreeBounds ); // JDB TODO:  Use faster abs-matrix method
51
52    const bool valid_min = isfinite( bbox.lower[0] ) && isfinite( bbox.lower[1] ) && isfinite( bbox.lower[2] );
53    const bool valid_max = isfinite( bbox.upper[0] ) && isfinite( bbox.upper[1] ) && isfinite( bbox.upper[2] );
54
55    uint mask = GRL_get_InstanceMask(instance);
56
57    uint offset = instanceBvh->rootNodeOffset;
58    if ( !valid_min || !valid_max )
59    {
60        bbox.lower[0] = xfm.p.x;
61        bbox.lower[1] = xfm.p.y;
62        bbox.lower[2] = xfm.p.z;
63        bbox.upper[0] = xfm.p.x;
64        bbox.upper[1] = xfm.p.y;
65        bbox.upper[2] = xfm.p.z;
66        offset = NO_NODE_OFFSET;
67        mask = 0;
68    }
69
70    instance_aabb_scratch[id] = bbox;
71
72    HwInstanceLeaf_Constructor( &leafs[id], instance, instanceIdx, offset, mask ); // TODO: No instance opening for refittable BVH
73}
74#endif
75
76
77GRL_ANNOTATE_IGC_DO_NOT_SPILL
78__attribute__((reqd_work_group_size(16, 1, 1)))
79void kernel
80update_instance_leaves(global struct BVHBase* bvh,
81    uint64_t dxrInstancesArray,
82    uint64_t dxrInstancesPtr,
83    global struct AABB3f* instance_aabb_scratch
84)
85{
86    uint num_leaves = BVHBase_GetNumHWInstanceLeaves(bvh);
87    uint id = get_local_id(0) + get_local_size(0) * get_group_id(0);
88    if (id >= num_leaves)
89        return;
90
91    DO_update_instance_leaves(
92        bvh,
93        dxrInstancesArray,
94        dxrInstancesPtr,
95        instance_aabb_scratch,
96        id,
97        0 );
98}
99
100GRL_ANNOTATE_IGC_DO_NOT_SPILL
101__attribute__((reqd_work_group_size(16, 1, 1)))
102void kernel
103update_instance_leaves_indirect(global struct BVHBase* bvh,
104    uint64_t dxrInstancesArray,
105    uint64_t dxrInstancesPtr,
106    global struct AABB3f* instance_aabb_scratch,
107    global struct IndirectBuildRangeInfo* indirect_data)
108{
109    uint num_leaves = BVHBase_GetNumHWInstanceLeaves(bvh);
110    uint id = get_local_id(0) + get_local_size(0) * get_group_id(0);
111    if (id >= num_leaves)
112        return;
113
114    DO_update_instance_leaves(
115        bvh,
116        dxrInstancesArray + indirect_data->primitiveOffset,
117        dxrInstancesPtr,
118        instance_aabb_scratch,
119        id,
120        0 );
121}
122
123#if 0
124/*
125
126  This kernel refit a BVH. The algorithm iterates over all BVH nodes
127  to find all leaf nodes, which is where refitting starts. For these
128  leaf nodes bounds get recalculated and then propagates up the tree.
129
130  One kernel instance considers a range of inner nodes as startpoints.
131 */
132 GRL_ANNOTATE_IGC_DO_NOT_SPILL
133__attribute__((reqd_work_group_size(8, 1, 1))) void kernel refit(
134    global struct BVHBase *bvh,
135    global GRL_RAYTRACING_GEOMETRY_DESC* geosArray,
136    global struct AABB3f* instance_leaf_aabbs )
137{
138    /* here we temporarily store the bounds for the children of a node */
139    struct AABB childrenAABB[BVH_NODE_N6];
140
141    /* get pointer to inner nodes and back pointers */
142    global struct QBVHNodeN *inner_nodes = BVHBase_rootNode(bvh);
143    BackPointers* backPointers = BVHBase_GetBackPointers(bvh);
144
145    /* construct range of nodes that each work group will process */
146    const uint numInnerNodes = BVHBase_numNodes(bvh);
147    const uint startID = (get_group_id(0) + 0) * numInnerNodes / get_num_groups(0);
148    const uint endID = (get_group_id(0) + 1) * numInnerNodes / get_num_groups(0);
149
150    /* each workgroup iterates over its range of nodes */
151    for (uint i = startID + get_local_id(0); i < endID; i += get_local_size(0))
152    {
153        global struct QBVHNodeN* curNode = &inner_nodes[i];
154        uint numChildren = refit_bottom(bvh, geosArray,
155                                 instance_leaf_aabbs,
156                                 curNode,
157                                 childrenAABB,
158                                 *InnerNode_GetBackPointer(backPointers, i));
159        if (numChildren != 0)
160        {
161            /* update bounds of node */
162            QBVHNodeN_setBounds(curNode, childrenAABB, numChildren);
163
164            /* refit upper parts of the BVH */
165            // TODO: this will not gonna work for mixed nodes
166            refit_bottom_up(curNode, bvh, childrenAABB, numChildren);
167        }
168    }
169}
170
171
172GRL_ANNOTATE_IGC_DO_NOT_SPILL
173__attribute__((reqd_work_group_size(8, 1, 1)))
174void kernel Find_refit_treelets(
175    global struct BVHBase* bvh,
176    global TreeletNodeData* treelets,
177    global uint* scratchStartpoints,
178    global uint* startpointAlloc)
179{
180    find_refit_treelets(bvh,
181                        treelets,
182                        scratchStartpoints,
183                        startpointAlloc);
184}
185
186GRL_ANNOTATE_IGC_DO_NOT_SPILL
187__attribute__((reqd_work_group_size(16, 1, 1)))
188void kernel Assign_refit_startpoints_to_treelets(
189    global struct BVHBase* bvh,
190    global TreeletNodeData* treelets,
191    global uint* scratchStartpoints)
192{
193    assign_refit_startpoints_to_treelets(bvh, treelets, scratchStartpoints);
194}
195
196GRL_ANNOTATE_IGC_DO_NOT_SPILL
197__attribute__((reqd_work_group_size(128, 1, 1)))
198__attribute__((intel_reqd_sub_group_size(16)))
199void kernel Finalize_treelets_in_groups(
200    global struct BVHBase* bvh,
201    global uint* scratchStartpoints )
202{
203    local uint depths[FINALIZE_TREELETS_SLM_DEPTHS_SPACE];
204
205    finalize_treelets_in_groups(bvh, scratchStartpoints, depths);
206}
207
208
209GRL_ANNOTATE_IGC_DO_NOT_SPILL
210__attribute__((reqd_work_group_size(256, 1, 1)))
211__attribute__((intel_reqd_sub_group_size(16)))
212void kernel Refit_quads_tree_per_group(global SquashedInput* psqinputs)
213{
214    uint group_id = get_group_id(0);
215    SquashedInput sqinput = psqinputs[group_id];
216    global struct BVHBase* bvh = sqinput.pBvh;
217    uint numLeaves = BVHBase_GetNumQuads(bvh);
218    global QuadLeaf* leafs = (global QuadLeaf*)BVHBase_GetQuadLeaves(bvh);
219
220    global void* input = sqinput.pInput;
221    global struct AABB* bbox_scratch = sqinput.bbox_scratch;
222
223    uint leafsIndexOffset = bvh->quadLeafStart - BVH_ROOT_NODE_OFFSET / 64;
224    global GRL_RAYTRACING_GEOMETRY_DESC* geosArray = (global GRL_RAYTRACING_GEOMETRY_DESC*) input;
225    uint id = get_local_id(0);
226
227    for (uint leaf_id = id; leaf_id < numLeaves; leaf_id += get_local_size(0))
228    {
229        struct AABB theAABB;
230        refit_bottom_child_quad(leafs + leaf_id, geosArray, &theAABB);
231        theAABB.lower.w = as_float(0xABBADEFFu);
232        bbox_scratch[leafsIndexOffset + leaf_id] = theAABB;
233    }
234}
235
236
237
238GRL_ANNOTATE_IGC_DO_NOT_SPILL
239__attribute__((reqd_work_group_size(32, 1, 1)))
240__attribute__((intel_reqd_sub_group_size(16)))
241void kernel Refit_quads(
242    global struct BVHBase* bvh,
243    global void* input,
244    global struct AABB* bbox_scratch,
245    uint numGroupsExecuted,
246    global SquashedInputGroupDesc* sqinput)
247{
248    uint numLeafs = BVHBase_GetNumQuads(bvh);
249    if (numLeafs == 0) return;
250    global QuadLeaf* leafs = (global QuadLeaf*)BVHBase_GetQuadLeaves(bvh);
251
252    global GRL_RAYTRACING_GEOMETRY_DESC* geosArray = (global GRL_RAYTRACING_GEOMETRY_DESC*) input;
253    uint leafsIndexOffset = bvh->quadLeafStart - BVH_ROOT_NODE_OFFSET / 64;
254
255    uint numLeafsPerGr = (numLeafs + (numGroupsExecuted - 1)) / numGroupsExecuted;
256
257    uint id_start = get_group_id(0) * numLeafsPerGr + get_local_id(0);
258    uint id_end = min(id_start + numLeafsPerGr, numLeafs);
259    for (uint id = id_start; id < id_end; id+= get_local_size(0))
260    {
261        struct AABB theAABB;
262        refit_bottom_child_quad(leafs + id, geosArray, &theAABB);
263        theAABB.lower.w = as_float(0xABBADEFFu);
264        bbox_scratch[leafsIndexOffset + id] = theAABB;
265    }
266
267    if (get_group_id(0) == 0 && get_local_id(0) < 16)
268    {
269
270        uint groupnr;
271        uint treeletCnt = *BVHBase_GetRefitTreeletCntPtr(bvh);
272        if (get_sub_group_local_id() == 0) {
273            groupnr = atomic_add_global(&sqinput->totalNumGroups, treeletCnt);
274        }
275        groupnr = sub_group_broadcast(groupnr, 0);
276        for (uint subtree = get_sub_group_local_id(); subtree < treeletCnt; subtree += get_sub_group_size())
277        {
278            uint gr = groupnr + subtree;
279            //printf("tree %llx, treelet %d/%d, grId %d, numStartpoints %d\n",  bvh, subtree,treeletCnt, gr, BVHBase_GetRefitTreeletDescs(bvh)[subtree].numStartpoints);
280            sqinput[gr].bvh = (qword)bvh;
281            sqinput[gr].scratch = (qword)bbox_scratch;
282            sqinput[gr].groupInTree = subtree;
283        }
284        //if (get_local_id(0)==0 && treeletCnt > 1)
285        //{
286        //    printf("tree %llx, tip treelet %d/%d = numStartpoints %d depth %d\n", bvh, treeletCnt, treeletCnt, BVHBase_GetRefitTreeletDescs(bvh)[treeletCnt].numStartpoints, BVHBase_GetRefitTreeletDescs(bvh)[treeletCnt].maxDepth);
287        //}
288    }
289}
290
291
292GRL_ANNOTATE_IGC_DO_NOT_SPILL
293__attribute__((reqd_work_group_size(256, 1, 1)))
294__attribute__((intel_reqd_sub_group_size(16)))
295void kernel
296Refit_tree_per_group_quad(
297    global SquashedInput* psqinputs)
298{
299    uint group_id = get_group_id(0);
300    SquashedInput sqinput = psqinputs[group_id];
301    global struct BVHBase* bvh = sqinput.pBvh;
302    global struct AABB* bbox_scratch = sqinput.bbox_scratch;
303    global void* pInput = sqinput.pInput;
304    local Treelet_by_single_group_locals loc;
305
306    if (*BVHBase_GetRefitTreeletCntPtr(bvh) == 0)
307        return;
308
309#if REFIT_DEBUG_CHECKS
310    uint bottoms_cnt = *BVHBase_GetRefitTreeletCntPtr(bvh);
311    if (bottoms_cnt != 1) {
312        if (get_local_id(0) == 0)
313        {
314            printf("Error: this tree has more than 1 treelets!\n");
315        }
316        return;
317    }
318#endif
319
320    /* get pointer to inner nodes and back pointers */
321    uniform global struct QBVHNodeN* inner_nodes = BVHBase_rootNode(bvh);
322
323    // uniform per group
324    uniform RefitTreelet* pTrltDsc = BVHBase_GetRefitTreeletDescs(bvh);
325
326    uint numLeafs = bvh->quadLeafCur - bvh->quadLeafStart;
327
328    if (numLeafs == 0) { return; }
329
330    uint numLeafsByOneThread = (numLeafs + (get_local_size(0) - 1)) / get_local_size(0);
331
332    update_quads(bvh, pInput, bbox_scratch, get_local_id(0), numLeafsByOneThread);
333
334    mem_fence_workgroup_default(); work_group_barrier(0);
335
336    RefitTreelet trltDsc = *pTrltDsc;
337
338    refit_treelet_by_single_group(
339        bbox_scratch,
340        &loc,
341        bvh,
342        trltDsc,
343        false,
344        true);
345
346    if (trltDsc.maxDepth > 0)
347    {
348        mem_fence_workgroup_default(); work_group_barrier(0);
349        post_refit_encode_qnode_tree_per_group(bbox_scratch,bvh);
350    }
351}
352
353
354GRL_ANNOTATE_IGC_DO_NOT_SPILL
355__attribute__((reqd_work_group_size(256, 1, 1)))
356__attribute__((intel_reqd_sub_group_size(16)))
357void kernel
358Refit_treelet_per_group(
359    global SquashedInputGroupDesc* sqinput)
360{
361    uint group_id = get_group_id(0);
362    global struct AABB*    bbox_scratch = (global struct AABB* )sqinput[group_id].scratch;
363    global struct BVHBase* bvh          = (global struct BVHBase* )sqinput[group_id].bvh;
364    group_id                            = sqinput[group_id].groupInTree;
365
366    /* get pointer to inner nodes and back pointers */
367    uniform global struct QBVHNodeN* inner_nodes = BVHBase_rootNode(bvh);
368
369    uint bottoms_cnt = *BVHBase_GetRefitTreeletCntPtr(bvh);
370
371    // uniform per group
372    uniform RefitTreelet* pTrltDsc = BVHBase_GetRefitTreeletDescs(bvh);
373
374    bool should_we_process_treetip = true;
375    local Treelet_by_single_group_locals loc;
376    local bool* l_should_we_process_treetip = (local bool*)&loc;
377#if REFIT_VERBOSE_LOG
378    if (group_id != 0) return;
379#endif
380
381    if (bottoms_cnt > 1)
382    {
383#if REFIT_VERBOSE_LOG
384        for (; group_id < bottoms_cnt; group_id++)
385        {
386            if (get_local_id(0) == 0) { printf("\n ====== treelet %d ====== \n", group_id); }
387            work_group_barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE, memory_scope_device);
388#endif
389            bool rootProcThread = refit_treelet_by_single_group(
390                bbox_scratch,
391                &loc,
392                bvh,
393                pTrltDsc[group_id],
394                true,
395                false);
396
397            // we have to make last group that finishes go up and process the treetip
398            if (rootProcThread)
399            {
400
401                mem_fence_gpu_invalidate();
402                uint finished_cnt = atomic_inc_global((global uint*) & bvh->refitTreeletCnt2);
403                should_we_process_treetip = finished_cnt + 1 == bottoms_cnt;
404
405                * l_should_we_process_treetip = should_we_process_treetip;
406
407                if (should_we_process_treetip) mem_fence_gpu_invalidate();
408            }
409#if REFIT_VERBOSE_LOG
410        }
411#endif
412        work_group_barrier(CLK_LOCAL_MEM_FENCE, memory_scope_work_group);
413
414        should_we_process_treetip = *l_should_we_process_treetip;
415    }
416
417    if (should_we_process_treetip)
418    {
419        //this group will process treetip
420        if (get_local_id(0) == 0) { bvh->refitTreeletCnt2 = 0; }
421        if (bottoms_cnt == 1) { bottoms_cnt = 0; }
422        refit_treelet_by_single_group(
423            bbox_scratch,
424            &loc,
425            bvh,
426            pTrltDsc[bottoms_cnt],
427            true,
428            true);
429    }
430}
431
432/*
433  This kernel refit a BVH. The algorithm iterates over all BVH nodes
434  to find all leaf nodes, which is where refitting starts. For these
435  leaf nodes bounds get recalculated and then propagates up the tree.
436
437  One kernel instance considers exactly one inner_node startpoint.
438  not range of inner nodes.
439 */
440 GRL_ANNOTATE_IGC_DO_NOT_SPILL
441__attribute__((reqd_work_group_size(8, 1, 1))) void kernel
442Refit_per_one_startpoint(
443    global struct BVHBase* bvh,
444    global GRL_RAYTRACING_GEOMETRY_DESC* geosArray,
445    global struct AABB3f* instance_leaf_aabbs )
446{
447    /* here we temporarily store the bounds for the children of a node */
448    struct AABB childrenAABB[BVH_NODE_N6];
449
450    /* get pointer to inner nodes and back pointers */
451    global struct QBVHNodeN* inner_nodes = BVHBase_rootNode(bvh);
452    BackPointers* backPointers = BVHBase_GetBackPointers(bvh);
453
454    /* get the inner node that we will consider as a bottom startpoint */
455    const uint numInnerNodes = BVHBase_numNodes(bvh);
456    const uint innerNodeIdx = (get_group_id(0) + 0) * get_local_size(0) + get_local_id(0);
457
458    if (innerNodeIdx >= numInnerNodes) return;
459
460    global struct QBVHNodeN* curNode = &inner_nodes[innerNodeIdx];
461    uint numChildren = refit_bottom(
462        bvh,
463        geosArray,
464        instance_leaf_aabbs,
465        curNode,
466        childrenAABB,
467        *InnerNode_GetBackPointer(backPointers, innerNodeIdx));
468
469    if (numChildren != 0)
470    {
471        /* update bounds of node */
472        QBVHNodeN_setBounds(curNode, childrenAABB, numChildren);
473
474        /* refit upper parts of the BVH */
475        /* TODO: this will not gonna work for mixed nodes */
476        refit_bottom_up(curNode, bvh, childrenAABB, numChildren);
477    }
478}
479
480#endif
481
482GRL_ANNOTATE_IGC_DO_NOT_SPILL
483__attribute__((reqd_work_group_size(SG_REFIT_WG_SIZE, 1, 1))) void kernel
484Refit_indirect_sg(
485    global struct BVHBase* bvh,
486    global GRL_RAYTRACING_GEOMETRY_DESC* geosArray,
487    global struct AABB3f* instance_leaf_aabbs)
488{
489    DO_Refit_per_one_startpoint_sg(bvh, geosArray, instance_leaf_aabbs, 0);
490
491}
492