• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1//
2// Copyright (C) 2009-2021 Intel Corporation
3//
4// SPDX-License-Identifier: MIT
5//
6//
7
8#include "api_interface.h"
9#include "common.h"
10
11#define GRID_SIZE 1024
12
13/*
14  This presplit item contains for each primitive a number of splits to
15  perform (priority) and the primref index.
16 */
17
18struct PresplitItem
19{
20    unsigned int index;
21    float priority;
22};
23
24/*
25
26  This function splits a line v0->v1 at position pos in dimension dim
27  and merges the bounds for the left and right line segments into
28  lbounds and rbounds.
29
30 */
31
32GRL_INLINE void splitLine(const uint dim,
33                      const float pos,
34                      const float4 v0,
35                      const float4 v1,
36                      struct AABB *lbounds,
37                      struct AABB *rbounds)
38{
39    const float v0d = v0[dim];
40    const float v1d = v1[dim];
41
42    /* this point is on left side */
43    if (v0d <= pos)
44        AABB_extend_point(lbounds, v0);
45
46    /* this point is on right side */
47    if (v0d >= pos)
48        AABB_extend_point(rbounds, v0);
49
50    /* the edge crosses the splitting location */
51    if ((v0d < pos && pos < v1d) || (v1d < pos && pos < v0d))
52    {
53        const float f = (pos - v0d) / (v1d - v0d);
54        const float4 c = f * (v1 - v0) + v0;
55        AABB_extend_point(lbounds, c);
56        AABB_extend_point(rbounds, c);
57    }
58}
59
60/*
61
62  This function splits a clipped triangle v0,v1,v2 with bounds prim at
63  position pos in dimension dim and merges the bounds for the left and
64  right clipped triangle fragments into lbounds and rbounds.
65
66 */
67
68GRL_INLINE void splitTriangle(struct AABB *prim,
69                          const uint dim,
70                          const float pos,
71                          const float4 v0,
72                          const float4 v1,
73                          const float4 v2,
74                          struct AABB *lbounds,
75                          struct AABB *rbounds)
76{
77    /* clip each triangle edge */
78    splitLine(dim, pos, v0, v1, lbounds, rbounds);
79    splitLine(dim, pos, v1, v2, lbounds, rbounds);
80    splitLine(dim, pos, v2, v0, lbounds, rbounds);
81
82    /* the triangle itself was clipped already, thus clip against triangle bounds */
83    AABB_intersect(lbounds, prim);
84    AABB_intersect(rbounds, prim);
85}
86
87float calculate_priority(struct AABB *prim, global GRL_RAYTRACING_GEOMETRY_DESC *geom)
88{
89    /* calculate projected area of first triangles */
90    const uint primID0 = PRIMREF_primID0(prim);
91    const uint3 tri0 = GRL_load_triangle(geom, primID0);
92    const float4 av0 = GRL_load_vertex(geom, tri0.x);
93    const float4 av1 = GRL_load_vertex(geom, tri0.y);
94    const float4 av2 = GRL_load_vertex(geom, tri0.z);
95    const float area_tri0 = areaProjectedTriangle(av0, av1, av2);
96
97    /* calculate projected area of second triangle */
98    const uint primID1 = PRIMREF_primID1(prim);
99    const uint3 tri1 = GRL_load_triangle(geom, primID1);
100    const float4 bv0 = GRL_load_vertex(geom, tri1.x);
101    const float4 bv1 = GRL_load_vertex(geom, tri1.y);
102    const float4 bv2 = GRL_load_vertex(geom, tri1.z);
103    const float area_tri1 = areaProjectedTriangle(bv0, bv1, bv2);
104
105    /* as priority we use the AABB area */
106    const float area_aabb = AABB_halfArea(prim);
107    float priority = area_aabb;
108
109    /* prefer triangles with a large potential SAH gain. */
110    const float area_tris = area_tri0 + area_tri1;
111    const float area_ratio = min(4.0f, area_aabb / max(1E-12f, area_tris));
112    priority *= area_ratio;
113
114    /* ignore too small primitives */
115    //const float4 size = AABB_size(prim);
116    //const float max_size = max(size.x,max(size.y,size.z));
117    //if (max_size < 0.5f*max_scene_size/GRID_SIZE)
118    //  priority = 0.0f;
119
120    return priority;
121}
122
123/*
124
125  This kernel calculates for each primitive an estimated splitting priority.
126
127 */
128
129 GRL_ANNOTATE_IGC_DO_NOT_SPILL
130__attribute__((reqd_work_group_size(MAX_HW_SIMD_WIDTH, 1, 1))) void kernel compute_num_presplits(global struct Globals *globals,
131                                                                                                 global struct BVHBase* bvh_base,
132                                                                                                 global struct AABB *primref,
133                                                                                                 global struct PresplitItem *presplit,
134                                                                                                 global GRL_RAYTRACING_GEOMETRY_DESC *geomDesc)
135{
136    //assert(sizeof(PresplitItem) == sizeof_PresplitItem);
137
138    /* calculate the range of primitives each work group should process */
139    const uint numPrimitives = globals->numPrimitives;
140    const uint startID = (get_group_id(0) + 0) * numPrimitives / get_num_groups(0);
141    const uint endID = (get_group_id(0) + 1) * numPrimitives / get_num_groups(0);
142
143    /* get scene bounding box size */
144    const float3 scene_size = AABB3f_size(&bvh_base->Meta.bounds);
145    const float max_scene_size = max(scene_size.x, max(scene_size.y, scene_size.z));
146
147    /* each work group iterates over its range of primitives */
148    for (uint i = startID + get_local_id(0); i < endID; i += get_local_size(0))
149    {
150        const uint geomID = PRIMREF_geomID(&primref[i]);
151
152        /* splitting heuristic for triangles */
153        if (GRL_is_triangle(&geomDesc[geomID]))
154        {
155            presplit[i].index = i;
156            presplit[i].priority = calculate_priority(&primref[i], &geomDesc[geomID]);
157        }
158
159        /* splitting of procedurals is not supported */
160        else if (GRL_is_procedural(&geomDesc[geomID]))
161        {
162            presplit[i].index = i;
163            presplit[i].priority = 0.0f;
164        }
165
166        else
167        {
168            //assert(false);
169        }
170    }
171
172    if (get_local_id(0) + get_group_id(0)*get_local_size(0) == 0)
173        globals->numOriginalPrimitives = globals->numPrimitives;
174}
175
176/*
177
178  This kernel computes the sum of all priorities.
179
180 */
181
182 GRL_ANNOTATE_IGC_DO_NOT_SPILL
183__attribute__((reqd_work_group_size(MAX_WORKGROUP_SIZE, 1, 1)))
184__attribute__((intel_reqd_sub_group_size(16))) void kernel
185priority_sum(global struct Globals *globals,
186             global struct PresplitItem *presplit,
187             uint numPrimitivesToSplit)
188{
189    const uint N = globals->numPrimitives;
190    const uint j = get_local_id(0);
191    const uint J = get_local_size(0);
192    const uint BLOCKSIZE = (N + J - 1) / J;
193    const uint start = min((j + 0) * BLOCKSIZE, N);
194    const uint end = min((j + 1) * BLOCKSIZE, N);
195
196    float prioritySum = 0;
197    for (uint i = start; i < end; i++)
198        prioritySum += presplit[i].priority;
199
200    prioritySum = work_group_reduce_add(prioritySum);
201    globals->presplitPrioritySum = prioritySum;
202
203#if 0
204  work_group_barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
205
206  float scale = 1.0f;
207  for (uint i = 0; i < 10; i++)
208  {
209    //if (j == 0)
210    //printf("prioritySum = %f\n",scale*prioritySum);
211
212    uint numSplits = 0;
213    for (uint i = start; i < end; i++)
214      numSplits += presplit[i].priority / (scale*prioritySum)*numPrimitivesToSplit;
215
216    numSplits = work_group_reduce_add(numSplits);
217
218    if (numSplits > numPrimitivesToSplit)
219      break;
220
221    //if (j == 0)
222    //  printf("numSplits = %i (%i)\n",numSplits,numPrimitivesToSplit);
223
224    globals->presplitPrioritySum = scale * prioritySum;
225    scale -= 0.05f;
226  }
227#endif
228}
229
230GRL_INLINE void heapify_down(struct AABB *array, uint size)
231{
232    /* we start at the root */
233    uint cur_node_id = 0;
234    struct AABB *cur_node = array;
235
236    while (true)
237    {
238        int larger_node_id = cur_node_id;
239        struct AABB *larger_node = cur_node;
240
241        /* check if left child is largest */
242        const int left_node_id = 2 * cur_node_id + 1;
243        struct AABB *left_node = &array[left_node_id];
244        if (left_node_id < size && AABB_halfArea(left_node) > AABB_halfArea(larger_node))
245        {
246            larger_node_id = left_node_id;
247            larger_node = left_node;
248        }
249
250        /* check if right child is largest */
251        const int right_node_id = 2 * cur_node_id + 2;
252        struct AABB *right_node = &array[right_node_id];
253        if (right_node_id < size && AABB_halfArea(right_node) > AABB_halfArea(larger_node))
254        {
255            larger_node_id = right_node_id;
256            larger_node = right_node;
257        }
258
259        /* if current node is largest heap property is fulfilled and we are done */
260        if (larger_node_id == cur_node_id)
261            break;
262
263        /* otherwise we swap cur and largest */
264        struct AABB tmp = *cur_node;
265        *cur_node = *larger_node;
266        *larger_node = tmp;
267
268        /* we continue downwards with the largest node */
269        cur_node_id = larger_node_id;
270        cur_node = larger_node;
271    }
272}
273
274GRL_INLINE void heapify_up(struct AABB *array, uint cur_node_id)
275{
276    /* stop if we start at the root */
277    if (cur_node_id == 0)
278        return;
279
280    struct AABB *cur_node = &array[cur_node_id];
281
282    /* we loop until we reach the root node */
283    while (cur_node_id)
284    {
285        /* get parent node */
286        uint parent_node_id = (cur_node_id - 1) / 2;
287        struct AABB *parent_node = &array[parent_node_id];
288
289        /* if parent is larger then current we fulfill the heap property and can terminate */
290        if (AABB_halfArea(parent_node) > AABB_halfArea(cur_node))
291            break;
292
293        /* otherwise we swap cur and parent */
294        struct AABB tmp = *cur_node;
295        *cur_node = *parent_node;
296        *parent_node = tmp;
297
298        /* and continue upwards */
299        cur_node_id = parent_node_id;
300        cur_node = parent_node;
301    }
302}
303
304/* splits a quad primref */
305GRL_INLINE void splitQuadPrimRef(global GRL_RAYTRACING_GEOMETRY_DESC *geom,
306                      struct AABB *cur, uint dim, float fsplit,
307                      struct AABB *left, struct AABB *right)
308{
309    /* left and right bounds to compute */
310    AABB_init(left);
311    AABB_init(right);
312
313    /* load first triangle and split it */
314    const uint primID0 = PRIMREF_primID0(cur);
315    const uint3 tri0 = GRL_load_triangle(geom, primID0);
316    const float4 av0 = GRL_load_vertex(geom, tri0.x);
317    const float4 av1 = GRL_load_vertex(geom, tri0.y);
318    const float4 av2 = GRL_load_vertex(geom, tri0.z);
319    splitTriangle(cur, dim, fsplit, av0, av1, av2, left, right);
320
321    /* load second triangle and split it */
322    const uint primID1 = PRIMREF_primID1(cur);
323    const uint3 tri1 = GRL_load_triangle(geom, primID1);
324    const float4 bv0 = GRL_load_vertex(geom, tri1.x);
325    const float4 bv1 = GRL_load_vertex(geom, tri1.y);
326    const float4 bv2 = GRL_load_vertex(geom, tri1.z);
327    splitTriangle(cur, dim, fsplit, bv0, bv1, bv2, left, right);
328
329    /* copy the PrimRef payload into left and right */
330    left->lower.w = cur->lower.w;
331    left->upper.w = cur->upper.w;
332    right->lower.w = cur->lower.w;
333    right->upper.w = cur->upper.w;
334}
335
336/*
337
338  This kernel performs the actual pre-splitting. It selects split
339  locations based on an implicit octree over the scene.
340
341 */
342
343#define USE_HEAP 0
344#define HEAP_SIZE 32u
345
346GRL_ANNOTATE_IGC_DO_NOT_SPILL
347__attribute__((reqd_work_group_size(MAX_HW_SIMD_WIDTH, 1, 1)))
348//__attribute__((intel_reqd_sub_group_size(16)))
349void kernel
350perform_presplits(global struct Globals *globals,
351                  global struct BVHBase* bvh_base,
352                  global struct AABB *primref,
353                  global struct PresplitItem *presplit,
354                  global char *bvh_mem,
355                  global GRL_RAYTRACING_GEOMETRY_DESC *geomDesc,
356                  uint numPrimitivesToSplit)
357{
358    /* calculate the range of primitives each work group should process */
359    const uint numPrimitives = globals->numPrimitives;
360    int pstart = globals->numOriginalPrimitives - numPrimitivesToSplit;
361    pstart = max(0, pstart);
362    const uint numPrimitivesToProcess = globals->numPrimitives - pstart;
363    const uint startID = (get_group_id(0) + 0) * numPrimitivesToProcess / get_num_groups(0);
364    const uint endID = (get_group_id(0) + 1) * numPrimitivesToProcess / get_num_groups(0);
365
366    /* calculates the 3D grid */
367    float4 grid_base;
368    grid_base.xyz = AABB3f_load_lower( &bvh_base->Meta.bounds );
369    grid_base.w = 0;
370
371    float4 grid_extend;
372    grid_extend.xyz = AABB3f_size(&bvh_base->Meta.bounds);
373    grid_extend.w=0;
374
375    grid_extend = max(grid_extend.x, max(grid_extend.y, grid_extend.z));
376    const float4 grid_scale = select(GRID_SIZE / grid_extend, 0.0f, grid_extend == 0.0f);
377    const float inv_grid_size = 1.0f / GRID_SIZE;
378
379    /* we have to update centroid bounds */
380    struct AABB centroidBounds;
381    AABB_init(&centroidBounds);
382
383    /* initialize heap */
384    struct AABB heap[HEAP_SIZE];
385    uint heap_size = 0;
386
387    /* each work group iterates over its range of primitives */
388    for (uint j = startID + get_local_id(0); j < endID; j += get_local_size(0))
389    {
390        /* array is in ascending order */
391        //const uint ID = numPrimitives-1-j;
392        const uint ID = pstart + j;
393        const float prob = presplit[ID].priority;
394        const uint i = presplit[ID].index;
395        const uint geomID = PRIMREF_geomID(&primref[i]);
396
397        /* do not split primitives with low splitting priority */
398        if (prob <= 0.0f)
399            continue;
400
401        /* we support splitting only for triangles */
402        if (!GRL_is_triangle(&geomDesc[geomID]))
403            continue;
404
405        /* compute number of split primitives to produce */
406        uint numSplitPrims = prob / globals->presplitPrioritySum * numPrimitivesToSplit;
407        numSplitPrims = min(HEAP_SIZE, numSplitPrims);
408
409        /* stop if not splits have to get performed */
410        if (numSplitPrims <= 1)
411            continue;
412
413        /* add primref to heap */
414        heap[0] = primref[i];
415        heap_size = 1;
416        uint heap_pos = 0;
417
418        /* iterate until all splits are done */
419        uint prims = 1;
420        uint last_heap_size = heap_size;
421        while (prims < numSplitPrims)
422        {
423            /* map the primitive bounds to the grid */
424            const float4 lower = heap[heap_pos].lower;
425            const float4 upper = heap[heap_pos].upper;
426            const float4 glower = (lower - grid_base) * grid_scale + 0.2f;
427            const float4 gupper = (upper - grid_base) * grid_scale - 0.2f;
428            uint4 ilower = convert_uint4_rtz(glower);
429            uint4 iupper = convert_uint4_rtz(gupper);
430
431            /* this ignores dimensions that are empty */
432            if (glower.x >= gupper.x)
433                iupper.x = ilower.x;
434            if (glower.y >= gupper.y)
435                iupper.y = ilower.y;
436            if (glower.z >= gupper.z)
437                iupper.z = ilower.z;
438
439            /* Now we compute a morton code for the lower and upper grid
440       * coordinates. */
441            const uint lower_code = bitInterleave3D(ilower);
442            const uint upper_code = bitInterleave3D(iupper);
443
444            /* if all bits are equal then we cannot split */
445            if (lower_code == upper_code)
446            {
447#if !USE_HEAP
448                prims++; // !!!!!!!
449
450                heap_pos++;
451                if (heap_pos == last_heap_size)
452                {
453                    heap_pos = 0;
454                    last_heap_size = heap_size;
455                }
456                continue;
457#else
458                if (heap_size == 1)
459                    break;
460
461                const uint offset = numPrimitives + atomic_add(&globals->numSplittedPrimitives, 1);
462                primref[offset] = heap[heap_pos];
463
464                presplit[offset].index = offset;
465                presplit[offset].priority = calculate_priority(&heap[heap_pos], &geomDesc[geomID]);
466
467                heap[0] = heap[--heap_size];
468                heapify_down(heap, heap_size);
469                continue;
470#endif
471            }
472
473            /* We find the bit position of the first differing bit from the
474       * top down. This bit indicates a split position inside an
475       * implicit octree. */
476            const uint diff = 31 - clz(lower_code ^ upper_code);
477
478            /* compute octree level and dimension to perform the split in */
479            const uint level = diff / 3;
480            const uint dim = diff % 3;
481
482            /* now we compute the grid position of the split */
483            const uint isplit = iupper[dim] & ~((1 << level) - 1);
484
485            /* compute world space position of split */
486            const float fsplit = grid_base[dim] + isplit * inv_grid_size * grid_extend[dim];
487
488            /* split primref into left and right part */
489            struct AABB left, right;
490            splitQuadPrimRef(&geomDesc[geomID], &heap[heap_pos], dim, fsplit, &left, &right);
491            prims++;
492
493            /* update centroid bounds */
494            AABB_extend_point(&centroidBounds, AABB_centroid2(&left));
495            AABB_extend_point(&centroidBounds, AABB_centroid2(&right));
496
497#if !USE_HEAP
498
499            heap[heap_pos] = left;
500            heap[heap_size] = right;
501            heap_size++;
502
503            heap_pos++;
504            if (heap_pos == last_heap_size)
505            {
506                heap_pos = 0;
507                last_heap_size = heap_size;
508            }
509#else
510
511            /* insert left element into heap */
512            heap[0] = left;
513            heapify_down(heap, heap_size);
514
515            /* insert right element into heap */
516            heap[heap_size] = right;
517            heapify_up(heap, heap_size);
518
519            heap_size++;
520#endif
521        }
522
523        /* copy primities to primref array */
524        primref[i] = heap[0];
525
526        presplit[ID].index = i;
527        presplit[ID].priority = calculate_priority(&heap[0], &geomDesc[geomID]);
528
529        for (uint k = 1; k < heap_size; k++)
530        {
531            const uint offset = numPrimitives + atomic_add(&globals->numSplittedPrimitives, 1);
532            primref[offset] = heap[k];
533
534            presplit[offset].index = offset;
535            presplit[offset].priority = calculate_priority(&heap[k], &geomDesc[geomID]);
536        }
537    }
538
539    /* merge centroid bounds into global bounds */
540    centroidBounds = AABB_sub_group_reduce(&centroidBounds);
541    if (get_sub_group_local_id() == 0)
542        AABB_global_atomic_merge(&globals->centroidBounds, &centroidBounds);
543
544    work_group_barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
545
546    /* update number of primitives on finish */
547    if (Globals_OnFinish(globals))
548    {
549        globals->numPrimitives = globals->numPrimitives + globals->numSplittedPrimitives;
550        globals->numSplittedPrimitives = 0;
551
552        /* update first build record */ // FIXME: should be done in builder itself
553        global struct BuildRecord *record = (global struct BuildRecord *)(bvh_mem + bvh_base->quadLeafStart*64);
554        record->end = globals->numPrimitives;
555    }
556}
557