• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1//
2// Copyright (C) 2009-2021 Intel Corporation
3//
4// SPDX-License-Identifier: MIT
5//
6//
7
8#include "api_interface.h"
9#include "common.h"
10#include "instance.h"
11
12#include "bvh_build_primref.h"
13
14//#pragma OPENCL EXTENSION cl_khr_subgroup_non_uniform_vote : enable
15//int sub_group_non_uniform_any(int predicate);
16
17#define WINDOW_SIZE 16
18
19/* Representation of two merged triangles. */
20struct QuadIndices
21{
22    uint primID0, primID1;
23    uint v0, v1, v2, v3;
24};
25
26/*
27
28  This function calculates a PrimRef from a merged quad and writes
29  this PrimRef to memory.
30
31 */
32GRL_INLINE void create_prim_ref(const uint geomID,
33                            const struct QuadIndices quad,
34                            global GRL_RAYTRACING_GEOMETRY_DESC *geomDesc,
35                            struct AABB *geometryBounds,
36                            struct AABB *centroidBounds,
37                            global uint *numPrimitives,
38                            global struct AABB *primref)
39{
40
41    /* load quad vertices */
42    const float4 vtx0 = GRL_load_vertex(geomDesc, quad.v0); // FIXME: these multiple load_vertex calls should get merged
43    const float4 vtx1 = GRL_load_vertex(geomDesc, quad.v1);
44    const float4 vtx2 = GRL_load_vertex(geomDesc, quad.v2);
45    const float4 vtx3 = GRL_load_vertex(geomDesc, quad.v3);
46
47    /* calculate bounds for quad */
48    float4 lower = min(min(vtx0, vtx1), min(vtx2, vtx3));
49    float4 upper = max(max(vtx0, vtx1), max(vtx2, vtx3));
50
51    /* extend geometry and centroid bounds */
52    const float4 centroid2 = lower + upper;
53    AABB_extendlu(geometryBounds, lower, upper);
54    AABB_extendlu(centroidBounds, centroid2, centroid2);
55
56    PrimRef ref;
57    PRIMREF_setAABB( &ref, lower.xyz, upper.xyz );
58    PRIMREF_setQuadMetaData( &ref, quad.primID0, quad.primID1, geomID, GRL_get_Flags( geomDesc ) );
59
60    /* store primref to memory */
61    const uint offset = atomic_add_global(numPrimitives, 1);
62    primref[offset] = ref;
63}
64
65/*
66
67  This function calculates a PrimRef from a procedural and writes
68  this PrimRef to memory.
69
70 */
71GRL_INLINE void create_prim_ref_procedural(global GRL_RAYTRACING_GEOMETRY_DESC *geomDesc,
72                                       const uint geomID,
73                                       const uint primID,
74                                       struct AABB *geometryBounds,
75                                       struct AABB *centroidBounds,
76                                       global uint *numPrimitives,
77                                       global struct AABB *primref)
78{
79    /* load aabb from memory */
80    struct GRL_RAYTRACING_AABB aabb = GRL_load_aabb(&geomDesc[geomID], primID);
81
82    /* extend geometry and centroid bounds */
83    float4 lower = (float4)(aabb.MinX, aabb.MinY, aabb.MinZ, 0.0f);
84    float4 upper = (float4)(aabb.MaxX, aabb.MaxY, aabb.MaxZ, 0.0f);
85    const float4 centroid2 = lower + upper;
86    AABB_extendlu(geometryBounds, lower, upper);
87    AABB_extendlu(centroidBounds, centroid2, centroid2);
88
89    /* encode geomID, primID */
90    uint geomFlags = GRL_get_Flags(&geomDesc[geomID]);
91
92    PrimRef ref;
93    PRIMREF_setAABB( &ref, lower.xyz, upper.xyz );
94    PRIMREF_setProceduralMetaData( &ref, geomID, primID, geomFlags );
95
96    /* store primref to memory */
97    const uint offset = atomic_add_global(numPrimitives, 1);
98    primref[offset] = ref;
99}
100
101/*
102
103   This function performs a binary search to calculate the geomID and
104   primID of the i'th primitive of the scene. For the search a
105   prefix_sum array is used that stores for each location j the sum of
106   the number of primitives of all meshes k with k<j.
107
108*/
109
110struct GeomPrimID
111{
112    uint geomID, primID;
113};
114
115struct GeomPrimID binary_search_geomID_primID(global uint *prefix_sum, const uint prefix_sum_size, const uint i)
116{
117    uint l = 0;
118    uint r = prefix_sum_size;
119    uint k = 0;
120
121    while (r - l > 1)
122    {
123        const uint m = (l + r) / 2;
124        k = prefix_sum[m];
125        if (k <= i)
126        {
127            l = m;
128        }
129        else if (i < k)
130        {
131            r = m;
132        }
133    }
134
135    struct GeomPrimID id;
136    id.geomID = l;
137    id.primID = i - prefix_sum[l];
138    return id;
139}
140
141/*
142
143  Checks if a vertex contains only finite floating point numbers.
144
145 */
146
147GRL_INLINE bool isfinite_vertex(float4 vtx)
148{
149    return isfinite(vtx.x) && isfinite(vtx.y) && isfinite(vtx.z);
150}
151
152
153/*
154  Create primrefs from array of instance descriptors.
155 */
156 GRL_ANNOTATE_IGC_DO_NOT_SPILL
157__attribute__((reqd_work_group_size(MAX_HW_SIMD_WIDTH, 1, 1)))
158__attribute__((intel_reqd_sub_group_size(MAX_HW_SIMD_WIDTH))) void kernel
159primrefs_from_DXR_instances(global struct Globals *globals,
160                            global struct BVHBase* bvh,
161                            global __const struct GRL_RAYTRACING_INSTANCE_DESC* instances,
162                            uint numInstances,
163                            global struct AABB *primrefs,
164                            uint allowUpdate)
165{
166    const uint instanceIndex = get_sub_group_local_id() + get_group_id(0) * MAX_HW_SIMD_WIDTH;
167    if (instanceIndex < numInstances)
168    {
169        global __const struct GRL_RAYTRACING_INSTANCE_DESC* instance = instances + instanceIndex;
170
171        primrefs_from_instances(
172            globals,
173            bvh,
174            instance,
175            instanceIndex,
176            primrefs,
177            0,
178            allowUpdate);
179    }
180}
181
182/*
183  Create primrefs from array of instance descriptors.
184 */
185 GRL_ANNOTATE_IGC_DO_NOT_SPILL
186__attribute__((reqd_work_group_size(MAX_HW_SIMD_WIDTH, 1, 1)))
187void kernel
188primrefs_from_DXR_instances_indirect(global struct Globals *globals,
189                            global struct BVHBase* bvh,
190                            global __const struct GRL_RAYTRACING_INSTANCE_DESC* instances,
191                            global struct IndirectBuildRangeInfo* indirect_data,
192                            global struct AABB *primrefs,
193                            uint allowUpdate)
194{
195    // TODO: On DG2, we have 8 dwords of 'inline data' which can be pushed
196    // directly to the kernel. THe rest of the kernel args are pulled using
197    // loads from memory. It may be more efficient to put 'numInstances' and
198    // 'allowUpdate' into 'globals'
199
200    const uint instanceIndex =  get_local_id(0) + get_group_id(0) * MAX_HW_SIMD_WIDTH;
201
202    if (instanceIndex < indirect_data->primitiveCount)
203    {
204        instances = (global __const struct GRL_RAYTRACING_INSTANCE_DESC*)
205            (((global char*)instances) + indirect_data->primitiveOffset);
206        global __const struct GRL_RAYTRACING_INSTANCE_DESC* instance = instances + instanceIndex;
207        primrefs_from_instances(
208            globals,
209            bvh,
210            instance,
211            instanceIndex,
212            primrefs,
213            0,
214            allowUpdate);
215    }
216}
217
218/*
219  Create primrefs from array of pointers to instance descriptors.
220 */
221 GRL_ANNOTATE_IGC_DO_NOT_SPILL
222__attribute__((reqd_work_group_size(MAX_HW_SIMD_WIDTH, 1, 1)))
223__attribute__((intel_reqd_sub_group_size(MAX_HW_SIMD_WIDTH))) void kernel
224primrefs_from_DXR_instances_pointers(global struct Globals *globals,
225                                     global struct BVHBase* bvh,
226                                     global void *instances_in,
227                                     uint numInstances,
228                                     global struct AABB *primrefs,
229                                     uint allowUpdate)
230{
231    global const struct GRL_RAYTRACING_INSTANCE_DESC **instances =
232        (global const struct GRL_RAYTRACING_INSTANCE_DESC **)instances_in;
233
234    const uint instanceIndex = get_sub_group_local_id() + get_group_id(0) * MAX_HW_SIMD_WIDTH;
235    if (instanceIndex < numInstances)
236    {
237        global __const struct GRL_RAYTRACING_INSTANCE_DESC* instance = instances[instanceIndex];
238
239        primrefs_from_instances(
240            globals,
241            bvh,
242            instance,
243            instanceIndex,
244            primrefs,
245            0,
246            allowUpdate);
247    }
248}
249
250/*
251  Create primrefs from array of pointers to instance descriptors.
252 */
253 GRL_ANNOTATE_IGC_DO_NOT_SPILL
254__attribute__((reqd_work_group_size(MAX_HW_SIMD_WIDTH, 1, 1)))
255void kernel
256primrefs_from_DXR_instances_pointers_indirect(global struct Globals *globals,
257                                              global struct BVHBase* bvh,
258                                              global void *instances_in,
259                                              global struct AABB *primrefs,
260                                              global struct IndirectBuildRangeInfo* indirect_data,
261                                              uint allowUpdate)
262{
263    global const struct GRL_RAYTRACING_INSTANCE_DESC **instances =
264        (global const struct GRL_RAYTRACING_INSTANCE_DESC **)instances_in;
265
266    const uint instanceIndex = get_local_id(0) + get_group_id(0) * MAX_HW_SIMD_WIDTH;
267
268    if (instanceIndex < indirect_data->primitiveCount)
269    {
270        instances = (global const struct GRL_RAYTRACING_INSTANCE_DESC**)
271            (((global char*)instances) + indirect_data->primitiveOffset);
272        global __const struct GRL_RAYTRACING_INSTANCE_DESC* instance = instances[instanceIndex];
273
274        primrefs_from_instances(
275            globals,
276            bvh,
277            instance,
278            instanceIndex,
279            primrefs,
280            0,
281            allowUpdate);
282    }
283}
284
285
286///////////////////////////////////////////////////////////////////////////////////////////
287///////////////////////////////////////////////////////////////////////////////////////////
288///////////////////////////////////////////////////////////////////////////////////////////
289///////////////////////////////////////////////////////////////////////////////////////////
290
291bool can_pair( uint3 a, uint3 b )
292{
293    bool match0 = any( a.xxx == b.xyz ) ? 1 : 0;
294    bool match1 = any( a.yyy == b.xyz ) ? 1 : 0;
295    bool match2 = any( a.zzz == b.xyz ) ? 1 : 0;
296    return (match0 + match1 + match2) >= 2;
297}
298
299void reduce_bounds(
300    float3 lower,
301    float3 upper,
302    global struct Globals* globals,
303    global struct BVHBase* bvh )
304{
305
306    // reduce centroid bounds... make sure to exclude lanes with invalid AABBs
307    float3 cent = lower + upper;
308    float3 cent_lower = select( (float3)(INFINITY, INFINITY, INFINITY), cent, lower <= upper);
309    float3 cent_upper = select(-(float3)(INFINITY, INFINITY, INFINITY), cent, lower <= upper);
310
311    // reduce geo bounds
312    AABB3f_atomic_merge_global_sub_group_lu( &bvh->Meta.bounds, lower, upper );
313    AABB_global_atomic_merge_sub_group_lu(&globals->centroidBounds, cent_lower, cent_upper );
314}
315
316
317struct TriState
318{
319    bool valid;
320    uint prim_index;
321    uint pairing;
322    uint3 indices;
323    float3 lower;
324    float3 upper;
325};
326
327#define NOT_PAIRED 0xffffffff
328
329void load_triangle_data(uniform global char* index_buffer,
330                        uniform const uint index_format,
331                        uniform global char* vertex_buffer,
332                        uniform const uint vertex_format,
333                        uniform const uint vertex_stride,
334                        uniform global float* transform_buffer,
335                        uniform uint total_vert_count,
336                        struct TriState* state,
337                        float4* v)
338{
339        state->indices = GRL_load_indices_from_buffer(index_buffer, index_format, state->prim_index );
340
341        const uint last_vertex = total_vert_count - 1;
342        const uint x = min(state->indices.x, last_vertex);
343        const uint y = min(state->indices.y, last_vertex);
344        const uint z = min(state->indices.z, last_vertex);
345
346        GRL_load_triangle_vertices(vertex_buffer, vertex_format, vertex_stride, transform_buffer, x, y, z, v);
347}
348
349struct TriState load_triangle( uniform global GRL_RAYTRACING_GEOMETRY_DESC* geomDesc,
350                        uniform uint base,
351                        uniform uint num_prims,
352                        uniform uint total_vert_count )
353{
354
355    struct TriState state;
356    state.pairing        = NOT_PAIRED;
357    state.valid          = false;
358    state.prim_index     = base + get_sub_group_local_id();
359    state.lower = (float3)(INFINITY, INFINITY, INFINITY);
360    state.upper = -(float3)(INFINITY, INFINITY, INFINITY);
361
362    if (state.prim_index < num_prims)
363    {
364        state.valid = true;
365        float4 v[3];
366        load_triangle_data((global char*)geomDesc->Desc.Triangles.pIndexBuffer,
367                        geomDesc->Desc.Triangles.IndexFormat,
368                        (global char*)geomDesc->Desc.Triangles.pVertexBuffer,
369                        geomDesc->Desc.Triangles.VertexFormat,
370                        geomDesc->Desc.Triangles.VertexBufferByteStride,
371                        (global float*)geomDesc->Desc.Triangles.pTransformBuffer,
372                        total_vert_count,
373                        &state,
374                        v);
375
376        if (state.indices.x >= total_vert_count || state.indices.y >= total_vert_count || state.indices.z >= total_vert_count ||
377            !isfinite_vertex(v[0]) || !isfinite_vertex(v[1]) || !isfinite_vertex(v[2]) ||
378            state.indices.x == state.indices.y || state.indices.x == state.indices.z || state.indices.y == state.indices.z)
379        {
380            state.valid = false;
381        }
382        else
383        {
384            state.lower.xyz = min(v[2].xyz, min(v[1].xyz, v[0].xyz));
385            state.upper.xyz = max(v[2].xyz, max(v[1].xyz, v[0].xyz));
386        }
387    }
388    return state;
389}
390
391void broadcast_triangles_local( struct TriState* state  )
392{
393    varying uint my_prim    = state->prim_index;
394    varying uint my_pairing = state->pairing;
395    varying float3 my_lower = state->lower;
396    varying float3 my_upper = state->upper;
397    varying bool valid      = state->valid;
398    varying uint3 indices   = state->indices;
399
400    for (uniform uint broadcast_lane = 0; broadcast_lane < get_sub_group_size(); broadcast_lane++)
401    {
402        // don't broadcast invalid prims
403        if ( !sub_group_broadcast( valid, broadcast_lane ) )
404            continue;
405
406        uint broadcast_pairing = sub_group_broadcast(my_pairing, broadcast_lane);
407        uint broadcast_prim = sub_group_broadcast(my_prim, broadcast_lane);
408
409        if (broadcast_pairing == NOT_PAIRED)
410        {
411            // if the broadcast prim is not paired already, all unpaired lanes attempt to pair with it
412            bool pairable = false;
413            uint3 other_indices = sub_group_broadcast_uint3( indices, broadcast_lane );
414            if (broadcast_prim != my_prim && my_pairing == NOT_PAIRED && valid )
415            {
416                pairable = can_pair( indices, other_indices );
417            }
418
419
420            uint pairable_lane = ctz(intel_sub_group_ballot(pairable));
421            if (valid && pairable_lane < get_sub_group_size())
422            {
423                // pair the broadcast primitive with the first lane that can accept it
424                float3 broadcast_lower = sub_group_broadcast_float3(my_lower.xyz, broadcast_lane);
425                float3 broadcast_upper = sub_group_broadcast_float3(my_upper.xyz, broadcast_lane);
426                if (get_sub_group_local_id() == pairable_lane)
427                {
428                    my_pairing = broadcast_prim;
429                    my_lower.xyz = min(my_lower.xyz, broadcast_lower);
430                    my_upper.xyz = max(my_upper.xyz, broadcast_upper);
431                }
432
433                // pair the broadcast primitive with the same that was paired to it
434                uint pairable_prim = sub_group_broadcast(my_pairing, pairable_lane);
435                if (get_sub_group_local_id() == broadcast_lane)
436                {
437                    my_pairing = pairable_prim;
438                }
439            }
440        }
441        else
442        {
443            //
444            // if this lane was already paired with the broadcasting tri
445            //   in an earlier loop iteration, then record the pairing in this lane's registers
446            float3 broadcast_lower = sub_group_broadcast_float3(my_lower.xyz, broadcast_lane);
447            float3 broadcast_upper = sub_group_broadcast_float3(my_upper.xyz, broadcast_lane);
448            if (broadcast_pairing == my_prim)
449            {
450                my_pairing = broadcast_prim;
451                my_lower.xyz = min(my_lower.xyz, broadcast_lower);
452                my_upper.xyz = max(my_upper.xyz, broadcast_upper);
453            }
454        }
455    }
456
457    state->pairing = my_pairing;
458    state->lower = my_lower;
459    state->upper = my_upper;
460}
461
462
463void broadcast_triangles_nonlocal(struct TriState* state, const struct TriState* other )
464{
465    varying uint my_prim = state->prim_index;
466    varying uint my_pairing = state->pairing;
467    varying float3 my_lower = state->lower;
468    varying float3 my_upper = state->upper;
469    varying bool valid = state->valid;
470    varying uint3 indices = state->indices;
471
472    for (uniform uint broadcast_lane = 0; broadcast_lane < get_sub_group_size(); broadcast_lane++)
473    {
474        // don't broadcast invalid prims
475        if (!sub_group_broadcast(other->valid, broadcast_lane))
476            continue;
477
478        uint broadcast_pairing = sub_group_broadcast(other->pairing, broadcast_lane);
479        uint broadcast_prim = sub_group_broadcast(other->prim_index, broadcast_lane);
480
481        if (broadcast_pairing == NOT_PAIRED)
482        {
483            // if the broadcast prim is not paired already, all unpaired lanes attempt to pair with it
484            bool pairable = false;
485            if ( my_pairing == NOT_PAIRED && valid )
486            {
487                uint3 other_indices = sub_group_broadcast_uint3(other->indices, broadcast_lane);
488                pairable = can_pair(indices, other_indices);
489            }
490
491            // pair the broadcast primitive with the first lane that can accept it
492            uint pairable_mask = intel_sub_group_ballot(pairable);
493            if (valid && (ctz(pairable_mask) == get_sub_group_local_id()))
494            {
495                my_pairing = broadcast_prim;
496                my_lower.xyz = min(my_lower.xyz, sub_group_broadcast_float3(other->lower.xyz, broadcast_lane));
497                my_upper.xyz = max(my_upper.xyz, sub_group_broadcast_float3(other->upper.xyz, broadcast_lane));
498            }
499        }
500
501    }
502
503    state->pairing = my_pairing;
504    state->lower = my_lower;
505    state->upper = my_upper;
506}
507
508GRL_INLINE void do_triangles_to_primrefs(
509    global struct Globals*               globals,
510    global struct BVHBase*               bvh,
511    global struct AABB*                  primref,
512    global GRL_RAYTRACING_GEOMETRY_DESC* geomDesc,
513    uint                                 geomID_and_flags,
514    const uint                           num_prims)
515{
516    uint geomID             = geomID_and_flags & 0x00ffffff;
517    uint geom_flags         = geomID_and_flags >> 24;
518    uint prim_base          = get_group_id(0) * get_local_size(0);
519    uint total_vert_count   = GRL_get_triangles_VertexCount(geomDesc);
520
521    struct TriState tri = load_triangle( geomDesc, prim_base, num_prims, total_vert_count );
522    broadcast_triangles_local( &tri );
523
524
525    // we will produce output if the lane creates a triangle (my_pairing == NOT_PAIRED)
526    // or for the lane corresponding to the larger of two triangles
527    bool will_write = (tri.pairing > tri.prim_index) && tri.valid;
528    uint write_mask = intel_sub_group_ballot(will_write);
529    uint write_offs = subgroup_bit_prefix_exclusive( write_mask );
530    uint write_count = popcount(write_mask);
531
532    // allocate space in primref buffer
533    uint write_base;
534    if( get_sub_group_local_id() == 0 )
535        write_base = atomic_add_global( &globals->numPrimitives, write_count );
536    write_offs += sub_group_broadcast( write_base, 0 );
537
538    uint primID0 = tri.prim_index;
539    uint primID1 = (tri.pairing != NOT_PAIRED) ? tri.pairing : tri.prim_index;
540
541    if (will_write)
542    {
543        PrimRef ref;
544        PRIMREF_setAABB(&ref, tri.lower.xyz, tri.upper.xyz);
545        PRIMREF_setQuadMetaData(&ref, primID0, primID1, geomID, geom_flags);
546        uint8 val = (uint8)(
547            as_uint(ref.lower.x), as_uint(ref.lower.y), as_uint(ref.lower.z), as_uint(ref.lower.w),
548            as_uint(ref.upper.x), as_uint(ref.upper.y), as_uint(ref.upper.z), as_uint(ref.upper.w));
549        store_uint8_L1WB_L3WB((global uint8*)(primref + write_offs), 0, val);
550    }
551
552    reduce_bounds( tri.lower, tri.upper, globals, bvh );
553}
554
555
556GRL_ANNOTATE_IGC_DO_NOT_SPILL
557__attribute__((reqd_work_group_size(16, 1, 1)))
558__attribute__((intel_reqd_sub_group_size(16))) void kernel
559triangles_to_primrefs(
560    global struct Globals* globals,
561    global struct BVHBase* bvh,
562    global struct AABB* primref,
563    global GRL_RAYTRACING_GEOMETRY_DESC* geomDesc,
564    uint geomID_and_flags,
565    uint num_prims
566    )
567{
568    do_triangles_to_primrefs(globals, bvh, primref, geomDesc, geomID_and_flags, num_prims);
569}
570
571GRL_ANNOTATE_IGC_DO_NOT_SPILL
572__attribute__((reqd_work_group_size(16, 1, 1)))
573void kernel
574triangles_to_primrefs_indirect(
575    global struct Globals*                globals,
576    global struct BVHBase*                bvh,
577    global struct AABB*                   primref,
578    global GRL_RAYTRACING_GEOMETRY_DESC*  geomDesc,
579    global struct IndirectBuildRangeInfo* indirect_data,
580    uint                                  geomID_and_flags)
581{
582    const uint num_prims = indirect_data->primitiveCount;
583    do_triangles_to_primrefs(globals, bvh, primref, geomDesc, geomID_and_flags, num_prims);
584}
585
586GRL_INLINE void do_procedurals_to_primrefs(
587    global struct Globals* globals,
588    global struct BVHBase* bvh,
589    global struct AABB* primref,
590    global GRL_RAYTRACING_GEOMETRY_DESC* geomDesc,
591    uint geomID_and_flags,
592    const uint num_prims)
593{
594    uint geomID    = geomID_and_flags & 0x00ffffff;
595    uint geomFlags = geomID_and_flags >> 24;
596
597    uint primID   = get_group_id(0) * get_local_size(0) + get_sub_group_local_id();
598
599    bool create_primref = false;
600    float3 lower =  (float3)(INFINITY, INFINITY, INFINITY);
601    float3 upper = -(float3)(INFINITY, INFINITY, INFINITY);
602    if (primID < num_prims)
603    {
604        /* check if procedural is valid */
605        struct GRL_RAYTRACING_AABB aabb = GRL_load_aabb(geomDesc, primID);
606        const bool valid_min = isfinite(aabb.MinX) && isfinite(aabb.MinY) && isfinite(aabb.MinZ);
607        const bool valid_max = isfinite(aabb.MaxX) && isfinite(aabb.MaxY) && isfinite(aabb.MaxZ);
608        if (valid_min & valid_max)
609        {
610            /* load aabb from memory */
611            float3 l = (float3)(aabb.MinX, aabb.MinY, aabb.MinZ);
612            float3 u = (float3)(aabb.MaxX, aabb.MaxY, aabb.MaxZ);
613
614            // convert degenerate boxes to points at the box centroid
615            lower = min( l, u );
616            upper = max( l, u );
617
618            create_primref = true;
619        }
620    }
621
622    uint write_mask = intel_sub_group_ballot(create_primref);
623    uint write_offs = subgroup_bit_prefix_exclusive(write_mask);
624    uint write_count = popcount(write_mask);
625
626    // allocate space in primref buffer
627    uint write_base;
628    if (get_sub_group_local_id() == 0)
629        write_base = atomic_add_global(&globals->numPrimitives, write_count);
630    write_offs += sub_group_broadcast(write_base, 0);
631
632    // write the primref
633    if (create_primref)
634    {
635        PrimRef ref;
636        PRIMREF_setAABB(&ref, lower.xyz, upper.xyz);
637        PRIMREF_setProceduralMetaData(&ref, geomID, primID, geomFlags);
638        primref[write_offs] = ref;
639    }
640
641    reduce_bounds(lower, upper, globals, bvh);
642
643}
644
645GRL_ANNOTATE_IGC_DO_NOT_SPILL
646__attribute__((reqd_work_group_size(16, 1, 1)))
647__attribute__((intel_reqd_sub_group_size(16))) void kernel
648procedurals_to_primrefs(
649    global struct Globals* globals,
650    global struct BVHBase* bvh,
651    global struct AABB* primref,
652    global GRL_RAYTRACING_GEOMETRY_DESC* geomDesc,
653    uint geomID_and_flags,
654    uint num_prims
655    )
656{
657    do_procedurals_to_primrefs(globals, bvh, primref, geomDesc, geomID_and_flags, num_prims);
658}
659
660GRL_ANNOTATE_IGC_DO_NOT_SPILL
661__attribute__((reqd_work_group_size(16, 1, 1)))
662void kernel
663procedurals_to_primrefs_indirect(
664    global struct Globals* globals,
665    global struct BVHBase* bvh,
666    global struct AABB* primref,
667    global GRL_RAYTRACING_GEOMETRY_DESC* geomDesc,
668    global const struct IndirectBuildRangeInfo* indirect_data,
669    uint geomID_and_flags
670    )
671{
672    const uint num_prims = indirect_data->primitiveCount;
673    do_procedurals_to_primrefs(globals, bvh, primref, geomDesc, geomID_and_flags, num_prims);
674}
675