1// 2// Copyright (C) 2009-2021 Intel Corporation 3// 4// SPDX-License-Identifier: MIT 5// 6// 7 8#include "api_interface.h" 9#include "common.h" 10#include "instance.h" 11 12#include "bvh_build_primref.h" 13 14//#pragma OPENCL EXTENSION cl_khr_subgroup_non_uniform_vote : enable 15//int sub_group_non_uniform_any(int predicate); 16 17#define WINDOW_SIZE 16 18 19/* Representation of two merged triangles. */ 20struct QuadIndices 21{ 22 uint primID0, primID1; 23 uint v0, v1, v2, v3; 24}; 25 26/* 27 28 This function calculates a PrimRef from a merged quad and writes 29 this PrimRef to memory. 30 31 */ 32GRL_INLINE void create_prim_ref(const uint geomID, 33 const struct QuadIndices quad, 34 global GRL_RAYTRACING_GEOMETRY_DESC *geomDesc, 35 struct AABB *geometryBounds, 36 struct AABB *centroidBounds, 37 global uint *numPrimitives, 38 global struct AABB *primref) 39{ 40 41 /* load quad vertices */ 42 const float4 vtx0 = GRL_load_vertex(geomDesc, quad.v0); // FIXME: these multiple load_vertex calls should get merged 43 const float4 vtx1 = GRL_load_vertex(geomDesc, quad.v1); 44 const float4 vtx2 = GRL_load_vertex(geomDesc, quad.v2); 45 const float4 vtx3 = GRL_load_vertex(geomDesc, quad.v3); 46 47 /* calculate bounds for quad */ 48 float4 lower = min(min(vtx0, vtx1), min(vtx2, vtx3)); 49 float4 upper = max(max(vtx0, vtx1), max(vtx2, vtx3)); 50 51 /* extend geometry and centroid bounds */ 52 const float4 centroid2 = lower + upper; 53 AABB_extendlu(geometryBounds, lower, upper); 54 AABB_extendlu(centroidBounds, centroid2, centroid2); 55 56 PrimRef ref; 57 PRIMREF_setAABB( &ref, lower.xyz, upper.xyz ); 58 PRIMREF_setQuadMetaData( &ref, quad.primID0, quad.primID1, geomID, GRL_get_Flags( geomDesc ) ); 59 60 /* store primref to memory */ 61 const uint offset = atomic_add_global(numPrimitives, 1); 62 primref[offset] = ref; 63} 64 65/* 66 67 This function calculates a PrimRef from a procedural and writes 68 this PrimRef to memory. 69 70 */ 71GRL_INLINE void create_prim_ref_procedural(global GRL_RAYTRACING_GEOMETRY_DESC *geomDesc, 72 const uint geomID, 73 const uint primID, 74 struct AABB *geometryBounds, 75 struct AABB *centroidBounds, 76 global uint *numPrimitives, 77 global struct AABB *primref) 78{ 79 /* load aabb from memory */ 80 struct GRL_RAYTRACING_AABB aabb = GRL_load_aabb(&geomDesc[geomID], primID); 81 82 /* extend geometry and centroid bounds */ 83 float4 lower = (float4)(aabb.MinX, aabb.MinY, aabb.MinZ, 0.0f); 84 float4 upper = (float4)(aabb.MaxX, aabb.MaxY, aabb.MaxZ, 0.0f); 85 const float4 centroid2 = lower + upper; 86 AABB_extendlu(geometryBounds, lower, upper); 87 AABB_extendlu(centroidBounds, centroid2, centroid2); 88 89 /* encode geomID, primID */ 90 uint geomFlags = GRL_get_Flags(&geomDesc[geomID]); 91 92 PrimRef ref; 93 PRIMREF_setAABB( &ref, lower.xyz, upper.xyz ); 94 PRIMREF_setProceduralMetaData( &ref, geomID, primID, geomFlags ); 95 96 /* store primref to memory */ 97 const uint offset = atomic_add_global(numPrimitives, 1); 98 primref[offset] = ref; 99} 100 101/* 102 103 This function performs a binary search to calculate the geomID and 104 primID of the i'th primitive of the scene. For the search a 105 prefix_sum array is used that stores for each location j the sum of 106 the number of primitives of all meshes k with k<j. 107 108*/ 109 110struct GeomPrimID 111{ 112 uint geomID, primID; 113}; 114 115struct GeomPrimID binary_search_geomID_primID(global uint *prefix_sum, const uint prefix_sum_size, const uint i) 116{ 117 uint l = 0; 118 uint r = prefix_sum_size; 119 uint k = 0; 120 121 while (r - l > 1) 122 { 123 const uint m = (l + r) / 2; 124 k = prefix_sum[m]; 125 if (k <= i) 126 { 127 l = m; 128 } 129 else if (i < k) 130 { 131 r = m; 132 } 133 } 134 135 struct GeomPrimID id; 136 id.geomID = l; 137 id.primID = i - prefix_sum[l]; 138 return id; 139} 140 141/* 142 143 Checks if a vertex contains only finite floating point numbers. 144 145 */ 146 147GRL_INLINE bool isfinite_vertex(float4 vtx) 148{ 149 return isfinite(vtx.x) && isfinite(vtx.y) && isfinite(vtx.z); 150} 151 152 153/* 154 Create primrefs from array of instance descriptors. 155 */ 156 GRL_ANNOTATE_IGC_DO_NOT_SPILL 157__attribute__((reqd_work_group_size(MAX_HW_SIMD_WIDTH, 1, 1))) 158__attribute__((intel_reqd_sub_group_size(MAX_HW_SIMD_WIDTH))) void kernel 159primrefs_from_DXR_instances(global struct Globals *globals, 160 global struct BVHBase* bvh, 161 global __const struct GRL_RAYTRACING_INSTANCE_DESC* instances, 162 uint numInstances, 163 global struct AABB *primrefs, 164 uint allowUpdate) 165{ 166 const uint instanceIndex = get_sub_group_local_id() + get_group_id(0) * MAX_HW_SIMD_WIDTH; 167 if (instanceIndex < numInstances) 168 { 169 global __const struct GRL_RAYTRACING_INSTANCE_DESC* instance = instances + instanceIndex; 170 171 primrefs_from_instances( 172 globals, 173 bvh, 174 instance, 175 instanceIndex, 176 primrefs, 177 0, 178 allowUpdate); 179 } 180} 181 182/* 183 Create primrefs from array of instance descriptors. 184 */ 185 GRL_ANNOTATE_IGC_DO_NOT_SPILL 186__attribute__((reqd_work_group_size(MAX_HW_SIMD_WIDTH, 1, 1))) 187void kernel 188primrefs_from_DXR_instances_indirect(global struct Globals *globals, 189 global struct BVHBase* bvh, 190 global __const struct GRL_RAYTRACING_INSTANCE_DESC* instances, 191 global struct IndirectBuildRangeInfo* indirect_data, 192 global struct AABB *primrefs, 193 uint allowUpdate) 194{ 195 // TODO: On DG2, we have 8 dwords of 'inline data' which can be pushed 196 // directly to the kernel. THe rest of the kernel args are pulled using 197 // loads from memory. It may be more efficient to put 'numInstances' and 198 // 'allowUpdate' into 'globals' 199 200 const uint instanceIndex = get_local_id(0) + get_group_id(0) * MAX_HW_SIMD_WIDTH; 201 202 if (instanceIndex < indirect_data->primitiveCount) 203 { 204 instances = (global __const struct GRL_RAYTRACING_INSTANCE_DESC*) 205 (((global char*)instances) + indirect_data->primitiveOffset); 206 global __const struct GRL_RAYTRACING_INSTANCE_DESC* instance = instances + instanceIndex; 207 primrefs_from_instances( 208 globals, 209 bvh, 210 instance, 211 instanceIndex, 212 primrefs, 213 0, 214 allowUpdate); 215 } 216} 217 218/* 219 Create primrefs from array of pointers to instance descriptors. 220 */ 221 GRL_ANNOTATE_IGC_DO_NOT_SPILL 222__attribute__((reqd_work_group_size(MAX_HW_SIMD_WIDTH, 1, 1))) 223__attribute__((intel_reqd_sub_group_size(MAX_HW_SIMD_WIDTH))) void kernel 224primrefs_from_DXR_instances_pointers(global struct Globals *globals, 225 global struct BVHBase* bvh, 226 global void *instances_in, 227 uint numInstances, 228 global struct AABB *primrefs, 229 uint allowUpdate) 230{ 231 global const struct GRL_RAYTRACING_INSTANCE_DESC **instances = 232 (global const struct GRL_RAYTRACING_INSTANCE_DESC **)instances_in; 233 234 const uint instanceIndex = get_sub_group_local_id() + get_group_id(0) * MAX_HW_SIMD_WIDTH; 235 if (instanceIndex < numInstances) 236 { 237 global __const struct GRL_RAYTRACING_INSTANCE_DESC* instance = instances[instanceIndex]; 238 239 primrefs_from_instances( 240 globals, 241 bvh, 242 instance, 243 instanceIndex, 244 primrefs, 245 0, 246 allowUpdate); 247 } 248} 249 250/* 251 Create primrefs from array of pointers to instance descriptors. 252 */ 253 GRL_ANNOTATE_IGC_DO_NOT_SPILL 254__attribute__((reqd_work_group_size(MAX_HW_SIMD_WIDTH, 1, 1))) 255void kernel 256primrefs_from_DXR_instances_pointers_indirect(global struct Globals *globals, 257 global struct BVHBase* bvh, 258 global void *instances_in, 259 global struct AABB *primrefs, 260 global struct IndirectBuildRangeInfo* indirect_data, 261 uint allowUpdate) 262{ 263 global const struct GRL_RAYTRACING_INSTANCE_DESC **instances = 264 (global const struct GRL_RAYTRACING_INSTANCE_DESC **)instances_in; 265 266 const uint instanceIndex = get_local_id(0) + get_group_id(0) * MAX_HW_SIMD_WIDTH; 267 268 if (instanceIndex < indirect_data->primitiveCount) 269 { 270 instances = (global const struct GRL_RAYTRACING_INSTANCE_DESC**) 271 (((global char*)instances) + indirect_data->primitiveOffset); 272 global __const struct GRL_RAYTRACING_INSTANCE_DESC* instance = instances[instanceIndex]; 273 274 primrefs_from_instances( 275 globals, 276 bvh, 277 instance, 278 instanceIndex, 279 primrefs, 280 0, 281 allowUpdate); 282 } 283} 284 285 286/////////////////////////////////////////////////////////////////////////////////////////// 287/////////////////////////////////////////////////////////////////////////////////////////// 288/////////////////////////////////////////////////////////////////////////////////////////// 289/////////////////////////////////////////////////////////////////////////////////////////// 290 291bool can_pair( uint3 a, uint3 b ) 292{ 293 bool match0 = any( a.xxx == b.xyz ) ? 1 : 0; 294 bool match1 = any( a.yyy == b.xyz ) ? 1 : 0; 295 bool match2 = any( a.zzz == b.xyz ) ? 1 : 0; 296 return (match0 + match1 + match2) >= 2; 297} 298 299void reduce_bounds( 300 float3 lower, 301 float3 upper, 302 global struct Globals* globals, 303 global struct BVHBase* bvh ) 304{ 305 306 // reduce centroid bounds... make sure to exclude lanes with invalid AABBs 307 float3 cent = lower + upper; 308 float3 cent_lower = select( (float3)(INFINITY, INFINITY, INFINITY), cent, lower <= upper); 309 float3 cent_upper = select(-(float3)(INFINITY, INFINITY, INFINITY), cent, lower <= upper); 310 311 // reduce geo bounds 312 AABB3f_atomic_merge_global_sub_group_lu( &bvh->Meta.bounds, lower, upper ); 313 AABB_global_atomic_merge_sub_group_lu(&globals->centroidBounds, cent_lower, cent_upper ); 314} 315 316 317struct TriState 318{ 319 bool valid; 320 uint prim_index; 321 uint pairing; 322 uint3 indices; 323 float3 lower; 324 float3 upper; 325}; 326 327#define NOT_PAIRED 0xffffffff 328 329void load_triangle_data(uniform global char* index_buffer, 330 uniform const uint index_format, 331 uniform global char* vertex_buffer, 332 uniform const uint vertex_format, 333 uniform const uint vertex_stride, 334 uniform global float* transform_buffer, 335 uniform uint total_vert_count, 336 struct TriState* state, 337 float4* v) 338{ 339 state->indices = GRL_load_indices_from_buffer(index_buffer, index_format, state->prim_index ); 340 341 const uint last_vertex = total_vert_count - 1; 342 const uint x = min(state->indices.x, last_vertex); 343 const uint y = min(state->indices.y, last_vertex); 344 const uint z = min(state->indices.z, last_vertex); 345 346 GRL_load_triangle_vertices(vertex_buffer, vertex_format, vertex_stride, transform_buffer, x, y, z, v); 347} 348 349struct TriState load_triangle( uniform global GRL_RAYTRACING_GEOMETRY_DESC* geomDesc, 350 uniform uint base, 351 uniform uint num_prims, 352 uniform uint total_vert_count ) 353{ 354 355 struct TriState state; 356 state.pairing = NOT_PAIRED; 357 state.valid = false; 358 state.prim_index = base + get_sub_group_local_id(); 359 state.lower = (float3)(INFINITY, INFINITY, INFINITY); 360 state.upper = -(float3)(INFINITY, INFINITY, INFINITY); 361 362 if (state.prim_index < num_prims) 363 { 364 state.valid = true; 365 float4 v[3]; 366 load_triangle_data((global char*)geomDesc->Desc.Triangles.pIndexBuffer, 367 geomDesc->Desc.Triangles.IndexFormat, 368 (global char*)geomDesc->Desc.Triangles.pVertexBuffer, 369 geomDesc->Desc.Triangles.VertexFormat, 370 geomDesc->Desc.Triangles.VertexBufferByteStride, 371 (global float*)geomDesc->Desc.Triangles.pTransformBuffer, 372 total_vert_count, 373 &state, 374 v); 375 376 if (state.indices.x >= total_vert_count || state.indices.y >= total_vert_count || state.indices.z >= total_vert_count || 377 !isfinite_vertex(v[0]) || !isfinite_vertex(v[1]) || !isfinite_vertex(v[2]) || 378 state.indices.x == state.indices.y || state.indices.x == state.indices.z || state.indices.y == state.indices.z) 379 { 380 state.valid = false; 381 } 382 else 383 { 384 state.lower.xyz = min(v[2].xyz, min(v[1].xyz, v[0].xyz)); 385 state.upper.xyz = max(v[2].xyz, max(v[1].xyz, v[0].xyz)); 386 } 387 } 388 return state; 389} 390 391void broadcast_triangles_local( struct TriState* state ) 392{ 393 varying uint my_prim = state->prim_index; 394 varying uint my_pairing = state->pairing; 395 varying float3 my_lower = state->lower; 396 varying float3 my_upper = state->upper; 397 varying bool valid = state->valid; 398 varying uint3 indices = state->indices; 399 400 for (uniform uint broadcast_lane = 0; broadcast_lane < get_sub_group_size(); broadcast_lane++) 401 { 402 // don't broadcast invalid prims 403 if ( !sub_group_broadcast( valid, broadcast_lane ) ) 404 continue; 405 406 uint broadcast_pairing = sub_group_broadcast(my_pairing, broadcast_lane); 407 uint broadcast_prim = sub_group_broadcast(my_prim, broadcast_lane); 408 409 if (broadcast_pairing == NOT_PAIRED) 410 { 411 // if the broadcast prim is not paired already, all unpaired lanes attempt to pair with it 412 bool pairable = false; 413 uint3 other_indices = sub_group_broadcast_uint3( indices, broadcast_lane ); 414 if (broadcast_prim != my_prim && my_pairing == NOT_PAIRED && valid ) 415 { 416 pairable = can_pair( indices, other_indices ); 417 } 418 419 420 uint pairable_lane = ctz(intel_sub_group_ballot(pairable)); 421 if (valid && pairable_lane < get_sub_group_size()) 422 { 423 // pair the broadcast primitive with the first lane that can accept it 424 float3 broadcast_lower = sub_group_broadcast_float3(my_lower.xyz, broadcast_lane); 425 float3 broadcast_upper = sub_group_broadcast_float3(my_upper.xyz, broadcast_lane); 426 if (get_sub_group_local_id() == pairable_lane) 427 { 428 my_pairing = broadcast_prim; 429 my_lower.xyz = min(my_lower.xyz, broadcast_lower); 430 my_upper.xyz = max(my_upper.xyz, broadcast_upper); 431 } 432 433 // pair the broadcast primitive with the same that was paired to it 434 uint pairable_prim = sub_group_broadcast(my_pairing, pairable_lane); 435 if (get_sub_group_local_id() == broadcast_lane) 436 { 437 my_pairing = pairable_prim; 438 } 439 } 440 } 441 else 442 { 443 // 444 // if this lane was already paired with the broadcasting tri 445 // in an earlier loop iteration, then record the pairing in this lane's registers 446 float3 broadcast_lower = sub_group_broadcast_float3(my_lower.xyz, broadcast_lane); 447 float3 broadcast_upper = sub_group_broadcast_float3(my_upper.xyz, broadcast_lane); 448 if (broadcast_pairing == my_prim) 449 { 450 my_pairing = broadcast_prim; 451 my_lower.xyz = min(my_lower.xyz, broadcast_lower); 452 my_upper.xyz = max(my_upper.xyz, broadcast_upper); 453 } 454 } 455 } 456 457 state->pairing = my_pairing; 458 state->lower = my_lower; 459 state->upper = my_upper; 460} 461 462 463void broadcast_triangles_nonlocal(struct TriState* state, const struct TriState* other ) 464{ 465 varying uint my_prim = state->prim_index; 466 varying uint my_pairing = state->pairing; 467 varying float3 my_lower = state->lower; 468 varying float3 my_upper = state->upper; 469 varying bool valid = state->valid; 470 varying uint3 indices = state->indices; 471 472 for (uniform uint broadcast_lane = 0; broadcast_lane < get_sub_group_size(); broadcast_lane++) 473 { 474 // don't broadcast invalid prims 475 if (!sub_group_broadcast(other->valid, broadcast_lane)) 476 continue; 477 478 uint broadcast_pairing = sub_group_broadcast(other->pairing, broadcast_lane); 479 uint broadcast_prim = sub_group_broadcast(other->prim_index, broadcast_lane); 480 481 if (broadcast_pairing == NOT_PAIRED) 482 { 483 // if the broadcast prim is not paired already, all unpaired lanes attempt to pair with it 484 bool pairable = false; 485 if ( my_pairing == NOT_PAIRED && valid ) 486 { 487 uint3 other_indices = sub_group_broadcast_uint3(other->indices, broadcast_lane); 488 pairable = can_pair(indices, other_indices); 489 } 490 491 // pair the broadcast primitive with the first lane that can accept it 492 uint pairable_mask = intel_sub_group_ballot(pairable); 493 if (valid && (ctz(pairable_mask) == get_sub_group_local_id())) 494 { 495 my_pairing = broadcast_prim; 496 my_lower.xyz = min(my_lower.xyz, sub_group_broadcast_float3(other->lower.xyz, broadcast_lane)); 497 my_upper.xyz = max(my_upper.xyz, sub_group_broadcast_float3(other->upper.xyz, broadcast_lane)); 498 } 499 } 500 501 } 502 503 state->pairing = my_pairing; 504 state->lower = my_lower; 505 state->upper = my_upper; 506} 507 508GRL_INLINE void do_triangles_to_primrefs( 509 global struct Globals* globals, 510 global struct BVHBase* bvh, 511 global struct AABB* primref, 512 global GRL_RAYTRACING_GEOMETRY_DESC* geomDesc, 513 uint geomID_and_flags, 514 const uint num_prims) 515{ 516 uint geomID = geomID_and_flags & 0x00ffffff; 517 uint geom_flags = geomID_and_flags >> 24; 518 uint prim_base = get_group_id(0) * get_local_size(0); 519 uint total_vert_count = GRL_get_triangles_VertexCount(geomDesc); 520 521 struct TriState tri = load_triangle( geomDesc, prim_base, num_prims, total_vert_count ); 522 broadcast_triangles_local( &tri ); 523 524 525 // we will produce output if the lane creates a triangle (my_pairing == NOT_PAIRED) 526 // or for the lane corresponding to the larger of two triangles 527 bool will_write = (tri.pairing > tri.prim_index) && tri.valid; 528 uint write_mask = intel_sub_group_ballot(will_write); 529 uint write_offs = subgroup_bit_prefix_exclusive( write_mask ); 530 uint write_count = popcount(write_mask); 531 532 // allocate space in primref buffer 533 uint write_base; 534 if( get_sub_group_local_id() == 0 ) 535 write_base = atomic_add_global( &globals->numPrimitives, write_count ); 536 write_offs += sub_group_broadcast( write_base, 0 ); 537 538 uint primID0 = tri.prim_index; 539 uint primID1 = (tri.pairing != NOT_PAIRED) ? tri.pairing : tri.prim_index; 540 541 if (will_write) 542 { 543 PrimRef ref; 544 PRIMREF_setAABB(&ref, tri.lower.xyz, tri.upper.xyz); 545 PRIMREF_setQuadMetaData(&ref, primID0, primID1, geomID, geom_flags); 546 uint8 val = (uint8)( 547 as_uint(ref.lower.x), as_uint(ref.lower.y), as_uint(ref.lower.z), as_uint(ref.lower.w), 548 as_uint(ref.upper.x), as_uint(ref.upper.y), as_uint(ref.upper.z), as_uint(ref.upper.w)); 549 store_uint8_L1WB_L3WB((global uint8*)(primref + write_offs), 0, val); 550 } 551 552 reduce_bounds( tri.lower, tri.upper, globals, bvh ); 553} 554 555 556GRL_ANNOTATE_IGC_DO_NOT_SPILL 557__attribute__((reqd_work_group_size(16, 1, 1))) 558__attribute__((intel_reqd_sub_group_size(16))) void kernel 559triangles_to_primrefs( 560 global struct Globals* globals, 561 global struct BVHBase* bvh, 562 global struct AABB* primref, 563 global GRL_RAYTRACING_GEOMETRY_DESC* geomDesc, 564 uint geomID_and_flags, 565 uint num_prims 566 ) 567{ 568 do_triangles_to_primrefs(globals, bvh, primref, geomDesc, geomID_and_flags, num_prims); 569} 570 571GRL_ANNOTATE_IGC_DO_NOT_SPILL 572__attribute__((reqd_work_group_size(16, 1, 1))) 573void kernel 574triangles_to_primrefs_indirect( 575 global struct Globals* globals, 576 global struct BVHBase* bvh, 577 global struct AABB* primref, 578 global GRL_RAYTRACING_GEOMETRY_DESC* geomDesc, 579 global struct IndirectBuildRangeInfo* indirect_data, 580 uint geomID_and_flags) 581{ 582 const uint num_prims = indirect_data->primitiveCount; 583 do_triangles_to_primrefs(globals, bvh, primref, geomDesc, geomID_and_flags, num_prims); 584} 585 586GRL_INLINE void do_procedurals_to_primrefs( 587 global struct Globals* globals, 588 global struct BVHBase* bvh, 589 global struct AABB* primref, 590 global GRL_RAYTRACING_GEOMETRY_DESC* geomDesc, 591 uint geomID_and_flags, 592 const uint num_prims) 593{ 594 uint geomID = geomID_and_flags & 0x00ffffff; 595 uint geomFlags = geomID_and_flags >> 24; 596 597 uint primID = get_group_id(0) * get_local_size(0) + get_sub_group_local_id(); 598 599 bool create_primref = false; 600 float3 lower = (float3)(INFINITY, INFINITY, INFINITY); 601 float3 upper = -(float3)(INFINITY, INFINITY, INFINITY); 602 if (primID < num_prims) 603 { 604 /* check if procedural is valid */ 605 struct GRL_RAYTRACING_AABB aabb = GRL_load_aabb(geomDesc, primID); 606 const bool valid_min = isfinite(aabb.MinX) && isfinite(aabb.MinY) && isfinite(aabb.MinZ); 607 const bool valid_max = isfinite(aabb.MaxX) && isfinite(aabb.MaxY) && isfinite(aabb.MaxZ); 608 if (valid_min & valid_max) 609 { 610 /* load aabb from memory */ 611 float3 l = (float3)(aabb.MinX, aabb.MinY, aabb.MinZ); 612 float3 u = (float3)(aabb.MaxX, aabb.MaxY, aabb.MaxZ); 613 614 // convert degenerate boxes to points at the box centroid 615 lower = min( l, u ); 616 upper = max( l, u ); 617 618 create_primref = true; 619 } 620 } 621 622 uint write_mask = intel_sub_group_ballot(create_primref); 623 uint write_offs = subgroup_bit_prefix_exclusive(write_mask); 624 uint write_count = popcount(write_mask); 625 626 // allocate space in primref buffer 627 uint write_base; 628 if (get_sub_group_local_id() == 0) 629 write_base = atomic_add_global(&globals->numPrimitives, write_count); 630 write_offs += sub_group_broadcast(write_base, 0); 631 632 // write the primref 633 if (create_primref) 634 { 635 PrimRef ref; 636 PRIMREF_setAABB(&ref, lower.xyz, upper.xyz); 637 PRIMREF_setProceduralMetaData(&ref, geomID, primID, geomFlags); 638 primref[write_offs] = ref; 639 } 640 641 reduce_bounds(lower, upper, globals, bvh); 642 643} 644 645GRL_ANNOTATE_IGC_DO_NOT_SPILL 646__attribute__((reqd_work_group_size(16, 1, 1))) 647__attribute__((intel_reqd_sub_group_size(16))) void kernel 648procedurals_to_primrefs( 649 global struct Globals* globals, 650 global struct BVHBase* bvh, 651 global struct AABB* primref, 652 global GRL_RAYTRACING_GEOMETRY_DESC* geomDesc, 653 uint geomID_and_flags, 654 uint num_prims 655 ) 656{ 657 do_procedurals_to_primrefs(globals, bvh, primref, geomDesc, geomID_and_flags, num_prims); 658} 659 660GRL_ANNOTATE_IGC_DO_NOT_SPILL 661__attribute__((reqd_work_group_size(16, 1, 1))) 662void kernel 663procedurals_to_primrefs_indirect( 664 global struct Globals* globals, 665 global struct BVHBase* bvh, 666 global struct AABB* primref, 667 global GRL_RAYTRACING_GEOMETRY_DESC* geomDesc, 668 global const struct IndirectBuildRangeInfo* indirect_data, 669 uint geomID_and_flags 670 ) 671{ 672 const uint num_prims = indirect_data->primitiveCount; 673 do_procedurals_to_primrefs(globals, bvh, primref, geomDesc, geomID_and_flags, num_prims); 674} 675