1// 2// Copyright (C) 2009-2021 Intel Corporation 3// 4// SPDX-License-Identifier: MIT 5// 6// 7 8#include "api_interface.h" 9#include "common.h" 10 11#define GRID_SIZE 1024 12 13/* 14 This presplit item contains for each primitive a number of splits to 15 perform (priority) and the primref index. 16 */ 17 18struct PresplitItem 19{ 20 unsigned int index; 21 float priority; 22}; 23 24/* 25 26 This function splits a line v0->v1 at position pos in dimension dim 27 and merges the bounds for the left and right line segments into 28 lbounds and rbounds. 29 30 */ 31 32GRL_INLINE void splitLine(const uint dim, 33 const float pos, 34 const float4 v0, 35 const float4 v1, 36 struct AABB *lbounds, 37 struct AABB *rbounds) 38{ 39 const float v0d = v0[dim]; 40 const float v1d = v1[dim]; 41 42 /* this point is on left side */ 43 if (v0d <= pos) 44 AABB_extend_point(lbounds, v0); 45 46 /* this point is on right side */ 47 if (v0d >= pos) 48 AABB_extend_point(rbounds, v0); 49 50 /* the edge crosses the splitting location */ 51 if ((v0d < pos && pos < v1d) || (v1d < pos && pos < v0d)) 52 { 53 const float f = (pos - v0d) / (v1d - v0d); 54 const float4 c = f * (v1 - v0) + v0; 55 AABB_extend_point(lbounds, c); 56 AABB_extend_point(rbounds, c); 57 } 58} 59 60/* 61 62 This function splits a clipped triangle v0,v1,v2 with bounds prim at 63 position pos in dimension dim and merges the bounds for the left and 64 right clipped triangle fragments into lbounds and rbounds. 65 66 */ 67 68GRL_INLINE void splitTriangle(struct AABB *prim, 69 const uint dim, 70 const float pos, 71 const float4 v0, 72 const float4 v1, 73 const float4 v2, 74 struct AABB *lbounds, 75 struct AABB *rbounds) 76{ 77 /* clip each triangle edge */ 78 splitLine(dim, pos, v0, v1, lbounds, rbounds); 79 splitLine(dim, pos, v1, v2, lbounds, rbounds); 80 splitLine(dim, pos, v2, v0, lbounds, rbounds); 81 82 /* the triangle itself was clipped already, thus clip against triangle bounds */ 83 AABB_intersect(lbounds, prim); 84 AABB_intersect(rbounds, prim); 85} 86 87float calculate_priority(struct AABB *prim, global GRL_RAYTRACING_GEOMETRY_DESC *geom) 88{ 89 /* calculate projected area of first triangles */ 90 const uint primID0 = PRIMREF_primID0(prim); 91 const uint3 tri0 = GRL_load_triangle(geom, primID0); 92 const float4 av0 = GRL_load_vertex(geom, tri0.x); 93 const float4 av1 = GRL_load_vertex(geom, tri0.y); 94 const float4 av2 = GRL_load_vertex(geom, tri0.z); 95 const float area_tri0 = areaProjectedTriangle(av0, av1, av2); 96 97 /* calculate projected area of second triangle */ 98 const uint primID1 = PRIMREF_primID1(prim); 99 const uint3 tri1 = GRL_load_triangle(geom, primID1); 100 const float4 bv0 = GRL_load_vertex(geom, tri1.x); 101 const float4 bv1 = GRL_load_vertex(geom, tri1.y); 102 const float4 bv2 = GRL_load_vertex(geom, tri1.z); 103 const float area_tri1 = areaProjectedTriangle(bv0, bv1, bv2); 104 105 /* as priority we use the AABB area */ 106 const float area_aabb = AABB_halfArea(prim); 107 float priority = area_aabb; 108 109 /* prefer triangles with a large potential SAH gain. */ 110 const float area_tris = area_tri0 + area_tri1; 111 const float area_ratio = min(4.0f, area_aabb / max(1E-12f, area_tris)); 112 priority *= area_ratio; 113 114 /* ignore too small primitives */ 115 //const float4 size = AABB_size(prim); 116 //const float max_size = max(size.x,max(size.y,size.z)); 117 //if (max_size < 0.5f*max_scene_size/GRID_SIZE) 118 // priority = 0.0f; 119 120 return priority; 121} 122 123/* 124 125 This kernel calculates for each primitive an estimated splitting priority. 126 127 */ 128 129 GRL_ANNOTATE_IGC_DO_NOT_SPILL 130__attribute__((reqd_work_group_size(MAX_HW_SIMD_WIDTH, 1, 1))) void kernel compute_num_presplits(global struct Globals *globals, 131 global struct BVHBase* bvh_base, 132 global struct AABB *primref, 133 global struct PresplitItem *presplit, 134 global GRL_RAYTRACING_GEOMETRY_DESC *geomDesc) 135{ 136 //assert(sizeof(PresplitItem) == sizeof_PresplitItem); 137 138 /* calculate the range of primitives each work group should process */ 139 const uint numPrimitives = globals->numPrimitives; 140 const uint startID = (get_group_id(0) + 0) * numPrimitives / get_num_groups(0); 141 const uint endID = (get_group_id(0) + 1) * numPrimitives / get_num_groups(0); 142 143 /* get scene bounding box size */ 144 const float3 scene_size = AABB3f_size(&bvh_base->Meta.bounds); 145 const float max_scene_size = max(scene_size.x, max(scene_size.y, scene_size.z)); 146 147 /* each work group iterates over its range of primitives */ 148 for (uint i = startID + get_local_id(0); i < endID; i += get_local_size(0)) 149 { 150 const uint geomID = PRIMREF_geomID(&primref[i]); 151 152 /* splitting heuristic for triangles */ 153 if (GRL_is_triangle(&geomDesc[geomID])) 154 { 155 presplit[i].index = i; 156 presplit[i].priority = calculate_priority(&primref[i], &geomDesc[geomID]); 157 } 158 159 /* splitting of procedurals is not supported */ 160 else if (GRL_is_procedural(&geomDesc[geomID])) 161 { 162 presplit[i].index = i; 163 presplit[i].priority = 0.0f; 164 } 165 166 else 167 { 168 //assert(false); 169 } 170 } 171 172 if (get_local_id(0) + get_group_id(0)*get_local_size(0) == 0) 173 globals->numOriginalPrimitives = globals->numPrimitives; 174} 175 176/* 177 178 This kernel computes the sum of all priorities. 179 180 */ 181 182 GRL_ANNOTATE_IGC_DO_NOT_SPILL 183__attribute__((reqd_work_group_size(MAX_WORKGROUP_SIZE, 1, 1))) 184__attribute__((intel_reqd_sub_group_size(16))) void kernel 185priority_sum(global struct Globals *globals, 186 global struct PresplitItem *presplit, 187 uint numPrimitivesToSplit) 188{ 189 const uint N = globals->numPrimitives; 190 const uint j = get_local_id(0); 191 const uint J = get_local_size(0); 192 const uint BLOCKSIZE = (N + J - 1) / J; 193 const uint start = min((j + 0) * BLOCKSIZE, N); 194 const uint end = min((j + 1) * BLOCKSIZE, N); 195 196 float prioritySum = 0; 197 for (uint i = start; i < end; i++) 198 prioritySum += presplit[i].priority; 199 200 prioritySum = work_group_reduce_add(prioritySum); 201 globals->presplitPrioritySum = prioritySum; 202 203#if 0 204 work_group_barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); 205 206 float scale = 1.0f; 207 for (uint i = 0; i < 10; i++) 208 { 209 //if (j == 0) 210 //printf("prioritySum = %f\n",scale*prioritySum); 211 212 uint numSplits = 0; 213 for (uint i = start; i < end; i++) 214 numSplits += presplit[i].priority / (scale*prioritySum)*numPrimitivesToSplit; 215 216 numSplits = work_group_reduce_add(numSplits); 217 218 if (numSplits > numPrimitivesToSplit) 219 break; 220 221 //if (j == 0) 222 // printf("numSplits = %i (%i)\n",numSplits,numPrimitivesToSplit); 223 224 globals->presplitPrioritySum = scale * prioritySum; 225 scale -= 0.05f; 226 } 227#endif 228} 229 230GRL_INLINE void heapify_down(struct AABB *array, uint size) 231{ 232 /* we start at the root */ 233 uint cur_node_id = 0; 234 struct AABB *cur_node = array; 235 236 while (true) 237 { 238 int larger_node_id = cur_node_id; 239 struct AABB *larger_node = cur_node; 240 241 /* check if left child is largest */ 242 const int left_node_id = 2 * cur_node_id + 1; 243 struct AABB *left_node = &array[left_node_id]; 244 if (left_node_id < size && AABB_halfArea(left_node) > AABB_halfArea(larger_node)) 245 { 246 larger_node_id = left_node_id; 247 larger_node = left_node; 248 } 249 250 /* check if right child is largest */ 251 const int right_node_id = 2 * cur_node_id + 2; 252 struct AABB *right_node = &array[right_node_id]; 253 if (right_node_id < size && AABB_halfArea(right_node) > AABB_halfArea(larger_node)) 254 { 255 larger_node_id = right_node_id; 256 larger_node = right_node; 257 } 258 259 /* if current node is largest heap property is fulfilled and we are done */ 260 if (larger_node_id == cur_node_id) 261 break; 262 263 /* otherwise we swap cur and largest */ 264 struct AABB tmp = *cur_node; 265 *cur_node = *larger_node; 266 *larger_node = tmp; 267 268 /* we continue downwards with the largest node */ 269 cur_node_id = larger_node_id; 270 cur_node = larger_node; 271 } 272} 273 274GRL_INLINE void heapify_up(struct AABB *array, uint cur_node_id) 275{ 276 /* stop if we start at the root */ 277 if (cur_node_id == 0) 278 return; 279 280 struct AABB *cur_node = &array[cur_node_id]; 281 282 /* we loop until we reach the root node */ 283 while (cur_node_id) 284 { 285 /* get parent node */ 286 uint parent_node_id = (cur_node_id - 1) / 2; 287 struct AABB *parent_node = &array[parent_node_id]; 288 289 /* if parent is larger then current we fulfill the heap property and can terminate */ 290 if (AABB_halfArea(parent_node) > AABB_halfArea(cur_node)) 291 break; 292 293 /* otherwise we swap cur and parent */ 294 struct AABB tmp = *cur_node; 295 *cur_node = *parent_node; 296 *parent_node = tmp; 297 298 /* and continue upwards */ 299 cur_node_id = parent_node_id; 300 cur_node = parent_node; 301 } 302} 303 304/* splits a quad primref */ 305GRL_INLINE void splitQuadPrimRef(global GRL_RAYTRACING_GEOMETRY_DESC *geom, 306 struct AABB *cur, uint dim, float fsplit, 307 struct AABB *left, struct AABB *right) 308{ 309 /* left and right bounds to compute */ 310 AABB_init(left); 311 AABB_init(right); 312 313 /* load first triangle and split it */ 314 const uint primID0 = PRIMREF_primID0(cur); 315 const uint3 tri0 = GRL_load_triangle(geom, primID0); 316 const float4 av0 = GRL_load_vertex(geom, tri0.x); 317 const float4 av1 = GRL_load_vertex(geom, tri0.y); 318 const float4 av2 = GRL_load_vertex(geom, tri0.z); 319 splitTriangle(cur, dim, fsplit, av0, av1, av2, left, right); 320 321 /* load second triangle and split it */ 322 const uint primID1 = PRIMREF_primID1(cur); 323 const uint3 tri1 = GRL_load_triangle(geom, primID1); 324 const float4 bv0 = GRL_load_vertex(geom, tri1.x); 325 const float4 bv1 = GRL_load_vertex(geom, tri1.y); 326 const float4 bv2 = GRL_load_vertex(geom, tri1.z); 327 splitTriangle(cur, dim, fsplit, bv0, bv1, bv2, left, right); 328 329 /* copy the PrimRef payload into left and right */ 330 left->lower.w = cur->lower.w; 331 left->upper.w = cur->upper.w; 332 right->lower.w = cur->lower.w; 333 right->upper.w = cur->upper.w; 334} 335 336/* 337 338 This kernel performs the actual pre-splitting. It selects split 339 locations based on an implicit octree over the scene. 340 341 */ 342 343#define USE_HEAP 0 344#define HEAP_SIZE 32u 345 346GRL_ANNOTATE_IGC_DO_NOT_SPILL 347__attribute__((reqd_work_group_size(MAX_HW_SIMD_WIDTH, 1, 1))) 348//__attribute__((intel_reqd_sub_group_size(16))) 349void kernel 350perform_presplits(global struct Globals *globals, 351 global struct BVHBase* bvh_base, 352 global struct AABB *primref, 353 global struct PresplitItem *presplit, 354 global char *bvh_mem, 355 global GRL_RAYTRACING_GEOMETRY_DESC *geomDesc, 356 uint numPrimitivesToSplit) 357{ 358 /* calculate the range of primitives each work group should process */ 359 const uint numPrimitives = globals->numPrimitives; 360 int pstart = globals->numOriginalPrimitives - numPrimitivesToSplit; 361 pstart = max(0, pstart); 362 const uint numPrimitivesToProcess = globals->numPrimitives - pstart; 363 const uint startID = (get_group_id(0) + 0) * numPrimitivesToProcess / get_num_groups(0); 364 const uint endID = (get_group_id(0) + 1) * numPrimitivesToProcess / get_num_groups(0); 365 366 /* calculates the 3D grid */ 367 float4 grid_base; 368 grid_base.xyz = AABB3f_load_lower( &bvh_base->Meta.bounds ); 369 grid_base.w = 0; 370 371 float4 grid_extend; 372 grid_extend.xyz = AABB3f_size(&bvh_base->Meta.bounds); 373 grid_extend.w=0; 374 375 grid_extend = max(grid_extend.x, max(grid_extend.y, grid_extend.z)); 376 const float4 grid_scale = select(GRID_SIZE / grid_extend, 0.0f, grid_extend == 0.0f); 377 const float inv_grid_size = 1.0f / GRID_SIZE; 378 379 /* we have to update centroid bounds */ 380 struct AABB centroidBounds; 381 AABB_init(¢roidBounds); 382 383 /* initialize heap */ 384 struct AABB heap[HEAP_SIZE]; 385 uint heap_size = 0; 386 387 /* each work group iterates over its range of primitives */ 388 for (uint j = startID + get_local_id(0); j < endID; j += get_local_size(0)) 389 { 390 /* array is in ascending order */ 391 //const uint ID = numPrimitives-1-j; 392 const uint ID = pstart + j; 393 const float prob = presplit[ID].priority; 394 const uint i = presplit[ID].index; 395 const uint geomID = PRIMREF_geomID(&primref[i]); 396 397 /* do not split primitives with low splitting priority */ 398 if (prob <= 0.0f) 399 continue; 400 401 /* we support splitting only for triangles */ 402 if (!GRL_is_triangle(&geomDesc[geomID])) 403 continue; 404 405 /* compute number of split primitives to produce */ 406 uint numSplitPrims = prob / globals->presplitPrioritySum * numPrimitivesToSplit; 407 numSplitPrims = min(HEAP_SIZE, numSplitPrims); 408 409 /* stop if not splits have to get performed */ 410 if (numSplitPrims <= 1) 411 continue; 412 413 /* add primref to heap */ 414 heap[0] = primref[i]; 415 heap_size = 1; 416 uint heap_pos = 0; 417 418 /* iterate until all splits are done */ 419 uint prims = 1; 420 uint last_heap_size = heap_size; 421 while (prims < numSplitPrims) 422 { 423 /* map the primitive bounds to the grid */ 424 const float4 lower = heap[heap_pos].lower; 425 const float4 upper = heap[heap_pos].upper; 426 const float4 glower = (lower - grid_base) * grid_scale + 0.2f; 427 const float4 gupper = (upper - grid_base) * grid_scale - 0.2f; 428 uint4 ilower = convert_uint4_rtz(glower); 429 uint4 iupper = convert_uint4_rtz(gupper); 430 431 /* this ignores dimensions that are empty */ 432 if (glower.x >= gupper.x) 433 iupper.x = ilower.x; 434 if (glower.y >= gupper.y) 435 iupper.y = ilower.y; 436 if (glower.z >= gupper.z) 437 iupper.z = ilower.z; 438 439 /* Now we compute a morton code for the lower and upper grid 440 * coordinates. */ 441 const uint lower_code = bitInterleave3D(ilower); 442 const uint upper_code = bitInterleave3D(iupper); 443 444 /* if all bits are equal then we cannot split */ 445 if (lower_code == upper_code) 446 { 447#if !USE_HEAP 448 prims++; // !!!!!!! 449 450 heap_pos++; 451 if (heap_pos == last_heap_size) 452 { 453 heap_pos = 0; 454 last_heap_size = heap_size; 455 } 456 continue; 457#else 458 if (heap_size == 1) 459 break; 460 461 const uint offset = numPrimitives + atomic_add(&globals->numSplittedPrimitives, 1); 462 primref[offset] = heap[heap_pos]; 463 464 presplit[offset].index = offset; 465 presplit[offset].priority = calculate_priority(&heap[heap_pos], &geomDesc[geomID]); 466 467 heap[0] = heap[--heap_size]; 468 heapify_down(heap, heap_size); 469 continue; 470#endif 471 } 472 473 /* We find the bit position of the first differing bit from the 474 * top down. This bit indicates a split position inside an 475 * implicit octree. */ 476 const uint diff = 31 - clz(lower_code ^ upper_code); 477 478 /* compute octree level and dimension to perform the split in */ 479 const uint level = diff / 3; 480 const uint dim = diff % 3; 481 482 /* now we compute the grid position of the split */ 483 const uint isplit = iupper[dim] & ~((1 << level) - 1); 484 485 /* compute world space position of split */ 486 const float fsplit = grid_base[dim] + isplit * inv_grid_size * grid_extend[dim]; 487 488 /* split primref into left and right part */ 489 struct AABB left, right; 490 splitQuadPrimRef(&geomDesc[geomID], &heap[heap_pos], dim, fsplit, &left, &right); 491 prims++; 492 493 /* update centroid bounds */ 494 AABB_extend_point(¢roidBounds, AABB_centroid2(&left)); 495 AABB_extend_point(¢roidBounds, AABB_centroid2(&right)); 496 497#if !USE_HEAP 498 499 heap[heap_pos] = left; 500 heap[heap_size] = right; 501 heap_size++; 502 503 heap_pos++; 504 if (heap_pos == last_heap_size) 505 { 506 heap_pos = 0; 507 last_heap_size = heap_size; 508 } 509#else 510 511 /* insert left element into heap */ 512 heap[0] = left; 513 heapify_down(heap, heap_size); 514 515 /* insert right element into heap */ 516 heap[heap_size] = right; 517 heapify_up(heap, heap_size); 518 519 heap_size++; 520#endif 521 } 522 523 /* copy primities to primref array */ 524 primref[i] = heap[0]; 525 526 presplit[ID].index = i; 527 presplit[ID].priority = calculate_priority(&heap[0], &geomDesc[geomID]); 528 529 for (uint k = 1; k < heap_size; k++) 530 { 531 const uint offset = numPrimitives + atomic_add(&globals->numSplittedPrimitives, 1); 532 primref[offset] = heap[k]; 533 534 presplit[offset].index = offset; 535 presplit[offset].priority = calculate_priority(&heap[k], &geomDesc[geomID]); 536 } 537 } 538 539 /* merge centroid bounds into global bounds */ 540 centroidBounds = AABB_sub_group_reduce(¢roidBounds); 541 if (get_sub_group_local_id() == 0) 542 AABB_global_atomic_merge(&globals->centroidBounds, ¢roidBounds); 543 544 work_group_barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); 545 546 /* update number of primitives on finish */ 547 if (Globals_OnFinish(globals)) 548 { 549 globals->numPrimitives = globals->numPrimitives + globals->numSplittedPrimitives; 550 globals->numSplittedPrimitives = 0; 551 552 /* update first build record */ // FIXME: should be done in builder itself 553 global struct BuildRecord *record = (global struct BuildRecord *)(bvh_mem + bvh_base->quadLeafStart*64); 554 record->end = globals->numPrimitives; 555 } 556} 557