1// 2// Copyright (C) 2009-2021 Intel Corporation 3// 4// SPDX-License-Identifier: MIT 5// 6// 7 8#include "bvh_build_refit.h" 9#include "api_interface.h" 10#include "common.h" 11 12 13 14 15 16#if 0 17GRL_ANNOTATE_IGC_DO_NOT_SPILL 18__attribute__( (reqd_work_group_size( 16, 1, 1 )) ) 19void kernel 20update_instance_leaves( global struct BVHBase* bvh, 21 uint64_t dxrInstancesArray, 22 uint64_t dxrInstancesPtr, 23 global struct AABB3f* instance_aabb_scratch 24) 25{ 26 uint num_leaves = BVHBase_GetNumHWInstanceLeaves( bvh ); 27 uint id = get_local_id( 0 ) + get_local_size( 0 ) * get_group_id( 0 ); 28 if ( id >= num_leaves ) 29 return; 30 31 global struct GRL_RAYTRACING_INSTANCE_DESC* instancesArray = 32 (global struct GRL_RAYTRACING_INSTANCE_DESC*)dxrInstancesArray; 33 global struct GRL_RAYTRACING_INSTANCE_DESC** instancesPtrArray = 34 (global struct GRL_RAYTRACING_INSTANCE_DESC**)dxrInstancesPtr; 35 36 global struct HwInstanceLeaf* leafs = (global struct HwInstanceLeaf*) BVHBase_GetHWInstanceLeaves( bvh ); 37 38 /* iterate over all children of the instance node and get their bounds */ 39 40 uint32_t instanceIdx = HwInstanceLeafPart1_getInstanceIndex( &leafs[id] ); 41 global struct GRL_RAYTRACING_INSTANCE_DESC* instance = NULL; 42 if ( dxrInstancesArray != NULL ) 43 instance = &instancesArray[instanceIdx]; 44 else 45 instance = instancesPtrArray[instanceIdx]; 46 47 struct AffineSpace3f xfm = AffineSpace3f_load_row_major( instance->Transform ); 48 global struct BVHBase* instanceBvh = (global struct BVHBase*)instance->AccelerationStructure; 49 struct AABB3f newSubtreeBounds = instanceBvh->Meta.bounds; 50 struct AABB3f bbox = AABB3f_transform( xfm, newSubtreeBounds ); // JDB TODO: Use faster abs-matrix method 51 52 const bool valid_min = isfinite( bbox.lower[0] ) && isfinite( bbox.lower[1] ) && isfinite( bbox.lower[2] ); 53 const bool valid_max = isfinite( bbox.upper[0] ) && isfinite( bbox.upper[1] ) && isfinite( bbox.upper[2] ); 54 55 uint mask = GRL_get_InstanceMask(instance); 56 57 uint offset = instanceBvh->rootNodeOffset; 58 if ( !valid_min || !valid_max ) 59 { 60 bbox.lower[0] = xfm.p.x; 61 bbox.lower[1] = xfm.p.y; 62 bbox.lower[2] = xfm.p.z; 63 bbox.upper[0] = xfm.p.x; 64 bbox.upper[1] = xfm.p.y; 65 bbox.upper[2] = xfm.p.z; 66 offset = NO_NODE_OFFSET; 67 mask = 0; 68 } 69 70 instance_aabb_scratch[id] = bbox; 71 72 HwInstanceLeaf_Constructor( &leafs[id], instance, instanceIdx, offset, mask ); // TODO: No instance opening for refittable BVH 73} 74#endif 75 76 77GRL_ANNOTATE_IGC_DO_NOT_SPILL 78__attribute__((reqd_work_group_size(16, 1, 1))) 79void kernel 80update_instance_leaves(global struct BVHBase* bvh, 81 uint64_t dxrInstancesArray, 82 uint64_t dxrInstancesPtr, 83 global struct AABB3f* instance_aabb_scratch 84) 85{ 86 uint num_leaves = BVHBase_GetNumHWInstanceLeaves(bvh); 87 uint id = get_local_id(0) + get_local_size(0) * get_group_id(0); 88 if (id >= num_leaves) 89 return; 90 91 DO_update_instance_leaves( 92 bvh, 93 dxrInstancesArray, 94 dxrInstancesPtr, 95 instance_aabb_scratch, 96 id, 97 0 ); 98} 99 100GRL_ANNOTATE_IGC_DO_NOT_SPILL 101__attribute__((reqd_work_group_size(16, 1, 1))) 102void kernel 103update_instance_leaves_indirect(global struct BVHBase* bvh, 104 uint64_t dxrInstancesArray, 105 uint64_t dxrInstancesPtr, 106 global struct AABB3f* instance_aabb_scratch, 107 global struct IndirectBuildRangeInfo* indirect_data) 108{ 109 uint num_leaves = BVHBase_GetNumHWInstanceLeaves(bvh); 110 uint id = get_local_id(0) + get_local_size(0) * get_group_id(0); 111 if (id >= num_leaves) 112 return; 113 114 DO_update_instance_leaves( 115 bvh, 116 dxrInstancesArray + indirect_data->primitiveOffset, 117 dxrInstancesPtr, 118 instance_aabb_scratch, 119 id, 120 0 ); 121} 122 123#if 0 124/* 125 126 This kernel refit a BVH. The algorithm iterates over all BVH nodes 127 to find all leaf nodes, which is where refitting starts. For these 128 leaf nodes bounds get recalculated and then propagates up the tree. 129 130 One kernel instance considers a range of inner nodes as startpoints. 131 */ 132 GRL_ANNOTATE_IGC_DO_NOT_SPILL 133__attribute__((reqd_work_group_size(8, 1, 1))) void kernel refit( 134 global struct BVHBase *bvh, 135 global GRL_RAYTRACING_GEOMETRY_DESC* geosArray, 136 global struct AABB3f* instance_leaf_aabbs ) 137{ 138 /* here we temporarily store the bounds for the children of a node */ 139 struct AABB childrenAABB[BVH_NODE_N6]; 140 141 /* get pointer to inner nodes and back pointers */ 142 global struct QBVHNodeN *inner_nodes = BVHBase_rootNode(bvh); 143 BackPointers* backPointers = BVHBase_GetBackPointers(bvh); 144 145 /* construct range of nodes that each work group will process */ 146 const uint numInnerNodes = BVHBase_numNodes(bvh); 147 const uint startID = (get_group_id(0) + 0) * numInnerNodes / get_num_groups(0); 148 const uint endID = (get_group_id(0) + 1) * numInnerNodes / get_num_groups(0); 149 150 /* each workgroup iterates over its range of nodes */ 151 for (uint i = startID + get_local_id(0); i < endID; i += get_local_size(0)) 152 { 153 global struct QBVHNodeN* curNode = &inner_nodes[i]; 154 uint numChildren = refit_bottom(bvh, geosArray, 155 instance_leaf_aabbs, 156 curNode, 157 childrenAABB, 158 *InnerNode_GetBackPointer(backPointers, i)); 159 if (numChildren != 0) 160 { 161 /* update bounds of node */ 162 QBVHNodeN_setBounds(curNode, childrenAABB, numChildren); 163 164 /* refit upper parts of the BVH */ 165 // TODO: this will not gonna work for mixed nodes 166 refit_bottom_up(curNode, bvh, childrenAABB, numChildren); 167 } 168 } 169} 170 171 172GRL_ANNOTATE_IGC_DO_NOT_SPILL 173__attribute__((reqd_work_group_size(8, 1, 1))) 174void kernel Find_refit_treelets( 175 global struct BVHBase* bvh, 176 global TreeletNodeData* treelets, 177 global uint* scratchStartpoints, 178 global uint* startpointAlloc) 179{ 180 find_refit_treelets(bvh, 181 treelets, 182 scratchStartpoints, 183 startpointAlloc); 184} 185 186GRL_ANNOTATE_IGC_DO_NOT_SPILL 187__attribute__((reqd_work_group_size(16, 1, 1))) 188void kernel Assign_refit_startpoints_to_treelets( 189 global struct BVHBase* bvh, 190 global TreeletNodeData* treelets, 191 global uint* scratchStartpoints) 192{ 193 assign_refit_startpoints_to_treelets(bvh, treelets, scratchStartpoints); 194} 195 196GRL_ANNOTATE_IGC_DO_NOT_SPILL 197__attribute__((reqd_work_group_size(128, 1, 1))) 198__attribute__((intel_reqd_sub_group_size(16))) 199void kernel Finalize_treelets_in_groups( 200 global struct BVHBase* bvh, 201 global uint* scratchStartpoints ) 202{ 203 local uint depths[FINALIZE_TREELETS_SLM_DEPTHS_SPACE]; 204 205 finalize_treelets_in_groups(bvh, scratchStartpoints, depths); 206} 207 208 209GRL_ANNOTATE_IGC_DO_NOT_SPILL 210__attribute__((reqd_work_group_size(256, 1, 1))) 211__attribute__((intel_reqd_sub_group_size(16))) 212void kernel Refit_quads_tree_per_group(global SquashedInput* psqinputs) 213{ 214 uint group_id = get_group_id(0); 215 SquashedInput sqinput = psqinputs[group_id]; 216 global struct BVHBase* bvh = sqinput.pBvh; 217 uint numLeaves = BVHBase_GetNumQuads(bvh); 218 global QuadLeaf* leafs = (global QuadLeaf*)BVHBase_GetQuadLeaves(bvh); 219 220 global void* input = sqinput.pInput; 221 global struct AABB* bbox_scratch = sqinput.bbox_scratch; 222 223 uint leafsIndexOffset = bvh->quadLeafStart - BVH_ROOT_NODE_OFFSET / 64; 224 global GRL_RAYTRACING_GEOMETRY_DESC* geosArray = (global GRL_RAYTRACING_GEOMETRY_DESC*) input; 225 uint id = get_local_id(0); 226 227 for (uint leaf_id = id; leaf_id < numLeaves; leaf_id += get_local_size(0)) 228 { 229 struct AABB theAABB; 230 refit_bottom_child_quad(leafs + leaf_id, geosArray, &theAABB); 231 theAABB.lower.w = as_float(0xABBADEFFu); 232 bbox_scratch[leafsIndexOffset + leaf_id] = theAABB; 233 } 234} 235 236 237 238GRL_ANNOTATE_IGC_DO_NOT_SPILL 239__attribute__((reqd_work_group_size(32, 1, 1))) 240__attribute__((intel_reqd_sub_group_size(16))) 241void kernel Refit_quads( 242 global struct BVHBase* bvh, 243 global void* input, 244 global struct AABB* bbox_scratch, 245 uint numGroupsExecuted, 246 global SquashedInputGroupDesc* sqinput) 247{ 248 uint numLeafs = BVHBase_GetNumQuads(bvh); 249 if (numLeafs == 0) return; 250 global QuadLeaf* leafs = (global QuadLeaf*)BVHBase_GetQuadLeaves(bvh); 251 252 global GRL_RAYTRACING_GEOMETRY_DESC* geosArray = (global GRL_RAYTRACING_GEOMETRY_DESC*) input; 253 uint leafsIndexOffset = bvh->quadLeafStart - BVH_ROOT_NODE_OFFSET / 64; 254 255 uint numLeafsPerGr = (numLeafs + (numGroupsExecuted - 1)) / numGroupsExecuted; 256 257 uint id_start = get_group_id(0) * numLeafsPerGr + get_local_id(0); 258 uint id_end = min(id_start + numLeafsPerGr, numLeafs); 259 for (uint id = id_start; id < id_end; id+= get_local_size(0)) 260 { 261 struct AABB theAABB; 262 refit_bottom_child_quad(leafs + id, geosArray, &theAABB); 263 theAABB.lower.w = as_float(0xABBADEFFu); 264 bbox_scratch[leafsIndexOffset + id] = theAABB; 265 } 266 267 if (get_group_id(0) == 0 && get_local_id(0) < 16) 268 { 269 270 uint groupnr; 271 uint treeletCnt = *BVHBase_GetRefitTreeletCntPtr(bvh); 272 if (get_sub_group_local_id() == 0) { 273 groupnr = atomic_add_global(&sqinput->totalNumGroups, treeletCnt); 274 } 275 groupnr = sub_group_broadcast(groupnr, 0); 276 for (uint subtree = get_sub_group_local_id(); subtree < treeletCnt; subtree += get_sub_group_size()) 277 { 278 uint gr = groupnr + subtree; 279 //printf("tree %llx, treelet %d/%d, grId %d, numStartpoints %d\n", bvh, subtree,treeletCnt, gr, BVHBase_GetRefitTreeletDescs(bvh)[subtree].numStartpoints); 280 sqinput[gr].bvh = (qword)bvh; 281 sqinput[gr].scratch = (qword)bbox_scratch; 282 sqinput[gr].groupInTree = subtree; 283 } 284 //if (get_local_id(0)==0 && treeletCnt > 1) 285 //{ 286 // printf("tree %llx, tip treelet %d/%d = numStartpoints %d depth %d\n", bvh, treeletCnt, treeletCnt, BVHBase_GetRefitTreeletDescs(bvh)[treeletCnt].numStartpoints, BVHBase_GetRefitTreeletDescs(bvh)[treeletCnt].maxDepth); 287 //} 288 } 289} 290 291 292GRL_ANNOTATE_IGC_DO_NOT_SPILL 293__attribute__((reqd_work_group_size(256, 1, 1))) 294__attribute__((intel_reqd_sub_group_size(16))) 295void kernel 296Refit_tree_per_group_quad( 297 global SquashedInput* psqinputs) 298{ 299 uint group_id = get_group_id(0); 300 SquashedInput sqinput = psqinputs[group_id]; 301 global struct BVHBase* bvh = sqinput.pBvh; 302 global struct AABB* bbox_scratch = sqinput.bbox_scratch; 303 global void* pInput = sqinput.pInput; 304 local Treelet_by_single_group_locals loc; 305 306 if (*BVHBase_GetRefitTreeletCntPtr(bvh) == 0) 307 return; 308 309#if REFIT_DEBUG_CHECKS 310 uint bottoms_cnt = *BVHBase_GetRefitTreeletCntPtr(bvh); 311 if (bottoms_cnt != 1) { 312 if (get_local_id(0) == 0) 313 { 314 printf("Error: this tree has more than 1 treelets!\n"); 315 } 316 return; 317 } 318#endif 319 320 /* get pointer to inner nodes and back pointers */ 321 uniform global struct QBVHNodeN* inner_nodes = BVHBase_rootNode(bvh); 322 323 // uniform per group 324 uniform RefitTreelet* pTrltDsc = BVHBase_GetRefitTreeletDescs(bvh); 325 326 uint numLeafs = bvh->quadLeafCur - bvh->quadLeafStart; 327 328 if (numLeafs == 0) { return; } 329 330 uint numLeafsByOneThread = (numLeafs + (get_local_size(0) - 1)) / get_local_size(0); 331 332 update_quads(bvh, pInput, bbox_scratch, get_local_id(0), numLeafsByOneThread); 333 334 mem_fence_workgroup_default(); work_group_barrier(0); 335 336 RefitTreelet trltDsc = *pTrltDsc; 337 338 refit_treelet_by_single_group( 339 bbox_scratch, 340 &loc, 341 bvh, 342 trltDsc, 343 false, 344 true); 345 346 if (trltDsc.maxDepth > 0) 347 { 348 mem_fence_workgroup_default(); work_group_barrier(0); 349 post_refit_encode_qnode_tree_per_group(bbox_scratch,bvh); 350 } 351} 352 353 354GRL_ANNOTATE_IGC_DO_NOT_SPILL 355__attribute__((reqd_work_group_size(256, 1, 1))) 356__attribute__((intel_reqd_sub_group_size(16))) 357void kernel 358Refit_treelet_per_group( 359 global SquashedInputGroupDesc* sqinput) 360{ 361 uint group_id = get_group_id(0); 362 global struct AABB* bbox_scratch = (global struct AABB* )sqinput[group_id].scratch; 363 global struct BVHBase* bvh = (global struct BVHBase* )sqinput[group_id].bvh; 364 group_id = sqinput[group_id].groupInTree; 365 366 /* get pointer to inner nodes and back pointers */ 367 uniform global struct QBVHNodeN* inner_nodes = BVHBase_rootNode(bvh); 368 369 uint bottoms_cnt = *BVHBase_GetRefitTreeletCntPtr(bvh); 370 371 // uniform per group 372 uniform RefitTreelet* pTrltDsc = BVHBase_GetRefitTreeletDescs(bvh); 373 374 bool should_we_process_treetip = true; 375 local Treelet_by_single_group_locals loc; 376 local bool* l_should_we_process_treetip = (local bool*)&loc; 377#if REFIT_VERBOSE_LOG 378 if (group_id != 0) return; 379#endif 380 381 if (bottoms_cnt > 1) 382 { 383#if REFIT_VERBOSE_LOG 384 for (; group_id < bottoms_cnt; group_id++) 385 { 386 if (get_local_id(0) == 0) { printf("\n ====== treelet %d ====== \n", group_id); } 387 work_group_barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE, memory_scope_device); 388#endif 389 bool rootProcThread = refit_treelet_by_single_group( 390 bbox_scratch, 391 &loc, 392 bvh, 393 pTrltDsc[group_id], 394 true, 395 false); 396 397 // we have to make last group that finishes go up and process the treetip 398 if (rootProcThread) 399 { 400 401 mem_fence_gpu_invalidate(); 402 uint finished_cnt = atomic_inc_global((global uint*) & bvh->refitTreeletCnt2); 403 should_we_process_treetip = finished_cnt + 1 == bottoms_cnt; 404 405 * l_should_we_process_treetip = should_we_process_treetip; 406 407 if (should_we_process_treetip) mem_fence_gpu_invalidate(); 408 } 409#if REFIT_VERBOSE_LOG 410 } 411#endif 412 work_group_barrier(CLK_LOCAL_MEM_FENCE, memory_scope_work_group); 413 414 should_we_process_treetip = *l_should_we_process_treetip; 415 } 416 417 if (should_we_process_treetip) 418 { 419 //this group will process treetip 420 if (get_local_id(0) == 0) { bvh->refitTreeletCnt2 = 0; } 421 if (bottoms_cnt == 1) { bottoms_cnt = 0; } 422 refit_treelet_by_single_group( 423 bbox_scratch, 424 &loc, 425 bvh, 426 pTrltDsc[bottoms_cnt], 427 true, 428 true); 429 } 430} 431 432/* 433 This kernel refit a BVH. The algorithm iterates over all BVH nodes 434 to find all leaf nodes, which is where refitting starts. For these 435 leaf nodes bounds get recalculated and then propagates up the tree. 436 437 One kernel instance considers exactly one inner_node startpoint. 438 not range of inner nodes. 439 */ 440 GRL_ANNOTATE_IGC_DO_NOT_SPILL 441__attribute__((reqd_work_group_size(8, 1, 1))) void kernel 442Refit_per_one_startpoint( 443 global struct BVHBase* bvh, 444 global GRL_RAYTRACING_GEOMETRY_DESC* geosArray, 445 global struct AABB3f* instance_leaf_aabbs ) 446{ 447 /* here we temporarily store the bounds for the children of a node */ 448 struct AABB childrenAABB[BVH_NODE_N6]; 449 450 /* get pointer to inner nodes and back pointers */ 451 global struct QBVHNodeN* inner_nodes = BVHBase_rootNode(bvh); 452 BackPointers* backPointers = BVHBase_GetBackPointers(bvh); 453 454 /* get the inner node that we will consider as a bottom startpoint */ 455 const uint numInnerNodes = BVHBase_numNodes(bvh); 456 const uint innerNodeIdx = (get_group_id(0) + 0) * get_local_size(0) + get_local_id(0); 457 458 if (innerNodeIdx >= numInnerNodes) return; 459 460 global struct QBVHNodeN* curNode = &inner_nodes[innerNodeIdx]; 461 uint numChildren = refit_bottom( 462 bvh, 463 geosArray, 464 instance_leaf_aabbs, 465 curNode, 466 childrenAABB, 467 *InnerNode_GetBackPointer(backPointers, innerNodeIdx)); 468 469 if (numChildren != 0) 470 { 471 /* update bounds of node */ 472 QBVHNodeN_setBounds(curNode, childrenAABB, numChildren); 473 474 /* refit upper parts of the BVH */ 475 /* TODO: this will not gonna work for mixed nodes */ 476 refit_bottom_up(curNode, bvh, childrenAABB, numChildren); 477 } 478} 479 480#endif 481 482GRL_ANNOTATE_IGC_DO_NOT_SPILL 483__attribute__((reqd_work_group_size(SG_REFIT_WG_SIZE, 1, 1))) void kernel 484Refit_indirect_sg( 485 global struct BVHBase* bvh, 486 global GRL_RAYTRACING_GEOMETRY_DESC* geosArray, 487 global struct AABB3f* instance_leaf_aabbs) 488{ 489 DO_Refit_per_one_startpoint_sg(bvh, geosArray, instance_leaf_aabbs, 0); 490 491} 492