1// 2// Copyright (C) 2009-2021 Intel Corporation 3// 4// SPDX-License-Identifier: MIT 5// 6// 7 8module new_sah_builder; 9 10kernel_module bfs_kernels ("bvh_build_BFS.cl") 11{ 12 links lsc_intrinsics; 13 14 kernel opencl_build_kernel_BinnedSAH_BFS_pass1_initial < kernelFunction="BFS_pass1_initial" > ; 15 kernel opencl_build_kernel_BinnedSAH_BFS_pass1_indexed < kernelFunction="BFS_pass1_indexed" > ; 16 kernel opencl_build_kernel_BinnedSAH_BFS_pass2_initial < kernelFunction="BFS_pass2_initial" > ; 17 kernel opencl_build_kernel_BinnedSAH_BFS_pass2_indexed < kernelFunction="BFS_pass2_indexed" > ; 18 19 kernel opencl_build_kernel_BinnedSAH_DFS < kernelFunction="DFS" >; 20 // kernel opencl_build_kernel_BinnedSAH_BuildQNodes < kernelFunction="build_qnodes" >; 21 kernel opencl_build_kernel_BinnedSAH_BuildQNodes_Kickoff < kernelFunction="build_qnodes_pc_kickoff" >; 22 kernel opencl_build_kernel_BinnedSAH_BuildQNodes_Amplify < kernelFunction="build_qnodes_pc_amplify" >; 23 kernel opencl_build_kernel_BinnedSAH_begin < kernelFunction = "begin" >; 24 kernel opencl_build_kernel_BinnedSAH_scheduler < kernelFunction = "scheduler" >; 25 26 kernel opencl_build_kernel_BinnedSAH_BFS_pass1_initial_batch < kernelFunction="BFS_pass1_initial_batchable" >; 27 kernel opencl_build_kernel_BinnedSAH_BFS_pass1_indexed_batch < kernelFunction="BFS_pass1_indexed_batchable" >; 28 kernel opencl_build_kernel_BinnedSAH_BFS_pass2_initial_batch < kernelFunction="BFS_pass2_initial_batchable" >; 29 kernel opencl_build_kernel_BinnedSAH_BFS_pass2_indexed_batch < kernelFunction="BFS_pass2_indexed_batchable" >; 30 31 kernel opencl_build_kernel_BinnedSAH_categorize_builds_and_init_scheduler < kernelFunction="categorize_builds_and_init_scheduler" >; 32 kernel opencl_build_kernel_BinnedSAH_begin_batched < kernelFunction="begin_batchable" >; 33 34 kernel opencl_build_kernel_BinnedSAH_qnode_init_scheduler_batched < kernelFunction="build_qnodes_init_scheduler_batched" >; 35 kernel opencl_build_kernel_BinnedSAH_qnode_begin_batched < kernelFunction="build_qnodes_begin_batchable" >; 36 kernel opencl_build_kernel_BinnedSAH_qnode_scheduler < kernelFunction="build_qnodes_scheduler" >; 37 kernel opencl_build_kernel_BinnedSAH_BuildQNodes_Amplify_batch < kernelFunction="build_qnodes_pc_amplify_batched" >; 38 39 kernel opencl_build_kernel_BinnedSAH_BuildQnodes_TryToFillGRB_batched < kernelFunction="build_qnodes_try_to_fill_grb_batched" >; 40 41} 42 43kernel opencl_build_kernel_DFS_single_wg < source="bvh_build_DFS.cl", kernelFunction="DFS_single_wg" > 44kernel opencl_build_kernel_DFS_trivial < source="bvh_build_DFS.cl", kernelFunction="DFS_trivial" > 45kernel opencl_build_kernel_DFS_single_wg_batch < source="bvh_build_DFS.cl", kernelFunction="DFS_single_wg_batchable" > 46kernel opencl_build_kernel_DFS_trivial_batch < source="bvh_build_DFS.cl", kernelFunction="DFS_trivial_batchable" > 47 48kernel single_pass_binsah < source="bvh_build_DFS.cl", kernelFunction="DFS" > 49 50 51const DFS_MIN_PRIMREFS = 6; 52const DFS_MAX_PRIMREFS = 256; 53const BFS_WG_SIZE_SHIFT = 9; 54 55 56 57struct Scheduler 58{ 59 dword num_bfs_wgs; 60 dword num_dfs_wgs; 61 62 dword scheduler_postsync; 63 dword _pad1; 64 65 dword num_trivial_builds; 66 dword num_single_builds; 67 68 dword batched_build_wg_count; 69 dword batched_build_loop_mask; 70 71}; 72 73 74struct SAHBuildArgs 75{ 76 qword p_num_primitives; 77 qword p_qnode_child_buffer; 78 qword p_scheduler; 79 qword p_sah_globals; 80 qword p_globals; 81 qword p_primref_buffer; 82 qword p_primref_index_buffers; 83 qword p_bvh_base; 84 qword p_bvh2; 85 qword p_root_buffer_counters; 86 dword sah_build_flags; 87 dword leaf_size; 88 dword leaf_type; 89 dword max_internal_nodes; 90}; 91 92 93metakernel single_pass_binsah( 94 qword build_globals, 95 qword bvh_buffer, 96 qword build_primref_buffer, 97 qword build_primref_index_buffers, 98 dword alloc_backpointers ) 99{ 100 101 dispatch single_pass_binsah(1, 1, 1) args( 102 build_globals, 103 bvh_buffer, 104 build_primref_buffer, 105 build_primref_index_buffers, 106 alloc_backpointers 107 ); 108 109} 110 111 112 113metakernel new_sah_build( SAHBuildArgs build_args ) 114{ 115 define REG_num_prims REG0; 116 117 { 118 define C_MIN_PRIMREFS REG1; 119 define C_MAX_PRIMREFS REG2; 120 define REG_dispatch_trivial REG3; 121 define REG_dispatch_single_wg REG4; 122 123 REG_num_prims = load_dword( build_args.p_num_primitives ); 124 C_MIN_PRIMREFS = DFS_MIN_PRIMREFS; 125 C_MAX_PRIMREFS = DFS_MAX_PRIMREFS; 126 127 REG_dispatch_trivial = REG_num_prims <= C_MIN_PRIMREFS; 128 REG_dispatch_single_wg = REG_num_prims <= C_MAX_PRIMREFS; 129 130 goto l_dispatch_trivial if(REG_dispatch_trivial.lo); 131 goto l_dispatch_single_wg if(REG_dispatch_single_wg.lo); 132 goto l_full_build; 133 } 134 135l_dispatch_trivial: 136 { 137 dispatch opencl_build_kernel_DFS_trivial (1,1,1) 138 args( build_args.p_globals, 139 build_args.p_bvh_base, 140 build_args.p_primref_buffer, 141 build_args.p_primref_index_buffers, 142 build_args.sah_build_flags 143 ); 144 145 control( wait_idle ); 146 goto l_done; 147 } 148 149l_dispatch_single_wg: 150 { 151 dispatch opencl_build_kernel_DFS_single_wg (1,1,1) 152 args( build_args.p_globals, 153 build_args.p_bvh_base, 154 build_args.p_primref_buffer, 155 build_args.p_primref_index_buffers, 156 build_args.sah_build_flags 157 ); 158 159 control( wait_idle ); 160 goto l_done; 161 } 162 163 164l_full_build: 165 166 167 { 168 define p_scheduler build_args.p_scheduler; 169 define p_num_dfs_wgs build_args.p_scheduler + offsetof(Scheduler.num_dfs_wgs); 170 define p_scheduler_postsync (build_args.p_scheduler + offsetof(Scheduler.scheduler_postsync) ); 171 define C_0 REG1; 172 define C_8 REG2; 173 C_8 = 8; 174 C_0 = 0; 175 176 177 // 178 // Init pass 179 // 180 store_dword( p_scheduler_postsync, C_0.lo ); 181 182 // compute number of BFS WGs from prim-count 183 // NOTE: This code uses a hardcoded WG size of 512 for BFS 184 // If the BFS wg size ever changes, it needs to be touched 185 // This is necessary because DG2 shifter only supports POW2 shifts 186 { 187 define REG_scheduler_postsync REG3; 188 define C_511 REG4; 189 define C_1 REG5; 190 191 REG_scheduler_postsync = p_scheduler_postsync; 192 C_511 = 511; 193 C_1 = 1; 194 195 store_qword( REG_scheduler_postsync, C_0 ); // initialize scheduler semaphore 196 197 REG_num_prims = REG_num_prims + C_511; 198 REG_num_prims = REG_num_prims >> C_8; 199 REG_num_prims = REG_num_prims >> C_1; 200 201 DISPATCHDIM_X = REG_num_prims.lo; 202 DISPATCHDIM_Y = 1; 203 DISPATCHDIM_Z = 1; 204 205 control( cs_store_fence ); // commit the semaphore write 206 207 // launch scheduler init kernel 208 dispatch opencl_build_kernel_BinnedSAH_begin (1,1,1) 209 args( 210 build_args.p_scheduler, 211 build_args.leaf_size, 212 build_args.leaf_type, 213 build_args.p_primref_index_buffers, 214 build_args.p_primref_buffer, 215 build_args.p_bvh2, 216 build_args.p_bvh_base, 217 build_args.p_globals, 218 build_args.p_sah_globals, 219 build_args.p_qnode_child_buffer, 220 build_args.sah_build_flags 221 ) 222 postsync store_dword( p_scheduler_postsync, 1 ); 223 224 // wait on init kernel 225 semaphore_wait while( *p_scheduler_postsync != 1 ); 226 227 // launch BFS1 pass1 228 dispatch_indirect opencl_build_kernel_BinnedSAH_BFS_pass1_initial 229 args( build_args.p_scheduler, 230 build_args.p_sah_globals) 231 postsync store_dword( p_scheduler_postsync, 0 ); 232 233 // wait on BFS pass1 234 semaphore_wait while( *p_scheduler_postsync != 0 ); 235 236 // launch BFS pass2 237 dispatch_indirect opencl_build_kernel_BinnedSAH_BFS_pass2_initial 238 args( build_args.p_scheduler, 239 build_args.p_sah_globals ) 240 postsync store_dword( p_scheduler_postsync, 1 ); 241 } 242 243 // after BFS pass 2 we drop into a scheduling loop 244 245 l_build_loop: 246 { 247 semaphore_wait while( *p_scheduler_postsync != 1 ); 248 249 { 250 dispatch opencl_build_kernel_BinnedSAH_scheduler(1,1,1) 251 args( build_args.p_scheduler, build_args.p_sah_globals ) 252 postsync store_dword( p_scheduler_postsync, 0 ); 253 254 // wait on the scheduler 255 semaphore_wait while( *p_scheduler_postsync != 0 ); 256 } 257 258 // load and process the scheduler results 259 define REG_wg_counts REG0; 260 define REG_num_bfs_wgs REG0.lo; 261 define REG_num_dfs_wgs REG0.hi; 262 define REG_loop_break REG1; 263 define REG_p_scheduler REG2; 264 { 265 REG_p_scheduler = p_scheduler; 266 REG_wg_counts = load_qword( REG_p_scheduler ); 267 268 define C_MASK_LO REG3 ; 269 C_MASK_LO = 0xffffffff; 270 271 REG_loop_break = REG_wg_counts & C_MASK_LO; 272 REG_loop_break = REG_loop_break == 0; 273 } 274 275 // dispatch new DFS WGs 276 DISPATCHDIM_X = REG_num_dfs_wgs; 277 dispatch_indirect opencl_build_kernel_BinnedSAH_DFS 278 args( p_scheduler, 279 build_args.p_sah_globals ); 280 281 // jump out if there are no bfs WGs 282 goto l_build_qnodes if (REG_loop_break); 283 284 // dispatch new BFS1 WGs 285 DISPATCHDIM_X = REG_num_bfs_wgs; 286 dispatch_indirect opencl_build_kernel_BinnedSAH_BFS_pass1_indexed 287 args( p_scheduler, 288 build_args.p_sah_globals ) 289 postsync store_dword( p_scheduler_postsync, 2 ); 290 291 semaphore_wait while( *p_scheduler_postsync != 2 ); 292 293 // dispatch new BFS2 WGs 294 dispatch_indirect opencl_build_kernel_BinnedSAH_BFS_pass2_indexed 295 args( p_scheduler, 296 build_args.p_sah_globals ) 297 postsync store_dword( p_scheduler_postsync, 1 ); 298 299 //goto l_build_loop if not(REG_num_dfs_wgs); //TODO: maybe add some logic to do "bool have_dfs_work" which will add some cycles but may be faster than checking semaphore 300 301 // wait until all upcoming DFS WGs have finished launching 302 // so that the scheduler can refill the launch array 303 // TODO_OPT: Look at replacing this with a ring buffer so that scheduler stalls instead (and only rarely) 304 semaphore_wait while( *p_num_dfs_wgs != 0 ); 305 306 307 goto l_build_loop; 308 } 309 } 310 311l_build_qnodes: 312 313 control( wait_idle ); 314 315 // P/C qnode build 316 317 dispatch opencl_build_kernel_BinnedSAH_BuildQNodes_Kickoff (1,1,1) 318 args( build_args.p_sah_globals, 319 build_args.p_qnode_child_buffer, 320 build_args.sah_build_flags ); 321 322 { 323 define p_pc_counters ( build_args.p_root_buffer_counters ); 324 325 define REG_addr REG0; 326 define REG_produced REG1; 327 define REG_consumed REG2; 328 define REG_have_work REG3; 329 define REG_wg_count REG4; 330 define C_8 REG5; 331 define C_16 REG6; 332 define C_1 REG7; 333 C_1 = 1; 334 C_8 = 8; 335 C_16 = 16; 336 REG_addr = build_args.p_root_buffer_counters; // HINT: should we use REG_addr or just pass separate arguments to metakernel to avoid add/sub from address 337 338 REG_consumed = 0; 339 340 l_qnode_loop: 341 342 control( wait_idle ); // wait for previous pass 343 344 // load counters and compute number of wgs to respawn 345 REG_produced = load_qword( REG_addr ); REG_addr = REG_addr + C_8; 346 REG_wg_count = REG_produced - REG_consumed; 347 REG_have_work = REG_wg_count > 0; 348 349 goto l_done if not(REG_have_work.lo); 350 351 // save REG_consumed as a starting position in p_qnode_child_buffer 352 store_qword(REG_addr, REG_consumed); REG_addr = REG_addr + C_8; 353 354 // save REG_produced as ending position in p_qnode_child_buffer 355 store_qword(REG_addr, REG_produced); REG_addr = REG_addr - C_16; 356 357 REG_consumed = REG_consumed + REG_wg_count; // update consumed for next iteration 358 359 // calculate amount of workgroups to schedule 360 REG_wg_count = REG_wg_count + C_1; 361 REG_wg_count = REG_wg_count >> C_1; 362 363 DISPATCHDIM_X = REG_wg_count.lo; 364 365 control( cs_store_fence ); // commit the stores 366 367 dispatch_indirect opencl_build_kernel_BinnedSAH_BuildQNodes_Amplify 368 args( build_args.p_sah_globals, 369 build_args.p_qnode_child_buffer, 370 build_args.sah_build_flags); 371 372 goto l_qnode_loop; 373 } 374 375l_done: 376} 377 378 379 380 381 382 383 384 385 386struct SAHBuildArgsBatchable 387{ 388 qword p_globals_ptrs; 389 qword p_scheduler; 390 qword p_buffers_info; 391 qword p_sah_globals; 392 393 dword num_max_qnode_global_root_buffer_entries; 394 dword num_builds; 395 396}; 397 398 399metakernel new_sah_build_batchable( SAHBuildArgsBatchable build_args ) 400{ 401 define p_scheduler build_args.p_scheduler; 402 define p_scheduler_postsync (build_args.p_scheduler + offsetof(Scheduler.scheduler_postsync) ); 403 define p_num_dfs_wgs (build_args.p_scheduler + offsetof(Scheduler.num_dfs_wgs)); 404 405 // initialize scheduler semaphore 406 REG0.lo = 0; 407 store_dword( p_scheduler_postsync, REG0.lo ); 408 409 410 // dispatch categorization pass 411 dispatch opencl_build_kernel_BinnedSAH_categorize_builds_and_init_scheduler(2,1,1) 412 args( 413 build_args.p_scheduler, 414 build_args.p_globals_ptrs, 415 build_args.p_buffers_info, 416 build_args.p_sah_globals, 417 build_args.num_builds 418 ) 419 postsync store_dword( p_scheduler_postsync, 1 ); 420 421 // wait on the categorization pass 422 semaphore_wait while( *p_scheduler_postsync != 1 ); 423 424 425 // dispatch the trivial and single-WG passes 426 { 427 REG0 = load_qword( build_args.p_scheduler + offsetof(Scheduler.num_trivial_builds) ); 428 DISPATCHDIM_X = REG0.lo; 429 DISPATCHDIM_Y = 1; 430 DISPATCHDIM_Z = 1; 431 432 // dispatch trivial builds 433 434 dispatch_indirect opencl_build_kernel_DFS_trivial_batch 435 args( build_args.p_sah_globals ); 436 437 control( wait_idle ); 438 439 // dispatch single-wg builds 440 441 DISPATCHDIM_X = REG0.hi; 442 dispatch_indirect opencl_build_kernel_DFS_single_wg_batch 443 args( build_args.p_sah_globals, build_args.p_scheduler ); 444 } 445 446 // compute the number of builds not covered by the trivial passes 447 // skip the builder loop if all builds are satisfied by trivial passes 448 { 449 REG1 = REG0.lo; 450 REG2 = REG0.hi; 451 REG3 = build_args.num_builds; 452 REG5 = REG2 + REG1; 453 REG5 = REG3 - REG5; 454 REG4 = REG5 == 0 ; 455 456 goto l_done if (REG4.lo); 457 } 458 459 // REG5 (number of non-trivial builds) will be used to launch build_qnodes kernel after the build loop 460 define REG_num_nontrivial REG5; 461 462l_build_outer_loop: 463 { 464 465 // configure the scheduler to initiate a new block of builds 466 467 dispatch opencl_build_kernel_BinnedSAH_begin_batched (1,1,1) 468 args( build_args.p_scheduler, build_args.p_sah_globals ) 469 postsync store_dword( p_scheduler_postsync, 0 ); 470 471 // wait on init kernel 472 semaphore_wait while( *p_scheduler_postsync != 0 ); 473 474 475 // read results produced by scheduler init kernel 476 // lo == BFS wg count. hi == all ones if we need to loop again 477 // 478 REG0 = build_args.p_scheduler + offsetof(Scheduler.batched_build_wg_count); 479 REG4 = load_qword( REG0 ); 480 481 // launch BFS1 pass1 482 DISPATCHDIM_X = REG4.lo; 483 dispatch_indirect opencl_build_kernel_BinnedSAH_BFS_pass1_initial_batch 484 args( build_args.p_scheduler, 485 build_args.p_sah_globals) 486 postsync store_dword( p_scheduler_postsync, 1 ); 487 488 // wait on BFS pass1 489 semaphore_wait while( *p_scheduler_postsync != 1 ); 490 491 // launch BFS pass2 492 dispatch_indirect opencl_build_kernel_BinnedSAH_BFS_pass2_initial_batch 493 args( build_args.p_scheduler, 494 build_args.p_sah_globals ) 495 postsync store_dword( p_scheduler_postsync, 0 ); 496 497 l_build_loop: 498 { 499 semaphore_wait while( *p_scheduler_postsync != 0 ); 500 501 { 502 dispatch opencl_build_kernel_BinnedSAH_scheduler(1,1,1) 503 args( build_args.p_scheduler, build_args.p_sah_globals ) 504 postsync store_dword( p_scheduler_postsync, 1 ); 505 506 // wait on the scheduler 507 semaphore_wait while( *p_scheduler_postsync != 1 ); 508 } 509 510 // load and process the scheduler results 511 define REG_wg_counts REG0; 512 define REG_num_bfs_wgs REG0.lo; 513 define REG_num_dfs_wgs REG0.hi; 514 define REG_loop_break REG1; 515 define REG_p_scheduler REG2; 516 { 517 REG_p_scheduler = p_scheduler; 518 REG_wg_counts = load_qword( REG_p_scheduler ); 519 520 define C_MASK_LO REG3 ; 521 C_MASK_LO = 0xffffffff; 522 523 REG_loop_break = REG_wg_counts & C_MASK_LO; 524 REG_loop_break = REG_loop_break == 0; 525 } 526 527 // dispatch new DFS WGs 528 DISPATCHDIM_X = REG_num_dfs_wgs; 529 dispatch_indirect opencl_build_kernel_BinnedSAH_DFS 530 args( p_scheduler, 531 build_args.p_sah_globals ); 532 533 // jump out if there are no bfs WGs 534 goto l_continue_outer_loop if (REG_loop_break); 535 536 // dispatch new BFS1 WGs 537 DISPATCHDIM_X = REG_num_bfs_wgs; 538 dispatch_indirect opencl_build_kernel_BinnedSAH_BFS_pass1_indexed_batch 539 args( p_scheduler, 540 build_args.p_sah_globals ) 541 postsync store_dword( p_scheduler_postsync, 2 ); 542 543 semaphore_wait while( *p_scheduler_postsync != 2 ); 544 545 // dispatch new BFS2 WGs 546 dispatch_indirect opencl_build_kernel_BinnedSAH_BFS_pass2_indexed_batch 547 args( p_scheduler, 548 build_args.p_sah_globals ) 549 postsync store_dword( p_scheduler_postsync, 0 ); 550 551 //goto l_build_loop if not(REG_num_dfs_wgs); //TODO: maybe add some logic to do "bool have_dfs_work" which will add some cycles but may be faster than checking semaphore 552 553 // wait until all upcoming DFS WGs have finished launching 554 // so that the scheduler can refill the launch array 555 // TODO_OPT: Look at replacing this with a ring buffer so that scheduler stalls instead (and only rarely) 556 semaphore_wait while( *p_num_dfs_wgs != 0 ); 557 558 goto l_build_loop; 559 } 560 561 562 l_continue_outer_loop: 563 564 565 goto l_build_outer_loop if(REG4.hi); 566 567 } 568 569//////// 570// 571// Qnode build phase 572// 573//////// 574 575 // Wait for all outstanding DFS dispatches to complete, then build the QNodes 576 control( wait_idle ); 577 578 define REG_wg_counts REG1; 579 define REG_p_scheduler REG2; 580 define REG_have_work REG3; 581 define REG_GRB_NUM_MAX_ENTRIES REG4; 582 583 // init scheduler for qnode phase 584 dispatch opencl_build_kernel_BinnedSAH_qnode_init_scheduler_batched(1,1,1) 585 args( build_args.p_scheduler, 586 build_args.num_builds, 587 build_args.num_max_qnode_global_root_buffer_entries); 588 589 REG_p_scheduler = p_scheduler; 590 591 control( wait_idle ); 592 593 REG_wg_counts = load_qword( REG_p_scheduler ); 594 595 DISPATCHDIM_X = REG_wg_counts.lo; 596 597 // configure the scheduler to initiate a new block of builds 598 dispatch_indirect opencl_build_kernel_BinnedSAH_qnode_begin_batched 599 args( build_args.p_scheduler, 600 build_args.p_sah_globals); 601 602 // read results produced by init scheduler kernel 603 // lo == num of builds processed. hi == num of maximum global root buffer entries 604 // 605 REG0 = build_args.p_scheduler + offsetof(Scheduler.batched_build_wg_count); 606 REG5 = load_qword( REG0 ); 607 608 REG_GRB_NUM_MAX_ENTRIES.lo = REG5.hi; 609 REG_GRB_NUM_MAX_ENTRIES.hi = 0; 610 611l_qnode_loop: 612 { 613 control( wait_idle ); // wait for previous pass 614 615 dispatch opencl_build_kernel_BinnedSAH_qnode_scheduler(1,1,1) args( build_args.p_scheduler ); 616 617 control( wait_idle ); 618 619 REG_wg_counts = load_qword( REG_p_scheduler ); 620 REG_have_work = REG_wg_counts > 0; 621 622 goto l_done if not(REG_have_work.lo); 623 624 DISPATCHDIM_X = REG_wg_counts.lo; 625 626 dispatch_indirect opencl_build_kernel_BinnedSAH_BuildQNodes_Amplify_batch 627 args( build_args.p_sah_globals, 628 build_args.p_scheduler ); 629 630 control( wait_idle ); 631 632 REG_wg_counts = load_qword( REG_p_scheduler ); // reload values 633 REG_wg_counts.lo = REG_wg_counts.hi; 634 REG_wg_counts.hi = 0; 635 636 REG_have_work = REG_wg_counts < REG_GRB_NUM_MAX_ENTRIES; 637 638 goto l_qnode_loop if not(REG_have_work.lo); 639 640 DISPATCHDIM_X = REG5.lo; // dispatch single workgroup for each build scheduled 641 642 dispatch_indirect opencl_build_kernel_BinnedSAH_BuildQnodes_TryToFillGRB_batched 643 args( build_args.p_sah_globals, 644 build_args.p_scheduler ); 645 646 goto l_qnode_loop; 647 } 648 649//////// 650// 651// Old implementation - TODO: maybe add switch between two implementations? 652// 653//////// 654 // Wait for all outstanding DFS dispatches to complete, then build the QNodes 655 //DISPATCHDIM_X = REG5.lo; 656 657 //dispatch_indirect opencl_build_kernel_BinnedSAH_BuildQNodes 658 // args( build_args.p_sah_globals, build_args.p_scheduler ); 659 660 661l_done: 662 663 control( wait_idle ); 664 665} 666