1 #pragma once 2 3 /* Standard C headers */ 4 #include <stddef.h> 5 #include <stdint.h> 6 7 /* Internal headers */ 8 #include "threadpool-common.h" 9 #include "threadpool-atomics.h" 10 11 /* POSIX headers */ 12 #if PTHREADPOOL_USE_CONDVAR || PTHREADPOOL_USE_FUTEX 13 #include <pthread.h> 14 #endif 15 16 /* Mach headers */ 17 #if PTHREADPOOL_USE_GCD 18 #include <dispatch/dispatch.h> 19 #endif 20 21 /* Windows headers */ 22 #if PTHREADPOOL_USE_EVENT 23 #include <windows.h> 24 #endif 25 26 /* Dependencies */ 27 #include <fxdiv.h> 28 29 /* Library header */ 30 #include <pthreadpool.h> 31 32 33 #define THREADPOOL_COMMAND_MASK UINT32_C(0x7FFFFFFF) 34 35 enum threadpool_command { 36 threadpool_command_init, 37 threadpool_command_parallelize, 38 threadpool_command_shutdown, 39 }; 40 41 struct PTHREADPOOL_CACHELINE_ALIGNED thread_info { 42 /** 43 * Index of the first element in the work range. 44 * Before processing a new element the owning worker thread increments this value. 45 */ 46 pthreadpool_atomic_size_t range_start; 47 /** 48 * Index of the element after the last element of the work range. 49 * Before processing a new element the stealing worker thread decrements this value. 50 */ 51 pthreadpool_atomic_size_t range_end; 52 /** 53 * The number of elements in the work range. 54 * Due to race conditions range_length <= range_end - range_start. 55 * The owning worker thread must decrement this value before incrementing @a range_start. 56 * The stealing worker thread must decrement this value before decrementing @a range_end. 57 */ 58 pthreadpool_atomic_size_t range_length; 59 /** 60 * Thread number in the 0..threads_count-1 range. 61 */ 62 size_t thread_number; 63 /** 64 * Thread pool which owns the thread. 65 */ 66 struct pthreadpool* threadpool; 67 #if PTHREADPOOL_USE_CONDVAR || PTHREADPOOL_USE_FUTEX 68 /** 69 * The pthread object corresponding to the thread. 70 */ 71 pthread_t thread_object; 72 #endif 73 #if PTHREADPOOL_USE_EVENT 74 /** 75 * The Windows thread handle corresponding to the thread. 76 */ 77 HANDLE thread_handle; 78 #endif 79 }; 80 81 PTHREADPOOL_STATIC_ASSERT(sizeof(struct thread_info) % PTHREADPOOL_CACHELINE_SIZE == 0, 82 "thread_info structure must occupy an integer number of cache lines (64 bytes)"); 83 84 struct pthreadpool_1d_with_uarch_params { 85 /** 86 * Copy of the default_uarch_index argument passed to the pthreadpool_parallelize_1d_with_uarch function. 87 */ 88 uint32_t default_uarch_index; 89 /** 90 * Copy of the max_uarch_index argument passed to the pthreadpool_parallelize_1d_with_uarch function. 91 */ 92 uint32_t max_uarch_index; 93 }; 94 95 struct pthreadpool_1d_tile_1d_params { 96 /** 97 * Copy of the range argument passed to the pthreadpool_parallelize_1d_tile_1d function. 98 */ 99 size_t range; 100 /** 101 * Copy of the tile argument passed to the pthreadpool_parallelize_1d_tile_1d function. 102 */ 103 size_t tile; 104 }; 105 106 struct pthreadpool_2d_params { 107 /** 108 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_2d function. 109 */ 110 struct fxdiv_divisor_size_t range_j; 111 }; 112 113 struct pthreadpool_2d_tile_1d_params { 114 /** 115 * Copy of the range_j argument passed to the pthreadpool_parallelize_2d_tile_1d function. 116 */ 117 size_t range_j; 118 /** 119 * Copy of the tile_j argument passed to the pthreadpool_parallelize_2d_tile_1d function. 120 */ 121 size_t tile_j; 122 /** 123 * FXdiv divisor for the divide_round_up(range_j, tile_j) value. 124 */ 125 struct fxdiv_divisor_size_t tile_range_j; 126 }; 127 128 struct pthreadpool_2d_tile_2d_params { 129 /** 130 * Copy of the range_i argument passed to the pthreadpool_parallelize_2d_tile_2d function. 131 */ 132 size_t range_i; 133 /** 134 * Copy of the tile_i argument passed to the pthreadpool_parallelize_2d_tile_2d function. 135 */ 136 size_t tile_i; 137 /** 138 * Copy of the range_j argument passed to the pthreadpool_parallelize_2d_tile_2d function. 139 */ 140 size_t range_j; 141 /** 142 * Copy of the tile_j argument passed to the pthreadpool_parallelize_2d_tile_2d function. 143 */ 144 size_t tile_j; 145 /** 146 * FXdiv divisor for the divide_round_up(range_j, tile_j) value. 147 */ 148 struct fxdiv_divisor_size_t tile_range_j; 149 }; 150 151 struct pthreadpool_2d_tile_2d_with_uarch_params { 152 /** 153 * Copy of the default_uarch_index argument passed to the pthreadpool_parallelize_2d_tile_2d_with_uarch function. 154 */ 155 uint32_t default_uarch_index; 156 /** 157 * Copy of the max_uarch_index argument passed to the pthreadpool_parallelize_2d_tile_2d_with_uarch function. 158 */ 159 uint32_t max_uarch_index; 160 /** 161 * Copy of the range_i argument passed to the pthreadpool_parallelize_2d_tile_2d_with_uarch function. 162 */ 163 size_t range_i; 164 /** 165 * Copy of the tile_i argument passed to the pthreadpool_parallelize_2d_tile_2d_with_uarch function. 166 */ 167 size_t tile_i; 168 /** 169 * Copy of the range_j argument passed to the pthreadpool_parallelize_2d_tile_2d_with_uarch function. 170 */ 171 size_t range_j; 172 /** 173 * Copy of the tile_j argument passed to the pthreadpool_parallelize_2d_tile_2d_with_uarch function. 174 */ 175 size_t tile_j; 176 /** 177 * FXdiv divisor for the divide_round_up(range_j, tile_j) value. 178 */ 179 struct fxdiv_divisor_size_t tile_range_j; 180 }; 181 182 struct pthreadpool_3d_params { 183 /** 184 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_3d function. 185 */ 186 struct fxdiv_divisor_size_t range_j; 187 /** 188 * FXdiv divisor for the range_k argument passed to the pthreadpool_parallelize_3d function. 189 */ 190 struct fxdiv_divisor_size_t range_k; 191 }; 192 193 struct pthreadpool_3d_tile_1d_params { 194 /** 195 * Copy of the range_k argument passed to the pthreadpool_parallelize_3d_tile_1d function. 196 */ 197 size_t range_k; 198 /** 199 * Copy of the tile_k argument passed to the pthreadpool_parallelize_3d_tile_1d function. 200 */ 201 size_t tile_k; 202 /** 203 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_3d_tile_1d function. 204 */ 205 struct fxdiv_divisor_size_t range_j; 206 /** 207 * FXdiv divisor for the divide_round_up(range_k, tile_k) value. 208 */ 209 struct fxdiv_divisor_size_t tile_range_k; 210 }; 211 212 struct pthreadpool_3d_tile_2d_params { 213 /** 214 * Copy of the range_j argument passed to the pthreadpool_parallelize_3d_tile_2d function. 215 */ 216 size_t range_j; 217 /** 218 * Copy of the tile_j argument passed to the pthreadpool_parallelize_3d_tile_2d function. 219 */ 220 size_t tile_j; 221 /** 222 * Copy of the range_k argument passed to the pthreadpool_parallelize_3d_tile_2d function. 223 */ 224 size_t range_k; 225 /** 226 * Copy of the tile_k argument passed to the pthreadpool_parallelize_3d_tile_2d function. 227 */ 228 size_t tile_k; 229 /** 230 * FXdiv divisor for the divide_round_up(range_j, tile_j) value. 231 */ 232 struct fxdiv_divisor_size_t tile_range_j; 233 /** 234 * FXdiv divisor for the divide_round_up(range_k, tile_k) value. 235 */ 236 struct fxdiv_divisor_size_t tile_range_k; 237 }; 238 239 struct pthreadpool_3d_tile_2d_with_uarch_params { 240 /** 241 * Copy of the default_uarch_index argument passed to the pthreadpool_parallelize_3d_tile_2d_with_uarch function. 242 */ 243 uint32_t default_uarch_index; 244 /** 245 * Copy of the max_uarch_index argument passed to the pthreadpool_parallelize_3d_tile_2d_with_uarch function. 246 */ 247 uint32_t max_uarch_index; 248 /** 249 * Copy of the range_j argument passed to the pthreadpool_parallelize_3d_tile_2d_with_uarch function. 250 */ 251 size_t range_j; 252 /** 253 * Copy of the tile_j argument passed to the pthreadpool_parallelize_3d_tile_2d_with_uarch function. 254 */ 255 size_t tile_j; 256 /** 257 * Copy of the range_k argument passed to the pthreadpool_parallelize_3d_tile_2d_with_uarch function. 258 */ 259 size_t range_k; 260 /** 261 * Copy of the tile_k argument passed to the pthreadpool_parallelize_3d_tile_2d_with_uarch function. 262 */ 263 size_t tile_k; 264 /** 265 * FXdiv divisor for the divide_round_up(range_j, tile_j) value. 266 */ 267 struct fxdiv_divisor_size_t tile_range_j; 268 /** 269 * FXdiv divisor for the divide_round_up(range_k, tile_k) value. 270 */ 271 struct fxdiv_divisor_size_t tile_range_k; 272 }; 273 274 struct pthreadpool_4d_params { 275 /** 276 * Copy of the range_k argument passed to the pthreadpool_parallelize_4d function. 277 */ 278 size_t range_k; 279 /** 280 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_4d function. 281 */ 282 struct fxdiv_divisor_size_t range_j; 283 /** 284 * FXdiv divisor for the range_k * range_l value. 285 */ 286 struct fxdiv_divisor_size_t range_kl; 287 /** 288 * FXdiv divisor for the range_l argument passed to the pthreadpool_parallelize_4d function. 289 */ 290 struct fxdiv_divisor_size_t range_l; 291 }; 292 293 struct pthreadpool_4d_tile_1d_params { 294 /** 295 * Copy of the range_k argument passed to the pthreadpool_parallelize_4d_tile_1d function. 296 */ 297 size_t range_k; 298 /** 299 * Copy of the range_l argument passed to the pthreadpool_parallelize_4d_tile_1d function. 300 */ 301 size_t range_l; 302 /** 303 * Copy of the tile_l argument passed to the pthreadpool_parallelize_4d_tile_1d function. 304 */ 305 size_t tile_l; 306 /** 307 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_4d_tile_1d function. 308 */ 309 struct fxdiv_divisor_size_t range_j; 310 /** 311 * FXdiv divisor for the range_k * divide_round_up(range_l, tile_l) value. 312 */ 313 struct fxdiv_divisor_size_t tile_range_kl; 314 /** 315 * FXdiv divisor for the divide_round_up(range_l, tile_l) value. 316 */ 317 struct fxdiv_divisor_size_t tile_range_l; 318 }; 319 320 struct pthreadpool_4d_tile_2d_params { 321 /** 322 * Copy of the range_k argument passed to the pthreadpool_parallelize_4d_tile_2d function. 323 */ 324 size_t range_k; 325 /** 326 * Copy of the tile_k argument passed to the pthreadpool_parallelize_4d_tile_2d function. 327 */ 328 size_t tile_k; 329 /** 330 * Copy of the range_l argument passed to the pthreadpool_parallelize_4d_tile_2d function. 331 */ 332 size_t range_l; 333 /** 334 * Copy of the tile_l argument passed to the pthreadpool_parallelize_4d_tile_2d function. 335 */ 336 size_t tile_l; 337 /** 338 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_4d_tile_2d function. 339 */ 340 struct fxdiv_divisor_size_t range_j; 341 /** 342 * FXdiv divisor for the divide_round_up(range_k, tile_k) * divide_round_up(range_l, tile_l) value. 343 */ 344 struct fxdiv_divisor_size_t tile_range_kl; 345 /** 346 * FXdiv divisor for the divide_round_up(range_l, tile_l) value. 347 */ 348 struct fxdiv_divisor_size_t tile_range_l; 349 }; 350 351 struct pthreadpool_4d_tile_2d_with_uarch_params { 352 /** 353 * Copy of the default_uarch_index argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function. 354 */ 355 uint32_t default_uarch_index; 356 /** 357 * Copy of the max_uarch_index argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function. 358 */ 359 uint32_t max_uarch_index; 360 /** 361 * Copy of the range_k argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function. 362 */ 363 size_t range_k; 364 /** 365 * Copy of the tile_k argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function. 366 */ 367 size_t tile_k; 368 /** 369 * Copy of the range_l argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function. 370 */ 371 size_t range_l; 372 /** 373 * Copy of the tile_l argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function. 374 */ 375 size_t tile_l; 376 /** 377 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function. 378 */ 379 struct fxdiv_divisor_size_t range_j; 380 /** 381 * FXdiv divisor for the divide_round_up(range_k, tile_k) * divide_round_up(range_l, tile_l) value. 382 */ 383 struct fxdiv_divisor_size_t tile_range_kl; 384 /** 385 * FXdiv divisor for the divide_round_up(range_l, tile_l) value. 386 */ 387 struct fxdiv_divisor_size_t tile_range_l; 388 }; 389 390 struct pthreadpool_5d_params { 391 /** 392 * Copy of the range_l argument passed to the pthreadpool_parallelize_5d function. 393 */ 394 size_t range_l; 395 /** 396 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_5d function. 397 */ 398 struct fxdiv_divisor_size_t range_j; 399 /** 400 * FXdiv divisor for the range_k argument passed to the pthreadpool_parallelize_5d function. 401 */ 402 struct fxdiv_divisor_size_t range_k; 403 /** 404 * FXdiv divisor for the range_l * range_m value. 405 */ 406 struct fxdiv_divisor_size_t range_lm; 407 /** 408 * FXdiv divisor for the range_m argument passed to the pthreadpool_parallelize_5d function. 409 */ 410 struct fxdiv_divisor_size_t range_m; 411 }; 412 413 struct pthreadpool_5d_tile_1d_params { 414 /** 415 * Copy of the range_k argument passed to the pthreadpool_parallelize_5d_tile_1d function. 416 */ 417 size_t range_k; 418 /** 419 * Copy of the range_m argument passed to the pthreadpool_parallelize_5d_tile_1d function. 420 */ 421 size_t range_m; 422 /** 423 * Copy of the tile_m argument passed to the pthreadpool_parallelize_5d_tile_1d function. 424 */ 425 size_t tile_m; 426 /** 427 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_5d_tile_1d function. 428 */ 429 struct fxdiv_divisor_size_t range_j; 430 /** 431 * FXdiv divisor for the range_k * range_l value. 432 */ 433 struct fxdiv_divisor_size_t range_kl; 434 /** 435 * FXdiv divisor for the range_l argument passed to the pthreadpool_parallelize_5d_tile_1d function. 436 */ 437 struct fxdiv_divisor_size_t range_l; 438 /** 439 * FXdiv divisor for the divide_round_up(range_m, tile_m) value. 440 */ 441 struct fxdiv_divisor_size_t tile_range_m; 442 }; 443 444 struct pthreadpool_5d_tile_2d_params { 445 /** 446 * Copy of the range_l argument passed to the pthreadpool_parallelize_5d_tile_2d function. 447 */ 448 size_t range_l; 449 /** 450 * Copy of the tile_l argument passed to the pthreadpool_parallelize_5d_tile_2d function. 451 */ 452 size_t tile_l; 453 /** 454 * Copy of the range_m argument passed to the pthreadpool_parallelize_5d_tile_2d function. 455 */ 456 size_t range_m; 457 /** 458 * Copy of the tile_m argument passed to the pthreadpool_parallelize_5d_tile_2d function. 459 */ 460 size_t tile_m; 461 /** 462 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_5d_tile_2d function. 463 */ 464 struct fxdiv_divisor_size_t range_j; 465 /** 466 * FXdiv divisor for the range_k argument passed to the pthreadpool_parallelize_5d_tile_2d function. 467 */ 468 struct fxdiv_divisor_size_t range_k; 469 /** 470 * FXdiv divisor for the divide_round_up(range_l, tile_l) * divide_round_up(range_m, tile_m) value. 471 */ 472 struct fxdiv_divisor_size_t tile_range_lm; 473 /** 474 * FXdiv divisor for the divide_round_up(range_m, tile_m) value. 475 */ 476 struct fxdiv_divisor_size_t tile_range_m; 477 }; 478 479 struct pthreadpool_6d_params { 480 /** 481 * Copy of the range_l argument passed to the pthreadpool_parallelize_6d function. 482 */ 483 size_t range_l; 484 /** 485 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_6d function. 486 */ 487 struct fxdiv_divisor_size_t range_j; 488 /** 489 * FXdiv divisor for the range_k argument passed to the pthreadpool_parallelize_6d function. 490 */ 491 struct fxdiv_divisor_size_t range_k; 492 /** 493 * FXdiv divisor for the range_l * range_m * range_n value. 494 */ 495 struct fxdiv_divisor_size_t range_lmn; 496 /** 497 * FXdiv divisor for the range_m argument passed to the pthreadpool_parallelize_6d function. 498 */ 499 struct fxdiv_divisor_size_t range_m; 500 /** 501 * FXdiv divisor for the range_n argument passed to the pthreadpool_parallelize_6d function. 502 */ 503 struct fxdiv_divisor_size_t range_n; 504 }; 505 506 struct pthreadpool_6d_tile_1d_params { 507 /** 508 * Copy of the range_l argument passed to the pthreadpool_parallelize_6d_tile_1d function. 509 */ 510 size_t range_l; 511 /** 512 * Copy of the range_n argument passed to the pthreadpool_parallelize_6d_tile_1d function. 513 */ 514 size_t range_n; 515 /** 516 * Copy of the tile_n argument passed to the pthreadpool_parallelize_6d_tile_1d function. 517 */ 518 size_t tile_n; 519 /** 520 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_6d_tile_1d function. 521 */ 522 struct fxdiv_divisor_size_t range_j; 523 /** 524 * FXdiv divisor for the range_k argument passed to the pthreadpool_parallelize_6d_tile_1d function. 525 */ 526 struct fxdiv_divisor_size_t range_k; 527 /** 528 * FXdiv divisor for the range_l * range_m * divide_round_up(range_n, tile_n) value. 529 */ 530 struct fxdiv_divisor_size_t tile_range_lmn; 531 /** 532 * FXdiv divisor for the range_m argument passed to the pthreadpool_parallelize_6d_tile_1d function. 533 */ 534 struct fxdiv_divisor_size_t range_m; 535 /** 536 * FXdiv divisor for the divide_round_up(range_n, tile_n) value. 537 */ 538 struct fxdiv_divisor_size_t tile_range_n; 539 }; 540 541 struct pthreadpool_6d_tile_2d_params { 542 /** 543 * Copy of the range_k argument passed to the pthreadpool_parallelize_6d_tile_2d function. 544 */ 545 size_t range_k; 546 /** 547 * Copy of the range_m argument passed to the pthreadpool_parallelize_6d_tile_2d function. 548 */ 549 size_t range_m; 550 /** 551 * Copy of the tile_m argument passed to the pthreadpool_parallelize_6d_tile_2d function. 552 */ 553 size_t tile_m; 554 /** 555 * Copy of the range_n argument passed to the pthreadpool_parallelize_6d_tile_2d function. 556 */ 557 size_t range_n; 558 /** 559 * Copy of the tile_n argument passed to the pthreadpool_parallelize_6d_tile_2d function. 560 */ 561 size_t tile_n; 562 /** 563 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_6d_tile_2d function. 564 */ 565 struct fxdiv_divisor_size_t range_j; 566 /** 567 * FXdiv divisor for the range_k * range_l value. 568 */ 569 struct fxdiv_divisor_size_t range_kl; 570 /** 571 * FXdiv divisor for the range_l argument passed to the pthreadpool_parallelize_6d_tile_2d function. 572 */ 573 struct fxdiv_divisor_size_t range_l; 574 /** 575 * FXdiv divisor for the divide_round_up(range_m, tile_m) * divide_round_up(range_n, tile_n) value. 576 */ 577 struct fxdiv_divisor_size_t tile_range_mn; 578 /** 579 * FXdiv divisor for the divide_round_up(range_n, tile_n) value. 580 */ 581 struct fxdiv_divisor_size_t tile_range_n; 582 }; 583 584 struct PTHREADPOOL_CACHELINE_ALIGNED pthreadpool { 585 #if !PTHREADPOOL_USE_GCD 586 /** 587 * The number of threads that are processing an operation. 588 */ 589 pthreadpool_atomic_size_t active_threads; 590 #endif 591 #if PTHREADPOOL_USE_FUTEX 592 /** 593 * Indicates if there are active threads. 594 * Only two values are possible: 595 * - has_active_threads == 0 if active_threads == 0 596 * - has_active_threads == 1 if active_threads != 0 597 */ 598 pthreadpool_atomic_uint32_t has_active_threads; 599 #endif 600 #if !PTHREADPOOL_USE_GCD 601 /** 602 * The last command submitted to the thread pool. 603 */ 604 pthreadpool_atomic_uint32_t command; 605 #endif 606 /** 607 * The entry point function to call for each thread in the thread pool for parallelization tasks. 608 */ 609 pthreadpool_atomic_void_p thread_function; 610 /** 611 * The function to call for each item. 612 */ 613 pthreadpool_atomic_void_p task; 614 /** 615 * The first argument to the item processing function. 616 */ 617 pthreadpool_atomic_void_p argument; 618 /** 619 * Additional parallelization parameters. 620 * These parameters are specific for each thread_function. 621 */ 622 union { 623 struct pthreadpool_1d_with_uarch_params parallelize_1d_with_uarch; 624 struct pthreadpool_1d_tile_1d_params parallelize_1d_tile_1d; 625 struct pthreadpool_2d_params parallelize_2d; 626 struct pthreadpool_2d_tile_1d_params parallelize_2d_tile_1d; 627 struct pthreadpool_2d_tile_2d_params parallelize_2d_tile_2d; 628 struct pthreadpool_2d_tile_2d_with_uarch_params parallelize_2d_tile_2d_with_uarch; 629 struct pthreadpool_3d_params parallelize_3d; 630 struct pthreadpool_3d_tile_1d_params parallelize_3d_tile_1d; 631 struct pthreadpool_3d_tile_2d_params parallelize_3d_tile_2d; 632 struct pthreadpool_3d_tile_2d_with_uarch_params parallelize_3d_tile_2d_with_uarch; 633 struct pthreadpool_4d_params parallelize_4d; 634 struct pthreadpool_4d_tile_1d_params parallelize_4d_tile_1d; 635 struct pthreadpool_4d_tile_2d_params parallelize_4d_tile_2d; 636 struct pthreadpool_4d_tile_2d_with_uarch_params parallelize_4d_tile_2d_with_uarch; 637 struct pthreadpool_5d_params parallelize_5d; 638 struct pthreadpool_5d_tile_1d_params parallelize_5d_tile_1d; 639 struct pthreadpool_5d_tile_2d_params parallelize_5d_tile_2d; 640 struct pthreadpool_6d_params parallelize_6d; 641 struct pthreadpool_6d_tile_1d_params parallelize_6d_tile_1d; 642 struct pthreadpool_6d_tile_2d_params parallelize_6d_tile_2d; 643 } params; 644 /** 645 * Copy of the flags passed to a parallelization function. 646 */ 647 pthreadpool_atomic_uint32_t flags; 648 #if PTHREADPOOL_USE_CONDVAR || PTHREADPOOL_USE_FUTEX 649 /** 650 * Serializes concurrent calls to @a pthreadpool_parallelize_* from different threads. 651 */ 652 pthread_mutex_t execution_mutex; 653 #endif 654 #if PTHREADPOOL_USE_GCD 655 /** 656 * Serializes concurrent calls to @a pthreadpool_parallelize_* from different threads. 657 */ 658 dispatch_semaphore_t execution_semaphore; 659 #endif 660 #if PTHREADPOOL_USE_EVENT 661 /** 662 * Serializes concurrent calls to @a pthreadpool_parallelize_* from different threads. 663 */ 664 HANDLE execution_mutex; 665 #endif 666 #if PTHREADPOOL_USE_CONDVAR 667 /** 668 * Guards access to the @a active_threads variable. 669 */ 670 pthread_mutex_t completion_mutex; 671 /** 672 * Condition variable to wait until all threads complete an operation (until @a active_threads is zero). 673 */ 674 pthread_cond_t completion_condvar; 675 /** 676 * Guards access to the @a command variable. 677 */ 678 pthread_mutex_t command_mutex; 679 /** 680 * Condition variable to wait for change of the @a command variable. 681 */ 682 pthread_cond_t command_condvar; 683 #endif 684 #if PTHREADPOOL_USE_EVENT 685 /** 686 * Events to wait on until all threads complete an operation (until @a active_threads is zero). 687 * To avoid race conditions due to spin-lock synchronization, we use two events and switch event in use after every 688 * submitted command according to the high bit of the command word. 689 */ 690 HANDLE completion_event[2]; 691 /** 692 * Events to wait on for change of the @a command variable. 693 * To avoid race conditions due to spin-lock synchronization, we use two events and switch event in use after every 694 * submitted command according to the high bit of the command word. 695 */ 696 HANDLE command_event[2]; 697 #endif 698 /** 699 * FXdiv divisor for the number of threads in the thread pool. 700 * This struct never change after pthreadpool_create. 701 */ 702 struct fxdiv_divisor_size_t threads_count; 703 /** 704 * Thread information structures that immediately follow this structure. 705 */ 706 struct thread_info threads[]; 707 }; 708 709 PTHREADPOOL_STATIC_ASSERT(sizeof(struct pthreadpool) % PTHREADPOOL_CACHELINE_SIZE == 0, 710 "pthreadpool structure must occupy an integer number of cache lines (64 bytes)"); 711 712 PTHREADPOOL_INTERNAL struct pthreadpool* pthreadpool_allocate( 713 size_t threads_count); 714 715 PTHREADPOOL_INTERNAL void pthreadpool_deallocate( 716 struct pthreadpool* threadpool); 717 718 typedef void (*thread_function_t)(struct pthreadpool* threadpool, struct thread_info* thread); 719 720 PTHREADPOOL_INTERNAL void pthreadpool_parallelize( 721 struct pthreadpool* threadpool, 722 thread_function_t thread_function, 723 const void* params, 724 size_t params_size, 725 void* task, 726 void* context, 727 size_t linear_range, 728 uint32_t flags); 729 730 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_1d_fastpath( 731 struct pthreadpool* threadpool, 732 struct thread_info* thread); 733 734 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_1d_with_uarch_fastpath( 735 struct pthreadpool* threadpool, 736 struct thread_info* thread); 737 738 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_1d_tile_1d_fastpath( 739 struct pthreadpool* threadpool, 740 struct thread_info* thread); 741 742 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_2d_fastpath( 743 struct pthreadpool* threadpool, 744 struct thread_info* thread); 745 746 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_2d_tile_1d_fastpath( 747 struct pthreadpool* threadpool, 748 struct thread_info* thread); 749 750 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_2d_tile_2d_fastpath( 751 struct pthreadpool* threadpool, 752 struct thread_info* thread); 753 754 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_2d_tile_2d_with_uarch_fastpath( 755 struct pthreadpool* threadpool, 756 struct thread_info* thread); 757 758 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_3d_fastpath( 759 struct pthreadpool* threadpool, 760 struct thread_info* thread); 761 762 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_3d_tile_1d_fastpath( 763 struct pthreadpool* threadpool, 764 struct thread_info* thread); 765 766 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_3d_tile_2d_fastpath( 767 struct pthreadpool* threadpool, 768 struct thread_info* thread); 769 770 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_3d_tile_2d_with_uarch_fastpath( 771 struct pthreadpool* threadpool, 772 struct thread_info* thread); 773 774 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_4d_fastpath( 775 struct pthreadpool* threadpool, 776 struct thread_info* thread); 777 778 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_4d_tile_1d_fastpath( 779 struct pthreadpool* threadpool, 780 struct thread_info* thread); 781 782 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_4d_tile_2d_fastpath( 783 struct pthreadpool* threadpool, 784 struct thread_info* thread); 785 786 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_4d_tile_2d_with_uarch_fastpath( 787 struct pthreadpool* threadpool, 788 struct thread_info* thread); 789 790 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_5d_fastpath( 791 struct pthreadpool* threadpool, 792 struct thread_info* thread); 793 794 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_5d_tile_1d_fastpath( 795 struct pthreadpool* threadpool, 796 struct thread_info* thread); 797 798 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_5d_tile_2d_fastpath( 799 struct pthreadpool* threadpool, 800 struct thread_info* thread); 801 802 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_6d_fastpath( 803 struct pthreadpool* threadpool, 804 struct thread_info* thread); 805 806 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_6d_tile_1d_fastpath( 807 struct pthreadpool* threadpool, 808 struct thread_info* thread); 809 810 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_6d_tile_2d_fastpath( 811 struct pthreadpool* threadpool, 812 struct thread_info* thread); 813