• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #pragma once
2 
3 /* Standard C headers */
4 #include <stddef.h>
5 #include <stdint.h>
6 
7 /* Internal headers */
8 #include "threadpool-common.h"
9 #include "threadpool-atomics.h"
10 
11 /* POSIX headers */
12 #if PTHREADPOOL_USE_CONDVAR || PTHREADPOOL_USE_FUTEX
13 #include <pthread.h>
14 #endif
15 
16 /* Mach headers */
17 #if PTHREADPOOL_USE_GCD
18 #include <dispatch/dispatch.h>
19 #endif
20 
21 /* Windows headers */
22 #if PTHREADPOOL_USE_EVENT
23 #include <windows.h>
24 #endif
25 
26 /* Dependencies */
27 #include <fxdiv.h>
28 
29 /* Library header */
30 #include <pthreadpool.h>
31 
32 
33 #define THREADPOOL_COMMAND_MASK UINT32_C(0x7FFFFFFF)
34 
35 enum threadpool_command {
36 	threadpool_command_init,
37 	threadpool_command_parallelize,
38 	threadpool_command_shutdown,
39 };
40 
41 struct PTHREADPOOL_CACHELINE_ALIGNED thread_info {
42 	/**
43 	 * Index of the first element in the work range.
44 	 * Before processing a new element the owning worker thread increments this value.
45 	 */
46 	pthreadpool_atomic_size_t range_start;
47 	/**
48 	 * Index of the element after the last element of the work range.
49 	 * Before processing a new element the stealing worker thread decrements this value.
50 	 */
51 	pthreadpool_atomic_size_t range_end;
52 	/**
53 	 * The number of elements in the work range.
54 	 * Due to race conditions range_length <= range_end - range_start.
55 	 * The owning worker thread must decrement this value before incrementing @a range_start.
56 	 * The stealing worker thread must decrement this value before decrementing @a range_end.
57 	 */
58 	pthreadpool_atomic_size_t range_length;
59 	/**
60 	 * Thread number in the 0..threads_count-1 range.
61 	 */
62 	size_t thread_number;
63 	/**
64 	 * Thread pool which owns the thread.
65 	 */
66 	struct pthreadpool* threadpool;
67 #if PTHREADPOOL_USE_CONDVAR || PTHREADPOOL_USE_FUTEX
68 	/**
69 	 * The pthread object corresponding to the thread.
70 	 */
71 	pthread_t thread_object;
72 #endif
73 #if PTHREADPOOL_USE_EVENT
74 	/**
75 	 * The Windows thread handle corresponding to the thread.
76 	 */
77 	HANDLE thread_handle;
78 #endif
79 };
80 
81 PTHREADPOOL_STATIC_ASSERT(sizeof(struct thread_info) % PTHREADPOOL_CACHELINE_SIZE == 0,
82 	"thread_info structure must occupy an integer number of cache lines (64 bytes)");
83 
84 struct pthreadpool_1d_with_uarch_params {
85 	/**
86 	 * Copy of the default_uarch_index argument passed to the pthreadpool_parallelize_1d_with_uarch function.
87 	 */
88 	uint32_t default_uarch_index;
89 	/**
90 	 * Copy of the max_uarch_index argument passed to the pthreadpool_parallelize_1d_with_uarch function.
91 	 */
92 	uint32_t max_uarch_index;
93 };
94 
95 struct pthreadpool_1d_tile_1d_params {
96 	/**
97 	 * Copy of the range argument passed to the pthreadpool_parallelize_1d_tile_1d function.
98 	 */
99 	size_t range;
100 	/**
101 	 * Copy of the tile argument passed to the pthreadpool_parallelize_1d_tile_1d function.
102 	 */
103 	size_t tile;
104 };
105 
106 struct pthreadpool_2d_params {
107 	/**
108 	 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_2d function.
109 	 */
110 	struct fxdiv_divisor_size_t range_j;
111 };
112 
113 struct pthreadpool_2d_tile_1d_params {
114 	/**
115 	 * Copy of the range_j argument passed to the pthreadpool_parallelize_2d_tile_1d function.
116 	 */
117 	size_t range_j;
118 	/**
119 	 * Copy of the tile_j argument passed to the pthreadpool_parallelize_2d_tile_1d function.
120 	 */
121 	size_t tile_j;
122 	/**
123 	 * FXdiv divisor for the divide_round_up(range_j, tile_j) value.
124 	 */
125 	struct fxdiv_divisor_size_t tile_range_j;
126 };
127 
128 struct pthreadpool_2d_tile_2d_params {
129 	/**
130 	 * Copy of the range_i argument passed to the pthreadpool_parallelize_2d_tile_2d function.
131 	 */
132 	size_t range_i;
133 	/**
134 	 * Copy of the tile_i argument passed to the pthreadpool_parallelize_2d_tile_2d function.
135 	 */
136 	size_t tile_i;
137 	/**
138 	 * Copy of the range_j argument passed to the pthreadpool_parallelize_2d_tile_2d function.
139 	 */
140 	size_t range_j;
141 	/**
142 	 * Copy of the tile_j argument passed to the pthreadpool_parallelize_2d_tile_2d function.
143 	 */
144 	size_t tile_j;
145 	/**
146 	 * FXdiv divisor for the divide_round_up(range_j, tile_j) value.
147 	 */
148 	struct fxdiv_divisor_size_t tile_range_j;
149 };
150 
151 struct pthreadpool_2d_tile_2d_with_uarch_params {
152 	/**
153 	 * Copy of the default_uarch_index argument passed to the pthreadpool_parallelize_2d_tile_2d_with_uarch function.
154 	 */
155 	uint32_t default_uarch_index;
156 	/**
157 	 * Copy of the max_uarch_index argument passed to the pthreadpool_parallelize_2d_tile_2d_with_uarch function.
158 	 */
159 	uint32_t max_uarch_index;
160 	/**
161 	 * Copy of the range_i argument passed to the pthreadpool_parallelize_2d_tile_2d_with_uarch function.
162 	 */
163 	size_t range_i;
164 	/**
165 	 * Copy of the tile_i argument passed to the pthreadpool_parallelize_2d_tile_2d_with_uarch function.
166 	 */
167 	size_t tile_i;
168 	/**
169 	 * Copy of the range_j argument passed to the pthreadpool_parallelize_2d_tile_2d_with_uarch function.
170 	 */
171 	size_t range_j;
172 	/**
173 	 * Copy of the tile_j argument passed to the pthreadpool_parallelize_2d_tile_2d_with_uarch function.
174 	 */
175 	size_t tile_j;
176 	/**
177 	 * FXdiv divisor for the divide_round_up(range_j, tile_j) value.
178 	 */
179 	struct fxdiv_divisor_size_t tile_range_j;
180 };
181 
182 struct pthreadpool_3d_params {
183 	/**
184 	 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_3d function.
185 	 */
186 	struct fxdiv_divisor_size_t range_j;
187 	/**
188 	 * FXdiv divisor for the range_k argument passed to the pthreadpool_parallelize_3d function.
189 	 */
190 	struct fxdiv_divisor_size_t range_k;
191 };
192 
193 struct pthreadpool_3d_tile_1d_params {
194 	/**
195 	 * Copy of the range_k argument passed to the pthreadpool_parallelize_3d_tile_1d function.
196 	 */
197 	size_t range_k;
198 	/**
199 	 * Copy of the tile_k argument passed to the pthreadpool_parallelize_3d_tile_1d function.
200 	 */
201 	size_t tile_k;
202 	/**
203 	 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_3d_tile_1d function.
204 	 */
205 	struct fxdiv_divisor_size_t range_j;
206 	/**
207 	 * FXdiv divisor for the divide_round_up(range_k, tile_k) value.
208 	 */
209 	struct fxdiv_divisor_size_t tile_range_k;
210 };
211 
212 struct pthreadpool_3d_tile_2d_params {
213 	/**
214 	 * Copy of the range_j argument passed to the pthreadpool_parallelize_3d_tile_2d function.
215 	 */
216 	size_t range_j;
217 	/**
218 	 * Copy of the tile_j argument passed to the pthreadpool_parallelize_3d_tile_2d function.
219 	 */
220 	size_t tile_j;
221 	/**
222 	 * Copy of the range_k argument passed to the pthreadpool_parallelize_3d_tile_2d function.
223 	 */
224 	size_t range_k;
225 	/**
226 	 * Copy of the tile_k argument passed to the pthreadpool_parallelize_3d_tile_2d function.
227 	 */
228 	size_t tile_k;
229 	/**
230 	 * FXdiv divisor for the divide_round_up(range_j, tile_j) value.
231 	 */
232 	struct fxdiv_divisor_size_t tile_range_j;
233 	/**
234 	 * FXdiv divisor for the divide_round_up(range_k, tile_k) value.
235 	 */
236 	struct fxdiv_divisor_size_t tile_range_k;
237 };
238 
239 struct pthreadpool_3d_tile_2d_with_uarch_params {
240 	/**
241 	 * Copy of the default_uarch_index argument passed to the pthreadpool_parallelize_3d_tile_2d_with_uarch function.
242 	 */
243 	uint32_t default_uarch_index;
244 	/**
245 	 * Copy of the max_uarch_index argument passed to the pthreadpool_parallelize_3d_tile_2d_with_uarch function.
246 	 */
247 	uint32_t max_uarch_index;
248 	/**
249 	 * Copy of the range_j argument passed to the pthreadpool_parallelize_3d_tile_2d_with_uarch function.
250 	 */
251 	size_t range_j;
252 	/**
253 	 * Copy of the tile_j argument passed to the pthreadpool_parallelize_3d_tile_2d_with_uarch function.
254 	 */
255 	size_t tile_j;
256 	/**
257 	 * Copy of the range_k argument passed to the pthreadpool_parallelize_3d_tile_2d_with_uarch function.
258 	 */
259 	size_t range_k;
260 	/**
261 	 * Copy of the tile_k argument passed to the pthreadpool_parallelize_3d_tile_2d_with_uarch function.
262 	 */
263 	size_t tile_k;
264 	/**
265 	 * FXdiv divisor for the divide_round_up(range_j, tile_j) value.
266 	 */
267 	struct fxdiv_divisor_size_t tile_range_j;
268 	/**
269 	 * FXdiv divisor for the divide_round_up(range_k, tile_k) value.
270 	 */
271 	struct fxdiv_divisor_size_t tile_range_k;
272 };
273 
274 struct pthreadpool_4d_params {
275 	/**
276 	 * Copy of the range_k argument passed to the pthreadpool_parallelize_4d function.
277 	 */
278 	size_t range_k;
279 	/**
280 	 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_4d function.
281 	 */
282 	struct fxdiv_divisor_size_t range_j;
283 	/**
284 	 * FXdiv divisor for the range_k * range_l value.
285 	 */
286 	struct fxdiv_divisor_size_t range_kl;
287 	/**
288 	 * FXdiv divisor for the range_l argument passed to the pthreadpool_parallelize_4d function.
289 	 */
290 	struct fxdiv_divisor_size_t range_l;
291 };
292 
293 struct pthreadpool_4d_tile_1d_params {
294 	/**
295 	 * Copy of the range_k argument passed to the pthreadpool_parallelize_4d_tile_1d function.
296 	 */
297 	size_t range_k;
298 	/**
299 	 * Copy of the range_l argument passed to the pthreadpool_parallelize_4d_tile_1d function.
300 	 */
301 	size_t range_l;
302 	/**
303 	 * Copy of the tile_l argument passed to the pthreadpool_parallelize_4d_tile_1d function.
304 	 */
305 	size_t tile_l;
306 	/**
307 	 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_4d_tile_1d function.
308 	 */
309 	struct fxdiv_divisor_size_t range_j;
310 	/**
311 	 * FXdiv divisor for the range_k * divide_round_up(range_l, tile_l) value.
312 	 */
313 	struct fxdiv_divisor_size_t tile_range_kl;
314 	/**
315 	 * FXdiv divisor for the divide_round_up(range_l, tile_l) value.
316 	 */
317 	struct fxdiv_divisor_size_t tile_range_l;
318 };
319 
320 struct pthreadpool_4d_tile_2d_params {
321 	/**
322 	 * Copy of the range_k argument passed to the pthreadpool_parallelize_4d_tile_2d function.
323 	 */
324 	size_t range_k;
325 	/**
326 	 * Copy of the tile_k argument passed to the pthreadpool_parallelize_4d_tile_2d function.
327 	 */
328 	size_t tile_k;
329 	/**
330 	 * Copy of the range_l argument passed to the pthreadpool_parallelize_4d_tile_2d function.
331 	 */
332 	size_t range_l;
333 	/**
334 	 * Copy of the tile_l argument passed to the pthreadpool_parallelize_4d_tile_2d function.
335 	 */
336 	size_t tile_l;
337 	/**
338 	 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_4d_tile_2d function.
339 	 */
340 	struct fxdiv_divisor_size_t range_j;
341 	/**
342 	 * FXdiv divisor for the divide_round_up(range_k, tile_k) * divide_round_up(range_l, tile_l) value.
343 	 */
344 	struct fxdiv_divisor_size_t tile_range_kl;
345 	/**
346 	 * FXdiv divisor for the divide_round_up(range_l, tile_l) value.
347 	 */
348 	struct fxdiv_divisor_size_t tile_range_l;
349 };
350 
351 struct pthreadpool_4d_tile_2d_with_uarch_params {
352 	/**
353 	 * Copy of the default_uarch_index argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function.
354 	 */
355 	uint32_t default_uarch_index;
356 	/**
357 	 * Copy of the max_uarch_index argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function.
358 	 */
359 	uint32_t max_uarch_index;
360 	/**
361 	 * Copy of the range_k argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function.
362 	 */
363 	size_t range_k;
364 	/**
365 	 * Copy of the tile_k argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function.
366 	 */
367 	size_t tile_k;
368 	/**
369 	 * Copy of the range_l argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function.
370 	 */
371 	size_t range_l;
372 	/**
373 	 * Copy of the tile_l argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function.
374 	 */
375 	size_t tile_l;
376 	/**
377 	 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function.
378 	 */
379 	struct fxdiv_divisor_size_t range_j;
380 	/**
381 	 * FXdiv divisor for the divide_round_up(range_k, tile_k) * divide_round_up(range_l, tile_l) value.
382 	 */
383 	struct fxdiv_divisor_size_t tile_range_kl;
384 	/**
385 	 * FXdiv divisor for the divide_round_up(range_l, tile_l) value.
386 	 */
387 	struct fxdiv_divisor_size_t tile_range_l;
388 };
389 
390 struct pthreadpool_5d_params {
391 	/**
392 	 * Copy of the range_l argument passed to the pthreadpool_parallelize_5d function.
393 	 */
394 	size_t range_l;
395 	/**
396 	 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_5d function.
397 	 */
398 	struct fxdiv_divisor_size_t range_j;
399 	/**
400 	 * FXdiv divisor for the range_k argument passed to the pthreadpool_parallelize_5d function.
401 	 */
402 	struct fxdiv_divisor_size_t range_k;
403 	/**
404 	 * FXdiv divisor for the range_l * range_m value.
405 	 */
406 	struct fxdiv_divisor_size_t range_lm;
407 	/**
408 	 * FXdiv divisor for the range_m argument passed to the pthreadpool_parallelize_5d function.
409 	 */
410 	struct fxdiv_divisor_size_t range_m;
411 };
412 
413 struct pthreadpool_5d_tile_1d_params {
414 	/**
415 	 * Copy of the range_k argument passed to the pthreadpool_parallelize_5d_tile_1d function.
416 	 */
417 	size_t range_k;
418 	/**
419 	 * Copy of the range_m argument passed to the pthreadpool_parallelize_5d_tile_1d function.
420 	 */
421 	size_t range_m;
422 	/**
423 	 * Copy of the tile_m argument passed to the pthreadpool_parallelize_5d_tile_1d function.
424 	 */
425 	size_t tile_m;
426 	/**
427 	 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_5d_tile_1d function.
428 	 */
429 	struct fxdiv_divisor_size_t range_j;
430 	/**
431 	 * FXdiv divisor for the range_k * range_l value.
432 	 */
433 	struct fxdiv_divisor_size_t range_kl;
434 	/**
435 	 * FXdiv divisor for the range_l argument passed to the pthreadpool_parallelize_5d_tile_1d function.
436 	 */
437 	struct fxdiv_divisor_size_t range_l;
438 	/**
439 	 * FXdiv divisor for the divide_round_up(range_m, tile_m) value.
440 	 */
441 	struct fxdiv_divisor_size_t tile_range_m;
442 };
443 
444 struct pthreadpool_5d_tile_2d_params {
445 	/**
446 	 * Copy of the range_l argument passed to the pthreadpool_parallelize_5d_tile_2d function.
447 	 */
448 	size_t range_l;
449 	/**
450 	 * Copy of the tile_l argument passed to the pthreadpool_parallelize_5d_tile_2d function.
451 	 */
452 	size_t tile_l;
453 	/**
454 	 * Copy of the range_m argument passed to the pthreadpool_parallelize_5d_tile_2d function.
455 	 */
456 	size_t range_m;
457 	/**
458 	 * Copy of the tile_m argument passed to the pthreadpool_parallelize_5d_tile_2d function.
459 	 */
460 	size_t tile_m;
461 	/**
462 	 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_5d_tile_2d function.
463 	 */
464 	struct fxdiv_divisor_size_t range_j;
465 	/**
466 	 * FXdiv divisor for the range_k argument passed to the pthreadpool_parallelize_5d_tile_2d function.
467 	 */
468 	struct fxdiv_divisor_size_t range_k;
469 	/**
470 	 * FXdiv divisor for the divide_round_up(range_l, tile_l) * divide_round_up(range_m, tile_m) value.
471 	 */
472 	struct fxdiv_divisor_size_t tile_range_lm;
473 	/**
474 	 * FXdiv divisor for the divide_round_up(range_m, tile_m) value.
475 	 */
476 	struct fxdiv_divisor_size_t tile_range_m;
477 };
478 
479 struct pthreadpool_6d_params {
480 	/**
481 	 * Copy of the range_l argument passed to the pthreadpool_parallelize_6d function.
482 	 */
483 	size_t range_l;
484 	/**
485 	 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_6d function.
486 	 */
487 	struct fxdiv_divisor_size_t range_j;
488 	/**
489 	 * FXdiv divisor for the range_k argument passed to the pthreadpool_parallelize_6d function.
490 	 */
491 	struct fxdiv_divisor_size_t range_k;
492 	/**
493 	 * FXdiv divisor for the range_l * range_m * range_n value.
494 	 */
495 	struct fxdiv_divisor_size_t range_lmn;
496 	/**
497 	 * FXdiv divisor for the range_m argument passed to the pthreadpool_parallelize_6d function.
498 	 */
499 	struct fxdiv_divisor_size_t range_m;
500 	/**
501 	 * FXdiv divisor for the range_n argument passed to the pthreadpool_parallelize_6d function.
502 	 */
503 	struct fxdiv_divisor_size_t range_n;
504 };
505 
506 struct pthreadpool_6d_tile_1d_params {
507 	/**
508 	 * Copy of the range_l argument passed to the pthreadpool_parallelize_6d_tile_1d function.
509 	 */
510 	size_t range_l;
511 	/**
512 	 * Copy of the range_n argument passed to the pthreadpool_parallelize_6d_tile_1d function.
513 	 */
514 	size_t range_n;
515 	/**
516 	 * Copy of the tile_n argument passed to the pthreadpool_parallelize_6d_tile_1d function.
517 	 */
518 	size_t tile_n;
519 	/**
520 	 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_6d_tile_1d function.
521 	 */
522 	struct fxdiv_divisor_size_t range_j;
523 	/**
524 	 * FXdiv divisor for the range_k argument passed to the pthreadpool_parallelize_6d_tile_1d function.
525 	 */
526 	struct fxdiv_divisor_size_t range_k;
527 	/**
528 	 * FXdiv divisor for the range_l * range_m * divide_round_up(range_n, tile_n) value.
529 	 */
530 	struct fxdiv_divisor_size_t tile_range_lmn;
531 	/**
532 	 * FXdiv divisor for the range_m argument passed to the pthreadpool_parallelize_6d_tile_1d function.
533 	 */
534 	struct fxdiv_divisor_size_t range_m;
535 	/**
536 	 * FXdiv divisor for the divide_round_up(range_n, tile_n) value.
537 	 */
538 	struct fxdiv_divisor_size_t tile_range_n;
539 };
540 
541 struct pthreadpool_6d_tile_2d_params {
542 	/**
543 	 * Copy of the range_k argument passed to the pthreadpool_parallelize_6d_tile_2d function.
544 	 */
545 	size_t range_k;
546 	/**
547 	 * Copy of the range_m argument passed to the pthreadpool_parallelize_6d_tile_2d function.
548 	 */
549 	size_t range_m;
550 	/**
551 	 * Copy of the tile_m argument passed to the pthreadpool_parallelize_6d_tile_2d function.
552 	 */
553 	size_t tile_m;
554 	/**
555 	 * Copy of the range_n argument passed to the pthreadpool_parallelize_6d_tile_2d function.
556 	 */
557 	size_t range_n;
558 	/**
559 	 * Copy of the tile_n argument passed to the pthreadpool_parallelize_6d_tile_2d function.
560 	 */
561 	size_t tile_n;
562 	/**
563 	 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_6d_tile_2d function.
564 	 */
565 	struct fxdiv_divisor_size_t range_j;
566 	/**
567 	 * FXdiv divisor for the range_k * range_l value.
568 	 */
569 	struct fxdiv_divisor_size_t range_kl;
570 	/**
571 	 * FXdiv divisor for the range_l argument passed to the pthreadpool_parallelize_6d_tile_2d function.
572 	 */
573 	struct fxdiv_divisor_size_t range_l;
574 	/**
575 	 * FXdiv divisor for the divide_round_up(range_m, tile_m) * divide_round_up(range_n, tile_n) value.
576 	 */
577 	struct fxdiv_divisor_size_t tile_range_mn;
578 	/**
579 	 * FXdiv divisor for the divide_round_up(range_n, tile_n) value.
580 	 */
581 	struct fxdiv_divisor_size_t tile_range_n;
582 };
583 
584 struct PTHREADPOOL_CACHELINE_ALIGNED pthreadpool {
585 #if !PTHREADPOOL_USE_GCD
586 	/**
587 	 * The number of threads that are processing an operation.
588 	 */
589 	pthreadpool_atomic_size_t active_threads;
590 #endif
591 #if PTHREADPOOL_USE_FUTEX
592 	/**
593 	 * Indicates if there are active threads.
594 	 * Only two values are possible:
595 	 * - has_active_threads == 0 if active_threads == 0
596 	 * - has_active_threads == 1 if active_threads != 0
597 	 */
598 	pthreadpool_atomic_uint32_t has_active_threads;
599 #endif
600 #if !PTHREADPOOL_USE_GCD
601 	/**
602 	 * The last command submitted to the thread pool.
603 	 */
604 	pthreadpool_atomic_uint32_t command;
605 #endif
606 	/**
607 	 * The entry point function to call for each thread in the thread pool for parallelization tasks.
608 	 */
609 	pthreadpool_atomic_void_p thread_function;
610 	/**
611 	 * The function to call for each item.
612 	 */
613 	pthreadpool_atomic_void_p task;
614 	/**
615 	 * The first argument to the item processing function.
616 	 */
617 	pthreadpool_atomic_void_p argument;
618 	/**
619 	 * Additional parallelization parameters.
620 	 * These parameters are specific for each thread_function.
621 	 */
622 	union {
623 		struct pthreadpool_1d_with_uarch_params parallelize_1d_with_uarch;
624 		struct pthreadpool_1d_tile_1d_params parallelize_1d_tile_1d;
625 		struct pthreadpool_2d_params parallelize_2d;
626 		struct pthreadpool_2d_tile_1d_params parallelize_2d_tile_1d;
627 		struct pthreadpool_2d_tile_2d_params parallelize_2d_tile_2d;
628 		struct pthreadpool_2d_tile_2d_with_uarch_params parallelize_2d_tile_2d_with_uarch;
629 		struct pthreadpool_3d_params parallelize_3d;
630 		struct pthreadpool_3d_tile_1d_params parallelize_3d_tile_1d;
631 		struct pthreadpool_3d_tile_2d_params parallelize_3d_tile_2d;
632 		struct pthreadpool_3d_tile_2d_with_uarch_params parallelize_3d_tile_2d_with_uarch;
633 		struct pthreadpool_4d_params parallelize_4d;
634 		struct pthreadpool_4d_tile_1d_params parallelize_4d_tile_1d;
635 		struct pthreadpool_4d_tile_2d_params parallelize_4d_tile_2d;
636 		struct pthreadpool_4d_tile_2d_with_uarch_params parallelize_4d_tile_2d_with_uarch;
637 		struct pthreadpool_5d_params parallelize_5d;
638 		struct pthreadpool_5d_tile_1d_params parallelize_5d_tile_1d;
639 		struct pthreadpool_5d_tile_2d_params parallelize_5d_tile_2d;
640 		struct pthreadpool_6d_params parallelize_6d;
641 		struct pthreadpool_6d_tile_1d_params parallelize_6d_tile_1d;
642 		struct pthreadpool_6d_tile_2d_params parallelize_6d_tile_2d;
643 	} params;
644 	/**
645 	 * Copy of the flags passed to a parallelization function.
646 	 */
647 	pthreadpool_atomic_uint32_t flags;
648 #if PTHREADPOOL_USE_CONDVAR || PTHREADPOOL_USE_FUTEX
649 	/**
650 	 * Serializes concurrent calls to @a pthreadpool_parallelize_* from different threads.
651 	 */
652 	pthread_mutex_t execution_mutex;
653 #endif
654 #if PTHREADPOOL_USE_GCD
655 	/**
656 	 * Serializes concurrent calls to @a pthreadpool_parallelize_* from different threads.
657 	 */
658 	dispatch_semaphore_t execution_semaphore;
659 #endif
660 #if PTHREADPOOL_USE_EVENT
661 	/**
662 	 * Serializes concurrent calls to @a pthreadpool_parallelize_* from different threads.
663 	 */
664 	HANDLE execution_mutex;
665 #endif
666 #if PTHREADPOOL_USE_CONDVAR
667 	/**
668 	 * Guards access to the @a active_threads variable.
669 	 */
670 	pthread_mutex_t completion_mutex;
671 	/**
672 	 * Condition variable to wait until all threads complete an operation (until @a active_threads is zero).
673 	 */
674 	pthread_cond_t completion_condvar;
675 	/**
676 	 * Guards access to the @a command variable.
677 	 */
678 	pthread_mutex_t command_mutex;
679 	/**
680 	 * Condition variable to wait for change of the @a command variable.
681 	 */
682 	pthread_cond_t command_condvar;
683 #endif
684 #if PTHREADPOOL_USE_EVENT
685 	/**
686 	 * Events to wait on until all threads complete an operation (until @a active_threads is zero).
687 	 * To avoid race conditions due to spin-lock synchronization, we use two events and switch event in use after every
688 	 * submitted command according to the high bit of the command word.
689 	 */
690 	HANDLE completion_event[2];
691 	/**
692 	 * Events to wait on for change of the @a command variable.
693 	 * To avoid race conditions due to spin-lock synchronization, we use two events and switch event in use after every
694 	 * submitted command according to the high bit of the command word.
695 	 */
696 	HANDLE command_event[2];
697 #endif
698 	/**
699 	 * FXdiv divisor for the number of threads in the thread pool.
700 	 * This struct never change after pthreadpool_create.
701 	 */
702 	struct fxdiv_divisor_size_t threads_count;
703 	/**
704 	 * Thread information structures that immediately follow this structure.
705 	 */
706 	struct thread_info threads[];
707 };
708 
709 PTHREADPOOL_STATIC_ASSERT(sizeof(struct pthreadpool) % PTHREADPOOL_CACHELINE_SIZE == 0,
710 	"pthreadpool structure must occupy an integer number of cache lines (64 bytes)");
711 
712 PTHREADPOOL_INTERNAL struct pthreadpool* pthreadpool_allocate(
713 	size_t threads_count);
714 
715 PTHREADPOOL_INTERNAL void pthreadpool_deallocate(
716 	struct pthreadpool* threadpool);
717 
718 typedef void (*thread_function_t)(struct pthreadpool* threadpool, struct thread_info* thread);
719 
720 PTHREADPOOL_INTERNAL void pthreadpool_parallelize(
721 	struct pthreadpool* threadpool,
722 	thread_function_t thread_function,
723 	const void* params,
724 	size_t params_size,
725 	void* task,
726 	void* context,
727 	size_t linear_range,
728 	uint32_t flags);
729 
730 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_1d_fastpath(
731 	struct pthreadpool* threadpool,
732 	struct thread_info* thread);
733 
734 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_1d_with_uarch_fastpath(
735 	struct pthreadpool* threadpool,
736 	struct thread_info* thread);
737 
738 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_1d_tile_1d_fastpath(
739 	struct pthreadpool* threadpool,
740 	struct thread_info* thread);
741 
742 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_2d_fastpath(
743 	struct pthreadpool* threadpool,
744 	struct thread_info* thread);
745 
746 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_2d_tile_1d_fastpath(
747 	struct pthreadpool* threadpool,
748 	struct thread_info* thread);
749 
750 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_2d_tile_2d_fastpath(
751 	struct pthreadpool* threadpool,
752 	struct thread_info* thread);
753 
754 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_2d_tile_2d_with_uarch_fastpath(
755 	struct pthreadpool* threadpool,
756 	struct thread_info* thread);
757 
758 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_3d_fastpath(
759 	struct pthreadpool* threadpool,
760 	struct thread_info* thread);
761 
762 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_3d_tile_1d_fastpath(
763 	struct pthreadpool* threadpool,
764 	struct thread_info* thread);
765 
766 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_3d_tile_2d_fastpath(
767 	struct pthreadpool* threadpool,
768 	struct thread_info* thread);
769 
770 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_3d_tile_2d_with_uarch_fastpath(
771 	struct pthreadpool* threadpool,
772 	struct thread_info* thread);
773 
774 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_4d_fastpath(
775 	struct pthreadpool* threadpool,
776 	struct thread_info* thread);
777 
778 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_4d_tile_1d_fastpath(
779 	struct pthreadpool* threadpool,
780 	struct thread_info* thread);
781 
782 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_4d_tile_2d_fastpath(
783 	struct pthreadpool* threadpool,
784 	struct thread_info* thread);
785 
786 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_4d_tile_2d_with_uarch_fastpath(
787 	struct pthreadpool* threadpool,
788 	struct thread_info* thread);
789 
790 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_5d_fastpath(
791 	struct pthreadpool* threadpool,
792 	struct thread_info* thread);
793 
794 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_5d_tile_1d_fastpath(
795 	struct pthreadpool* threadpool,
796 	struct thread_info* thread);
797 
798 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_5d_tile_2d_fastpath(
799 	struct pthreadpool* threadpool,
800 	struct thread_info* thread);
801 
802 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_6d_fastpath(
803 	struct pthreadpool* threadpool,
804 	struct thread_info* thread);
805 
806 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_6d_tile_1d_fastpath(
807 	struct pthreadpool* threadpool,
808 	struct thread_info* thread);
809 
810 PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_6d_tile_2d_fastpath(
811 	struct pthreadpool* threadpool,
812 	struct thread_info* thread);
813