1 #include <gtest/gtest.h>
2
3 #include <pthreadpool.h>
4
5 #include <algorithm>
6 #include <atomic>
7 #include <cstddef>
8 #include <memory>
9
10
11 typedef std::unique_ptr<pthreadpool, decltype(&pthreadpool_destroy)> auto_pthreadpool_t;
12
13
14 const size_t kParallelize1DRange = 1223;
15 const size_t kParallelize1DTile1DRange = 1303;
16 const size_t kParallelize1DTile1DTile = 11;
17 const size_t kParallelize2DRangeI = 41;
18 const size_t kParallelize2DRangeJ = 43;
19 const size_t kParallelize2DTile1DRangeI = 43;
20 const size_t kParallelize2DTile1DRangeJ = 53;
21 const size_t kParallelize2DTile1DTileJ = 5;
22 const size_t kParallelize2DTile2DRangeI = 53;
23 const size_t kParallelize2DTile2DRangeJ = 59;
24 const size_t kParallelize2DTile2DTileI = 5;
25 const size_t kParallelize2DTile2DTileJ = 7;
26 const size_t kParallelize3DRangeI = 13;
27 const size_t kParallelize3DRangeJ = 17;
28 const size_t kParallelize3DRangeK = 19;
29 const size_t kParallelize3DTile1DRangeI = 17;
30 const size_t kParallelize3DTile1DRangeJ = 19;
31 const size_t kParallelize3DTile1DRangeK = 23;
32 const size_t kParallelize3DTile1DTileK = 5;
33 const size_t kParallelize3DTile2DRangeI = 19;
34 const size_t kParallelize3DTile2DRangeJ = 23;
35 const size_t kParallelize3DTile2DRangeK = 29;
36 const size_t kParallelize3DTile2DTileJ = 2;
37 const size_t kParallelize3DTile2DTileK = 3;
38 const size_t kParallelize4DRangeI = 11;
39 const size_t kParallelize4DRangeJ = 13;
40 const size_t kParallelize4DRangeK = 17;
41 const size_t kParallelize4DRangeL = 19;
42 const size_t kParallelize4DTile1DRangeI = 13;
43 const size_t kParallelize4DTile1DRangeJ = 17;
44 const size_t kParallelize4DTile1DRangeK = 19;
45 const size_t kParallelize4DTile1DRangeL = 23;
46 const size_t kParallelize4DTile1DTileL = 5;
47 const size_t kParallelize4DTile2DRangeI = 17;
48 const size_t kParallelize4DTile2DRangeJ = 19;
49 const size_t kParallelize4DTile2DRangeK = 23;
50 const size_t kParallelize4DTile2DRangeL = 29;
51 const size_t kParallelize4DTile2DTileK = 2;
52 const size_t kParallelize4DTile2DTileL = 3;
53 const size_t kParallelize5DRangeI = 7;
54 const size_t kParallelize5DRangeJ = 11;
55 const size_t kParallelize5DRangeK = 13;
56 const size_t kParallelize5DRangeL = 17;
57 const size_t kParallelize5DRangeM = 19;
58 const size_t kParallelize5DTile1DRangeI = 11;
59 const size_t kParallelize5DTile1DRangeJ = 13;
60 const size_t kParallelize5DTile1DRangeK = 17;
61 const size_t kParallelize5DTile1DRangeL = 19;
62 const size_t kParallelize5DTile1DRangeM = 23;
63 const size_t kParallelize5DTile1DTileM = 5;
64 const size_t kParallelize5DTile2DRangeI = 13;
65 const size_t kParallelize5DTile2DRangeJ = 17;
66 const size_t kParallelize5DTile2DRangeK = 19;
67 const size_t kParallelize5DTile2DRangeL = 23;
68 const size_t kParallelize5DTile2DRangeM = 29;
69 const size_t kParallelize5DTile2DTileL = 3;
70 const size_t kParallelize5DTile2DTileM = 2;
71 const size_t kParallelize6DRangeI = 3;
72 const size_t kParallelize6DRangeJ = 5;
73 const size_t kParallelize6DRangeK = 7;
74 const size_t kParallelize6DRangeL = 11;
75 const size_t kParallelize6DRangeM = 13;
76 const size_t kParallelize6DRangeN = 17;
77 const size_t kParallelize6DTile1DRangeI = 5;
78 const size_t kParallelize6DTile1DRangeJ = 7;
79 const size_t kParallelize6DTile1DRangeK = 11;
80 const size_t kParallelize6DTile1DRangeL = 13;
81 const size_t kParallelize6DTile1DRangeM = 17;
82 const size_t kParallelize6DTile1DRangeN = 19;
83 const size_t kParallelize6DTile1DTileN = 5;
84 const size_t kParallelize6DTile2DRangeI = 7;
85 const size_t kParallelize6DTile2DRangeJ = 11;
86 const size_t kParallelize6DTile2DRangeK = 13;
87 const size_t kParallelize6DTile2DRangeL = 17;
88 const size_t kParallelize6DTile2DRangeM = 19;
89 const size_t kParallelize6DTile2DRangeN = 23;
90 const size_t kParallelize6DTile2DTileM = 3;
91 const size_t kParallelize6DTile2DTileN = 2;
92
93 const size_t kIncrementIterations = 101;
94 const size_t kIncrementIterations5D = 7;
95 const size_t kIncrementIterations6D = 3;
96
97 const uint32_t kMaxUArchIndex = 0;
98 const uint32_t kDefaultUArchIndex = 42;
99
100
TEST(CreateAndDestroy,NullThreadPool)101 TEST(CreateAndDestroy, NullThreadPool) {
102 pthreadpool* threadpool = nullptr;
103 pthreadpool_destroy(threadpool);
104 }
105
TEST(CreateAndDestroy,SingleThreadPool)106 TEST(CreateAndDestroy, SingleThreadPool) {
107 pthreadpool* threadpool = pthreadpool_create(1);
108 ASSERT_TRUE(threadpool);
109 pthreadpool_destroy(threadpool);
110 }
111
TEST(CreateAndDestroy,MultiThreadPool)112 TEST(CreateAndDestroy, MultiThreadPool) {
113 pthreadpool* threadpool = pthreadpool_create(0);
114 ASSERT_TRUE(threadpool);
115 pthreadpool_destroy(threadpool);
116 }
117
ComputeNothing1D(void *,size_t)118 static void ComputeNothing1D(void*, size_t) {
119 }
120
TEST(Parallelize1D,SingleThreadPoolCompletes)121 TEST(Parallelize1D, SingleThreadPoolCompletes) {
122 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
123 ASSERT_TRUE(threadpool.get());
124
125 pthreadpool_parallelize_1d(threadpool.get(),
126 ComputeNothing1D,
127 nullptr,
128 kParallelize1DRange,
129 0 /* flags */);
130 }
131
TEST(Parallelize1D,MultiThreadPoolCompletes)132 TEST(Parallelize1D, MultiThreadPoolCompletes) {
133 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
134 ASSERT_TRUE(threadpool.get());
135
136 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
137 GTEST_SKIP();
138 }
139
140 pthreadpool_parallelize_1d(
141 threadpool.get(),
142 ComputeNothing1D,
143 nullptr,
144 kParallelize1DRange,
145 0 /* flags */);
146 }
147
CheckBounds1D(void *,size_t i)148 static void CheckBounds1D(void*, size_t i) {
149 EXPECT_LT(i, kParallelize1DRange);
150 }
151
TEST(Parallelize1D,SingleThreadPoolAllItemsInBounds)152 TEST(Parallelize1D, SingleThreadPoolAllItemsInBounds) {
153 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
154 ASSERT_TRUE(threadpool.get());
155
156 pthreadpool_parallelize_1d(
157 threadpool.get(),
158 CheckBounds1D,
159 nullptr,
160 kParallelize1DRange,
161 0 /* flags */);
162 }
163
TEST(Parallelize1D,MultiThreadPoolAllItemsInBounds)164 TEST(Parallelize1D, MultiThreadPoolAllItemsInBounds) {
165 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
166 ASSERT_TRUE(threadpool.get());
167
168 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
169 GTEST_SKIP();
170 }
171
172 pthreadpool_parallelize_1d(
173 threadpool.get(),
174 CheckBounds1D,
175 nullptr,
176 kParallelize1DRange,
177 0 /* flags */);
178 }
179
SetTrue1D(std::atomic_bool * processed_indicators,size_t i)180 static void SetTrue1D(std::atomic_bool* processed_indicators, size_t i) {
181 processed_indicators[i].store(true, std::memory_order_relaxed);
182 }
183
TEST(Parallelize1D,SingleThreadPoolAllItemsProcessed)184 TEST(Parallelize1D, SingleThreadPoolAllItemsProcessed) {
185 std::vector<std::atomic_bool> indicators(kParallelize1DRange);
186
187 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
188 ASSERT_TRUE(threadpool.get());
189
190 pthreadpool_parallelize_1d(
191 threadpool.get(),
192 reinterpret_cast<pthreadpool_task_1d_t>(SetTrue1D),
193 static_cast<void*>(indicators.data()),
194 kParallelize1DRange,
195 0 /* flags */);
196
197 for (size_t i = 0; i < kParallelize1DRange; i++) {
198 EXPECT_TRUE(indicators[i].load(std::memory_order_relaxed))
199 << "Element " << i << " not processed";
200 }
201 }
202
TEST(Parallelize1D,MultiThreadPoolAllItemsProcessed)203 TEST(Parallelize1D, MultiThreadPoolAllItemsProcessed) {
204 std::vector<std::atomic_bool> indicators(kParallelize1DRange);
205
206 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
207 ASSERT_TRUE(threadpool.get());
208
209 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
210 GTEST_SKIP();
211 }
212
213 pthreadpool_parallelize_1d(
214 threadpool.get(),
215 reinterpret_cast<pthreadpool_task_1d_t>(SetTrue1D),
216 static_cast<void*>(indicators.data()),
217 kParallelize1DRange,
218 0 /* flags */);
219
220 for (size_t i = 0; i < kParallelize1DRange; i++) {
221 EXPECT_TRUE(indicators[i].load(std::memory_order_relaxed))
222 << "Element " << i << " not processed";
223 }
224 }
225
Increment1D(std::atomic_int * processed_counters,size_t i)226 static void Increment1D(std::atomic_int* processed_counters, size_t i) {
227 processed_counters[i].fetch_add(1, std::memory_order_relaxed);
228 }
229
TEST(Parallelize1D,SingleThreadPoolEachItemProcessedOnce)230 TEST(Parallelize1D, SingleThreadPoolEachItemProcessedOnce) {
231 std::vector<std::atomic_int> counters(kParallelize1DRange);
232
233 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
234 ASSERT_TRUE(threadpool.get());
235
236 pthreadpool_parallelize_1d(
237 threadpool.get(),
238 reinterpret_cast<pthreadpool_task_1d_t>(Increment1D),
239 static_cast<void*>(counters.data()),
240 kParallelize1DRange,
241 0 /* flags */);
242
243 for (size_t i = 0; i < kParallelize1DRange; i++) {
244 EXPECT_EQ(counters[i].load(std::memory_order_relaxed), 1)
245 << "Element " << i << " was processed " << counters[i].load(std::memory_order_relaxed) << " times (expected: 1)";
246 }
247 }
248
TEST(Parallelize1D,MultiThreadPoolEachItemProcessedOnce)249 TEST(Parallelize1D, MultiThreadPoolEachItemProcessedOnce) {
250 std::vector<std::atomic_int> counters(kParallelize1DRange);
251
252 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
253 ASSERT_TRUE(threadpool.get());
254
255 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
256 GTEST_SKIP();
257 }
258
259 pthreadpool_parallelize_1d(
260 threadpool.get(),
261 reinterpret_cast<pthreadpool_task_1d_t>(Increment1D),
262 static_cast<void*>(counters.data()),
263 kParallelize1DRange,
264 0 /* flags */);
265
266 for (size_t i = 0; i < kParallelize1DRange; i++) {
267 EXPECT_EQ(counters[i].load(std::memory_order_relaxed), 1)
268 << "Element " << i << " was processed " << counters[i].load(std::memory_order_relaxed) << " times (expected: 1)";
269 }
270 }
271
TEST(Parallelize1D,SingleThreadPoolEachItemProcessedMultipleTimes)272 TEST(Parallelize1D, SingleThreadPoolEachItemProcessedMultipleTimes) {
273 std::vector<std::atomic_int> counters(kParallelize1DRange);
274
275 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
276 ASSERT_TRUE(threadpool.get());
277
278 for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
279 pthreadpool_parallelize_1d(
280 threadpool.get(),
281 reinterpret_cast<pthreadpool_task_1d_t>(Increment1D),
282 static_cast<void*>(counters.data()),
283 kParallelize1DRange,
284 0 /* flags */);
285 }
286
287 for (size_t i = 0; i < kParallelize1DRange; i++) {
288 EXPECT_EQ(counters[i].load(std::memory_order_relaxed), kIncrementIterations)
289 << "Element " << i << " was processed " << counters[i].load(std::memory_order_relaxed) << " times "
290 << "(expected: " << kIncrementIterations << ")";
291 }
292 }
293
TEST(Parallelize1D,MultiThreadPoolEachItemProcessedMultipleTimes)294 TEST(Parallelize1D, MultiThreadPoolEachItemProcessedMultipleTimes) {
295 std::vector<std::atomic_int> counters(kParallelize1DRange);
296
297 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
298 ASSERT_TRUE(threadpool.get());
299
300 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
301 GTEST_SKIP();
302 }
303
304 for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
305 pthreadpool_parallelize_1d(
306 threadpool.get(),
307 reinterpret_cast<pthreadpool_task_1d_t>(Increment1D),
308 static_cast<void*>(counters.data()),
309 kParallelize1DRange,
310 0 /* flags */);
311 }
312
313 for (size_t i = 0; i < kParallelize1DRange; i++) {
314 EXPECT_EQ(counters[i].load(std::memory_order_relaxed), kIncrementIterations)
315 << "Element " << i << " was processed " << counters[i].load(std::memory_order_relaxed) << " times "
316 << "(expected: " << kIncrementIterations << ")";
317 }
318 }
319
IncrementSame1D(std::atomic_int * num_processed_items,size_t i)320 static void IncrementSame1D(std::atomic_int* num_processed_items, size_t i) {
321 num_processed_items->fetch_add(1, std::memory_order_relaxed);
322 }
323
TEST(Parallelize1D,MultiThreadPoolHighContention)324 TEST(Parallelize1D, MultiThreadPoolHighContention) {
325 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
326
327 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
328 ASSERT_TRUE(threadpool.get());
329
330 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
331 GTEST_SKIP();
332 }
333
334 pthreadpool_parallelize_1d(
335 threadpool.get(),
336 reinterpret_cast<pthreadpool_task_1d_t>(IncrementSame1D),
337 static_cast<void*>(&num_processed_items),
338 kParallelize1DRange,
339 0 /* flags */);
340 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize1DRange);
341 }
342
WorkImbalance1D(std::atomic_int * num_processed_items,size_t i)343 static void WorkImbalance1D(std::atomic_int* num_processed_items, size_t i) {
344 num_processed_items->fetch_add(1, std::memory_order_relaxed);
345 if (i == 0) {
346 /* Spin-wait until all items are computed */
347 while (num_processed_items->load(std::memory_order_relaxed) != kParallelize1DRange) {
348 std::atomic_thread_fence(std::memory_order_acquire);
349 }
350 }
351 }
352
TEST(Parallelize1D,MultiThreadPoolWorkStealing)353 TEST(Parallelize1D, MultiThreadPoolWorkStealing) {
354 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
355
356 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
357 ASSERT_TRUE(threadpool.get());
358
359 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
360 GTEST_SKIP();
361 }
362
363 pthreadpool_parallelize_1d(
364 threadpool.get(),
365 reinterpret_cast<pthreadpool_task_1d_t>(WorkImbalance1D),
366 static_cast<void*>(&num_processed_items),
367 kParallelize1DRange,
368 0 /* flags */);
369 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize1DRange);
370 }
371
ComputeNothing1DWithUArch(void *,uint32_t,size_t)372 static void ComputeNothing1DWithUArch(void*, uint32_t, size_t) {
373 }
374
TEST(Parallelize1DWithUArch,SingleThreadPoolCompletes)375 TEST(Parallelize1DWithUArch, SingleThreadPoolCompletes) {
376 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
377 ASSERT_TRUE(threadpool.get());
378
379 pthreadpool_parallelize_1d_with_uarch(threadpool.get(),
380 ComputeNothing1DWithUArch,
381 nullptr,
382 kDefaultUArchIndex,
383 kMaxUArchIndex,
384 kParallelize1DRange,
385 0 /* flags */);
386 }
387
TEST(Parallelize1DWithUArch,MultiThreadPoolCompletes)388 TEST(Parallelize1DWithUArch, MultiThreadPoolCompletes) {
389 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
390 ASSERT_TRUE(threadpool.get());
391
392 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
393 GTEST_SKIP();
394 }
395
396 pthreadpool_parallelize_1d_with_uarch(
397 threadpool.get(),
398 ComputeNothing1DWithUArch,
399 nullptr,
400 kDefaultUArchIndex,
401 kMaxUArchIndex,
402 kParallelize1DRange,
403 0 /* flags */);
404 }
405
CheckUArch1DWithUArch(void *,uint32_t uarch_index,size_t)406 static void CheckUArch1DWithUArch(void*, uint32_t uarch_index, size_t) {
407 if (uarch_index != kDefaultUArchIndex) {
408 EXPECT_LE(uarch_index, kMaxUArchIndex);
409 }
410 }
411
TEST(Parallelize1DWithUArch,SingleThreadPoolUArchInBounds)412 TEST(Parallelize1DWithUArch, SingleThreadPoolUArchInBounds) {
413 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
414 ASSERT_TRUE(threadpool.get());
415
416 pthreadpool_parallelize_1d_with_uarch(threadpool.get(),
417 CheckUArch1DWithUArch,
418 nullptr,
419 kDefaultUArchIndex,
420 kMaxUArchIndex,
421 kParallelize1DRange,
422 0 /* flags */);
423 }
424
TEST(Parallelize1DWithUArch,MultiThreadPoolUArchInBounds)425 TEST(Parallelize1DWithUArch, MultiThreadPoolUArchInBounds) {
426 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
427 ASSERT_TRUE(threadpool.get());
428
429 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
430 GTEST_SKIP();
431 }
432
433 pthreadpool_parallelize_1d_with_uarch(
434 threadpool.get(),
435 CheckUArch1DWithUArch,
436 nullptr,
437 kDefaultUArchIndex,
438 kMaxUArchIndex,
439 kParallelize1DRange,
440 0 /* flags */);
441 }
442
CheckBounds1DWithUArch(void *,uint32_t,size_t i)443 static void CheckBounds1DWithUArch(void*, uint32_t, size_t i) {
444 EXPECT_LT(i, kParallelize1DRange);
445 }
446
TEST(Parallelize1DWithUArch,SingleThreadPoolAllItemsInBounds)447 TEST(Parallelize1DWithUArch, SingleThreadPoolAllItemsInBounds) {
448 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
449 ASSERT_TRUE(threadpool.get());
450
451 pthreadpool_parallelize_1d_with_uarch(
452 threadpool.get(),
453 CheckBounds1DWithUArch,
454 nullptr,
455 kDefaultUArchIndex,
456 kMaxUArchIndex,
457 kParallelize1DRange,
458 0 /* flags */);
459 }
460
TEST(Parallelize1DWithUArch,MultiThreadPoolAllItemsInBounds)461 TEST(Parallelize1DWithUArch, MultiThreadPoolAllItemsInBounds) {
462 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
463 ASSERT_TRUE(threadpool.get());
464
465 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
466 GTEST_SKIP();
467 }
468
469 pthreadpool_parallelize_1d_with_uarch(
470 threadpool.get(),
471 CheckBounds1DWithUArch,
472 nullptr,
473 kDefaultUArchIndex,
474 kMaxUArchIndex,
475 kParallelize1DRange,
476 0 /* flags */);
477 }
478
SetTrue1DWithUArch(std::atomic_bool * processed_indicators,uint32_t,size_t i)479 static void SetTrue1DWithUArch(std::atomic_bool* processed_indicators, uint32_t, size_t i) {
480 processed_indicators[i].store(true, std::memory_order_relaxed);
481 }
482
TEST(Parallelize1DWithUArch,SingleThreadPoolAllItemsProcessed)483 TEST(Parallelize1DWithUArch, SingleThreadPoolAllItemsProcessed) {
484 std::vector<std::atomic_bool> indicators(kParallelize1DRange);
485
486 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
487 ASSERT_TRUE(threadpool.get());
488
489 pthreadpool_parallelize_1d_with_uarch(
490 threadpool.get(),
491 reinterpret_cast<pthreadpool_task_1d_with_id_t>(SetTrue1DWithUArch),
492 static_cast<void*>(indicators.data()),
493 kDefaultUArchIndex,
494 kMaxUArchIndex,
495 kParallelize1DRange,
496 0 /* flags */);
497
498 for (size_t i = 0; i < kParallelize1DRange; i++) {
499 EXPECT_TRUE(indicators[i].load(std::memory_order_relaxed))
500 << "Element " << i << " not processed";
501 }
502 }
503
TEST(Parallelize1DWithUArch,MultiThreadPoolAllItemsProcessed)504 TEST(Parallelize1DWithUArch, MultiThreadPoolAllItemsProcessed) {
505 std::vector<std::atomic_bool> indicators(kParallelize1DRange);
506
507 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
508 ASSERT_TRUE(threadpool.get());
509
510 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
511 GTEST_SKIP();
512 }
513
514 pthreadpool_parallelize_1d_with_uarch(
515 threadpool.get(),
516 reinterpret_cast<pthreadpool_task_1d_with_id_t>(SetTrue1DWithUArch),
517 static_cast<void*>(indicators.data()),
518 kDefaultUArchIndex,
519 kMaxUArchIndex,
520 kParallelize1DRange,
521 0 /* flags */);
522
523 for (size_t i = 0; i < kParallelize1DRange; i++) {
524 EXPECT_TRUE(indicators[i].load(std::memory_order_relaxed))
525 << "Element " << i << " not processed";
526 }
527 }
528
Increment1DWithUArch(std::atomic_int * processed_counters,uint32_t,size_t i)529 static void Increment1DWithUArch(std::atomic_int* processed_counters, uint32_t, size_t i) {
530 processed_counters[i].fetch_add(1, std::memory_order_relaxed);
531 }
532
TEST(Parallelize1DWithUArch,SingleThreadPoolEachItemProcessedOnce)533 TEST(Parallelize1DWithUArch, SingleThreadPoolEachItemProcessedOnce) {
534 std::vector<std::atomic_int> counters(kParallelize1DRange);
535
536 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
537 ASSERT_TRUE(threadpool.get());
538
539 pthreadpool_parallelize_1d_with_uarch(
540 threadpool.get(),
541 reinterpret_cast<pthreadpool_task_1d_with_id_t>(Increment1DWithUArch),
542 static_cast<void*>(counters.data()),
543 kDefaultUArchIndex,
544 kMaxUArchIndex,
545 kParallelize1DRange,
546 0 /* flags */);
547
548 for (size_t i = 0; i < kParallelize1DRange; i++) {
549 EXPECT_EQ(counters[i].load(std::memory_order_relaxed), 1)
550 << "Element " << i << " was processed " << counters[i].load(std::memory_order_relaxed) << " times (expected: 1)";
551 }
552 }
553
TEST(Parallelize1DWithUArch,MultiThreadPoolEachItemProcessedOnce)554 TEST(Parallelize1DWithUArch, MultiThreadPoolEachItemProcessedOnce) {
555 std::vector<std::atomic_int> counters(kParallelize1DRange);
556
557 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
558 ASSERT_TRUE(threadpool.get());
559
560 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
561 GTEST_SKIP();
562 }
563
564 pthreadpool_parallelize_1d_with_uarch(
565 threadpool.get(),
566 reinterpret_cast<pthreadpool_task_1d_with_id_t>(Increment1DWithUArch),
567 static_cast<void*>(counters.data()),
568 kDefaultUArchIndex,
569 kMaxUArchIndex,
570 kParallelize1DRange,
571 0 /* flags */);
572
573 for (size_t i = 0; i < kParallelize1DRange; i++) {
574 EXPECT_EQ(counters[i].load(std::memory_order_relaxed), 1)
575 << "Element " << i << " was processed " << counters[i].load(std::memory_order_relaxed) << " times (expected: 1)";
576 }
577 }
578
TEST(Parallelize1DWithUArch,SingleThreadPoolEachItemProcessedMultipleTimes)579 TEST(Parallelize1DWithUArch, SingleThreadPoolEachItemProcessedMultipleTimes) {
580 std::vector<std::atomic_int> counters(kParallelize1DRange);
581
582 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
583 ASSERT_TRUE(threadpool.get());
584
585 for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
586 pthreadpool_parallelize_1d_with_uarch(
587 threadpool.get(),
588 reinterpret_cast<pthreadpool_task_1d_with_id_t>(Increment1DWithUArch),
589 static_cast<void*>(counters.data()),
590 kDefaultUArchIndex,
591 kMaxUArchIndex,
592 kParallelize1DRange,
593 0 /* flags */);
594 }
595
596 for (size_t i = 0; i < kParallelize1DRange; i++) {
597 EXPECT_EQ(counters[i].load(std::memory_order_relaxed), kIncrementIterations)
598 << "Element " << i << " was processed " << counters[i].load(std::memory_order_relaxed) << " times "
599 << "(expected: " << kIncrementIterations << ")";
600 }
601 }
602
TEST(Parallelize1DWithUArch,MultiThreadPoolEachItemProcessedMultipleTimes)603 TEST(Parallelize1DWithUArch, MultiThreadPoolEachItemProcessedMultipleTimes) {
604 std::vector<std::atomic_int> counters(kParallelize1DRange);
605
606 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
607 ASSERT_TRUE(threadpool.get());
608
609 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
610 GTEST_SKIP();
611 }
612
613 for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
614 pthreadpool_parallelize_1d_with_uarch(
615 threadpool.get(),
616 reinterpret_cast<pthreadpool_task_1d_with_id_t>(Increment1DWithUArch),
617 static_cast<void*>(counters.data()),
618 kDefaultUArchIndex,
619 kMaxUArchIndex,
620 kParallelize1DRange,
621 0 /* flags */);
622 }
623
624 for (size_t i = 0; i < kParallelize1DRange; i++) {
625 EXPECT_EQ(counters[i].load(std::memory_order_relaxed), kIncrementIterations)
626 << "Element " << i << " was processed " << counters[i].load(std::memory_order_relaxed) << " times "
627 << "(expected: " << kIncrementIterations << ")";
628 }
629 }
630
IncrementSame1DWithUArch(std::atomic_int * num_processed_items,uint32_t,size_t i)631 static void IncrementSame1DWithUArch(std::atomic_int* num_processed_items, uint32_t, size_t i) {
632 num_processed_items->fetch_add(1, std::memory_order_relaxed);
633 }
634
TEST(Parallelize1DWithUArch,MultiThreadPoolHighContention)635 TEST(Parallelize1DWithUArch, MultiThreadPoolHighContention) {
636 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
637
638 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
639 ASSERT_TRUE(threadpool.get());
640
641 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
642 GTEST_SKIP();
643 }
644
645 pthreadpool_parallelize_1d_with_uarch(
646 threadpool.get(),
647 reinterpret_cast<pthreadpool_task_1d_with_id_t>(IncrementSame1DWithUArch),
648 static_cast<void*>(&num_processed_items),
649 kDefaultUArchIndex,
650 kMaxUArchIndex,
651 kParallelize1DRange,
652 0 /* flags */);
653 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize1DRange);
654 }
655
WorkImbalance1DWithUArch(std::atomic_int * num_processed_items,uint32_t,size_t i)656 static void WorkImbalance1DWithUArch(std::atomic_int* num_processed_items, uint32_t, size_t i) {
657 num_processed_items->fetch_add(1, std::memory_order_relaxed);
658 if (i == 0) {
659 /* Spin-wait until all items are computed */
660 while (num_processed_items->load(std::memory_order_relaxed) != kParallelize1DRange) {
661 std::atomic_thread_fence(std::memory_order_acquire);
662 }
663 }
664 }
665
TEST(Parallelize1DWithUArch,MultiThreadPoolWorkStealing)666 TEST(Parallelize1DWithUArch, MultiThreadPoolWorkStealing) {
667 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
668
669 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
670 ASSERT_TRUE(threadpool.get());
671
672 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
673 GTEST_SKIP();
674 }
675
676 pthreadpool_parallelize_1d_with_uarch(
677 threadpool.get(),
678 reinterpret_cast<pthreadpool_task_1d_with_id_t>(WorkImbalance1DWithUArch),
679 static_cast<void*>(&num_processed_items),
680 kDefaultUArchIndex,
681 kMaxUArchIndex,
682 kParallelize1DRange,
683 0 /* flags */);
684 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize1DRange);
685 }
686
ComputeNothing1DTile1D(void *,size_t,size_t)687 static void ComputeNothing1DTile1D(void*, size_t, size_t) {
688 }
689
TEST(Parallelize1DTile1D,SingleThreadPoolCompletes)690 TEST(Parallelize1DTile1D, SingleThreadPoolCompletes) {
691 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
692 ASSERT_TRUE(threadpool.get());
693
694 pthreadpool_parallelize_1d_tile_1d(threadpool.get(),
695 ComputeNothing1DTile1D,
696 nullptr,
697 kParallelize1DTile1DRange, kParallelize1DTile1DTile,
698 0 /* flags */);
699 }
700
TEST(Parallelize1DTile1D,MultiThreadPoolCompletes)701 TEST(Parallelize1DTile1D, MultiThreadPoolCompletes) {
702 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
703 ASSERT_TRUE(threadpool.get());
704
705 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
706 GTEST_SKIP();
707 }
708
709 pthreadpool_parallelize_1d_tile_1d(
710 threadpool.get(),
711 ComputeNothing1DTile1D,
712 nullptr,
713 kParallelize1DTile1DRange, kParallelize1DTile1DTile,
714 0 /* flags */);
715 }
716
CheckBounds1DTile1D(void *,size_t start_i,size_t tile_i)717 static void CheckBounds1DTile1D(void*, size_t start_i, size_t tile_i) {
718 EXPECT_LT(start_i, kParallelize1DTile1DRange);
719 EXPECT_LE(start_i + tile_i, kParallelize1DTile1DRange);
720 }
721
TEST(Parallelize1DTile1D,SingleThreadPoolAllItemsInBounds)722 TEST(Parallelize1DTile1D, SingleThreadPoolAllItemsInBounds) {
723 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
724 ASSERT_TRUE(threadpool.get());
725
726 pthreadpool_parallelize_1d_tile_1d(
727 threadpool.get(),
728 CheckBounds1DTile1D,
729 nullptr,
730 kParallelize1DTile1DRange, kParallelize1DTile1DTile,
731 0 /* flags */);
732 }
733
TEST(Parallelize1DTile1D,MultiThreadPoolAllItemsInBounds)734 TEST(Parallelize1DTile1D, MultiThreadPoolAllItemsInBounds) {
735 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
736 ASSERT_TRUE(threadpool.get());
737
738 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
739 GTEST_SKIP();
740 }
741
742 pthreadpool_parallelize_1d_tile_1d(
743 threadpool.get(),
744 CheckBounds1DTile1D,
745 nullptr,
746 kParallelize1DTile1DRange, kParallelize1DTile1DTile,
747 0 /* flags */);
748 }
749
CheckTiling1DTile1D(void *,size_t start_i,size_t tile_i)750 static void CheckTiling1DTile1D(void*, size_t start_i, size_t tile_i) {
751 EXPECT_GT(tile_i, 0);
752 EXPECT_LE(tile_i, kParallelize1DTile1DTile);
753 EXPECT_EQ(start_i % kParallelize1DTile1DTile, 0);
754 EXPECT_EQ(tile_i, std::min<size_t>(kParallelize1DTile1DTile, kParallelize1DTile1DRange - start_i));
755 }
756
TEST(Parallelize1DTile1D,SingleThreadPoolUniformTiling)757 TEST(Parallelize1DTile1D, SingleThreadPoolUniformTiling) {
758 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
759 ASSERT_TRUE(threadpool.get());
760
761 pthreadpool_parallelize_1d_tile_1d(
762 threadpool.get(),
763 CheckTiling1DTile1D,
764 nullptr,
765 kParallelize1DTile1DRange, kParallelize1DTile1DTile,
766 0 /* flags */);
767 }
768
TEST(Parallelize1DTile1D,MultiThreadPoolUniformTiling)769 TEST(Parallelize1DTile1D, MultiThreadPoolUniformTiling) {
770 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
771 ASSERT_TRUE(threadpool.get());
772
773 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
774 GTEST_SKIP();
775 }
776
777 pthreadpool_parallelize_1d_tile_1d(
778 threadpool.get(),
779 CheckTiling1DTile1D,
780 nullptr,
781 kParallelize1DTile1DRange, kParallelize1DTile1DTile,
782 0 /* flags */);
783 }
784
SetTrue1DTile1D(std::atomic_bool * processed_indicators,size_t start_i,size_t tile_i)785 static void SetTrue1DTile1D(std::atomic_bool* processed_indicators, size_t start_i, size_t tile_i) {
786 for (size_t i = start_i; i < start_i + tile_i; i++) {
787 processed_indicators[i].store(true, std::memory_order_relaxed);
788 }
789 }
790
TEST(Parallelize1DTile1D,SingleThreadPoolAllItemsProcessed)791 TEST(Parallelize1DTile1D, SingleThreadPoolAllItemsProcessed) {
792 std::vector<std::atomic_bool> indicators(kParallelize1DTile1DRange);
793
794 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
795 ASSERT_TRUE(threadpool.get());
796
797 pthreadpool_parallelize_1d_tile_1d(
798 threadpool.get(),
799 reinterpret_cast<pthreadpool_task_1d_tile_1d_t>(SetTrue1DTile1D),
800 static_cast<void*>(indicators.data()),
801 kParallelize1DTile1DRange, kParallelize1DTile1DTile,
802 0 /* flags */);
803
804 for (size_t i = 0; i < kParallelize1DTile1DRange; i++) {
805 EXPECT_TRUE(indicators[i].load(std::memory_order_relaxed))
806 << "Element " << i << " not processed";
807 }
808 }
809
TEST(Parallelize1DTile1D,MultiThreadPoolAllItemsProcessed)810 TEST(Parallelize1DTile1D, MultiThreadPoolAllItemsProcessed) {
811 std::vector<std::atomic_bool> indicators(kParallelize1DTile1DRange);
812
813 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
814 ASSERT_TRUE(threadpool.get());
815
816 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
817 GTEST_SKIP();
818 }
819
820 pthreadpool_parallelize_1d_tile_1d(
821 threadpool.get(),
822 reinterpret_cast<pthreadpool_task_1d_tile_1d_t>(SetTrue1DTile1D),
823 static_cast<void*>(indicators.data()),
824 kParallelize1DTile1DRange, kParallelize1DTile1DTile,
825 0 /* flags */);
826
827 for (size_t i = 0; i < kParallelize1DTile1DRange; i++) {
828 EXPECT_TRUE(indicators[i].load(std::memory_order_relaxed))
829 << "Element " << i << " not processed";
830 }
831 }
832
Increment1DTile1D(std::atomic_int * processed_counters,size_t start_i,size_t tile_i)833 static void Increment1DTile1D(std::atomic_int* processed_counters, size_t start_i, size_t tile_i) {
834 for (size_t i = start_i; i < start_i + tile_i; i++) {
835 processed_counters[i].fetch_add(1, std::memory_order_relaxed);
836 }
837 }
838
TEST(Parallelize1DTile1D,SingleThreadPoolEachItemProcessedOnce)839 TEST(Parallelize1DTile1D, SingleThreadPoolEachItemProcessedOnce) {
840 std::vector<std::atomic_int> counters(kParallelize1DTile1DRange);
841
842 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
843 ASSERT_TRUE(threadpool.get());
844
845 pthreadpool_parallelize_1d_tile_1d(
846 threadpool.get(),
847 reinterpret_cast<pthreadpool_task_1d_tile_1d_t>(Increment1DTile1D),
848 static_cast<void*>(counters.data()),
849 kParallelize1DTile1DRange, kParallelize1DTile1DTile,
850 0 /* flags */);
851
852 for (size_t i = 0; i < kParallelize1DTile1DRange; i++) {
853 EXPECT_EQ(counters[i].load(std::memory_order_relaxed), 1)
854 << "Element " << i << " was processed " << counters[i].load(std::memory_order_relaxed) << " times (expected: 1)";
855 }
856 }
857
TEST(Parallelize1DTile1D,MultiThreadPoolEachItemProcessedOnce)858 TEST(Parallelize1DTile1D, MultiThreadPoolEachItemProcessedOnce) {
859 std::vector<std::atomic_int> counters(kParallelize1DTile1DRange);
860
861 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
862 ASSERT_TRUE(threadpool.get());
863
864 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
865 GTEST_SKIP();
866 }
867
868 pthreadpool_parallelize_1d_tile_1d(
869 threadpool.get(),
870 reinterpret_cast<pthreadpool_task_1d_tile_1d_t>(Increment1DTile1D),
871 static_cast<void*>(counters.data()),
872 kParallelize1DTile1DRange, kParallelize1DTile1DTile,
873 0 /* flags */);
874
875 for (size_t i = 0; i < kParallelize1DTile1DRange; i++) {
876 EXPECT_EQ(counters[i].load(std::memory_order_relaxed), 1)
877 << "Element " << i << " was processed " << counters[i].load(std::memory_order_relaxed) << " times (expected: 1)";
878 }
879 }
880
TEST(Parallelize1DTile1D,SingleThreadPoolEachItemProcessedMultipleTimes)881 TEST(Parallelize1DTile1D, SingleThreadPoolEachItemProcessedMultipleTimes) {
882 std::vector<std::atomic_int> counters(kParallelize1DTile1DRange);
883
884 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
885 ASSERT_TRUE(threadpool.get());
886
887 for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
888 pthreadpool_parallelize_1d_tile_1d(
889 threadpool.get(),
890 reinterpret_cast<pthreadpool_task_1d_tile_1d_t>(Increment1DTile1D),
891 static_cast<void*>(counters.data()),
892 kParallelize1DTile1DRange, kParallelize1DTile1DTile,
893 0 /* flags */);
894 }
895
896 for (size_t i = 0; i < kParallelize1DTile1DRange; i++) {
897 EXPECT_EQ(counters[i].load(std::memory_order_relaxed), kIncrementIterations)
898 << "Element " << i << " was processed " << counters[i].load(std::memory_order_relaxed) << " times "
899 << "(expected: " << kIncrementIterations << ")";
900 }
901 }
902
TEST(Parallelize1DTile1D,MultiThreadPoolEachItemProcessedMultipleTimes)903 TEST(Parallelize1DTile1D, MultiThreadPoolEachItemProcessedMultipleTimes) {
904 std::vector<std::atomic_int> counters(kParallelize1DTile1DRange);
905
906 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
907 ASSERT_TRUE(threadpool.get());
908
909 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
910 GTEST_SKIP();
911 }
912
913 for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
914 pthreadpool_parallelize_1d_tile_1d(
915 threadpool.get(),
916 reinterpret_cast<pthreadpool_task_1d_tile_1d_t>(Increment1DTile1D),
917 static_cast<void*>(counters.data()),
918 kParallelize1DTile1DRange, kParallelize1DTile1DTile,
919 0 /* flags */);
920 }
921
922 for (size_t i = 0; i < kParallelize1DTile1DRange; i++) {
923 EXPECT_EQ(counters[i].load(std::memory_order_relaxed), kIncrementIterations)
924 << "Element " << i << " was processed " << counters[i].load(std::memory_order_relaxed) << " times "
925 << "(expected: " << kIncrementIterations << ")";
926 }
927 }
928
IncrementSame1DTile1D(std::atomic_int * num_processed_items,size_t start_i,size_t tile_i)929 static void IncrementSame1DTile1D(std::atomic_int* num_processed_items, size_t start_i, size_t tile_i) {
930 for (size_t i = start_i; i < start_i + tile_i; i++) {
931 num_processed_items->fetch_add(1, std::memory_order_relaxed);
932 }
933 }
934
TEST(Parallelize1DTile1D,MultiThreadPoolHighContention)935 TEST(Parallelize1DTile1D, MultiThreadPoolHighContention) {
936 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
937
938 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
939 ASSERT_TRUE(threadpool.get());
940
941 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
942 GTEST_SKIP();
943 }
944
945 pthreadpool_parallelize_1d_tile_1d(
946 threadpool.get(),
947 reinterpret_cast<pthreadpool_task_1d_tile_1d_t>(IncrementSame1DTile1D),
948 static_cast<void*>(&num_processed_items),
949 kParallelize1DTile1DRange, kParallelize1DTile1DTile,
950 0 /* flags */);
951 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize1DTile1DRange);
952 }
953
WorkImbalance1DTile1D(std::atomic_int * num_processed_items,size_t start_i,size_t tile_i)954 static void WorkImbalance1DTile1D(std::atomic_int* num_processed_items, size_t start_i, size_t tile_i) {
955 num_processed_items->fetch_add(tile_i, std::memory_order_relaxed);
956 if (start_i == 0) {
957 /* Spin-wait until all items are computed */
958 while (num_processed_items->load(std::memory_order_relaxed) != kParallelize1DTile1DRange) {
959 std::atomic_thread_fence(std::memory_order_acquire);
960 }
961 }
962 }
963
TEST(Parallelize1DTile1D,MultiThreadPoolWorkStealing)964 TEST(Parallelize1DTile1D, MultiThreadPoolWorkStealing) {
965 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
966
967 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
968 ASSERT_TRUE(threadpool.get());
969
970 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
971 GTEST_SKIP();
972 }
973
974 pthreadpool_parallelize_1d_tile_1d(
975 threadpool.get(),
976 reinterpret_cast<pthreadpool_task_1d_tile_1d_t>(WorkImbalance1DTile1D),
977 static_cast<void*>(&num_processed_items),
978 kParallelize1DTile1DRange, kParallelize1DTile1DTile,
979 0 /* flags */);
980 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize1DTile1DRange);
981 }
982
ComputeNothing2D(void *,size_t,size_t)983 static void ComputeNothing2D(void*, size_t, size_t) {
984 }
985
TEST(Parallelize2D,SingleThreadPoolCompletes)986 TEST(Parallelize2D, SingleThreadPoolCompletes) {
987 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
988 ASSERT_TRUE(threadpool.get());
989
990 pthreadpool_parallelize_2d(threadpool.get(),
991 ComputeNothing2D,
992 nullptr,
993 kParallelize2DRangeI, kParallelize2DRangeJ,
994 0 /* flags */);
995 }
996
TEST(Parallelize2D,MultiThreadPoolCompletes)997 TEST(Parallelize2D, MultiThreadPoolCompletes) {
998 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
999 ASSERT_TRUE(threadpool.get());
1000
1001 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
1002 GTEST_SKIP();
1003 }
1004
1005 pthreadpool_parallelize_2d(
1006 threadpool.get(),
1007 ComputeNothing2D,
1008 nullptr,
1009 kParallelize2DRangeI, kParallelize2DRangeJ,
1010 0 /* flags */);
1011 }
1012
CheckBounds2D(void *,size_t i,size_t j)1013 static void CheckBounds2D(void*, size_t i, size_t j) {
1014 EXPECT_LT(i, kParallelize2DRangeI);
1015 EXPECT_LT(j, kParallelize2DRangeJ);
1016 }
1017
TEST(Parallelize2D,SingleThreadPoolAllItemsInBounds)1018 TEST(Parallelize2D, SingleThreadPoolAllItemsInBounds) {
1019 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
1020 ASSERT_TRUE(threadpool.get());
1021
1022 pthreadpool_parallelize_2d(
1023 threadpool.get(),
1024 CheckBounds2D,
1025 nullptr,
1026 kParallelize2DRangeI, kParallelize2DRangeJ,
1027 0 /* flags */);
1028 }
1029
TEST(Parallelize2D,MultiThreadPoolAllItemsInBounds)1030 TEST(Parallelize2D, MultiThreadPoolAllItemsInBounds) {
1031 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
1032 ASSERT_TRUE(threadpool.get());
1033
1034 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
1035 GTEST_SKIP();
1036 }
1037
1038 pthreadpool_parallelize_2d(
1039 threadpool.get(),
1040 CheckBounds2D,
1041 nullptr,
1042 kParallelize2DRangeI, kParallelize2DRangeJ,
1043 0 /* flags */);
1044 }
1045
SetTrue2D(std::atomic_bool * processed_indicators,size_t i,size_t j)1046 static void SetTrue2D(std::atomic_bool* processed_indicators, size_t i, size_t j) {
1047 const size_t linear_idx = i * kParallelize2DRangeJ + j;
1048 processed_indicators[linear_idx].store(true, std::memory_order_relaxed);
1049 }
1050
TEST(Parallelize2D,SingleThreadPoolAllItemsProcessed)1051 TEST(Parallelize2D, SingleThreadPoolAllItemsProcessed) {
1052 std::vector<std::atomic_bool> indicators(kParallelize2DRangeI * kParallelize2DRangeJ);
1053
1054 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
1055 ASSERT_TRUE(threadpool.get());
1056
1057 pthreadpool_parallelize_2d(
1058 threadpool.get(),
1059 reinterpret_cast<pthreadpool_task_2d_t>(SetTrue2D),
1060 static_cast<void*>(indicators.data()),
1061 kParallelize2DRangeI, kParallelize2DRangeJ,
1062 0 /* flags */);
1063
1064 for (size_t i = 0; i < kParallelize2DRangeI; i++) {
1065 for (size_t j = 0; j < kParallelize2DRangeJ; j++) {
1066 const size_t linear_idx = i * kParallelize2DRangeJ + j;
1067 EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
1068 << "Element (" << i << ", " << j << ") not processed";
1069 }
1070 }
1071 }
1072
TEST(Parallelize2D,MultiThreadPoolAllItemsProcessed)1073 TEST(Parallelize2D, MultiThreadPoolAllItemsProcessed) {
1074 std::vector<std::atomic_bool> indicators(kParallelize2DRangeI * kParallelize2DRangeJ);
1075
1076 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
1077 ASSERT_TRUE(threadpool.get());
1078
1079 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
1080 GTEST_SKIP();
1081 }
1082
1083 pthreadpool_parallelize_2d(
1084 threadpool.get(),
1085 reinterpret_cast<pthreadpool_task_2d_t>(SetTrue2D),
1086 static_cast<void*>(indicators.data()),
1087 kParallelize2DRangeI, kParallelize2DRangeJ,
1088 0 /* flags */);
1089
1090 for (size_t i = 0; i < kParallelize2DRangeI; i++) {
1091 for (size_t j = 0; j < kParallelize2DRangeJ; j++) {
1092 const size_t linear_idx = i * kParallelize2DRangeJ + j;
1093 EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
1094 << "Element (" << i << ", " << j << ") not processed";
1095 }
1096 }
1097 }
1098
Increment2D(std::atomic_int * processed_counters,size_t i,size_t j)1099 static void Increment2D(std::atomic_int* processed_counters, size_t i, size_t j) {
1100 const size_t linear_idx = i * kParallelize2DRangeJ + j;
1101 processed_counters[linear_idx].fetch_add(1, std::memory_order_relaxed);
1102 }
1103
TEST(Parallelize2D,SingleThreadPoolEachItemProcessedOnce)1104 TEST(Parallelize2D, SingleThreadPoolEachItemProcessedOnce) {
1105 std::vector<std::atomic_int> counters(kParallelize2DRangeI * kParallelize2DRangeJ);
1106
1107 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
1108 ASSERT_TRUE(threadpool.get());
1109
1110 pthreadpool_parallelize_2d(
1111 threadpool.get(),
1112 reinterpret_cast<pthreadpool_task_2d_t>(Increment2D),
1113 static_cast<void*>(counters.data()),
1114 kParallelize2DRangeI, kParallelize2DRangeJ,
1115 0 /* flags */);
1116
1117 for (size_t i = 0; i < kParallelize2DRangeI; i++) {
1118 for (size_t j = 0; j < kParallelize2DRangeJ; j++) {
1119 const size_t linear_idx = i * kParallelize2DRangeJ + j;
1120 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
1121 << "Element (" << i << ", " << j << ") was processed "
1122 << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
1123 }
1124 }
1125 }
1126
TEST(Parallelize2D,MultiThreadPoolEachItemProcessedOnce)1127 TEST(Parallelize2D, MultiThreadPoolEachItemProcessedOnce) {
1128 std::vector<std::atomic_int> counters(kParallelize2DRangeI * kParallelize2DRangeJ);
1129
1130 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
1131 ASSERT_TRUE(threadpool.get());
1132
1133 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
1134 GTEST_SKIP();
1135 }
1136
1137 pthreadpool_parallelize_2d(
1138 threadpool.get(),
1139 reinterpret_cast<pthreadpool_task_2d_t>(Increment2D),
1140 static_cast<void*>(counters.data()),
1141 kParallelize2DRangeI, kParallelize2DRangeJ,
1142 0 /* flags */);
1143
1144 for (size_t i = 0; i < kParallelize2DRangeI; i++) {
1145 for (size_t j = 0; j < kParallelize2DRangeJ; j++) {
1146 const size_t linear_idx = i * kParallelize2DRangeJ + j;
1147 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
1148 << "Element (" << i << ", " << j << ") was processed "
1149 << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
1150 }
1151 }
1152 }
1153
TEST(Parallelize2D,SingleThreadPoolEachItemProcessedMultipleTimes)1154 TEST(Parallelize2D, SingleThreadPoolEachItemProcessedMultipleTimes) {
1155 std::vector<std::atomic_int> counters(kParallelize2DRangeI * kParallelize2DRangeJ);
1156
1157 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
1158 ASSERT_TRUE(threadpool.get());
1159
1160 for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
1161 pthreadpool_parallelize_2d(
1162 threadpool.get(),
1163 reinterpret_cast<pthreadpool_task_2d_t>(Increment2D),
1164 static_cast<void*>(counters.data()),
1165 kParallelize2DRangeI, kParallelize2DRangeJ,
1166 0 /* flags */);
1167 }
1168
1169 for (size_t i = 0; i < kParallelize2DRangeI; i++) {
1170 for (size_t j = 0; j < kParallelize2DRangeJ; j++) {
1171 const size_t linear_idx = i * kParallelize2DRangeJ + j;
1172 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations)
1173 << "Element (" << i << ", " << j << ") was processed "
1174 << counters[linear_idx].load(std::memory_order_relaxed) << " times "
1175 << "(expected: " << kIncrementIterations << ")";
1176 }
1177 }
1178 }
1179
TEST(Parallelize2D,MultiThreadPoolEachItemProcessedMultipleTimes)1180 TEST(Parallelize2D, MultiThreadPoolEachItemProcessedMultipleTimes) {
1181 std::vector<std::atomic_int> counters(kParallelize2DRangeI * kParallelize2DRangeJ);
1182
1183 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
1184 ASSERT_TRUE(threadpool.get());
1185
1186 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
1187 GTEST_SKIP();
1188 }
1189
1190 for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
1191 pthreadpool_parallelize_2d(
1192 threadpool.get(),
1193 reinterpret_cast<pthreadpool_task_2d_t>(Increment2D),
1194 static_cast<void*>(counters.data()),
1195 kParallelize2DRangeI, kParallelize2DRangeJ,
1196 0 /* flags */);
1197 }
1198
1199 for (size_t i = 0; i < kParallelize2DRangeI; i++) {
1200 for (size_t j = 0; j < kParallelize2DRangeJ; j++) {
1201 const size_t linear_idx = i * kParallelize2DRangeJ + j;
1202 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations)
1203 << "Element (" << i << ", " << j << ") was processed "
1204 << counters[linear_idx].load(std::memory_order_relaxed) << " times "
1205 << "(expected: " << kIncrementIterations << ")";
1206 }
1207 }
1208 }
1209
IncrementSame2D(std::atomic_int * num_processed_items,size_t i,size_t j)1210 static void IncrementSame2D(std::atomic_int* num_processed_items, size_t i, size_t j) {
1211 num_processed_items->fetch_add(1, std::memory_order_relaxed);
1212 }
1213
TEST(Parallelize2D,MultiThreadPoolHighContention)1214 TEST(Parallelize2D, MultiThreadPoolHighContention) {
1215 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
1216
1217 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
1218 ASSERT_TRUE(threadpool.get());
1219
1220 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
1221 GTEST_SKIP();
1222 }
1223
1224 pthreadpool_parallelize_2d(
1225 threadpool.get(),
1226 reinterpret_cast<pthreadpool_task_2d_t>(IncrementSame2D),
1227 static_cast<void*>(&num_processed_items),
1228 kParallelize2DRangeI, kParallelize2DRangeJ,
1229 0 /* flags */);
1230 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize2DRangeI * kParallelize2DRangeJ);
1231 }
1232
WorkImbalance2D(std::atomic_int * num_processed_items,size_t i,size_t j)1233 static void WorkImbalance2D(std::atomic_int* num_processed_items, size_t i, size_t j) {
1234 num_processed_items->fetch_add(1, std::memory_order_relaxed);
1235 if (i == 0 && j == 0) {
1236 /* Spin-wait until all items are computed */
1237 while (num_processed_items->load(std::memory_order_relaxed) != kParallelize2DRangeI * kParallelize2DRangeJ) {
1238 std::atomic_thread_fence(std::memory_order_acquire);
1239 }
1240 }
1241 }
1242
TEST(Parallelize2D,MultiThreadPoolWorkStealing)1243 TEST(Parallelize2D, MultiThreadPoolWorkStealing) {
1244 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
1245
1246 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
1247 ASSERT_TRUE(threadpool.get());
1248
1249 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
1250 GTEST_SKIP();
1251 }
1252
1253 pthreadpool_parallelize_2d(
1254 threadpool.get(),
1255 reinterpret_cast<pthreadpool_task_2d_t>(WorkImbalance2D),
1256 static_cast<void*>(&num_processed_items),
1257 kParallelize2DRangeI, kParallelize2DRangeJ,
1258 0 /* flags */);
1259 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize2DRangeI * kParallelize2DRangeJ);
1260 }
1261
ComputeNothing2DTile1D(void *,size_t,size_t,size_t)1262 static void ComputeNothing2DTile1D(void*, size_t, size_t, size_t) {
1263 }
1264
TEST(Parallelize2DTile1D,SingleThreadPoolCompletes)1265 TEST(Parallelize2DTile1D, SingleThreadPoolCompletes) {
1266 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
1267 ASSERT_TRUE(threadpool.get());
1268
1269 pthreadpool_parallelize_2d_tile_1d(threadpool.get(),
1270 ComputeNothing2DTile1D,
1271 nullptr,
1272 kParallelize2DTile1DRangeI, kParallelize2DTile1DRangeJ, kParallelize2DTile1DTileJ,
1273 0 /* flags */);
1274 }
1275
TEST(Parallelize2DTile1D,MultiThreadPoolCompletes)1276 TEST(Parallelize2DTile1D, MultiThreadPoolCompletes) {
1277 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
1278 ASSERT_TRUE(threadpool.get());
1279
1280 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
1281 GTEST_SKIP();
1282 }
1283
1284 pthreadpool_parallelize_2d_tile_1d(
1285 threadpool.get(),
1286 ComputeNothing2DTile1D,
1287 nullptr,
1288 kParallelize2DTile1DRangeI, kParallelize2DTile1DRangeJ, kParallelize2DTile1DTileJ,
1289 0 /* flags */);
1290 }
1291
CheckBounds2DTile1D(void *,size_t i,size_t start_j,size_t tile_j)1292 static void CheckBounds2DTile1D(void*, size_t i, size_t start_j, size_t tile_j) {
1293 EXPECT_LT(i, kParallelize2DTile1DRangeI);
1294 EXPECT_LT(start_j, kParallelize2DTile1DRangeJ);
1295 EXPECT_LE(start_j + tile_j, kParallelize2DTile1DRangeJ);
1296 }
1297
TEST(Parallelize2DTile1D,SingleThreadPoolAllItemsInBounds)1298 TEST(Parallelize2DTile1D, SingleThreadPoolAllItemsInBounds) {
1299 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
1300 ASSERT_TRUE(threadpool.get());
1301
1302 pthreadpool_parallelize_2d_tile_1d(
1303 threadpool.get(),
1304 CheckBounds2DTile1D,
1305 nullptr,
1306 kParallelize2DTile1DRangeI, kParallelize2DTile1DRangeJ, kParallelize2DTile1DTileJ,
1307 0 /* flags */);
1308 }
1309
TEST(Parallelize2DTile1D,MultiThreadPoolAllItemsInBounds)1310 TEST(Parallelize2DTile1D, MultiThreadPoolAllItemsInBounds) {
1311 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
1312 ASSERT_TRUE(threadpool.get());
1313
1314 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
1315 GTEST_SKIP();
1316 }
1317
1318 pthreadpool_parallelize_2d_tile_1d(
1319 threadpool.get(),
1320 CheckBounds2DTile1D,
1321 nullptr,
1322 kParallelize2DTile1DRangeI, kParallelize2DTile1DRangeJ, kParallelize2DTile1DTileJ,
1323 0 /* flags */);
1324 }
1325
CheckTiling2DTile1D(void *,size_t i,size_t start_j,size_t tile_j)1326 static void CheckTiling2DTile1D(void*, size_t i, size_t start_j, size_t tile_j) {
1327 EXPECT_GT(tile_j, 0);
1328 EXPECT_LE(tile_j, kParallelize2DTile1DTileJ);
1329 EXPECT_EQ(start_j % kParallelize2DTile1DTileJ, 0);
1330 EXPECT_EQ(tile_j, std::min<size_t>(kParallelize2DTile1DTileJ, kParallelize2DTile1DRangeJ - start_j));
1331 }
1332
TEST(Parallelize2DTile1D,SingleThreadPoolUniformTiling)1333 TEST(Parallelize2DTile1D, SingleThreadPoolUniformTiling) {
1334 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
1335 ASSERT_TRUE(threadpool.get());
1336
1337 pthreadpool_parallelize_2d_tile_1d(
1338 threadpool.get(),
1339 CheckTiling2DTile1D,
1340 nullptr,
1341 kParallelize2DTile1DRangeI, kParallelize2DTile1DRangeJ, kParallelize2DTile1DTileJ,
1342 0 /* flags */);
1343 }
1344
TEST(Parallelize2DTile1D,MultiThreadPoolUniformTiling)1345 TEST(Parallelize2DTile1D, MultiThreadPoolUniformTiling) {
1346 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
1347 ASSERT_TRUE(threadpool.get());
1348
1349 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
1350 GTEST_SKIP();
1351 }
1352
1353 pthreadpool_parallelize_2d_tile_1d(
1354 threadpool.get(),
1355 CheckTiling2DTile1D,
1356 nullptr,
1357 kParallelize2DTile1DRangeI, kParallelize2DTile1DRangeJ, kParallelize2DTile1DTileJ,
1358 0 /* flags */);
1359 }
1360
SetTrue2DTile1D(std::atomic_bool * processed_indicators,size_t i,size_t start_j,size_t tile_j)1361 static void SetTrue2DTile1D(std::atomic_bool* processed_indicators, size_t i, size_t start_j, size_t tile_j) {
1362 for (size_t j = start_j; j < start_j + tile_j; j++) {
1363 const size_t linear_idx = i * kParallelize2DTile1DRangeJ + j;
1364 processed_indicators[linear_idx].store(true, std::memory_order_relaxed);
1365 }
1366 }
1367
TEST(Parallelize2DTile1D,SingleThreadPoolAllItemsProcessed)1368 TEST(Parallelize2DTile1D, SingleThreadPoolAllItemsProcessed) {
1369 std::vector<std::atomic_bool> indicators(kParallelize2DTile1DRangeI * kParallelize2DTile1DRangeJ);
1370
1371 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
1372 ASSERT_TRUE(threadpool.get());
1373
1374 pthreadpool_parallelize_2d_tile_1d(
1375 threadpool.get(),
1376 reinterpret_cast<pthreadpool_task_2d_tile_1d_t>(SetTrue2DTile1D),
1377 static_cast<void*>(indicators.data()),
1378 kParallelize2DTile1DRangeI, kParallelize2DTile1DRangeJ, kParallelize2DTile1DTileJ,
1379 0 /* flags */);
1380
1381 for (size_t i = 0; i < kParallelize2DTile1DRangeI; i++) {
1382 for (size_t j = 0; j < kParallelize2DTile1DRangeJ; j++) {
1383 const size_t linear_idx = i * kParallelize2DTile1DRangeJ + j;
1384 EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
1385 << "Element (" << i << ", " << j << ") not processed";
1386 }
1387 }
1388 }
1389
TEST(Parallelize2DTile1D,MultiThreadPoolAllItemsProcessed)1390 TEST(Parallelize2DTile1D, MultiThreadPoolAllItemsProcessed) {
1391 std::vector<std::atomic_bool> indicators(kParallelize2DTile1DRangeI * kParallelize2DTile1DRangeJ);
1392
1393 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
1394 ASSERT_TRUE(threadpool.get());
1395
1396 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
1397 GTEST_SKIP();
1398 }
1399
1400 pthreadpool_parallelize_2d_tile_1d(
1401 threadpool.get(),
1402 reinterpret_cast<pthreadpool_task_2d_tile_1d_t>(SetTrue2DTile1D),
1403 static_cast<void*>(indicators.data()),
1404 kParallelize2DTile1DRangeI, kParallelize2DTile1DRangeJ, kParallelize2DTile1DTileJ,
1405 0 /* flags */);
1406
1407 for (size_t i = 0; i < kParallelize2DTile1DRangeI; i++) {
1408 for (size_t j = 0; j < kParallelize2DTile1DRangeJ; j++) {
1409 const size_t linear_idx = i * kParallelize2DTile1DRangeJ + j;
1410 EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
1411 << "Element (" << i << ", " << j << ") not processed";
1412 }
1413 }
1414 }
1415
Increment2DTile1D(std::atomic_int * processed_counters,size_t i,size_t start_j,size_t tile_j)1416 static void Increment2DTile1D(std::atomic_int* processed_counters, size_t i, size_t start_j, size_t tile_j) {
1417 for (size_t j = start_j; j < start_j + tile_j; j++) {
1418 const size_t linear_idx = i * kParallelize2DTile1DRangeJ + j;
1419 processed_counters[linear_idx].fetch_add(1, std::memory_order_relaxed);
1420 }
1421 }
1422
TEST(Parallelize2DTile1D,SingleThreadPoolEachItemProcessedOnce)1423 TEST(Parallelize2DTile1D, SingleThreadPoolEachItemProcessedOnce) {
1424 std::vector<std::atomic_int> counters(kParallelize2DTile1DRangeI * kParallelize2DTile1DRangeJ);
1425
1426 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
1427 ASSERT_TRUE(threadpool.get());
1428
1429 pthreadpool_parallelize_2d_tile_1d(
1430 threadpool.get(),
1431 reinterpret_cast<pthreadpool_task_2d_tile_1d_t>(Increment2DTile1D),
1432 static_cast<void*>(counters.data()),
1433 kParallelize2DTile1DRangeI, kParallelize2DTile1DRangeJ, kParallelize2DTile1DTileJ,
1434 0 /* flags */);
1435
1436 for (size_t i = 0; i < kParallelize2DTile1DRangeI; i++) {
1437 for (size_t j = 0; j < kParallelize2DTile1DRangeJ; j++) {
1438 const size_t linear_idx = i * kParallelize2DTile1DRangeJ + j;
1439 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
1440 << "Element (" << i << ", " << j << ") was processed "
1441 << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
1442 }
1443 }
1444 }
1445
TEST(Parallelize2DTile1D,MultiThreadPoolEachItemProcessedOnce)1446 TEST(Parallelize2DTile1D, MultiThreadPoolEachItemProcessedOnce) {
1447 std::vector<std::atomic_int> counters(kParallelize2DTile1DRangeI * kParallelize2DTile1DRangeJ);
1448
1449 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
1450 ASSERT_TRUE(threadpool.get());
1451
1452 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
1453 GTEST_SKIP();
1454 }
1455
1456 pthreadpool_parallelize_2d_tile_1d(
1457 threadpool.get(),
1458 reinterpret_cast<pthreadpool_task_2d_tile_1d_t>(Increment2DTile1D),
1459 static_cast<void*>(counters.data()),
1460 kParallelize2DTile1DRangeI, kParallelize2DTile1DRangeJ, kParallelize2DTile1DTileJ,
1461 0 /* flags */);
1462
1463 for (size_t i = 0; i < kParallelize2DTile1DRangeI; i++) {
1464 for (size_t j = 0; j < kParallelize2DTile1DRangeJ; j++) {
1465 const size_t linear_idx = i * kParallelize2DTile1DRangeJ + j;
1466 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
1467 << "Element (" << i << ", " << j << ") was processed "
1468 << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
1469 }
1470 }
1471 }
1472
TEST(Parallelize2DTile1D,SingleThreadPoolEachItemProcessedMultipleTimes)1473 TEST(Parallelize2DTile1D, SingleThreadPoolEachItemProcessedMultipleTimes) {
1474 std::vector<std::atomic_int> counters(kParallelize2DTile1DRangeI * kParallelize2DTile1DRangeJ);
1475
1476 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
1477 ASSERT_TRUE(threadpool.get());
1478
1479 for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
1480 pthreadpool_parallelize_2d_tile_1d(
1481 threadpool.get(),
1482 reinterpret_cast<pthreadpool_task_2d_tile_1d_t>(Increment2DTile1D),
1483 static_cast<void*>(counters.data()),
1484 kParallelize2DTile1DRangeI, kParallelize2DTile1DRangeJ, kParallelize2DTile1DTileJ,
1485 0 /* flags */);
1486 }
1487
1488 for (size_t i = 0; i < kParallelize2DTile1DRangeI; i++) {
1489 for (size_t j = 0; j < kParallelize2DTile1DRangeJ; j++) {
1490 const size_t linear_idx = i * kParallelize2DTile1DRangeJ + j;
1491 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations)
1492 << "Element (" << i << ", " << j << ") was processed "
1493 << counters[linear_idx].load(std::memory_order_relaxed) << " times "
1494 << "(expected: " << kIncrementIterations << ")";
1495 }
1496 }
1497 }
1498
TEST(Parallelize2DTile1D,MultiThreadPoolEachItemProcessedMultipleTimes)1499 TEST(Parallelize2DTile1D, MultiThreadPoolEachItemProcessedMultipleTimes) {
1500 std::vector<std::atomic_int> counters(kParallelize2DTile1DRangeI * kParallelize2DTile1DRangeJ);
1501
1502 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
1503 ASSERT_TRUE(threadpool.get());
1504
1505 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
1506 GTEST_SKIP();
1507 }
1508
1509 for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
1510 pthreadpool_parallelize_2d_tile_1d(
1511 threadpool.get(),
1512 reinterpret_cast<pthreadpool_task_2d_tile_1d_t>(Increment2DTile1D),
1513 static_cast<void*>(counters.data()),
1514 kParallelize2DTile1DRangeI, kParallelize2DTile1DRangeJ, kParallelize2DTile1DTileJ,
1515 0 /* flags */);
1516 }
1517
1518 for (size_t i = 0; i < kParallelize2DTile1DRangeI; i++) {
1519 for (size_t j = 0; j < kParallelize2DTile1DRangeJ; j++) {
1520 const size_t linear_idx = i * kParallelize2DTile1DRangeJ + j;
1521 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations)
1522 << "Element (" << i << ", " << j << ") was processed "
1523 << counters[linear_idx].load(std::memory_order_relaxed) << " times "
1524 << "(expected: " << kIncrementIterations << ")";
1525 }
1526 }
1527 }
1528
IncrementSame2DTile1D(std::atomic_int * num_processed_items,size_t i,size_t start_j,size_t tile_j)1529 static void IncrementSame2DTile1D(std::atomic_int* num_processed_items, size_t i, size_t start_j, size_t tile_j) {
1530 for (size_t j = start_j; j < start_j + tile_j; j++) {
1531 num_processed_items->fetch_add(1, std::memory_order_relaxed);
1532 }
1533 }
1534
TEST(Parallelize2DTile1D,MultiThreadPoolHighContention)1535 TEST(Parallelize2DTile1D, MultiThreadPoolHighContention) {
1536 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
1537
1538 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
1539 ASSERT_TRUE(threadpool.get());
1540
1541 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
1542 GTEST_SKIP();
1543 }
1544
1545 pthreadpool_parallelize_2d_tile_1d(
1546 threadpool.get(),
1547 reinterpret_cast<pthreadpool_task_2d_tile_1d_t>(IncrementSame2DTile1D),
1548 static_cast<void*>(&num_processed_items),
1549 kParallelize2DTile1DRangeI, kParallelize2DTile1DRangeJ, kParallelize2DTile1DTileJ,
1550 0 /* flags */);
1551 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize2DTile1DRangeI * kParallelize2DTile1DRangeJ);
1552 }
1553
WorkImbalance2DTile1D(std::atomic_int * num_processed_items,size_t i,size_t start_j,size_t tile_j)1554 static void WorkImbalance2DTile1D(std::atomic_int* num_processed_items, size_t i, size_t start_j, size_t tile_j) {
1555 num_processed_items->fetch_add(tile_j, std::memory_order_relaxed);
1556 if (i == 0 && start_j == 0) {
1557 /* Spin-wait until all items are computed */
1558 while (num_processed_items->load(std::memory_order_relaxed) != kParallelize2DTile1DRangeI * kParallelize2DTile1DRangeJ) {
1559 std::atomic_thread_fence(std::memory_order_acquire);
1560 }
1561 }
1562 }
1563
TEST(Parallelize2DTile1D,MultiThreadPoolWorkStealing)1564 TEST(Parallelize2DTile1D, MultiThreadPoolWorkStealing) {
1565 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
1566
1567 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
1568 ASSERT_TRUE(threadpool.get());
1569
1570 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
1571 GTEST_SKIP();
1572 }
1573
1574 pthreadpool_parallelize_2d_tile_1d(
1575 threadpool.get(),
1576 reinterpret_cast<pthreadpool_task_2d_tile_1d_t>(WorkImbalance2DTile1D),
1577 static_cast<void*>(&num_processed_items),
1578 kParallelize2DTile1DRangeI, kParallelize2DTile1DRangeJ, kParallelize2DTile1DTileJ,
1579 0 /* flags */);
1580 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize2DTile1DRangeI * kParallelize2DTile1DRangeJ);
1581 }
1582
ComputeNothing2DTile2D(void *,size_t,size_t,size_t,size_t)1583 static void ComputeNothing2DTile2D(void*, size_t, size_t, size_t, size_t) {
1584 }
1585
TEST(Parallelize2DTile2D,SingleThreadPoolCompletes)1586 TEST(Parallelize2DTile2D, SingleThreadPoolCompletes) {
1587 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
1588 ASSERT_TRUE(threadpool.get());
1589
1590 pthreadpool_parallelize_2d_tile_2d(threadpool.get(),
1591 ComputeNothing2DTile2D,
1592 nullptr,
1593 kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
1594 kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
1595 0 /* flags */);
1596 }
1597
TEST(Parallelize2DTile2D,MultiThreadPoolCompletes)1598 TEST(Parallelize2DTile2D, MultiThreadPoolCompletes) {
1599 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
1600 ASSERT_TRUE(threadpool.get());
1601
1602 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
1603 GTEST_SKIP();
1604 }
1605
1606 pthreadpool_parallelize_2d_tile_2d(
1607 threadpool.get(),
1608 ComputeNothing2DTile2D,
1609 nullptr,
1610 kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
1611 kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
1612 0 /* flags */);
1613 }
1614
CheckBounds2DTile2D(void *,size_t start_i,size_t start_j,size_t tile_i,size_t tile_j)1615 static void CheckBounds2DTile2D(void*, size_t start_i, size_t start_j, size_t tile_i, size_t tile_j) {
1616 EXPECT_LT(start_i, kParallelize2DTile2DRangeI);
1617 EXPECT_LT(start_j, kParallelize2DTile2DRangeJ);
1618 EXPECT_LE(start_i + tile_i, kParallelize2DTile2DRangeI);
1619 EXPECT_LE(start_j + tile_j, kParallelize2DTile2DRangeJ);
1620 }
1621
TEST(Parallelize2DTile2D,SingleThreadPoolAllItemsInBounds)1622 TEST(Parallelize2DTile2D, SingleThreadPoolAllItemsInBounds) {
1623 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
1624 ASSERT_TRUE(threadpool.get());
1625
1626 pthreadpool_parallelize_2d_tile_2d(
1627 threadpool.get(),
1628 CheckBounds2DTile2D,
1629 nullptr,
1630 kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
1631 kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
1632 0 /* flags */);
1633 }
1634
TEST(Parallelize2DTile2D,MultiThreadPoolAllItemsInBounds)1635 TEST(Parallelize2DTile2D, MultiThreadPoolAllItemsInBounds) {
1636 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
1637 ASSERT_TRUE(threadpool.get());
1638
1639 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
1640 GTEST_SKIP();
1641 }
1642
1643 pthreadpool_parallelize_2d_tile_2d(
1644 threadpool.get(),
1645 CheckBounds2DTile2D,
1646 nullptr,
1647 kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
1648 kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
1649 0 /* flags */);
1650 }
1651
CheckTiling2DTile2D(void *,size_t start_i,size_t start_j,size_t tile_i,size_t tile_j)1652 static void CheckTiling2DTile2D(void*, size_t start_i, size_t start_j, size_t tile_i, size_t tile_j) {
1653 EXPECT_GT(tile_i, 0);
1654 EXPECT_LE(tile_i, kParallelize2DTile2DTileI);
1655 EXPECT_EQ(start_i % kParallelize2DTile2DTileI, 0);
1656 EXPECT_EQ(tile_i, std::min<size_t>(kParallelize2DTile2DTileI, kParallelize2DTile2DRangeI - start_i));
1657
1658 EXPECT_GT(tile_j, 0);
1659 EXPECT_LE(tile_j, kParallelize2DTile2DTileJ);
1660 EXPECT_EQ(start_j % kParallelize2DTile2DTileJ, 0);
1661 EXPECT_EQ(tile_j, std::min<size_t>(kParallelize2DTile2DTileJ, kParallelize2DTile2DRangeJ - start_j));
1662 }
1663
TEST(Parallelize2DTile2D,SingleThreadPoolUniformTiling)1664 TEST(Parallelize2DTile2D, SingleThreadPoolUniformTiling) {
1665 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
1666 ASSERT_TRUE(threadpool.get());
1667
1668 pthreadpool_parallelize_2d_tile_2d(
1669 threadpool.get(),
1670 CheckTiling2DTile2D,
1671 nullptr,
1672 kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
1673 kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
1674 0 /* flags */);
1675 }
1676
TEST(Parallelize2DTile2D,MultiThreadPoolUniformTiling)1677 TEST(Parallelize2DTile2D, MultiThreadPoolUniformTiling) {
1678 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
1679 ASSERT_TRUE(threadpool.get());
1680
1681 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
1682 GTEST_SKIP();
1683 }
1684
1685 pthreadpool_parallelize_2d_tile_2d(
1686 threadpool.get(),
1687 CheckTiling2DTile2D,
1688 nullptr,
1689 kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
1690 kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
1691 0 /* flags */);
1692 }
1693
SetTrue2DTile2D(std::atomic_bool * processed_indicators,size_t start_i,size_t start_j,size_t tile_i,size_t tile_j)1694 static void SetTrue2DTile2D(std::atomic_bool* processed_indicators, size_t start_i, size_t start_j, size_t tile_i, size_t tile_j) {
1695 for (size_t i = start_i; i < start_i + tile_i; i++) {
1696 for (size_t j = start_j; j < start_j + tile_j; j++) {
1697 const size_t linear_idx = i * kParallelize2DTile2DRangeJ + j;
1698 processed_indicators[linear_idx].store(true, std::memory_order_relaxed);
1699 }
1700 }
1701 }
1702
TEST(Parallelize2DTile2D,SingleThreadPoolAllItemsProcessed)1703 TEST(Parallelize2DTile2D, SingleThreadPoolAllItemsProcessed) {
1704 std::vector<std::atomic_bool> indicators(kParallelize2DTile2DRangeI * kParallelize2DTile2DRangeJ);
1705
1706 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
1707 ASSERT_TRUE(threadpool.get());
1708
1709 pthreadpool_parallelize_2d_tile_2d(
1710 threadpool.get(),
1711 reinterpret_cast<pthreadpool_task_2d_tile_2d_t>(SetTrue2DTile2D),
1712 static_cast<void*>(indicators.data()),
1713 kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
1714 kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
1715 0 /* flags */);
1716
1717 for (size_t i = 0; i < kParallelize2DTile2DRangeI; i++) {
1718 for (size_t j = 0; j < kParallelize2DTile2DRangeJ; j++) {
1719 const size_t linear_idx = i * kParallelize2DTile2DRangeJ + j;
1720 EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
1721 << "Element (" << i << ", " << j << ") not processed";
1722 }
1723 }
1724 }
1725
TEST(Parallelize2DTile2D,MultiThreadPoolAllItemsProcessed)1726 TEST(Parallelize2DTile2D, MultiThreadPoolAllItemsProcessed) {
1727 std::vector<std::atomic_bool> indicators(kParallelize2DTile2DRangeI * kParallelize2DTile2DRangeJ);
1728
1729 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
1730 ASSERT_TRUE(threadpool.get());
1731
1732 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
1733 GTEST_SKIP();
1734 }
1735
1736 pthreadpool_parallelize_2d_tile_2d(
1737 threadpool.get(),
1738 reinterpret_cast<pthreadpool_task_2d_tile_2d_t>(SetTrue2DTile2D),
1739 static_cast<void*>(indicators.data()),
1740 kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
1741 kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
1742 0 /* flags */);
1743
1744 for (size_t i = 0; i < kParallelize2DTile2DRangeI; i++) {
1745 for (size_t j = 0; j < kParallelize2DTile2DRangeJ; j++) {
1746 const size_t linear_idx = i * kParallelize2DTile2DRangeJ + j;
1747 EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
1748 << "Element (" << i << ", " << j << ") not processed";
1749 }
1750 }
1751 }
1752
Increment2DTile2D(std::atomic_int * processed_counters,size_t start_i,size_t start_j,size_t tile_i,size_t tile_j)1753 static void Increment2DTile2D(std::atomic_int* processed_counters, size_t start_i, size_t start_j, size_t tile_i, size_t tile_j) {
1754 for (size_t i = start_i; i < start_i + tile_i; i++) {
1755 for (size_t j = start_j; j < start_j + tile_j; j++) {
1756 const size_t linear_idx = i * kParallelize2DTile2DRangeJ + j;
1757 processed_counters[linear_idx].fetch_add(1, std::memory_order_relaxed);
1758 }
1759 }
1760 }
1761
TEST(Parallelize2DTile2D,SingleThreadPoolEachItemProcessedOnce)1762 TEST(Parallelize2DTile2D, SingleThreadPoolEachItemProcessedOnce) {
1763 std::vector<std::atomic_int> counters(kParallelize2DTile2DRangeI * kParallelize2DTile2DRangeJ);
1764
1765 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
1766 ASSERT_TRUE(threadpool.get());
1767
1768 pthreadpool_parallelize_2d_tile_2d(
1769 threadpool.get(),
1770 reinterpret_cast<pthreadpool_task_2d_tile_2d_t>(Increment2DTile2D),
1771 static_cast<void*>(counters.data()),
1772 kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
1773 kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
1774 0 /* flags */);
1775
1776 for (size_t i = 0; i < kParallelize2DTile2DRangeI; i++) {
1777 for (size_t j = 0; j < kParallelize2DTile2DRangeJ; j++) {
1778 const size_t linear_idx = i * kParallelize2DTile2DRangeJ + j;
1779 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
1780 << "Element (" << i << ", " << j << ") was processed "
1781 << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
1782 }
1783 }
1784 }
1785
TEST(Parallelize2DTile2D,MultiThreadPoolEachItemProcessedOnce)1786 TEST(Parallelize2DTile2D, MultiThreadPoolEachItemProcessedOnce) {
1787 std::vector<std::atomic_int> counters(kParallelize2DTile2DRangeI * kParallelize2DTile2DRangeJ);
1788
1789 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
1790 ASSERT_TRUE(threadpool.get());
1791
1792 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
1793 GTEST_SKIP();
1794 }
1795
1796 pthreadpool_parallelize_2d_tile_2d(
1797 threadpool.get(),
1798 reinterpret_cast<pthreadpool_task_2d_tile_2d_t>(Increment2DTile2D),
1799 static_cast<void*>(counters.data()),
1800 kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
1801 kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
1802 0 /* flags */);
1803
1804 for (size_t i = 0; i < kParallelize2DTile2DRangeI; i++) {
1805 for (size_t j = 0; j < kParallelize2DTile2DRangeJ; j++) {
1806 const size_t linear_idx = i * kParallelize2DTile2DRangeJ + j;
1807 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
1808 << "Element (" << i << ", " << j << ") was processed "
1809 << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
1810 }
1811 }
1812 }
1813
TEST(Parallelize2DTile2D,SingleThreadPoolEachItemProcessedMultipleTimes)1814 TEST(Parallelize2DTile2D, SingleThreadPoolEachItemProcessedMultipleTimes) {
1815 std::vector<std::atomic_int> counters(kParallelize2DTile2DRangeI * kParallelize2DTile2DRangeJ);
1816
1817 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
1818 ASSERT_TRUE(threadpool.get());
1819
1820 for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
1821 pthreadpool_parallelize_2d_tile_2d(
1822 threadpool.get(),
1823 reinterpret_cast<pthreadpool_task_2d_tile_2d_t>(Increment2DTile2D),
1824 static_cast<void*>(counters.data()),
1825 kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
1826 kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
1827 0 /* flags */);
1828 }
1829
1830 for (size_t i = 0; i < kParallelize2DTile2DRangeI; i++) {
1831 for (size_t j = 0; j < kParallelize2DTile2DRangeJ; j++) {
1832 const size_t linear_idx = i * kParallelize2DTile2DRangeJ + j;
1833 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations)
1834 << "Element (" << i << ", " << j << ") was processed "
1835 << counters[linear_idx].load(std::memory_order_relaxed) << " times "
1836 << "(expected: " << kIncrementIterations << ")";
1837 }
1838 }
1839 }
1840
TEST(Parallelize2DTile2D,MultiThreadPoolEachItemProcessedMultipleTimes)1841 TEST(Parallelize2DTile2D, MultiThreadPoolEachItemProcessedMultipleTimes) {
1842 std::vector<std::atomic_int> counters(kParallelize2DTile2DRangeI * kParallelize2DTile2DRangeJ);
1843
1844 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
1845 ASSERT_TRUE(threadpool.get());
1846
1847 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
1848 GTEST_SKIP();
1849 }
1850
1851 for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
1852 pthreadpool_parallelize_2d_tile_2d(
1853 threadpool.get(),
1854 reinterpret_cast<pthreadpool_task_2d_tile_2d_t>(Increment2DTile2D),
1855 static_cast<void*>(counters.data()),
1856 kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
1857 kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
1858 0 /* flags */);
1859 }
1860
1861 for (size_t i = 0; i < kParallelize2DTile2DRangeI; i++) {
1862 for (size_t j = 0; j < kParallelize2DTile2DRangeJ; j++) {
1863 const size_t linear_idx = i * kParallelize2DTile2DRangeJ + j;
1864 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations)
1865 << "Element (" << i << ", " << j << ") was processed "
1866 << counters[linear_idx].load(std::memory_order_relaxed) << " times "
1867 << "(expected: " << kIncrementIterations << ")";
1868 }
1869 }
1870 }
1871
IncrementSame2DTile2D(std::atomic_int * num_processed_items,size_t start_i,size_t start_j,size_t tile_i,size_t tile_j)1872 static void IncrementSame2DTile2D(std::atomic_int* num_processed_items, size_t start_i, size_t start_j, size_t tile_i, size_t tile_j) {
1873 for (size_t i = start_i; i < start_i + tile_i; i++) {
1874 for (size_t j = start_j; j < start_j + tile_j; j++) {
1875 num_processed_items->fetch_add(1, std::memory_order_relaxed);
1876 }
1877 }
1878 }
1879
TEST(Parallelize2DTile2D,MultiThreadPoolHighContention)1880 TEST(Parallelize2DTile2D, MultiThreadPoolHighContention) {
1881 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
1882
1883 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
1884 ASSERT_TRUE(threadpool.get());
1885
1886 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
1887 GTEST_SKIP();
1888 }
1889
1890 pthreadpool_parallelize_2d_tile_2d(
1891 threadpool.get(),
1892 reinterpret_cast<pthreadpool_task_2d_tile_2d_t>(IncrementSame2DTile2D),
1893 static_cast<void*>(&num_processed_items),
1894 kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
1895 kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
1896 0 /* flags */);
1897 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize2DTile2DRangeI * kParallelize2DTile2DRangeJ);
1898 }
1899
WorkImbalance2DTile2D(std::atomic_int * num_processed_items,size_t start_i,size_t start_j,size_t tile_i,size_t tile_j)1900 static void WorkImbalance2DTile2D(std::atomic_int* num_processed_items, size_t start_i, size_t start_j, size_t tile_i, size_t tile_j) {
1901 num_processed_items->fetch_add(tile_i * tile_j, std::memory_order_relaxed);
1902 if (start_i == 0 && start_j == 0) {
1903 /* Spin-wait until all items are computed */
1904 while (num_processed_items->load(std::memory_order_relaxed) != kParallelize2DTile2DRangeI * kParallelize2DTile2DRangeJ) {
1905 std::atomic_thread_fence(std::memory_order_acquire);
1906 }
1907 }
1908 }
1909
TEST(Parallelize2DTile2D,MultiThreadPoolWorkStealing)1910 TEST(Parallelize2DTile2D, MultiThreadPoolWorkStealing) {
1911 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
1912
1913 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
1914 ASSERT_TRUE(threadpool.get());
1915
1916 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
1917 GTEST_SKIP();
1918 }
1919
1920 pthreadpool_parallelize_2d_tile_2d(
1921 threadpool.get(),
1922 reinterpret_cast<pthreadpool_task_2d_tile_2d_t>(WorkImbalance2DTile2D),
1923 static_cast<void*>(&num_processed_items),
1924 kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
1925 kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
1926 0 /* flags */);
1927 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize2DTile2DRangeI * kParallelize2DTile2DRangeJ);
1928 }
1929
ComputeNothing2DTile2DWithUArch(void *,uint32_t,size_t,size_t,size_t,size_t)1930 static void ComputeNothing2DTile2DWithUArch(void*, uint32_t, size_t, size_t, size_t, size_t) {
1931 }
1932
TEST(Parallelize2DTile2DWithUArch,SingleThreadPoolCompletes)1933 TEST(Parallelize2DTile2DWithUArch, SingleThreadPoolCompletes) {
1934 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
1935 ASSERT_TRUE(threadpool.get());
1936
1937 pthreadpool_parallelize_2d_tile_2d_with_uarch(threadpool.get(),
1938 ComputeNothing2DTile2DWithUArch,
1939 nullptr,
1940 kDefaultUArchIndex, kMaxUArchIndex,
1941 kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
1942 kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
1943 0 /* flags */);
1944 }
1945
TEST(Parallelize2DTile2DWithUArch,MultiThreadPoolCompletes)1946 TEST(Parallelize2DTile2DWithUArch, MultiThreadPoolCompletes) {
1947 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
1948 ASSERT_TRUE(threadpool.get());
1949
1950 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
1951 GTEST_SKIP();
1952 }
1953
1954 pthreadpool_parallelize_2d_tile_2d_with_uarch(
1955 threadpool.get(),
1956 ComputeNothing2DTile2DWithUArch,
1957 nullptr,
1958 kDefaultUArchIndex, kMaxUArchIndex,
1959 kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
1960 kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
1961 0 /* flags */);
1962 }
1963
CheckUArch2DTile2DWithUArch(void *,uint32_t uarch_index,size_t,size_t,size_t,size_t)1964 static void CheckUArch2DTile2DWithUArch(void*, uint32_t uarch_index, size_t, size_t, size_t, size_t) {
1965 if (uarch_index != kDefaultUArchIndex) {
1966 EXPECT_LE(uarch_index, kMaxUArchIndex);
1967 }
1968 }
1969
TEST(Parallelize2DTile2DWithUArch,SingleThreadPoolUArchInBounds)1970 TEST(Parallelize2DTile2DWithUArch, SingleThreadPoolUArchInBounds) {
1971 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
1972 ASSERT_TRUE(threadpool.get());
1973
1974 pthreadpool_parallelize_2d_tile_2d_with_uarch(
1975 threadpool.get(),
1976 CheckUArch2DTile2DWithUArch,
1977 nullptr,
1978 kDefaultUArchIndex, kMaxUArchIndex,
1979 kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
1980 kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
1981 0 /* flags */);
1982 }
1983
TEST(Parallelize2DTile2DWithUArch,MultiThreadPoolUArchInBounds)1984 TEST(Parallelize2DTile2DWithUArch, MultiThreadPoolUArchInBounds) {
1985 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
1986 ASSERT_TRUE(threadpool.get());
1987
1988 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
1989 GTEST_SKIP();
1990 }
1991
1992 pthreadpool_parallelize_2d_tile_2d_with_uarch(
1993 threadpool.get(),
1994 CheckUArch2DTile2DWithUArch,
1995 nullptr,
1996 kDefaultUArchIndex, kMaxUArchIndex,
1997 kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
1998 kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
1999 0 /* flags */);
2000 }
2001
CheckBounds2DTile2DWithUArch(void *,uint32_t,size_t start_i,size_t start_j,size_t tile_i,size_t tile_j)2002 static void CheckBounds2DTile2DWithUArch(void*, uint32_t, size_t start_i, size_t start_j, size_t tile_i, size_t tile_j) {
2003 EXPECT_LT(start_i, kParallelize2DTile2DRangeI);
2004 EXPECT_LT(start_j, kParallelize2DTile2DRangeJ);
2005 EXPECT_LE(start_i + tile_i, kParallelize2DTile2DRangeI);
2006 EXPECT_LE(start_j + tile_j, kParallelize2DTile2DRangeJ);
2007 }
2008
TEST(Parallelize2DTile2DWithUArch,SingleThreadPoolAllItemsInBounds)2009 TEST(Parallelize2DTile2DWithUArch, SingleThreadPoolAllItemsInBounds) {
2010 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
2011 ASSERT_TRUE(threadpool.get());
2012
2013 pthreadpool_parallelize_2d_tile_2d_with_uarch(
2014 threadpool.get(),
2015 CheckBounds2DTile2DWithUArch,
2016 nullptr,
2017 kDefaultUArchIndex, kMaxUArchIndex,
2018 kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
2019 kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
2020 0 /* flags */);
2021 }
2022
TEST(Parallelize2DTile2DWithUArch,MultiThreadPoolAllItemsInBounds)2023 TEST(Parallelize2DTile2DWithUArch, MultiThreadPoolAllItemsInBounds) {
2024 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
2025 ASSERT_TRUE(threadpool.get());
2026
2027 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
2028 GTEST_SKIP();
2029 }
2030
2031 pthreadpool_parallelize_2d_tile_2d_with_uarch(
2032 threadpool.get(),
2033 CheckBounds2DTile2DWithUArch,
2034 nullptr,
2035 kDefaultUArchIndex, kMaxUArchIndex,
2036 kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
2037 kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
2038 0 /* flags */);
2039 }
2040
CheckTiling2DTile2DWithUArch(void *,uint32_t,size_t start_i,size_t start_j,size_t tile_i,size_t tile_j)2041 static void CheckTiling2DTile2DWithUArch(void*, uint32_t, size_t start_i, size_t start_j, size_t tile_i, size_t tile_j) {
2042 EXPECT_GT(tile_i, 0);
2043 EXPECT_LE(tile_i, kParallelize2DTile2DTileI);
2044 EXPECT_EQ(start_i % kParallelize2DTile2DTileI, 0);
2045 EXPECT_EQ(tile_i, std::min<size_t>(kParallelize2DTile2DTileI, kParallelize2DTile2DRangeI - start_i));
2046
2047 EXPECT_GT(tile_j, 0);
2048 EXPECT_LE(tile_j, kParallelize2DTile2DTileJ);
2049 EXPECT_EQ(start_j % kParallelize2DTile2DTileJ, 0);
2050 EXPECT_EQ(tile_j, std::min<size_t>(kParallelize2DTile2DTileJ, kParallelize2DTile2DRangeJ - start_j));
2051 }
2052
TEST(Parallelize2DTile2DWithUArch,SingleThreadPoolUniformTiling)2053 TEST(Parallelize2DTile2DWithUArch, SingleThreadPoolUniformTiling) {
2054 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
2055 ASSERT_TRUE(threadpool.get());
2056
2057 pthreadpool_parallelize_2d_tile_2d_with_uarch(
2058 threadpool.get(),
2059 CheckTiling2DTile2DWithUArch,
2060 nullptr,
2061 kDefaultUArchIndex, kMaxUArchIndex,
2062 kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
2063 kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
2064 0 /* flags */);
2065 }
2066
TEST(Parallelize2DTile2DWithUArch,MultiThreadPoolUniformTiling)2067 TEST(Parallelize2DTile2DWithUArch, MultiThreadPoolUniformTiling) {
2068 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
2069 ASSERT_TRUE(threadpool.get());
2070
2071 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
2072 GTEST_SKIP();
2073 }
2074
2075 pthreadpool_parallelize_2d_tile_2d_with_uarch(
2076 threadpool.get(),
2077 CheckTiling2DTile2DWithUArch,
2078 nullptr,
2079 kDefaultUArchIndex, kMaxUArchIndex,
2080 kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
2081 kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
2082 0 /* flags */);
2083 }
2084
SetTrue2DTile2DWithUArch(std::atomic_bool * processed_indicators,uint32_t,size_t start_i,size_t start_j,size_t tile_i,size_t tile_j)2085 static void SetTrue2DTile2DWithUArch(std::atomic_bool* processed_indicators, uint32_t, size_t start_i, size_t start_j, size_t tile_i, size_t tile_j) {
2086 for (size_t i = start_i; i < start_i + tile_i; i++) {
2087 for (size_t j = start_j; j < start_j + tile_j; j++) {
2088 const size_t linear_idx = i * kParallelize2DTile2DRangeJ + j;
2089 processed_indicators[linear_idx].store(true, std::memory_order_relaxed);
2090 }
2091 }
2092 }
2093
TEST(Parallelize2DTile2DWithUArch,SingleThreadPoolAllItemsProcessed)2094 TEST(Parallelize2DTile2DWithUArch, SingleThreadPoolAllItemsProcessed) {
2095 std::vector<std::atomic_bool> indicators(kParallelize2DTile2DRangeI * kParallelize2DTile2DRangeJ);
2096
2097 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
2098 ASSERT_TRUE(threadpool.get());
2099
2100 pthreadpool_parallelize_2d_tile_2d_with_uarch(
2101 threadpool.get(),
2102 reinterpret_cast<pthreadpool_task_2d_tile_2d_with_id_t>(SetTrue2DTile2DWithUArch),
2103 static_cast<void*>(indicators.data()),
2104 kDefaultUArchIndex, kMaxUArchIndex,
2105 kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
2106 kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
2107 0 /* flags */);
2108
2109 for (size_t i = 0; i < kParallelize2DTile2DRangeI; i++) {
2110 for (size_t j = 0; j < kParallelize2DTile2DRangeJ; j++) {
2111 const size_t linear_idx = i * kParallelize2DTile2DRangeJ + j;
2112 EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
2113 << "Element (" << i << ", " << j << ") not processed";
2114 }
2115 }
2116 }
2117
TEST(Parallelize2DTile2DWithUArch,MultiThreadPoolAllItemsProcessed)2118 TEST(Parallelize2DTile2DWithUArch, MultiThreadPoolAllItemsProcessed) {
2119 std::vector<std::atomic_bool> indicators(kParallelize2DTile2DRangeI * kParallelize2DTile2DRangeJ);
2120
2121 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
2122 ASSERT_TRUE(threadpool.get());
2123
2124 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
2125 GTEST_SKIP();
2126 }
2127
2128 pthreadpool_parallelize_2d_tile_2d_with_uarch(
2129 threadpool.get(),
2130 reinterpret_cast<pthreadpool_task_2d_tile_2d_with_id_t>(SetTrue2DTile2DWithUArch),
2131 static_cast<void*>(indicators.data()),
2132 kDefaultUArchIndex, kMaxUArchIndex,
2133 kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
2134 kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
2135 0 /* flags */);
2136
2137 for (size_t i = 0; i < kParallelize2DTile2DRangeI; i++) {
2138 for (size_t j = 0; j < kParallelize2DTile2DRangeJ; j++) {
2139 const size_t linear_idx = i * kParallelize2DTile2DRangeJ + j;
2140 EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
2141 << "Element (" << i << ", " << j << ") not processed";
2142 }
2143 }
2144 }
2145
Increment2DTile2DWithUArch(std::atomic_int * processed_counters,uint32_t,size_t start_i,size_t start_j,size_t tile_i,size_t tile_j)2146 static void Increment2DTile2DWithUArch(std::atomic_int* processed_counters, uint32_t, size_t start_i, size_t start_j, size_t tile_i, size_t tile_j) {
2147 for (size_t i = start_i; i < start_i + tile_i; i++) {
2148 for (size_t j = start_j; j < start_j + tile_j; j++) {
2149 const size_t linear_idx = i * kParallelize2DTile2DRangeJ + j;
2150 processed_counters[linear_idx].fetch_add(1, std::memory_order_relaxed);
2151 }
2152 }
2153 }
2154
TEST(Parallelize2DTile2DWithUArch,SingleThreadPoolEachItemProcessedOnce)2155 TEST(Parallelize2DTile2DWithUArch, SingleThreadPoolEachItemProcessedOnce) {
2156 std::vector<std::atomic_int> counters(kParallelize2DTile2DRangeI * kParallelize2DTile2DRangeJ);
2157
2158 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
2159 ASSERT_TRUE(threadpool.get());
2160
2161 pthreadpool_parallelize_2d_tile_2d_with_uarch(
2162 threadpool.get(),
2163 reinterpret_cast<pthreadpool_task_2d_tile_2d_with_id_t>(Increment2DTile2DWithUArch),
2164 static_cast<void*>(counters.data()),
2165 kDefaultUArchIndex, kMaxUArchIndex,
2166 kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
2167 kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
2168 0 /* flags */);
2169
2170 for (size_t i = 0; i < kParallelize2DTile2DRangeI; i++) {
2171 for (size_t j = 0; j < kParallelize2DTile2DRangeJ; j++) {
2172 const size_t linear_idx = i * kParallelize2DTile2DRangeJ + j;
2173 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
2174 << "Element (" << i << ", " << j << ") was processed "
2175 << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
2176 }
2177 }
2178 }
2179
TEST(Parallelize2DTile2DWithUArch,MultiThreadPoolEachItemProcessedOnce)2180 TEST(Parallelize2DTile2DWithUArch, MultiThreadPoolEachItemProcessedOnce) {
2181 std::vector<std::atomic_int> counters(kParallelize2DTile2DRangeI * kParallelize2DTile2DRangeJ);
2182
2183 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
2184 ASSERT_TRUE(threadpool.get());
2185
2186 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
2187 GTEST_SKIP();
2188 }
2189
2190 pthreadpool_parallelize_2d_tile_2d_with_uarch(
2191 threadpool.get(),
2192 reinterpret_cast<pthreadpool_task_2d_tile_2d_with_id_t>(Increment2DTile2DWithUArch),
2193 static_cast<void*>(counters.data()),
2194 kDefaultUArchIndex, kMaxUArchIndex,
2195 kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
2196 kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
2197 0 /* flags */);
2198
2199 for (size_t i = 0; i < kParallelize2DTile2DRangeI; i++) {
2200 for (size_t j = 0; j < kParallelize2DTile2DRangeJ; j++) {
2201 const size_t linear_idx = i * kParallelize2DTile2DRangeJ + j;
2202 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
2203 << "Element (" << i << ", " << j << ") was processed "
2204 << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
2205 }
2206 }
2207 }
2208
TEST(Parallelize2DTile2DWithUArch,SingleThreadPoolEachItemProcessedMultipleTimes)2209 TEST(Parallelize2DTile2DWithUArch, SingleThreadPoolEachItemProcessedMultipleTimes) {
2210 std::vector<std::atomic_int> counters(kParallelize2DTile2DRangeI * kParallelize2DTile2DRangeJ);
2211
2212 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
2213 ASSERT_TRUE(threadpool.get());
2214
2215 for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
2216 pthreadpool_parallelize_2d_tile_2d_with_uarch(
2217 threadpool.get(),
2218 reinterpret_cast<pthreadpool_task_2d_tile_2d_with_id_t>(Increment2DTile2DWithUArch),
2219 static_cast<void*>(counters.data()),
2220 kDefaultUArchIndex, kMaxUArchIndex,
2221 kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
2222 kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
2223 0 /* flags */);
2224 }
2225
2226 for (size_t i = 0; i < kParallelize2DTile2DRangeI; i++) {
2227 for (size_t j = 0; j < kParallelize2DTile2DRangeJ; j++) {
2228 const size_t linear_idx = i * kParallelize2DTile2DRangeJ + j;
2229 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations)
2230 << "Element (" << i << ", " << j << ") was processed "
2231 << counters[linear_idx].load(std::memory_order_relaxed) << " times "
2232 << "(expected: " << kIncrementIterations << ")";
2233 }
2234 }
2235 }
2236
TEST(Parallelize2DTile2DWithUArch,MultiThreadPoolEachItemProcessedMultipleTimes)2237 TEST(Parallelize2DTile2DWithUArch, MultiThreadPoolEachItemProcessedMultipleTimes) {
2238 std::vector<std::atomic_int> counters(kParallelize2DTile2DRangeI * kParallelize2DTile2DRangeJ);
2239
2240 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
2241 ASSERT_TRUE(threadpool.get());
2242
2243 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
2244 GTEST_SKIP();
2245 }
2246
2247 for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
2248 pthreadpool_parallelize_2d_tile_2d_with_uarch(
2249 threadpool.get(),
2250 reinterpret_cast<pthreadpool_task_2d_tile_2d_with_id_t>(Increment2DTile2DWithUArch),
2251 static_cast<void*>(counters.data()),
2252 kDefaultUArchIndex, kMaxUArchIndex,
2253 kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
2254 kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
2255 0 /* flags */);
2256 }
2257
2258 for (size_t i = 0; i < kParallelize2DTile2DRangeI; i++) {
2259 for (size_t j = 0; j < kParallelize2DTile2DRangeJ; j++) {
2260 const size_t linear_idx = i * kParallelize2DTile2DRangeJ + j;
2261 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations)
2262 << "Element (" << i << ", " << j << ") was processed "
2263 << counters[linear_idx].load(std::memory_order_relaxed) << " times "
2264 << "(expected: " << kIncrementIterations << ")";
2265 }
2266 }
2267 }
2268
IncrementSame2DTile2DWithUArch(std::atomic_int * num_processed_items,uint32_t,size_t start_i,size_t start_j,size_t tile_i,size_t tile_j)2269 static void IncrementSame2DTile2DWithUArch(std::atomic_int* num_processed_items, uint32_t, size_t start_i, size_t start_j, size_t tile_i, size_t tile_j) {
2270 for (size_t i = start_i; i < start_i + tile_i; i++) {
2271 for (size_t j = start_j; j < start_j + tile_j; j++) {
2272 num_processed_items->fetch_add(1, std::memory_order_relaxed);
2273 }
2274 }
2275 }
2276
TEST(Parallelize2DTile2DWithUArch,MultiThreadPoolHighContention)2277 TEST(Parallelize2DTile2DWithUArch, MultiThreadPoolHighContention) {
2278 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
2279
2280 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
2281 ASSERT_TRUE(threadpool.get());
2282
2283 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
2284 GTEST_SKIP();
2285 }
2286
2287 pthreadpool_parallelize_2d_tile_2d_with_uarch(
2288 threadpool.get(),
2289 reinterpret_cast<pthreadpool_task_2d_tile_2d_with_id_t>(IncrementSame2DTile2DWithUArch),
2290 static_cast<void*>(&num_processed_items),
2291 kDefaultUArchIndex, kMaxUArchIndex,
2292 kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
2293 kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
2294 0 /* flags */);
2295 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize2DTile2DRangeI * kParallelize2DTile2DRangeJ);
2296 }
2297
WorkImbalance2DTile2DWithUArch(std::atomic_int * num_processed_items,uint32_t,size_t start_i,size_t start_j,size_t tile_i,size_t tile_j)2298 static void WorkImbalance2DTile2DWithUArch(std::atomic_int* num_processed_items, uint32_t, size_t start_i, size_t start_j, size_t tile_i, size_t tile_j) {
2299 num_processed_items->fetch_add(tile_i * tile_j, std::memory_order_relaxed);
2300 if (start_i == 0 && start_j == 0) {
2301 /* Spin-wait until all items are computed */
2302 while (num_processed_items->load(std::memory_order_relaxed) != kParallelize2DTile2DRangeI * kParallelize2DTile2DRangeJ) {
2303 std::atomic_thread_fence(std::memory_order_acquire);
2304 }
2305 }
2306 }
2307
TEST(Parallelize2DTile2DWithUArch,MultiThreadPoolWorkStealing)2308 TEST(Parallelize2DTile2DWithUArch, MultiThreadPoolWorkStealing) {
2309 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
2310
2311 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
2312 ASSERT_TRUE(threadpool.get());
2313
2314 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
2315 GTEST_SKIP();
2316 }
2317
2318 pthreadpool_parallelize_2d_tile_2d_with_uarch(
2319 threadpool.get(),
2320 reinterpret_cast<pthreadpool_task_2d_tile_2d_with_id_t>(WorkImbalance2DTile2DWithUArch),
2321 static_cast<void*>(&num_processed_items),
2322 kDefaultUArchIndex, kMaxUArchIndex,
2323 kParallelize2DTile2DRangeI, kParallelize2DTile2DRangeJ,
2324 kParallelize2DTile2DTileI, kParallelize2DTile2DTileJ,
2325 0 /* flags */);
2326 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize2DTile2DRangeI * kParallelize2DTile2DRangeJ);
2327 }
2328
ComputeNothing3D(void *,size_t,size_t,size_t)2329 static void ComputeNothing3D(void*, size_t, size_t, size_t) {
2330 }
2331
TEST(Parallelize3D,SingleThreadPoolCompletes)2332 TEST(Parallelize3D, SingleThreadPoolCompletes) {
2333 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
2334 ASSERT_TRUE(threadpool.get());
2335
2336 pthreadpool_parallelize_3d(threadpool.get(),
2337 ComputeNothing3D,
2338 nullptr,
2339 kParallelize3DRangeI, kParallelize3DRangeJ, kParallelize3DRangeK,
2340 0 /* flags */);
2341 }
2342
TEST(Parallelize3D,MultiThreadPoolCompletes)2343 TEST(Parallelize3D, MultiThreadPoolCompletes) {
2344 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
2345 ASSERT_TRUE(threadpool.get());
2346
2347 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
2348 GTEST_SKIP();
2349 }
2350
2351 pthreadpool_parallelize_3d(
2352 threadpool.get(),
2353 ComputeNothing3D,
2354 nullptr,
2355 kParallelize3DRangeI, kParallelize3DRangeJ, kParallelize3DRangeK,
2356 0 /* flags */);
2357 }
2358
CheckBounds3D(void *,size_t i,size_t j,size_t k)2359 static void CheckBounds3D(void*, size_t i, size_t j, size_t k) {
2360 EXPECT_LT(i, kParallelize3DRangeI);
2361 EXPECT_LT(j, kParallelize3DRangeJ);
2362 EXPECT_LT(k, kParallelize3DRangeK);
2363 }
2364
TEST(Parallelize3D,SingleThreadPoolAllItemsInBounds)2365 TEST(Parallelize3D, SingleThreadPoolAllItemsInBounds) {
2366 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
2367 ASSERT_TRUE(threadpool.get());
2368
2369 pthreadpool_parallelize_3d(
2370 threadpool.get(),
2371 CheckBounds3D,
2372 nullptr,
2373 kParallelize3DRangeI, kParallelize3DRangeJ, kParallelize3DRangeK,
2374 0 /* flags */);
2375 }
2376
TEST(Parallelize3D,MultiThreadPoolAllItemsInBounds)2377 TEST(Parallelize3D, MultiThreadPoolAllItemsInBounds) {
2378 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
2379 ASSERT_TRUE(threadpool.get());
2380
2381 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
2382 GTEST_SKIP();
2383 }
2384
2385 pthreadpool_parallelize_3d(
2386 threadpool.get(),
2387 CheckBounds3D,
2388 nullptr,
2389 kParallelize3DRangeI, kParallelize3DRangeJ, kParallelize3DRangeK,
2390 0 /* flags */);
2391 }
2392
SetTrue3D(std::atomic_bool * processed_indicators,size_t i,size_t j,size_t k)2393 static void SetTrue3D(std::atomic_bool* processed_indicators, size_t i, size_t j, size_t k) {
2394 const size_t linear_idx = (i * kParallelize3DRangeJ + j) * kParallelize3DRangeK + k;
2395 processed_indicators[linear_idx].store(true, std::memory_order_relaxed);
2396 }
2397
TEST(Parallelize3D,SingleThreadPoolAllItemsProcessed)2398 TEST(Parallelize3D, SingleThreadPoolAllItemsProcessed) {
2399 std::vector<std::atomic_bool> indicators(kParallelize3DRangeI * kParallelize3DRangeJ * kParallelize3DRangeK);
2400
2401 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
2402 ASSERT_TRUE(threadpool.get());
2403
2404 pthreadpool_parallelize_3d(
2405 threadpool.get(),
2406 reinterpret_cast<pthreadpool_task_3d_t>(SetTrue3D),
2407 static_cast<void*>(indicators.data()),
2408 kParallelize3DRangeI, kParallelize3DRangeJ, kParallelize3DRangeK,
2409 0 /* flags */);
2410
2411 for (size_t i = 0; i < kParallelize3DRangeI; i++) {
2412 for (size_t j = 0; j < kParallelize3DRangeJ; j++) {
2413 for (size_t k = 0; k < kParallelize3DRangeK; k++) {
2414 const size_t linear_idx = (i * kParallelize3DRangeJ + j) * kParallelize3DRangeK + k;
2415 EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
2416 << "Element (" << i << ", " << j << ", " << k << ") not processed";
2417 }
2418 }
2419 }
2420 }
2421
TEST(Parallelize3D,MultiThreadPoolAllItemsProcessed)2422 TEST(Parallelize3D, MultiThreadPoolAllItemsProcessed) {
2423 std::vector<std::atomic_bool> indicators(kParallelize3DRangeI * kParallelize3DRangeJ * kParallelize3DRangeK);
2424
2425 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
2426 ASSERT_TRUE(threadpool.get());
2427
2428 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
2429 GTEST_SKIP();
2430 }
2431
2432 pthreadpool_parallelize_3d(
2433 threadpool.get(),
2434 reinterpret_cast<pthreadpool_task_3d_t>(SetTrue3D),
2435 static_cast<void*>(indicators.data()),
2436 kParallelize3DRangeI, kParallelize3DRangeJ, kParallelize3DRangeK,
2437 0 /* flags */);
2438
2439 for (size_t i = 0; i < kParallelize3DRangeI; i++) {
2440 for (size_t j = 0; j < kParallelize3DRangeJ; j++) {
2441 for (size_t k = 0; k < kParallelize3DRangeK; k++) {
2442 const size_t linear_idx = (i * kParallelize3DRangeJ + j) * kParallelize3DRangeK + k;
2443 EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
2444 << "Element (" << i << ", " << j << ", " << k << ") not processed";
2445 }
2446 }
2447 }
2448 }
2449
Increment3D(std::atomic_int * processed_counters,size_t i,size_t j,size_t k)2450 static void Increment3D(std::atomic_int* processed_counters, size_t i, size_t j, size_t k) {
2451 const size_t linear_idx = (i * kParallelize3DRangeJ + j) * kParallelize3DRangeK + k;
2452 processed_counters[linear_idx].fetch_add(1, std::memory_order_relaxed);
2453 }
2454
TEST(Parallelize3D,SingleThreadPoolEachItemProcessedOnce)2455 TEST(Parallelize3D, SingleThreadPoolEachItemProcessedOnce) {
2456 std::vector<std::atomic_int> counters(kParallelize3DRangeI * kParallelize3DRangeJ * kParallelize3DRangeK);
2457
2458 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
2459 ASSERT_TRUE(threadpool.get());
2460
2461 pthreadpool_parallelize_3d(
2462 threadpool.get(),
2463 reinterpret_cast<pthreadpool_task_3d_t>(Increment3D),
2464 static_cast<void*>(counters.data()),
2465 kParallelize3DRangeI, kParallelize3DRangeJ, kParallelize3DRangeK,
2466 0 /* flags */);
2467
2468 for (size_t i = 0; i < kParallelize3DRangeI; i++) {
2469 for (size_t j = 0; j < kParallelize3DRangeJ; j++) {
2470 for (size_t k = 0; k < kParallelize3DRangeK; k++) {
2471 const size_t linear_idx = (i * kParallelize3DRangeJ + j) * kParallelize3DRangeK + k;
2472 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
2473 << "Element (" << i << ", " << j << ", " << k << ") was processed "
2474 << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
2475 }
2476 }
2477 }
2478 }
2479
TEST(Parallelize3D,MultiThreadPoolEachItemProcessedOnce)2480 TEST(Parallelize3D, MultiThreadPoolEachItemProcessedOnce) {
2481 std::vector<std::atomic_int> counters(kParallelize3DRangeI * kParallelize3DRangeJ * kParallelize3DRangeK);
2482
2483 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
2484 ASSERT_TRUE(threadpool.get());
2485
2486 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
2487 GTEST_SKIP();
2488 }
2489
2490 pthreadpool_parallelize_3d(
2491 threadpool.get(),
2492 reinterpret_cast<pthreadpool_task_3d_t>(Increment3D),
2493 static_cast<void*>(counters.data()),
2494 kParallelize3DRangeI, kParallelize3DRangeJ, kParallelize3DRangeK,
2495 0 /* flags */);
2496
2497 for (size_t i = 0; i < kParallelize3DRangeI; i++) {
2498 for (size_t j = 0; j < kParallelize3DRangeJ; j++) {
2499 for (size_t k = 0; k < kParallelize3DRangeK; k++) {
2500 const size_t linear_idx = (i * kParallelize3DRangeJ + j) * kParallelize3DRangeK + k;
2501 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
2502 << "Element (" << i << ", " << j << ", " << k << ") was processed "
2503 << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
2504 }
2505 }
2506 }
2507 }
2508
TEST(Parallelize3D,SingleThreadPoolEachItemProcessedMultipleTimes)2509 TEST(Parallelize3D, SingleThreadPoolEachItemProcessedMultipleTimes) {
2510 std::vector<std::atomic_int> counters(kParallelize3DRangeI * kParallelize3DRangeJ * kParallelize3DRangeK);
2511
2512 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
2513 ASSERT_TRUE(threadpool.get());
2514
2515 for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
2516 pthreadpool_parallelize_3d(
2517 threadpool.get(),
2518 reinterpret_cast<pthreadpool_task_3d_t>(Increment3D),
2519 static_cast<void*>(counters.data()),
2520 kParallelize3DRangeI, kParallelize3DRangeJ, kParallelize3DRangeK,
2521 0 /* flags */);
2522 }
2523
2524 for (size_t i = 0; i < kParallelize3DRangeI; i++) {
2525 for (size_t j = 0; j < kParallelize3DRangeJ; j++) {
2526 for (size_t k = 0; k < kParallelize3DRangeK; k++) {
2527 const size_t linear_idx = (i * kParallelize3DRangeJ + j) * kParallelize3DRangeK + k;
2528 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations)
2529 << "Element (" << i << ", " << j << ", " << k << ") was processed "
2530 << counters[linear_idx].load(std::memory_order_relaxed) << " times "
2531 << "(expected: " << kIncrementIterations << ")";
2532 }
2533 }
2534 }
2535 }
2536
TEST(Parallelize3D,MultiThreadPoolEachItemProcessedMultipleTimes)2537 TEST(Parallelize3D, MultiThreadPoolEachItemProcessedMultipleTimes) {
2538 std::vector<std::atomic_int> counters(kParallelize3DRangeI * kParallelize3DRangeJ * kParallelize3DRangeK);
2539
2540 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
2541 ASSERT_TRUE(threadpool.get());
2542
2543 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
2544 GTEST_SKIP();
2545 }
2546
2547 for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
2548 pthreadpool_parallelize_3d(
2549 threadpool.get(),
2550 reinterpret_cast<pthreadpool_task_3d_t>(Increment3D),
2551 static_cast<void*>(counters.data()),
2552 kParallelize3DRangeI, kParallelize3DRangeJ, kParallelize3DRangeK,
2553 0 /* flags */);
2554 }
2555
2556 for (size_t i = 0; i < kParallelize3DRangeI; i++) {
2557 for (size_t j = 0; j < kParallelize3DRangeJ; j++) {
2558 for (size_t k = 0; k < kParallelize3DRangeK; k++) {
2559 const size_t linear_idx = (i * kParallelize3DRangeJ + j) * kParallelize3DRangeK + k;
2560 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations)
2561 << "Element (" << i << ", " << j << ", " << k << ") was processed "
2562 << counters[linear_idx].load(std::memory_order_relaxed) << " times "
2563 << "(expected: " << kIncrementIterations << ")";
2564 }
2565 }
2566 }
2567 }
2568
IncrementSame3D(std::atomic_int * num_processed_items,size_t i,size_t j,size_t k)2569 static void IncrementSame3D(std::atomic_int* num_processed_items, size_t i, size_t j, size_t k) {
2570 num_processed_items->fetch_add(1, std::memory_order_relaxed);
2571 }
2572
TEST(Parallelize3D,MultiThreadPoolHighContention)2573 TEST(Parallelize3D, MultiThreadPoolHighContention) {
2574 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
2575
2576 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
2577 ASSERT_TRUE(threadpool.get());
2578
2579 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
2580 GTEST_SKIP();
2581 }
2582
2583 pthreadpool_parallelize_3d(
2584 threadpool.get(),
2585 reinterpret_cast<pthreadpool_task_3d_t>(IncrementSame3D),
2586 static_cast<void*>(&num_processed_items),
2587 kParallelize3DRangeI, kParallelize3DRangeJ, kParallelize3DRangeK,
2588 0 /* flags */);
2589 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize3DRangeI * kParallelize3DRangeJ * kParallelize3DRangeK);
2590 }
2591
WorkImbalance3D(std::atomic_int * num_processed_items,size_t i,size_t j,size_t k)2592 static void WorkImbalance3D(std::atomic_int* num_processed_items, size_t i, size_t j, size_t k) {
2593 num_processed_items->fetch_add(1, std::memory_order_relaxed);
2594 if (i == 0 && j == 0 && k == 0) {
2595 /* Spin-wait until all items are computed */
2596 while (num_processed_items->load(std::memory_order_relaxed) != kParallelize3DRangeI * kParallelize3DRangeJ * kParallelize3DRangeK) {
2597 std::atomic_thread_fence(std::memory_order_acquire);
2598 }
2599 }
2600 }
2601
TEST(Parallelize3D,MultiThreadPoolWorkStealing)2602 TEST(Parallelize3D, MultiThreadPoolWorkStealing) {
2603 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
2604
2605 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
2606 ASSERT_TRUE(threadpool.get());
2607
2608 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
2609 GTEST_SKIP();
2610 }
2611
2612 pthreadpool_parallelize_3d(
2613 threadpool.get(),
2614 reinterpret_cast<pthreadpool_task_3d_t>(WorkImbalance3D),
2615 static_cast<void*>(&num_processed_items),
2616 kParallelize3DRangeI, kParallelize3DRangeJ, kParallelize3DRangeK,
2617 0 /* flags */);
2618 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize3DRangeI * kParallelize3DRangeJ * kParallelize3DRangeK);
2619 }
2620
ComputeNothing3DTile1D(void *,size_t,size_t,size_t,size_t)2621 static void ComputeNothing3DTile1D(void*, size_t, size_t, size_t, size_t) {
2622 }
2623
TEST(Parallelize3DTile1D,SingleThreadPoolCompletes)2624 TEST(Parallelize3DTile1D, SingleThreadPoolCompletes) {
2625 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
2626 ASSERT_TRUE(threadpool.get());
2627
2628 pthreadpool_parallelize_3d_tile_1d(threadpool.get(),
2629 ComputeNothing3DTile1D,
2630 nullptr,
2631 kParallelize3DTile1DRangeI, kParallelize3DTile1DRangeJ, kParallelize3DTile1DRangeK,
2632 kParallelize3DTile1DTileK,
2633 0 /* flags */);
2634 }
2635
TEST(Parallelize3DTile1D,MultiThreadPoolCompletes)2636 TEST(Parallelize3DTile1D, MultiThreadPoolCompletes) {
2637 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
2638 ASSERT_TRUE(threadpool.get());
2639
2640 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
2641 GTEST_SKIP();
2642 }
2643
2644 pthreadpool_parallelize_3d_tile_1d(
2645 threadpool.get(),
2646 ComputeNothing3DTile1D,
2647 nullptr,
2648 kParallelize3DTile1DRangeI, kParallelize3DTile1DRangeJ, kParallelize3DTile1DRangeK,
2649 kParallelize3DTile1DTileK,
2650 0 /* flags */);
2651 }
2652
CheckBounds3DTile1D(void *,size_t i,size_t j,size_t start_k,size_t tile_k)2653 static void CheckBounds3DTile1D(void*, size_t i, size_t j, size_t start_k, size_t tile_k) {
2654 EXPECT_LT(i, kParallelize3DTile1DRangeI);
2655 EXPECT_LT(j, kParallelize3DTile1DRangeJ);
2656 EXPECT_LT(start_k, kParallelize3DTile1DRangeK);
2657 EXPECT_LE(start_k + tile_k, kParallelize3DTile1DRangeK);
2658 }
2659
TEST(Parallelize3DTile1D,SingleThreadPoolAllItemsInBounds)2660 TEST(Parallelize3DTile1D, SingleThreadPoolAllItemsInBounds) {
2661 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
2662 ASSERT_TRUE(threadpool.get());
2663
2664 pthreadpool_parallelize_3d_tile_1d(
2665 threadpool.get(),
2666 CheckBounds3DTile1D,
2667 nullptr,
2668 kParallelize3DTile1DRangeI, kParallelize3DTile1DRangeJ, kParallelize3DTile1DRangeK,
2669 kParallelize3DTile1DTileK,
2670 0 /* flags */);
2671 }
2672
TEST(Parallelize3DTile1D,MultiThreadPoolAllItemsInBounds)2673 TEST(Parallelize3DTile1D, MultiThreadPoolAllItemsInBounds) {
2674 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
2675 ASSERT_TRUE(threadpool.get());
2676
2677 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
2678 GTEST_SKIP();
2679 }
2680
2681 pthreadpool_parallelize_3d_tile_1d(
2682 threadpool.get(),
2683 CheckBounds3DTile1D,
2684 nullptr,
2685 kParallelize3DTile1DRangeI, kParallelize3DTile1DRangeJ, kParallelize3DTile1DRangeK,
2686 kParallelize3DTile1DTileK,
2687 0 /* flags */);
2688 }
2689
CheckTiling3DTile1D(void *,size_t i,size_t j,size_t start_k,size_t tile_k)2690 static void CheckTiling3DTile1D(void*, size_t i, size_t j, size_t start_k, size_t tile_k) {
2691 EXPECT_GT(tile_k, 0);
2692 EXPECT_LE(tile_k, kParallelize3DTile1DTileK);
2693 EXPECT_EQ(start_k % kParallelize3DTile1DTileK, 0);
2694 EXPECT_EQ(tile_k, std::min<size_t>(kParallelize3DTile1DTileK, kParallelize3DTile1DRangeK - start_k));
2695 }
2696
TEST(Parallelize3DTile1D,SingleThreadPoolUniformTiling)2697 TEST(Parallelize3DTile1D, SingleThreadPoolUniformTiling) {
2698 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
2699 ASSERT_TRUE(threadpool.get());
2700
2701 pthreadpool_parallelize_3d_tile_1d(
2702 threadpool.get(),
2703 CheckTiling3DTile1D,
2704 nullptr,
2705 kParallelize3DTile1DRangeI, kParallelize3DTile1DRangeJ, kParallelize3DTile1DRangeK,
2706 kParallelize3DTile1DTileK,
2707 0 /* flags */);
2708 }
2709
TEST(Parallelize3DTile1D,MultiThreadPoolUniformTiling)2710 TEST(Parallelize3DTile1D, MultiThreadPoolUniformTiling) {
2711 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
2712 ASSERT_TRUE(threadpool.get());
2713
2714 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
2715 GTEST_SKIP();
2716 }
2717
2718 pthreadpool_parallelize_3d_tile_1d(
2719 threadpool.get(),
2720 CheckTiling3DTile1D,
2721 nullptr,
2722 kParallelize3DTile1DRangeI, kParallelize3DTile1DRangeJ, kParallelize3DTile1DRangeK,
2723 kParallelize3DTile1DTileK,
2724 0 /* flags */);
2725 }
2726
SetTrue3DTile1D(std::atomic_bool * processed_indicators,size_t i,size_t j,size_t start_k,size_t tile_k)2727 static void SetTrue3DTile1D(std::atomic_bool* processed_indicators, size_t i, size_t j, size_t start_k, size_t tile_k) {
2728 for (size_t k = start_k; k < start_k + tile_k; k++) {
2729 const size_t linear_idx = (i * kParallelize3DTile1DRangeJ + j) * kParallelize3DTile1DRangeK + k;
2730 processed_indicators[linear_idx].store(true, std::memory_order_relaxed);
2731 }
2732 }
2733
TEST(Parallelize3DTile1D,SingleThreadPoolAllItemsProcessed)2734 TEST(Parallelize3DTile1D, SingleThreadPoolAllItemsProcessed) {
2735 std::vector<std::atomic_bool> indicators(kParallelize3DTile1DRangeI * kParallelize3DTile1DRangeJ * kParallelize3DTile1DRangeK);
2736
2737 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
2738 ASSERT_TRUE(threadpool.get());
2739
2740 pthreadpool_parallelize_3d_tile_1d(
2741 threadpool.get(),
2742 reinterpret_cast<pthreadpool_task_3d_tile_1d_t>(SetTrue3DTile1D),
2743 static_cast<void*>(indicators.data()),
2744 kParallelize3DTile1DRangeI, kParallelize3DTile1DRangeJ, kParallelize3DTile1DRangeK,
2745 kParallelize3DTile1DTileK,
2746 0 /* flags */);
2747
2748 for (size_t i = 0; i < kParallelize3DTile1DRangeI; i++) {
2749 for (size_t j = 0; j < kParallelize3DTile1DRangeJ; j++) {
2750 for (size_t k = 0; k < kParallelize3DTile1DRangeK; k++) {
2751 const size_t linear_idx = (i * kParallelize3DTile1DRangeJ + j) * kParallelize3DTile1DRangeK + k;
2752 EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
2753 << "Element (" << i << ", " << j << ", " << k << ") not processed";
2754 }
2755 }
2756 }
2757 }
2758
TEST(Parallelize3DTile1D,MultiThreadPoolAllItemsProcessed)2759 TEST(Parallelize3DTile1D, MultiThreadPoolAllItemsProcessed) {
2760 std::vector<std::atomic_bool> indicators(kParallelize3DTile1DRangeI * kParallelize3DTile1DRangeJ * kParallelize3DTile1DRangeK);
2761
2762 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
2763 ASSERT_TRUE(threadpool.get());
2764
2765 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
2766 GTEST_SKIP();
2767 }
2768
2769 pthreadpool_parallelize_3d_tile_1d(
2770 threadpool.get(),
2771 reinterpret_cast<pthreadpool_task_3d_tile_1d_t>(SetTrue3DTile1D),
2772 static_cast<void*>(indicators.data()),
2773 kParallelize3DTile1DRangeI, kParallelize3DTile1DRangeJ, kParallelize3DTile1DRangeK,
2774 kParallelize3DTile1DTileK,
2775 0 /* flags */);
2776
2777 for (size_t i = 0; i < kParallelize3DTile1DRangeI; i++) {
2778 for (size_t j = 0; j < kParallelize3DTile1DRangeJ; j++) {
2779 for (size_t k = 0; k < kParallelize3DTile1DRangeK; k++) {
2780 const size_t linear_idx = (i * kParallelize3DTile1DRangeJ + j) * kParallelize3DTile1DRangeK + k;
2781 EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
2782 << "Element (" << i << ", " << j << ", " << k << ") not processed";
2783 }
2784 }
2785 }
2786 }
2787
Increment3DTile1D(std::atomic_int * processed_counters,size_t i,size_t j,size_t start_k,size_t tile_k)2788 static void Increment3DTile1D(std::atomic_int* processed_counters, size_t i, size_t j, size_t start_k, size_t tile_k) {
2789 for (size_t k = start_k; k < start_k + tile_k; k++) {
2790 const size_t linear_idx = (i * kParallelize3DTile1DRangeJ + j) * kParallelize3DTile1DRangeK + k;
2791 processed_counters[linear_idx].fetch_add(1, std::memory_order_relaxed);
2792 }
2793 }
2794
TEST(Parallelize3DTile1D,SingleThreadPoolEachItemProcessedOnce)2795 TEST(Parallelize3DTile1D, SingleThreadPoolEachItemProcessedOnce) {
2796 std::vector<std::atomic_int> counters(kParallelize3DTile1DRangeI * kParallelize3DTile1DRangeJ * kParallelize3DTile1DRangeK);
2797
2798 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
2799 ASSERT_TRUE(threadpool.get());
2800
2801 pthreadpool_parallelize_3d_tile_1d(
2802 threadpool.get(),
2803 reinterpret_cast<pthreadpool_task_3d_tile_1d_t>(Increment3DTile1D),
2804 static_cast<void*>(counters.data()),
2805 kParallelize3DTile1DRangeI, kParallelize3DTile1DRangeJ, kParallelize3DTile1DRangeK,
2806 kParallelize3DTile1DTileK,
2807 0 /* flags */);
2808
2809 for (size_t i = 0; i < kParallelize3DTile1DRangeI; i++) {
2810 for (size_t j = 0; j < kParallelize3DTile1DRangeJ; j++) {
2811 for (size_t k = 0; k < kParallelize3DTile1DRangeK; k++) {
2812 const size_t linear_idx = (i * kParallelize3DTile1DRangeJ + j) * kParallelize3DTile1DRangeK + k;
2813 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
2814 << "Element (" << i << ", " << j << ", " << k << ") was processed "
2815 << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
2816 }
2817 }
2818 }
2819 }
2820
TEST(Parallelize3DTile1D,MultiThreadPoolEachItemProcessedOnce)2821 TEST(Parallelize3DTile1D, MultiThreadPoolEachItemProcessedOnce) {
2822 std::vector<std::atomic_int> counters(kParallelize3DTile1DRangeI * kParallelize3DTile1DRangeJ * kParallelize3DTile1DRangeK);
2823
2824 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
2825 ASSERT_TRUE(threadpool.get());
2826
2827 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
2828 GTEST_SKIP();
2829 }
2830
2831 pthreadpool_parallelize_3d_tile_1d(
2832 threadpool.get(),
2833 reinterpret_cast<pthreadpool_task_3d_tile_1d_t>(Increment3DTile1D),
2834 static_cast<void*>(counters.data()),
2835 kParallelize3DTile1DRangeI, kParallelize3DTile1DRangeJ, kParallelize3DTile1DRangeK,
2836 kParallelize3DTile1DTileK,
2837 0 /* flags */);
2838
2839 for (size_t i = 0; i < kParallelize3DTile1DRangeI; i++) {
2840 for (size_t j = 0; j < kParallelize3DTile1DRangeJ; j++) {
2841 for (size_t k = 0; k < kParallelize3DTile1DRangeK; k++) {
2842 const size_t linear_idx = (i * kParallelize3DTile1DRangeJ + j) * kParallelize3DTile1DRangeK + k;
2843 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
2844 << "Element (" << i << ", " << j << ", " << k << ") was processed "
2845 << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
2846 }
2847 }
2848 }
2849 }
2850
TEST(Parallelize3DTile1D,SingleThreadPoolEachItemProcessedMultipleTimes)2851 TEST(Parallelize3DTile1D, SingleThreadPoolEachItemProcessedMultipleTimes) {
2852 std::vector<std::atomic_int> counters(kParallelize3DTile1DRangeI * kParallelize3DTile1DRangeJ * kParallelize3DTile1DRangeK);
2853
2854 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
2855 ASSERT_TRUE(threadpool.get());
2856
2857 for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
2858 pthreadpool_parallelize_3d_tile_1d(
2859 threadpool.get(),
2860 reinterpret_cast<pthreadpool_task_3d_tile_1d_t>(Increment3DTile1D),
2861 static_cast<void*>(counters.data()),
2862 kParallelize3DTile1DRangeI, kParallelize3DTile1DRangeJ, kParallelize3DTile1DRangeK,
2863 kParallelize3DTile1DTileK,
2864 0 /* flags */);
2865 }
2866
2867 for (size_t i = 0; i < kParallelize3DTile1DRangeI; i++) {
2868 for (size_t j = 0; j < kParallelize3DTile1DRangeJ; j++) {
2869 for (size_t k = 0; k < kParallelize3DTile1DRangeK; k++) {
2870 const size_t linear_idx = (i * kParallelize3DTile1DRangeJ + j) * kParallelize3DTile1DRangeK + k;
2871 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations)
2872 << "Element (" << i << ", " << j << ", " << k << ") was processed "
2873 << counters[linear_idx].load(std::memory_order_relaxed) << " times "
2874 << "(expected: " << kIncrementIterations << ")";
2875 }
2876 }
2877 }
2878 }
2879
TEST(Parallelize3DTile1D,MultiThreadPoolEachItemProcessedMultipleTimes)2880 TEST(Parallelize3DTile1D, MultiThreadPoolEachItemProcessedMultipleTimes) {
2881 std::vector<std::atomic_int> counters(kParallelize3DTile1DRangeI * kParallelize3DTile1DRangeJ * kParallelize3DTile1DRangeK);
2882
2883 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
2884 ASSERT_TRUE(threadpool.get());
2885
2886 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
2887 GTEST_SKIP();
2888 }
2889
2890 for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
2891 pthreadpool_parallelize_3d_tile_1d(
2892 threadpool.get(),
2893 reinterpret_cast<pthreadpool_task_3d_tile_1d_t>(Increment3DTile1D),
2894 static_cast<void*>(counters.data()),
2895 kParallelize3DTile1DRangeI, kParallelize3DTile1DRangeJ, kParallelize3DTile1DRangeK,
2896 kParallelize3DTile1DTileK,
2897 0 /* flags */);
2898 }
2899
2900 for (size_t i = 0; i < kParallelize3DTile1DRangeI; i++) {
2901 for (size_t j = 0; j < kParallelize3DTile1DRangeJ; j++) {
2902 for (size_t k = 0; k < kParallelize3DTile1DRangeK; k++) {
2903 const size_t linear_idx = (i * kParallelize3DTile1DRangeJ + j) * kParallelize3DTile1DRangeK + k;
2904 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations)
2905 << "Element (" << i << ", " << j << ", " << k << ") was processed "
2906 << counters[linear_idx].load(std::memory_order_relaxed) << " times "
2907 << "(expected: " << kIncrementIterations << ")";
2908 }
2909 }
2910 }
2911 }
2912
IncrementSame3DTile1D(std::atomic_int * num_processed_items,size_t i,size_t j,size_t start_k,size_t tile_k)2913 static void IncrementSame3DTile1D(std::atomic_int* num_processed_items, size_t i, size_t j, size_t start_k, size_t tile_k) {
2914 for (size_t k = start_k; k < start_k + tile_k; k++) {
2915 num_processed_items->fetch_add(1, std::memory_order_relaxed);
2916 }
2917 }
2918
TEST(Parallelize3DTile1D,MultiThreadPoolHighContention)2919 TEST(Parallelize3DTile1D, MultiThreadPoolHighContention) {
2920 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
2921
2922 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
2923 ASSERT_TRUE(threadpool.get());
2924
2925 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
2926 GTEST_SKIP();
2927 }
2928
2929 pthreadpool_parallelize_3d_tile_1d(
2930 threadpool.get(),
2931 reinterpret_cast<pthreadpool_task_3d_tile_1d_t>(IncrementSame3DTile1D),
2932 static_cast<void*>(&num_processed_items),
2933 kParallelize3DTile1DRangeI, kParallelize3DTile1DRangeJ, kParallelize3DTile1DRangeK,
2934 kParallelize3DTile1DTileK,
2935 0 /* flags */);
2936 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize3DTile1DRangeI * kParallelize3DTile1DRangeJ * kParallelize3DTile1DRangeK);
2937 }
2938
WorkImbalance3DTile1D(std::atomic_int * num_processed_items,size_t i,size_t j,size_t start_k,size_t tile_k)2939 static void WorkImbalance3DTile1D(std::atomic_int* num_processed_items, size_t i, size_t j, size_t start_k, size_t tile_k) {
2940 num_processed_items->fetch_add(tile_k, std::memory_order_relaxed);
2941 if (i == 0 && j == 0 && start_k == 0) {
2942 /* Spin-wait until all items are computed */
2943 while (num_processed_items->load(std::memory_order_relaxed) != kParallelize3DTile1DRangeI * kParallelize3DTile1DRangeJ * kParallelize3DTile1DRangeK) {
2944 std::atomic_thread_fence(std::memory_order_acquire);
2945 }
2946 }
2947 }
2948
TEST(Parallelize3DTile1D,MultiThreadPoolWorkStealing)2949 TEST(Parallelize3DTile1D, MultiThreadPoolWorkStealing) {
2950 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
2951
2952 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
2953 ASSERT_TRUE(threadpool.get());
2954
2955 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
2956 GTEST_SKIP();
2957 }
2958
2959 pthreadpool_parallelize_3d_tile_1d(
2960 threadpool.get(),
2961 reinterpret_cast<pthreadpool_task_3d_tile_1d_t>(WorkImbalance3DTile1D),
2962 static_cast<void*>(&num_processed_items),
2963 kParallelize3DTile1DRangeI, kParallelize3DTile1DRangeJ, kParallelize3DTile1DRangeK,
2964 kParallelize3DTile1DTileK,
2965 0 /* flags */);
2966 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize3DTile1DRangeI * kParallelize3DTile1DRangeJ * kParallelize3DTile1DRangeK);
2967 }
2968
ComputeNothing3DTile2D(void *,size_t,size_t,size_t,size_t,size_t)2969 static void ComputeNothing3DTile2D(void*, size_t, size_t, size_t, size_t, size_t) {
2970 }
2971
TEST(Parallelize3DTile2D,SingleThreadPoolCompletes)2972 TEST(Parallelize3DTile2D, SingleThreadPoolCompletes) {
2973 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
2974 ASSERT_TRUE(threadpool.get());
2975
2976 pthreadpool_parallelize_3d_tile_2d(threadpool.get(),
2977 ComputeNothing3DTile2D,
2978 nullptr,
2979 kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
2980 kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
2981 0 /* flags */);
2982 }
2983
TEST(Parallelize3DTile2D,MultiThreadPoolCompletes)2984 TEST(Parallelize3DTile2D, MultiThreadPoolCompletes) {
2985 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
2986 ASSERT_TRUE(threadpool.get());
2987
2988 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
2989 GTEST_SKIP();
2990 }
2991
2992 pthreadpool_parallelize_3d_tile_2d(
2993 threadpool.get(),
2994 ComputeNothing3DTile2D,
2995 nullptr,
2996 kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
2997 kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
2998 0 /* flags */);
2999 }
3000
CheckBounds3DTile2D(void *,size_t i,size_t start_j,size_t start_k,size_t tile_j,size_t tile_k)3001 static void CheckBounds3DTile2D(void*, size_t i, size_t start_j, size_t start_k, size_t tile_j, size_t tile_k) {
3002 EXPECT_LT(i, kParallelize3DTile2DRangeI);
3003 EXPECT_LT(start_j, kParallelize3DTile2DRangeJ);
3004 EXPECT_LT(start_k, kParallelize3DTile2DRangeK);
3005 EXPECT_LE(start_j + tile_j, kParallelize3DTile2DRangeJ);
3006 EXPECT_LE(start_k + tile_k, kParallelize3DTile2DRangeK);
3007 }
3008
TEST(Parallelize3DTile2D,SingleThreadPoolAllItemsInBounds)3009 TEST(Parallelize3DTile2D, SingleThreadPoolAllItemsInBounds) {
3010 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
3011 ASSERT_TRUE(threadpool.get());
3012
3013 pthreadpool_parallelize_3d_tile_2d(
3014 threadpool.get(),
3015 CheckBounds3DTile2D,
3016 nullptr,
3017 kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
3018 kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
3019 0 /* flags */);
3020 }
3021
TEST(Parallelize3DTile2D,MultiThreadPoolAllItemsInBounds)3022 TEST(Parallelize3DTile2D, MultiThreadPoolAllItemsInBounds) {
3023 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
3024 ASSERT_TRUE(threadpool.get());
3025
3026 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
3027 GTEST_SKIP();
3028 }
3029
3030 pthreadpool_parallelize_3d_tile_2d(
3031 threadpool.get(),
3032 CheckBounds3DTile2D,
3033 nullptr,
3034 kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
3035 kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
3036 0 /* flags */);
3037 }
3038
CheckTiling3DTile2D(void *,size_t i,size_t start_j,size_t start_k,size_t tile_j,size_t tile_k)3039 static void CheckTiling3DTile2D(void*, size_t i, size_t start_j, size_t start_k, size_t tile_j, size_t tile_k) {
3040 EXPECT_GT(tile_j, 0);
3041 EXPECT_LE(tile_j, kParallelize3DTile2DTileJ);
3042 EXPECT_EQ(start_j % kParallelize3DTile2DTileJ, 0);
3043 EXPECT_EQ(tile_j, std::min<size_t>(kParallelize3DTile2DTileJ, kParallelize3DTile2DRangeJ - start_j));
3044
3045 EXPECT_GT(tile_k, 0);
3046 EXPECT_LE(tile_k, kParallelize3DTile2DTileK);
3047 EXPECT_EQ(start_k % kParallelize3DTile2DTileK, 0);
3048 EXPECT_EQ(tile_k, std::min<size_t>(kParallelize3DTile2DTileK, kParallelize3DTile2DRangeK - start_k));
3049 }
3050
TEST(Parallelize3DTile2D,SingleThreadPoolUniformTiling)3051 TEST(Parallelize3DTile2D, SingleThreadPoolUniformTiling) {
3052 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
3053 ASSERT_TRUE(threadpool.get());
3054
3055 pthreadpool_parallelize_3d_tile_2d(
3056 threadpool.get(),
3057 CheckTiling3DTile2D,
3058 nullptr,
3059 kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
3060 kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
3061 0 /* flags */);
3062 }
3063
TEST(Parallelize3DTile2D,MultiThreadPoolUniformTiling)3064 TEST(Parallelize3DTile2D, MultiThreadPoolUniformTiling) {
3065 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
3066 ASSERT_TRUE(threadpool.get());
3067
3068 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
3069 GTEST_SKIP();
3070 }
3071
3072 pthreadpool_parallelize_3d_tile_2d(
3073 threadpool.get(),
3074 CheckTiling3DTile2D,
3075 nullptr,
3076 kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
3077 kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
3078 0 /* flags */);
3079 }
3080
SetTrue3DTile2D(std::atomic_bool * processed_indicators,size_t i,size_t start_j,size_t start_k,size_t tile_j,size_t tile_k)3081 static void SetTrue3DTile2D(std::atomic_bool* processed_indicators, size_t i, size_t start_j, size_t start_k, size_t tile_j, size_t tile_k) {
3082 for (size_t j = start_j; j < start_j + tile_j; j++) {
3083 for (size_t k = start_k; k < start_k + tile_k; k++) {
3084 const size_t linear_idx = (i * kParallelize3DTile2DRangeJ + j) * kParallelize3DTile2DRangeK + k;
3085 processed_indicators[linear_idx].store(true, std::memory_order_relaxed);
3086 }
3087 }
3088 }
3089
TEST(Parallelize3DTile2D,SingleThreadPoolAllItemsProcessed)3090 TEST(Parallelize3DTile2D, SingleThreadPoolAllItemsProcessed) {
3091 std::vector<std::atomic_bool> indicators(kParallelize3DTile2DRangeI * kParallelize3DTile2DRangeJ * kParallelize3DTile2DRangeK);
3092
3093 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
3094 ASSERT_TRUE(threadpool.get());
3095
3096 pthreadpool_parallelize_3d_tile_2d(
3097 threadpool.get(),
3098 reinterpret_cast<pthreadpool_task_3d_tile_2d_t>(SetTrue3DTile2D),
3099 static_cast<void*>(indicators.data()),
3100 kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
3101 kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
3102 0 /* flags */);
3103
3104 for (size_t i = 0; i < kParallelize3DTile2DRangeI; i++) {
3105 for (size_t j = 0; j < kParallelize3DTile2DRangeJ; j++) {
3106 for (size_t k = 0; k < kParallelize3DTile2DRangeK; k++) {
3107 const size_t linear_idx = (i * kParallelize3DTile2DRangeJ + j) * kParallelize3DTile2DRangeK + k;
3108 EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
3109 << "Element (" << i << ", " << j << ", " << k << ") not processed";
3110 }
3111 }
3112 }
3113 }
3114
TEST(Parallelize3DTile2D,MultiThreadPoolAllItemsProcessed)3115 TEST(Parallelize3DTile2D, MultiThreadPoolAllItemsProcessed) {
3116 std::vector<std::atomic_bool> indicators(kParallelize3DTile2DRangeI * kParallelize3DTile2DRangeJ * kParallelize3DTile2DRangeK);
3117
3118 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
3119 ASSERT_TRUE(threadpool.get());
3120
3121 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
3122 GTEST_SKIP();
3123 }
3124
3125 pthreadpool_parallelize_3d_tile_2d(
3126 threadpool.get(),
3127 reinterpret_cast<pthreadpool_task_3d_tile_2d_t>(SetTrue3DTile2D),
3128 static_cast<void*>(indicators.data()),
3129 kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
3130 kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
3131 0 /* flags */);
3132
3133 for (size_t i = 0; i < kParallelize3DTile2DRangeI; i++) {
3134 for (size_t j = 0; j < kParallelize3DTile2DRangeJ; j++) {
3135 for (size_t k = 0; k < kParallelize3DTile2DRangeK; k++) {
3136 const size_t linear_idx = (i * kParallelize3DTile2DRangeJ + j) * kParallelize3DTile2DRangeK + k;
3137 EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
3138 << "Element (" << i << ", " << j << ", " << k << ") not processed";
3139 }
3140 }
3141 }
3142 }
3143
Increment3DTile2D(std::atomic_int * processed_counters,size_t i,size_t start_j,size_t start_k,size_t tile_j,size_t tile_k)3144 static void Increment3DTile2D(std::atomic_int* processed_counters, size_t i, size_t start_j, size_t start_k, size_t tile_j, size_t tile_k) {
3145 for (size_t j = start_j; j < start_j + tile_j; j++) {
3146 for (size_t k = start_k; k < start_k + tile_k; k++) {
3147 const size_t linear_idx = (i * kParallelize3DTile2DRangeJ + j) * kParallelize3DTile2DRangeK + k;
3148 processed_counters[linear_idx].fetch_add(1, std::memory_order_relaxed);
3149 }
3150 }
3151 }
3152
TEST(Parallelize3DTile2D,SingleThreadPoolEachItemProcessedOnce)3153 TEST(Parallelize3DTile2D, SingleThreadPoolEachItemProcessedOnce) {
3154 std::vector<std::atomic_int> counters(kParallelize3DTile2DRangeI * kParallelize3DTile2DRangeJ * kParallelize3DTile2DRangeK);
3155
3156 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
3157 ASSERT_TRUE(threadpool.get());
3158
3159 pthreadpool_parallelize_3d_tile_2d(
3160 threadpool.get(),
3161 reinterpret_cast<pthreadpool_task_3d_tile_2d_t>(Increment3DTile2D),
3162 static_cast<void*>(counters.data()),
3163 kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
3164 kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
3165 0 /* flags */);
3166
3167 for (size_t i = 0; i < kParallelize3DTile2DRangeI; i++) {
3168 for (size_t j = 0; j < kParallelize3DTile2DRangeJ; j++) {
3169 for (size_t k = 0; k < kParallelize3DTile2DRangeK; k++) {
3170 const size_t linear_idx = (i * kParallelize3DTile2DRangeJ + j) * kParallelize3DTile2DRangeK + k;
3171 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
3172 << "Element (" << i << ", " << j << ", " << k << ") was processed "
3173 << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
3174 }
3175 }
3176 }
3177 }
3178
TEST(Parallelize3DTile2D,MultiThreadPoolEachItemProcessedOnce)3179 TEST(Parallelize3DTile2D, MultiThreadPoolEachItemProcessedOnce) {
3180 std::vector<std::atomic_int> counters(kParallelize3DTile2DRangeI * kParallelize3DTile2DRangeJ * kParallelize3DTile2DRangeK);
3181
3182 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
3183 ASSERT_TRUE(threadpool.get());
3184
3185 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
3186 GTEST_SKIP();
3187 }
3188
3189 pthreadpool_parallelize_3d_tile_2d(
3190 threadpool.get(),
3191 reinterpret_cast<pthreadpool_task_3d_tile_2d_t>(Increment3DTile2D),
3192 static_cast<void*>(counters.data()),
3193 kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
3194 kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
3195 0 /* flags */);
3196
3197 for (size_t i = 0; i < kParallelize3DTile2DRangeI; i++) {
3198 for (size_t j = 0; j < kParallelize3DTile2DRangeJ; j++) {
3199 for (size_t k = 0; k < kParallelize3DTile2DRangeK; k++) {
3200 const size_t linear_idx = (i * kParallelize3DTile2DRangeJ + j) * kParallelize3DTile2DRangeK + k;
3201 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
3202 << "Element (" << i << ", " << j << ", " << k << ") was processed "
3203 << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
3204 }
3205 }
3206 }
3207 }
3208
TEST(Parallelize3DTile2D,SingleThreadPoolEachItemProcessedMultipleTimes)3209 TEST(Parallelize3DTile2D, SingleThreadPoolEachItemProcessedMultipleTimes) {
3210 std::vector<std::atomic_int> counters(kParallelize3DTile2DRangeI * kParallelize3DTile2DRangeJ * kParallelize3DTile2DRangeK);
3211
3212 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
3213 ASSERT_TRUE(threadpool.get());
3214
3215 for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
3216 pthreadpool_parallelize_3d_tile_2d(
3217 threadpool.get(),
3218 reinterpret_cast<pthreadpool_task_3d_tile_2d_t>(Increment3DTile2D),
3219 static_cast<void*>(counters.data()),
3220 kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
3221 kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
3222 0 /* flags */);
3223 }
3224
3225 for (size_t i = 0; i < kParallelize3DTile2DRangeI; i++) {
3226 for (size_t j = 0; j < kParallelize3DTile2DRangeJ; j++) {
3227 for (size_t k = 0; k < kParallelize3DTile2DRangeK; k++) {
3228 const size_t linear_idx = (i * kParallelize3DTile2DRangeJ + j) * kParallelize3DTile2DRangeK + k;
3229 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations)
3230 << "Element (" << i << ", " << j << ", " << k << ") was processed "
3231 << counters[linear_idx].load(std::memory_order_relaxed) << " times "
3232 << "(expected: " << kIncrementIterations << ")";
3233 }
3234 }
3235 }
3236 }
3237
TEST(Parallelize3DTile2D,MultiThreadPoolEachItemProcessedMultipleTimes)3238 TEST(Parallelize3DTile2D, MultiThreadPoolEachItemProcessedMultipleTimes) {
3239 std::vector<std::atomic_int> counters(kParallelize3DTile2DRangeI * kParallelize3DTile2DRangeJ * kParallelize3DTile2DRangeK);
3240
3241 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
3242 ASSERT_TRUE(threadpool.get());
3243
3244 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
3245 GTEST_SKIP();
3246 }
3247
3248 for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
3249 pthreadpool_parallelize_3d_tile_2d(
3250 threadpool.get(),
3251 reinterpret_cast<pthreadpool_task_3d_tile_2d_t>(Increment3DTile2D),
3252 static_cast<void*>(counters.data()),
3253 kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
3254 kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
3255 0 /* flags */);
3256 }
3257
3258 for (size_t i = 0; i < kParallelize3DTile2DRangeI; i++) {
3259 for (size_t j = 0; j < kParallelize3DTile2DRangeJ; j++) {
3260 for (size_t k = 0; k < kParallelize3DTile2DRangeK; k++) {
3261 const size_t linear_idx = (i * kParallelize3DTile2DRangeJ + j) * kParallelize3DTile2DRangeK + k;
3262 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations)
3263 << "Element (" << i << ", " << j << ", " << k << ") was processed "
3264 << counters[linear_idx].load(std::memory_order_relaxed) << " times "
3265 << "(expected: " << kIncrementIterations << ")";
3266 }
3267 }
3268 }
3269 }
3270
IncrementSame3DTile2D(std::atomic_int * num_processed_items,size_t i,size_t start_j,size_t start_k,size_t tile_j,size_t tile_k)3271 static void IncrementSame3DTile2D(std::atomic_int* num_processed_items, size_t i, size_t start_j, size_t start_k, size_t tile_j, size_t tile_k) {
3272 for (size_t j = start_j; j < start_j + tile_j; j++) {
3273 for (size_t k = start_k; k < start_k + tile_k; k++) {
3274 num_processed_items->fetch_add(1, std::memory_order_relaxed);
3275 }
3276 }
3277 }
3278
TEST(Parallelize3DTile2D,MultiThreadPoolHighContention)3279 TEST(Parallelize3DTile2D, MultiThreadPoolHighContention) {
3280 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
3281
3282 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
3283 ASSERT_TRUE(threadpool.get());
3284
3285 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
3286 GTEST_SKIP();
3287 }
3288
3289 pthreadpool_parallelize_3d_tile_2d(
3290 threadpool.get(),
3291 reinterpret_cast<pthreadpool_task_3d_tile_2d_t>(IncrementSame3DTile2D),
3292 static_cast<void*>(&num_processed_items),
3293 kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
3294 kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
3295 0 /* flags */);
3296 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize3DTile2DRangeI * kParallelize3DTile2DRangeJ * kParallelize3DTile2DRangeK);
3297 }
3298
WorkImbalance3DTile2D(std::atomic_int * num_processed_items,size_t i,size_t start_j,size_t start_k,size_t tile_j,size_t tile_k)3299 static void WorkImbalance3DTile2D(std::atomic_int* num_processed_items, size_t i, size_t start_j, size_t start_k, size_t tile_j, size_t tile_k) {
3300 num_processed_items->fetch_add(tile_j * tile_k, std::memory_order_relaxed);
3301 if (i == 0 && start_j == 0 && start_k == 0) {
3302 /* Spin-wait until all items are computed */
3303 while (num_processed_items->load(std::memory_order_relaxed) != kParallelize3DTile2DRangeI * kParallelize3DTile2DRangeJ * kParallelize3DTile2DRangeK) {
3304 std::atomic_thread_fence(std::memory_order_acquire);
3305 }
3306 }
3307 }
3308
TEST(Parallelize3DTile2D,MultiThreadPoolWorkStealing)3309 TEST(Parallelize3DTile2D, MultiThreadPoolWorkStealing) {
3310 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
3311
3312 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
3313 ASSERT_TRUE(threadpool.get());
3314
3315 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
3316 GTEST_SKIP();
3317 }
3318
3319 pthreadpool_parallelize_3d_tile_2d(
3320 threadpool.get(),
3321 reinterpret_cast<pthreadpool_task_3d_tile_2d_t>(WorkImbalance3DTile2D),
3322 static_cast<void*>(&num_processed_items),
3323 kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
3324 kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
3325 0 /* flags */);
3326 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize3DTile2DRangeI * kParallelize3DTile2DRangeJ * kParallelize3DTile2DRangeK);
3327 }
3328
ComputeNothing3DTile2DWithUArch(void *,uint32_t,size_t,size_t,size_t,size_t,size_t)3329 static void ComputeNothing3DTile2DWithUArch(void*, uint32_t, size_t, size_t, size_t, size_t, size_t) {
3330 }
3331
TEST(Parallelize3DTile2DWithUArch,SingleThreadPoolCompletes)3332 TEST(Parallelize3DTile2DWithUArch, SingleThreadPoolCompletes) {
3333 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
3334 ASSERT_TRUE(threadpool.get());
3335
3336 pthreadpool_parallelize_3d_tile_2d_with_uarch(threadpool.get(),
3337 ComputeNothing3DTile2DWithUArch,
3338 nullptr,
3339 kDefaultUArchIndex, kMaxUArchIndex,
3340 kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
3341 kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
3342 0 /* flags */);
3343 }
3344
TEST(Parallelize3DTile2DWithUArch,MultiThreadPoolCompletes)3345 TEST(Parallelize3DTile2DWithUArch, MultiThreadPoolCompletes) {
3346 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
3347 ASSERT_TRUE(threadpool.get());
3348
3349 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
3350 GTEST_SKIP();
3351 }
3352
3353 pthreadpool_parallelize_3d_tile_2d_with_uarch(
3354 threadpool.get(),
3355 ComputeNothing3DTile2DWithUArch,
3356 nullptr,
3357 kDefaultUArchIndex, kMaxUArchIndex,
3358 kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
3359 kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
3360 0 /* flags */);
3361 }
3362
CheckUArch3DTile2DWithUArch(void *,uint32_t uarch_index,size_t,size_t,size_t,size_t,size_t)3363 static void CheckUArch3DTile2DWithUArch(void*, uint32_t uarch_index, size_t, size_t, size_t, size_t, size_t) {
3364 if (uarch_index != kDefaultUArchIndex) {
3365 EXPECT_LE(uarch_index, kMaxUArchIndex);
3366 }
3367 }
3368
TEST(Parallelize3DTile2DWithUArch,SingleThreadPoolUArchInBounds)3369 TEST(Parallelize3DTile2DWithUArch, SingleThreadPoolUArchInBounds) {
3370 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
3371 ASSERT_TRUE(threadpool.get());
3372
3373 pthreadpool_parallelize_3d_tile_2d_with_uarch(
3374 threadpool.get(),
3375 CheckUArch3DTile2DWithUArch,
3376 nullptr,
3377 kDefaultUArchIndex, kMaxUArchIndex,
3378 kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
3379 kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
3380 0 /* flags */);
3381 }
3382
TEST(Parallelize3DTile2DWithUArch,MultiThreadPoolUArchInBounds)3383 TEST(Parallelize3DTile2DWithUArch, MultiThreadPoolUArchInBounds) {
3384 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
3385 ASSERT_TRUE(threadpool.get());
3386
3387 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
3388 GTEST_SKIP();
3389 }
3390
3391 pthreadpool_parallelize_3d_tile_2d_with_uarch(
3392 threadpool.get(),
3393 CheckUArch3DTile2DWithUArch,
3394 nullptr,
3395 kDefaultUArchIndex, kMaxUArchIndex,
3396 kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
3397 kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
3398 0 /* flags */);
3399 }
3400
CheckBounds3DTile2DWithUArch(void *,uint32_t,size_t i,size_t start_j,size_t start_k,size_t tile_j,size_t tile_k)3401 static void CheckBounds3DTile2DWithUArch(void*, uint32_t, size_t i, size_t start_j, size_t start_k, size_t tile_j, size_t tile_k) {
3402 EXPECT_LT(i, kParallelize3DTile2DRangeI);
3403 EXPECT_LT(start_j, kParallelize3DTile2DRangeJ);
3404 EXPECT_LT(start_k, kParallelize3DTile2DRangeK);
3405 EXPECT_LE(start_j + tile_j, kParallelize3DTile2DRangeJ);
3406 EXPECT_LE(start_k + tile_k, kParallelize3DTile2DRangeK);
3407 }
3408
TEST(Parallelize3DTile2DWithUArch,SingleThreadPoolAllItemsInBounds)3409 TEST(Parallelize3DTile2DWithUArch, SingleThreadPoolAllItemsInBounds) {
3410 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
3411 ASSERT_TRUE(threadpool.get());
3412
3413 pthreadpool_parallelize_3d_tile_2d_with_uarch(
3414 threadpool.get(),
3415 CheckBounds3DTile2DWithUArch,
3416 nullptr,
3417 kDefaultUArchIndex, kMaxUArchIndex,
3418 kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
3419 kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
3420 0 /* flags */);
3421 }
3422
TEST(Parallelize3DTile2DWithUArch,MultiThreadPoolAllItemsInBounds)3423 TEST(Parallelize3DTile2DWithUArch, MultiThreadPoolAllItemsInBounds) {
3424 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
3425 ASSERT_TRUE(threadpool.get());
3426
3427 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
3428 GTEST_SKIP();
3429 }
3430
3431 pthreadpool_parallelize_3d_tile_2d_with_uarch(
3432 threadpool.get(),
3433 CheckBounds3DTile2DWithUArch,
3434 nullptr,
3435 kDefaultUArchIndex, kMaxUArchIndex,
3436 kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
3437 kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
3438 0 /* flags */);
3439 }
3440
CheckTiling3DTile2DWithUArch(void *,uint32_t,size_t i,size_t start_j,size_t start_k,size_t tile_j,size_t tile_k)3441 static void CheckTiling3DTile2DWithUArch(void*, uint32_t, size_t i, size_t start_j, size_t start_k, size_t tile_j, size_t tile_k) {
3442 EXPECT_GT(tile_j, 0);
3443 EXPECT_LE(tile_j, kParallelize3DTile2DTileJ);
3444 EXPECT_EQ(start_j % kParallelize3DTile2DTileJ, 0);
3445 EXPECT_EQ(tile_j, std::min<size_t>(kParallelize3DTile2DTileJ, kParallelize3DTile2DRangeJ - start_j));
3446
3447 EXPECT_GT(tile_k, 0);
3448 EXPECT_LE(tile_k, kParallelize3DTile2DTileK);
3449 EXPECT_EQ(start_k % kParallelize3DTile2DTileK, 0);
3450 EXPECT_EQ(tile_k, std::min<size_t>(kParallelize3DTile2DTileK, kParallelize3DTile2DRangeK - start_k));
3451 }
3452
TEST(Parallelize3DTile2DWithUArch,SingleThreadPoolUniformTiling)3453 TEST(Parallelize3DTile2DWithUArch, SingleThreadPoolUniformTiling) {
3454 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
3455 ASSERT_TRUE(threadpool.get());
3456
3457 pthreadpool_parallelize_3d_tile_2d_with_uarch(
3458 threadpool.get(),
3459 CheckTiling3DTile2DWithUArch,
3460 nullptr,
3461 kDefaultUArchIndex, kMaxUArchIndex,
3462 kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
3463 kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
3464 0 /* flags */);
3465 }
3466
TEST(Parallelize3DTile2DWithUArch,MultiThreadPoolUniformTiling)3467 TEST(Parallelize3DTile2DWithUArch, MultiThreadPoolUniformTiling) {
3468 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
3469 ASSERT_TRUE(threadpool.get());
3470
3471 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
3472 GTEST_SKIP();
3473 }
3474
3475 pthreadpool_parallelize_3d_tile_2d_with_uarch(
3476 threadpool.get(),
3477 CheckTiling3DTile2DWithUArch,
3478 nullptr,
3479 kDefaultUArchIndex, kMaxUArchIndex,
3480 kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
3481 kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
3482 0 /* flags */);
3483 }
3484
SetTrue3DTile2DWithUArch(std::atomic_bool * processed_indicators,uint32_t,size_t i,size_t start_j,size_t start_k,size_t tile_j,size_t tile_k)3485 static void SetTrue3DTile2DWithUArch(std::atomic_bool* processed_indicators, uint32_t, size_t i, size_t start_j, size_t start_k, size_t tile_j, size_t tile_k) {
3486 for (size_t j = start_j; j < start_j + tile_j; j++) {
3487 for (size_t k = start_k; k < start_k + tile_k; k++) {
3488 const size_t linear_idx = (i * kParallelize3DTile2DRangeJ + j) * kParallelize3DTile2DRangeK + k;
3489 processed_indicators[linear_idx].store(true, std::memory_order_relaxed);
3490 }
3491 }
3492 }
3493
TEST(Parallelize3DTile2DWithUArch,SingleThreadPoolAllItemsProcessed)3494 TEST(Parallelize3DTile2DWithUArch, SingleThreadPoolAllItemsProcessed) {
3495 std::vector<std::atomic_bool> indicators(kParallelize3DTile2DRangeI * kParallelize3DTile2DRangeJ * kParallelize3DTile2DRangeK);
3496
3497 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
3498 ASSERT_TRUE(threadpool.get());
3499
3500 pthreadpool_parallelize_3d_tile_2d_with_uarch(
3501 threadpool.get(),
3502 reinterpret_cast<pthreadpool_task_3d_tile_2d_with_id_t>(SetTrue3DTile2DWithUArch),
3503 static_cast<void*>(indicators.data()),
3504 kDefaultUArchIndex, kMaxUArchIndex,
3505 kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
3506 kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
3507 0 /* flags */);
3508
3509 for (size_t i = 0; i < kParallelize3DTile2DRangeI; i++) {
3510 for (size_t j = 0; j < kParallelize3DTile2DRangeJ; j++) {
3511 for (size_t k = 0; k < kParallelize3DTile2DRangeK; k++) {
3512 const size_t linear_idx = (i * kParallelize3DTile2DRangeJ + j) * kParallelize3DTile2DRangeK + k;
3513 EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
3514 << "Element (" << i << ", " << j << ", " << k << ") not processed";
3515 }
3516 }
3517 }
3518 }
3519
TEST(Parallelize3DTile2DWithUArch,MultiThreadPoolAllItemsProcessed)3520 TEST(Parallelize3DTile2DWithUArch, MultiThreadPoolAllItemsProcessed) {
3521 std::vector<std::atomic_bool> indicators(kParallelize3DTile2DRangeI * kParallelize3DTile2DRangeJ * kParallelize3DTile2DRangeK);
3522
3523 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
3524 ASSERT_TRUE(threadpool.get());
3525
3526 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
3527 GTEST_SKIP();
3528 }
3529
3530 pthreadpool_parallelize_3d_tile_2d_with_uarch(
3531 threadpool.get(),
3532 reinterpret_cast<pthreadpool_task_3d_tile_2d_with_id_t>(SetTrue3DTile2DWithUArch),
3533 static_cast<void*>(indicators.data()),
3534 kDefaultUArchIndex, kMaxUArchIndex,
3535 kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
3536 kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
3537 0 /* flags */);
3538
3539 for (size_t i = 0; i < kParallelize3DTile2DRangeI; i++) {
3540 for (size_t j = 0; j < kParallelize3DTile2DRangeJ; j++) {
3541 for (size_t k = 0; k < kParallelize3DTile2DRangeK; k++) {
3542 const size_t linear_idx = (i * kParallelize3DTile2DRangeJ + j) * kParallelize3DTile2DRangeK + k;
3543 EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
3544 << "Element (" << i << ", " << j << ", " << k << ") not processed";
3545 }
3546 }
3547 }
3548 }
3549
Increment3DTile2DWithUArch(std::atomic_int * processed_counters,uint32_t,size_t i,size_t start_j,size_t start_k,size_t tile_j,size_t tile_k)3550 static void Increment3DTile2DWithUArch(std::atomic_int* processed_counters, uint32_t, size_t i, size_t start_j, size_t start_k, size_t tile_j, size_t tile_k) {
3551 for (size_t j = start_j; j < start_j + tile_j; j++) {
3552 for (size_t k = start_k; k < start_k + tile_k; k++) {
3553 const size_t linear_idx = (i * kParallelize3DTile2DRangeJ + j) * kParallelize3DTile2DRangeK + k;
3554 processed_counters[linear_idx].fetch_add(1, std::memory_order_relaxed);
3555 }
3556 }
3557 }
3558
TEST(Parallelize3DTile2DWithUArch,SingleThreadPoolEachItemProcessedOnce)3559 TEST(Parallelize3DTile2DWithUArch, SingleThreadPoolEachItemProcessedOnce) {
3560 std::vector<std::atomic_int> counters(kParallelize3DTile2DRangeI * kParallelize3DTile2DRangeJ * kParallelize3DTile2DRangeK);
3561
3562 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
3563 ASSERT_TRUE(threadpool.get());
3564
3565 pthreadpool_parallelize_3d_tile_2d_with_uarch(
3566 threadpool.get(),
3567 reinterpret_cast<pthreadpool_task_3d_tile_2d_with_id_t>(Increment3DTile2DWithUArch),
3568 static_cast<void*>(counters.data()),
3569 kDefaultUArchIndex, kMaxUArchIndex,
3570 kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
3571 kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
3572 0 /* flags */);
3573
3574 for (size_t i = 0; i < kParallelize3DTile2DRangeI; i++) {
3575 for (size_t j = 0; j < kParallelize3DTile2DRangeJ; j++) {
3576 for (size_t k = 0; k < kParallelize3DTile2DRangeK; k++) {
3577 const size_t linear_idx = (i * kParallelize3DTile2DRangeJ + j) * kParallelize3DTile2DRangeK + k;
3578 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
3579 << "Element (" << i << ", " << j << ", " << k << ") was processed "
3580 << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
3581 }
3582 }
3583 }
3584 }
3585
TEST(Parallelize3DTile2DWithUArch,MultiThreadPoolEachItemProcessedOnce)3586 TEST(Parallelize3DTile2DWithUArch, MultiThreadPoolEachItemProcessedOnce) {
3587 std::vector<std::atomic_int> counters(kParallelize3DTile2DRangeI * kParallelize3DTile2DRangeJ * kParallelize3DTile2DRangeK);
3588
3589 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
3590 ASSERT_TRUE(threadpool.get());
3591
3592 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
3593 GTEST_SKIP();
3594 }
3595
3596 pthreadpool_parallelize_3d_tile_2d_with_uarch(
3597 threadpool.get(),
3598 reinterpret_cast<pthreadpool_task_3d_tile_2d_with_id_t>(Increment3DTile2DWithUArch),
3599 static_cast<void*>(counters.data()),
3600 kDefaultUArchIndex, kMaxUArchIndex,
3601 kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
3602 kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
3603 0 /* flags */);
3604
3605 for (size_t i = 0; i < kParallelize3DTile2DRangeI; i++) {
3606 for (size_t j = 0; j < kParallelize3DTile2DRangeJ; j++) {
3607 for (size_t k = 0; k < kParallelize3DTile2DRangeK; k++) {
3608 const size_t linear_idx = (i * kParallelize3DTile2DRangeJ + j) * kParallelize3DTile2DRangeK + k;
3609 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
3610 << "Element (" << i << ", " << j << ", " << k << ") was processed "
3611 << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
3612 }
3613 }
3614 }
3615 }
3616
TEST(Parallelize3DTile2DWithUArch,SingleThreadPoolEachItemProcessedMultipleTimes)3617 TEST(Parallelize3DTile2DWithUArch, SingleThreadPoolEachItemProcessedMultipleTimes) {
3618 std::vector<std::atomic_int> counters(kParallelize3DTile2DRangeI * kParallelize3DTile2DRangeJ * kParallelize3DTile2DRangeK);
3619
3620 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
3621 ASSERT_TRUE(threadpool.get());
3622
3623 for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
3624 pthreadpool_parallelize_3d_tile_2d_with_uarch(
3625 threadpool.get(),
3626 reinterpret_cast<pthreadpool_task_3d_tile_2d_with_id_t>(Increment3DTile2DWithUArch),
3627 static_cast<void*>(counters.data()),
3628 kDefaultUArchIndex, kMaxUArchIndex,
3629 kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
3630 kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
3631 0 /* flags */);
3632 }
3633
3634 for (size_t i = 0; i < kParallelize3DTile2DRangeI; i++) {
3635 for (size_t j = 0; j < kParallelize3DTile2DRangeJ; j++) {
3636 for (size_t k = 0; k < kParallelize3DTile2DRangeK; k++) {
3637 const size_t linear_idx = (i * kParallelize3DTile2DRangeJ + j) * kParallelize3DTile2DRangeK + k;
3638 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations)
3639 << "Element (" << i << ", " << j << ", " << k << ") was processed "
3640 << counters[linear_idx].load(std::memory_order_relaxed) << " times "
3641 << "(expected: " << kIncrementIterations << ")";
3642 }
3643 }
3644 }
3645 }
3646
TEST(Parallelize3DTile2DWithUArch,MultiThreadPoolEachItemProcessedMultipleTimes)3647 TEST(Parallelize3DTile2DWithUArch, MultiThreadPoolEachItemProcessedMultipleTimes) {
3648 std::vector<std::atomic_int> counters(kParallelize3DTile2DRangeI * kParallelize3DTile2DRangeJ * kParallelize3DTile2DRangeK);
3649
3650 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
3651 ASSERT_TRUE(threadpool.get());
3652
3653 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
3654 GTEST_SKIP();
3655 }
3656
3657 for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
3658 pthreadpool_parallelize_3d_tile_2d_with_uarch(
3659 threadpool.get(),
3660 reinterpret_cast<pthreadpool_task_3d_tile_2d_with_id_t>(Increment3DTile2DWithUArch),
3661 static_cast<void*>(counters.data()),
3662 kDefaultUArchIndex, kMaxUArchIndex,
3663 kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
3664 kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
3665 0 /* flags */);
3666 }
3667
3668 for (size_t i = 0; i < kParallelize3DTile2DRangeI; i++) {
3669 for (size_t j = 0; j < kParallelize3DTile2DRangeJ; j++) {
3670 for (size_t k = 0; k < kParallelize3DTile2DRangeK; k++) {
3671 const size_t linear_idx = (i * kParallelize3DTile2DRangeJ + j) * kParallelize3DTile2DRangeK + k;
3672 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations)
3673 << "Element (" << i << ", " << j << ", " << k << ") was processed "
3674 << counters[linear_idx].load(std::memory_order_relaxed) << " times "
3675 << "(expected: " << kIncrementIterations << ")";
3676 }
3677 }
3678 }
3679 }
3680
IncrementSame3DTile2DWithUArch(std::atomic_int * num_processed_items,uint32_t,size_t i,size_t start_j,size_t start_k,size_t tile_j,size_t tile_k)3681 static void IncrementSame3DTile2DWithUArch(std::atomic_int* num_processed_items, uint32_t, size_t i, size_t start_j, size_t start_k, size_t tile_j, size_t tile_k) {
3682 for (size_t j = start_j; j < start_j + tile_j; j++) {
3683 for (size_t k = start_k; k < start_k + tile_k; k++) {
3684 num_processed_items->fetch_add(1, std::memory_order_relaxed);
3685 }
3686 }
3687 }
3688
TEST(Parallelize3DTile2DWithUArch,MultiThreadPoolHighContention)3689 TEST(Parallelize3DTile2DWithUArch, MultiThreadPoolHighContention) {
3690 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
3691
3692 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
3693 ASSERT_TRUE(threadpool.get());
3694
3695 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
3696 GTEST_SKIP();
3697 }
3698
3699 pthreadpool_parallelize_3d_tile_2d_with_uarch(
3700 threadpool.get(),
3701 reinterpret_cast<pthreadpool_task_3d_tile_2d_with_id_t>(IncrementSame3DTile2DWithUArch),
3702 static_cast<void*>(&num_processed_items),
3703 kDefaultUArchIndex, kMaxUArchIndex,
3704 kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
3705 kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
3706 0 /* flags */);
3707 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize3DTile2DRangeI * kParallelize3DTile2DRangeJ * kParallelize3DTile2DRangeK);
3708 }
3709
WorkImbalance3DTile2DWithUArch(std::atomic_int * num_processed_items,uint32_t,size_t i,size_t start_j,size_t start_k,size_t tile_j,size_t tile_k)3710 static void WorkImbalance3DTile2DWithUArch(std::atomic_int* num_processed_items, uint32_t, size_t i, size_t start_j, size_t start_k, size_t tile_j, size_t tile_k) {
3711 num_processed_items->fetch_add(tile_j * tile_k, std::memory_order_relaxed);
3712 if (i == 0 && start_j == 0 && start_k == 0) {
3713 /* Spin-wait until all items are computed */
3714 while (num_processed_items->load(std::memory_order_relaxed) != kParallelize3DTile2DRangeI * kParallelize3DTile2DRangeJ * kParallelize3DTile2DRangeK) {
3715 std::atomic_thread_fence(std::memory_order_acquire);
3716 }
3717 }
3718 }
3719
TEST(Parallelize3DTile2DWithUArch,MultiThreadPoolWorkStealing)3720 TEST(Parallelize3DTile2DWithUArch, MultiThreadPoolWorkStealing) {
3721 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
3722
3723 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
3724 ASSERT_TRUE(threadpool.get());
3725
3726 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
3727 GTEST_SKIP();
3728 }
3729
3730 pthreadpool_parallelize_3d_tile_2d_with_uarch(
3731 threadpool.get(),
3732 reinterpret_cast<pthreadpool_task_3d_tile_2d_with_id_t>(WorkImbalance3DTile2DWithUArch),
3733 static_cast<void*>(&num_processed_items),
3734 kDefaultUArchIndex, kMaxUArchIndex,
3735 kParallelize3DTile2DRangeI, kParallelize3DTile2DRangeJ, kParallelize3DTile2DRangeK,
3736 kParallelize3DTile2DTileJ, kParallelize3DTile2DTileK,
3737 0 /* flags */);
3738 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize3DTile2DRangeI * kParallelize3DTile2DRangeJ * kParallelize3DTile2DRangeK);
3739 }
3740
ComputeNothing4D(void *,size_t,size_t,size_t,size_t)3741 static void ComputeNothing4D(void*, size_t, size_t, size_t, size_t) {
3742 }
3743
TEST(Parallelize4D,SingleThreadPoolCompletes)3744 TEST(Parallelize4D, SingleThreadPoolCompletes) {
3745 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
3746 ASSERT_TRUE(threadpool.get());
3747
3748 pthreadpool_parallelize_4d(threadpool.get(),
3749 ComputeNothing4D,
3750 nullptr,
3751 kParallelize4DRangeI, kParallelize4DRangeJ, kParallelize4DRangeK, kParallelize4DRangeL,
3752 0 /* flags */);
3753 }
3754
TEST(Parallelize4D,MultiThreadPoolCompletes)3755 TEST(Parallelize4D, MultiThreadPoolCompletes) {
3756 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
3757 ASSERT_TRUE(threadpool.get());
3758
3759 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
3760 GTEST_SKIP();
3761 }
3762
3763 pthreadpool_parallelize_4d(
3764 threadpool.get(),
3765 ComputeNothing4D,
3766 nullptr,
3767 kParallelize4DRangeI, kParallelize4DRangeJ, kParallelize4DRangeK, kParallelize4DRangeL,
3768 0 /* flags */);
3769 }
3770
CheckBounds4D(void *,size_t i,size_t j,size_t k,size_t l)3771 static void CheckBounds4D(void*, size_t i, size_t j, size_t k, size_t l) {
3772 EXPECT_LT(i, kParallelize4DRangeI);
3773 EXPECT_LT(j, kParallelize4DRangeJ);
3774 EXPECT_LT(k, kParallelize4DRangeK);
3775 EXPECT_LT(l, kParallelize4DRangeL);
3776 }
3777
TEST(Parallelize4D,SingleThreadPoolAllItemsInBounds)3778 TEST(Parallelize4D, SingleThreadPoolAllItemsInBounds) {
3779 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
3780 ASSERT_TRUE(threadpool.get());
3781
3782 pthreadpool_parallelize_4d(
3783 threadpool.get(),
3784 CheckBounds4D,
3785 nullptr,
3786 kParallelize4DRangeI, kParallelize4DRangeJ, kParallelize4DRangeK, kParallelize4DRangeL,
3787 0 /* flags */);
3788 }
3789
TEST(Parallelize4D,MultiThreadPoolAllItemsInBounds)3790 TEST(Parallelize4D, MultiThreadPoolAllItemsInBounds) {
3791 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
3792 ASSERT_TRUE(threadpool.get());
3793
3794 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
3795 GTEST_SKIP();
3796 }
3797
3798 pthreadpool_parallelize_4d(
3799 threadpool.get(),
3800 CheckBounds4D,
3801 nullptr,
3802 kParallelize4DRangeI, kParallelize4DRangeJ, kParallelize4DRangeK, kParallelize4DRangeL,
3803 0 /* flags */);
3804 }
3805
SetTrue4D(std::atomic_bool * processed_indicators,size_t i,size_t j,size_t k,size_t l)3806 static void SetTrue4D(std::atomic_bool* processed_indicators, size_t i, size_t j, size_t k, size_t l) {
3807 const size_t linear_idx = ((i * kParallelize4DRangeJ + j) * kParallelize4DRangeK + k) * kParallelize4DRangeL + l;
3808 processed_indicators[linear_idx].store(true, std::memory_order_relaxed);
3809 }
3810
TEST(Parallelize4D,SingleThreadPoolAllItemsProcessed)3811 TEST(Parallelize4D, SingleThreadPoolAllItemsProcessed) {
3812 std::vector<std::atomic_bool> indicators(kParallelize4DRangeI * kParallelize4DRangeJ * kParallelize4DRangeK * kParallelize4DRangeL);
3813
3814 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
3815 ASSERT_TRUE(threadpool.get());
3816
3817 pthreadpool_parallelize_4d(
3818 threadpool.get(),
3819 reinterpret_cast<pthreadpool_task_4d_t>(SetTrue4D),
3820 static_cast<void*>(indicators.data()),
3821 kParallelize4DRangeI, kParallelize4DRangeJ, kParallelize4DRangeK, kParallelize4DRangeL,
3822 0 /* flags */);
3823
3824 for (size_t i = 0; i < kParallelize4DRangeI; i++) {
3825 for (size_t j = 0; j < kParallelize4DRangeJ; j++) {
3826 for (size_t k = 0; k < kParallelize4DRangeK; k++) {
3827 for (size_t l = 0; l < kParallelize4DRangeL; l++) {
3828 const size_t linear_idx = ((i * kParallelize4DRangeJ + j) * kParallelize4DRangeK + k) * kParallelize4DRangeL + l;
3829 EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
3830 << "Element (" << i << ", " << j << ", " << k << ", " << l << ") not processed";
3831 }
3832 }
3833 }
3834 }
3835 }
3836
TEST(Parallelize4D,MultiThreadPoolAllItemsProcessed)3837 TEST(Parallelize4D, MultiThreadPoolAllItemsProcessed) {
3838 std::vector<std::atomic_bool> indicators(kParallelize4DRangeI * kParallelize4DRangeJ * kParallelize4DRangeK * kParallelize4DRangeL);
3839
3840 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
3841 ASSERT_TRUE(threadpool.get());
3842
3843 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
3844 GTEST_SKIP();
3845 }
3846
3847 pthreadpool_parallelize_4d(
3848 threadpool.get(),
3849 reinterpret_cast<pthreadpool_task_4d_t>(SetTrue4D),
3850 static_cast<void*>(indicators.data()),
3851 kParallelize4DRangeI, kParallelize4DRangeJ, kParallelize4DRangeK, kParallelize4DRangeL,
3852 0 /* flags */);
3853
3854 for (size_t i = 0; i < kParallelize4DRangeI; i++) {
3855 for (size_t j = 0; j < kParallelize4DRangeJ; j++) {
3856 for (size_t k = 0; k < kParallelize4DRangeK; k++) {
3857 for (size_t l = 0; l < kParallelize4DRangeL; l++) {
3858 const size_t linear_idx = ((i * kParallelize4DRangeJ + j) * kParallelize4DRangeK + k) * kParallelize4DRangeL + l;
3859 EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
3860 << "Element (" << i << ", " << j << ", " << k << ", " << l << ") not processed";
3861 }
3862 }
3863 }
3864 }
3865 }
3866
Increment4D(std::atomic_int * processed_counters,size_t i,size_t j,size_t k,size_t l)3867 static void Increment4D(std::atomic_int* processed_counters, size_t i, size_t j, size_t k, size_t l) {
3868 const size_t linear_idx = ((i * kParallelize4DRangeJ + j) * kParallelize4DRangeK + k) * kParallelize4DRangeL + l;
3869 processed_counters[linear_idx].fetch_add(1, std::memory_order_relaxed);
3870 }
3871
TEST(Parallelize4D,SingleThreadPoolEachItemProcessedOnce)3872 TEST(Parallelize4D, SingleThreadPoolEachItemProcessedOnce) {
3873 std::vector<std::atomic_int> counters(kParallelize4DRangeI * kParallelize4DRangeJ * kParallelize4DRangeK * kParallelize4DRangeL);
3874
3875 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
3876 ASSERT_TRUE(threadpool.get());
3877
3878 pthreadpool_parallelize_4d(
3879 threadpool.get(),
3880 reinterpret_cast<pthreadpool_task_4d_t>(Increment4D),
3881 static_cast<void*>(counters.data()),
3882 kParallelize4DRangeI, kParallelize4DRangeJ, kParallelize4DRangeK, kParallelize4DRangeL,
3883 0 /* flags */);
3884
3885 for (size_t i = 0; i < kParallelize4DRangeI; i++) {
3886 for (size_t j = 0; j < kParallelize4DRangeJ; j++) {
3887 for (size_t k = 0; k < kParallelize4DRangeK; k++) {
3888 for (size_t l = 0; l < kParallelize4DRangeL; l++) {
3889 const size_t linear_idx = ((i * kParallelize4DRangeJ + j) * kParallelize4DRangeK + k) * kParallelize4DRangeL + l;
3890 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
3891 << "Element (" << i << ", " << j << ", " << k << ", " << l << ") was processed "
3892 << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
3893 }
3894 }
3895 }
3896 }
3897 }
3898
TEST(Parallelize4D,MultiThreadPoolEachItemProcessedOnce)3899 TEST(Parallelize4D, MultiThreadPoolEachItemProcessedOnce) {
3900 std::vector<std::atomic_int> counters(kParallelize4DRangeI * kParallelize4DRangeJ * kParallelize4DRangeK * kParallelize4DRangeL);
3901
3902 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
3903 ASSERT_TRUE(threadpool.get());
3904
3905 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
3906 GTEST_SKIP();
3907 }
3908
3909 pthreadpool_parallelize_4d(
3910 threadpool.get(),
3911 reinterpret_cast<pthreadpool_task_4d_t>(Increment4D),
3912 static_cast<void*>(counters.data()),
3913 kParallelize4DRangeI, kParallelize4DRangeJ, kParallelize4DRangeK, kParallelize4DRangeL,
3914 0 /* flags */);
3915
3916 for (size_t i = 0; i < kParallelize4DRangeI; i++) {
3917 for (size_t j = 0; j < kParallelize4DRangeJ; j++) {
3918 for (size_t k = 0; k < kParallelize4DRangeK; k++) {
3919 for (size_t l = 0; l < kParallelize4DRangeL; l++) {
3920 const size_t linear_idx = ((i * kParallelize4DRangeJ + j) * kParallelize4DRangeK + k) * kParallelize4DRangeL + l;
3921 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
3922 << "Element (" << i << ", " << j << ", " << k << ", " << l << ") was processed "
3923 << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
3924 }
3925 }
3926 }
3927 }
3928 }
3929
TEST(Parallelize4D,SingleThreadPoolEachItemProcessedMultipleTimes)3930 TEST(Parallelize4D, SingleThreadPoolEachItemProcessedMultipleTimes) {
3931 std::vector<std::atomic_int> counters(kParallelize4DRangeI * kParallelize4DRangeJ * kParallelize4DRangeK * kParallelize4DRangeL);
3932
3933 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
3934 ASSERT_TRUE(threadpool.get());
3935
3936 for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
3937 pthreadpool_parallelize_4d(
3938 threadpool.get(),
3939 reinterpret_cast<pthreadpool_task_4d_t>(Increment4D),
3940 static_cast<void*>(counters.data()),
3941 kParallelize4DRangeI, kParallelize4DRangeJ, kParallelize4DRangeK, kParallelize4DRangeL,
3942 0 /* flags */);
3943 }
3944
3945 for (size_t i = 0; i < kParallelize4DRangeI; i++) {
3946 for (size_t j = 0; j < kParallelize4DRangeJ; j++) {
3947 for (size_t k = 0; k < kParallelize4DRangeK; k++) {
3948 for (size_t l = 0; l < kParallelize4DRangeL; l++) {
3949 const size_t linear_idx = ((i * kParallelize4DRangeJ + j) * kParallelize4DRangeK + k) * kParallelize4DRangeL + l;
3950 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations)
3951 << "Element (" << i << ", " << j << ", " << k << ", " << l << ") was processed "
3952 << counters[linear_idx].load(std::memory_order_relaxed) << " times "
3953 << "(expected: " << kIncrementIterations << ")";
3954 }
3955 }
3956 }
3957 }
3958 }
3959
TEST(Parallelize4D,MultiThreadPoolEachItemProcessedMultipleTimes)3960 TEST(Parallelize4D, MultiThreadPoolEachItemProcessedMultipleTimes) {
3961 std::vector<std::atomic_int> counters(kParallelize4DRangeI * kParallelize4DRangeJ * kParallelize4DRangeK * kParallelize4DRangeL);
3962
3963 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
3964 ASSERT_TRUE(threadpool.get());
3965
3966 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
3967 GTEST_SKIP();
3968 }
3969
3970 for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
3971 pthreadpool_parallelize_4d(
3972 threadpool.get(),
3973 reinterpret_cast<pthreadpool_task_4d_t>(Increment4D),
3974 static_cast<void*>(counters.data()),
3975 kParallelize4DRangeI, kParallelize4DRangeJ, kParallelize4DRangeK, kParallelize4DRangeL,
3976 0 /* flags */);
3977 }
3978
3979 for (size_t i = 0; i < kParallelize4DRangeI; i++) {
3980 for (size_t j = 0; j < kParallelize4DRangeJ; j++) {
3981 for (size_t k = 0; k < kParallelize4DRangeK; k++) {
3982 for (size_t l = 0; l < kParallelize4DRangeL; l++) {
3983 const size_t linear_idx = ((i * kParallelize4DRangeJ + j) * kParallelize4DRangeK + k) * kParallelize4DRangeL + l;
3984 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations)
3985 << "Element (" << i << ", " << j << ", " << k << ", " << l << ") was processed "
3986 << counters[linear_idx].load(std::memory_order_relaxed) << " times "
3987 << "(expected: " << kIncrementIterations << ")";
3988 }
3989 }
3990 }
3991 }
3992 }
3993
IncrementSame4D(std::atomic_int * num_processed_items,size_t i,size_t j,size_t k,size_t l)3994 static void IncrementSame4D(std::atomic_int* num_processed_items, size_t i, size_t j, size_t k, size_t l) {
3995 num_processed_items->fetch_add(1, std::memory_order_relaxed);
3996 }
3997
TEST(Parallelize4D,MultiThreadPoolHighContention)3998 TEST(Parallelize4D, MultiThreadPoolHighContention) {
3999 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
4000
4001 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
4002 ASSERT_TRUE(threadpool.get());
4003
4004 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
4005 GTEST_SKIP();
4006 }
4007
4008 pthreadpool_parallelize_4d(
4009 threadpool.get(),
4010 reinterpret_cast<pthreadpool_task_4d_t>(IncrementSame4D),
4011 static_cast<void*>(&num_processed_items),
4012 kParallelize4DRangeI, kParallelize4DRangeJ, kParallelize4DRangeK, kParallelize4DRangeL,
4013 0 /* flags */);
4014 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize4DRangeI * kParallelize4DRangeJ * kParallelize4DRangeK * kParallelize4DRangeL);
4015 }
4016
WorkImbalance4D(std::atomic_int * num_processed_items,size_t i,size_t j,size_t k,size_t l)4017 static void WorkImbalance4D(std::atomic_int* num_processed_items, size_t i, size_t j, size_t k, size_t l) {
4018 num_processed_items->fetch_add(1, std::memory_order_relaxed);
4019 if (i == 0 && j == 0 && k == 0 && l == 0) {
4020 /* Spin-wait until all items are computed */
4021 while (num_processed_items->load(std::memory_order_relaxed) != kParallelize4DRangeI * kParallelize4DRangeJ * kParallelize4DRangeK * kParallelize4DRangeL) {
4022 std::atomic_thread_fence(std::memory_order_acquire);
4023 }
4024 }
4025 }
4026
TEST(Parallelize4D,MultiThreadPoolWorkStealing)4027 TEST(Parallelize4D, MultiThreadPoolWorkStealing) {
4028 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
4029
4030 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
4031 ASSERT_TRUE(threadpool.get());
4032
4033 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
4034 GTEST_SKIP();
4035 }
4036
4037 pthreadpool_parallelize_4d(
4038 threadpool.get(),
4039 reinterpret_cast<pthreadpool_task_4d_t>(WorkImbalance4D),
4040 static_cast<void*>(&num_processed_items),
4041 kParallelize4DRangeI, kParallelize4DRangeJ, kParallelize4DRangeK, kParallelize4DRangeL,
4042 0 /* flags */);
4043 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize4DRangeI * kParallelize4DRangeJ * kParallelize4DRangeK * kParallelize4DRangeL);
4044 }
4045
ComputeNothing4DTile1D(void *,size_t,size_t,size_t,size_t,size_t)4046 static void ComputeNothing4DTile1D(void*, size_t, size_t, size_t, size_t, size_t) {
4047 }
4048
TEST(Parallelize4DTile1D,SingleThreadPoolCompletes)4049 TEST(Parallelize4DTile1D, SingleThreadPoolCompletes) {
4050 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
4051 ASSERT_TRUE(threadpool.get());
4052
4053 pthreadpool_parallelize_4d_tile_1d(threadpool.get(),
4054 ComputeNothing4DTile1D,
4055 nullptr,
4056 kParallelize4DTile1DRangeI, kParallelize4DTile1DRangeJ, kParallelize4DTile1DRangeK, kParallelize4DTile1DRangeL,
4057 kParallelize4DTile1DTileL,
4058 0 /* flags */);
4059 }
4060
TEST(Parallelize4DTile1D,MultiThreadPoolCompletes)4061 TEST(Parallelize4DTile1D, MultiThreadPoolCompletes) {
4062 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
4063 ASSERT_TRUE(threadpool.get());
4064
4065 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
4066 GTEST_SKIP();
4067 }
4068
4069 pthreadpool_parallelize_4d_tile_1d(
4070 threadpool.get(),
4071 ComputeNothing4DTile1D,
4072 nullptr,
4073 kParallelize4DTile1DRangeI, kParallelize4DTile1DRangeJ, kParallelize4DTile1DRangeK, kParallelize4DTile1DRangeL,
4074 kParallelize4DTile1DTileL,
4075 0 /* flags */);
4076 }
4077
CheckBounds4DTile1D(void *,size_t i,size_t j,size_t k,size_t start_l,size_t tile_l)4078 static void CheckBounds4DTile1D(void*, size_t i, size_t j, size_t k, size_t start_l, size_t tile_l) {
4079 EXPECT_LT(i, kParallelize4DTile1DRangeI);
4080 EXPECT_LT(j, kParallelize4DTile1DRangeJ);
4081 EXPECT_LT(k, kParallelize4DTile1DRangeK);
4082 EXPECT_LT(start_l, kParallelize4DTile1DRangeL);
4083 EXPECT_LE(start_l + tile_l, kParallelize4DTile1DRangeL);
4084 }
4085
TEST(Parallelize4DTile1D,SingleThreadPoolAllItemsInBounds)4086 TEST(Parallelize4DTile1D, SingleThreadPoolAllItemsInBounds) {
4087 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
4088 ASSERT_TRUE(threadpool.get());
4089
4090 pthreadpool_parallelize_4d_tile_1d(
4091 threadpool.get(),
4092 CheckBounds4DTile1D,
4093 nullptr,
4094 kParallelize4DTile1DRangeI, kParallelize4DTile1DRangeJ, kParallelize4DTile1DRangeK, kParallelize4DTile1DRangeL,
4095 kParallelize4DTile1DTileL,
4096 0 /* flags */);
4097 }
4098
TEST(Parallelize4DTile1D,MultiThreadPoolAllItemsInBounds)4099 TEST(Parallelize4DTile1D, MultiThreadPoolAllItemsInBounds) {
4100 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
4101 ASSERT_TRUE(threadpool.get());
4102
4103 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
4104 GTEST_SKIP();
4105 }
4106
4107 pthreadpool_parallelize_4d_tile_1d(
4108 threadpool.get(),
4109 CheckBounds4DTile1D,
4110 nullptr,
4111 kParallelize4DTile1DRangeI, kParallelize4DTile1DRangeJ, kParallelize4DTile1DRangeK, kParallelize4DTile1DRangeL,
4112 kParallelize4DTile1DTileL,
4113 0 /* flags */);
4114 }
4115
CheckTiling4DTile1D(void *,size_t i,size_t j,size_t k,size_t start_l,size_t tile_l)4116 static void CheckTiling4DTile1D(void*, size_t i, size_t j, size_t k, size_t start_l, size_t tile_l) {
4117 EXPECT_GT(tile_l, 0);
4118 EXPECT_LE(tile_l, kParallelize4DTile1DTileL);
4119 EXPECT_EQ(start_l % kParallelize4DTile1DTileL, 0);
4120 EXPECT_EQ(tile_l, std::min<size_t>(kParallelize4DTile1DTileL, kParallelize4DTile1DRangeL - start_l));
4121 }
4122
TEST(Parallelize4DTile1D,SingleThreadPoolUniformTiling)4123 TEST(Parallelize4DTile1D, SingleThreadPoolUniformTiling) {
4124 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
4125 ASSERT_TRUE(threadpool.get());
4126
4127 pthreadpool_parallelize_4d_tile_1d(
4128 threadpool.get(),
4129 CheckTiling4DTile1D,
4130 nullptr,
4131 kParallelize4DTile1DRangeI, kParallelize4DTile1DRangeJ, kParallelize4DTile1DRangeK, kParallelize4DTile1DRangeL,
4132 kParallelize4DTile1DTileL,
4133 0 /* flags */);
4134 }
4135
TEST(Parallelize4DTile1D,MultiThreadPoolUniformTiling)4136 TEST(Parallelize4DTile1D, MultiThreadPoolUniformTiling) {
4137 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
4138 ASSERT_TRUE(threadpool.get());
4139
4140 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
4141 GTEST_SKIP();
4142 }
4143
4144 pthreadpool_parallelize_4d_tile_1d(
4145 threadpool.get(),
4146 CheckTiling4DTile1D,
4147 nullptr,
4148 kParallelize4DTile1DRangeI, kParallelize4DTile1DRangeJ, kParallelize4DTile1DRangeK, kParallelize4DTile1DRangeL,
4149 kParallelize4DTile1DTileL,
4150 0 /* flags */);
4151 }
4152
SetTrue4DTile1D(std::atomic_bool * processed_indicators,size_t i,size_t j,size_t k,size_t start_l,size_t tile_l)4153 static void SetTrue4DTile1D(std::atomic_bool* processed_indicators, size_t i, size_t j, size_t k, size_t start_l, size_t tile_l) {
4154 for (size_t l = start_l; l < start_l + tile_l; l++) {
4155 const size_t linear_idx = ((i * kParallelize4DTile1DRangeJ + j) * kParallelize4DTile1DRangeK + k) * kParallelize4DTile1DRangeL + l;
4156 processed_indicators[linear_idx].store(true, std::memory_order_relaxed);
4157 }
4158 }
4159
TEST(Parallelize4DTile1D,SingleThreadPoolAllItemsProcessed)4160 TEST(Parallelize4DTile1D, SingleThreadPoolAllItemsProcessed) {
4161 std::vector<std::atomic_bool> indicators(kParallelize4DTile1DRangeI * kParallelize4DTile1DRangeJ * kParallelize4DTile1DRangeK * kParallelize4DTile1DRangeL);
4162
4163 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
4164 ASSERT_TRUE(threadpool.get());
4165
4166 pthreadpool_parallelize_4d_tile_1d(
4167 threadpool.get(),
4168 reinterpret_cast<pthreadpool_task_4d_tile_1d_t>(SetTrue4DTile1D),
4169 static_cast<void*>(indicators.data()),
4170 kParallelize4DTile1DRangeI, kParallelize4DTile1DRangeJ, kParallelize4DTile1DRangeK, kParallelize4DTile1DRangeL,
4171 kParallelize4DTile1DTileL,
4172 0 /* flags */);
4173
4174 for (size_t i = 0; i < kParallelize4DTile1DRangeI; i++) {
4175 for (size_t j = 0; j < kParallelize4DTile1DRangeJ; j++) {
4176 for (size_t k = 0; k < kParallelize4DTile1DRangeK; k++) {
4177 for (size_t l = 0; l < kParallelize4DTile1DRangeL; l++) {
4178 const size_t linear_idx = ((i * kParallelize4DTile1DRangeJ + j) * kParallelize4DTile1DRangeK + k) * kParallelize4DTile1DRangeL + l;
4179 EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
4180 << "Element (" << i << ", " << j << ", " << k << ", " << l << ") not processed";
4181 }
4182 }
4183 }
4184 }
4185 }
4186
TEST(Parallelize4DTile1D,MultiThreadPoolAllItemsProcessed)4187 TEST(Parallelize4DTile1D, MultiThreadPoolAllItemsProcessed) {
4188 std::vector<std::atomic_bool> indicators(kParallelize4DTile1DRangeI * kParallelize4DTile1DRangeJ * kParallelize4DTile1DRangeK * kParallelize4DTile1DRangeL);
4189
4190 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
4191 ASSERT_TRUE(threadpool.get());
4192
4193 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
4194 GTEST_SKIP();
4195 }
4196
4197 pthreadpool_parallelize_4d_tile_1d(
4198 threadpool.get(),
4199 reinterpret_cast<pthreadpool_task_4d_tile_1d_t>(SetTrue4DTile1D),
4200 static_cast<void*>(indicators.data()),
4201 kParallelize4DTile1DRangeI, kParallelize4DTile1DRangeJ, kParallelize4DTile1DRangeK, kParallelize4DTile1DRangeL,
4202 kParallelize4DTile1DTileL,
4203 0 /* flags */);
4204
4205 for (size_t i = 0; i < kParallelize4DTile1DRangeI; i++) {
4206 for (size_t j = 0; j < kParallelize4DTile1DRangeJ; j++) {
4207 for (size_t k = 0; k < kParallelize4DTile1DRangeK; k++) {
4208 for (size_t l = 0; l < kParallelize4DTile1DRangeL; l++) {
4209 const size_t linear_idx = ((i * kParallelize4DTile1DRangeJ + j) * kParallelize4DTile1DRangeK + k) * kParallelize4DTile1DRangeL + l;
4210 EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
4211 << "Element (" << i << ", " << j << ", " << k << ", " << l << ") not processed";
4212 }
4213 }
4214 }
4215 }
4216 }
4217
Increment4DTile1D(std::atomic_int * processed_counters,size_t i,size_t j,size_t k,size_t start_l,size_t tile_l)4218 static void Increment4DTile1D(std::atomic_int* processed_counters, size_t i, size_t j, size_t k, size_t start_l, size_t tile_l) {
4219 for (size_t l = start_l; l < start_l + tile_l; l++) {
4220 const size_t linear_idx = ((i * kParallelize4DTile1DRangeJ + j) * kParallelize4DTile1DRangeK + k) * kParallelize4DTile1DRangeL + l;
4221 processed_counters[linear_idx].fetch_add(1, std::memory_order_relaxed);
4222 }
4223 }
4224
TEST(Parallelize4DTile1D,SingleThreadPoolEachItemProcessedOnce)4225 TEST(Parallelize4DTile1D, SingleThreadPoolEachItemProcessedOnce) {
4226 std::vector<std::atomic_int> counters(kParallelize4DTile1DRangeI * kParallelize4DTile1DRangeJ * kParallelize4DTile1DRangeK * kParallelize4DTile1DRangeL);
4227
4228 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
4229 ASSERT_TRUE(threadpool.get());
4230
4231 pthreadpool_parallelize_4d_tile_1d(
4232 threadpool.get(),
4233 reinterpret_cast<pthreadpool_task_4d_tile_1d_t>(Increment4DTile1D),
4234 static_cast<void*>(counters.data()),
4235 kParallelize4DTile1DRangeI, kParallelize4DTile1DRangeJ, kParallelize4DTile1DRangeK, kParallelize4DTile1DRangeL,
4236 kParallelize4DTile1DTileL,
4237 0 /* flags */);
4238
4239 for (size_t i = 0; i < kParallelize4DTile1DRangeI; i++) {
4240 for (size_t j = 0; j < kParallelize4DTile1DRangeJ; j++) {
4241 for (size_t k = 0; k < kParallelize4DTile1DRangeK; k++) {
4242 for (size_t l = 0; l < kParallelize4DTile1DRangeL; l++) {
4243 const size_t linear_idx = ((i * kParallelize4DTile1DRangeJ + j) * kParallelize4DTile1DRangeK + k) * kParallelize4DTile1DRangeL + l;
4244 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
4245 << "Element (" << i << ", " << j << ", " << k << ", " << l << ") was processed "
4246 << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
4247 }
4248 }
4249 }
4250 }
4251 }
4252
TEST(Parallelize4DTile1D,MultiThreadPoolEachItemProcessedOnce)4253 TEST(Parallelize4DTile1D, MultiThreadPoolEachItemProcessedOnce) {
4254 std::vector<std::atomic_int> counters(kParallelize4DTile1DRangeI * kParallelize4DTile1DRangeJ * kParallelize4DTile1DRangeK * kParallelize4DTile1DRangeL);
4255
4256 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
4257 ASSERT_TRUE(threadpool.get());
4258
4259 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
4260 GTEST_SKIP();
4261 }
4262
4263 pthreadpool_parallelize_4d_tile_1d(
4264 threadpool.get(),
4265 reinterpret_cast<pthreadpool_task_4d_tile_1d_t>(Increment4DTile1D),
4266 static_cast<void*>(counters.data()),
4267 kParallelize4DTile1DRangeI, kParallelize4DTile1DRangeJ, kParallelize4DTile1DRangeK, kParallelize4DTile1DRangeL,
4268 kParallelize4DTile1DTileL,
4269 0 /* flags */);
4270
4271 for (size_t i = 0; i < kParallelize4DTile1DRangeI; i++) {
4272 for (size_t j = 0; j < kParallelize4DTile1DRangeJ; j++) {
4273 for (size_t k = 0; k < kParallelize4DTile1DRangeK; k++) {
4274 for (size_t l = 0; l < kParallelize4DTile1DRangeL; l++) {
4275 const size_t linear_idx = ((i * kParallelize4DTile1DRangeJ + j) * kParallelize4DTile1DRangeK + k) * kParallelize4DTile1DRangeL + l;
4276 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
4277 << "Element (" << i << ", " << j << ", " << k << ", " << l << ") was processed "
4278 << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
4279 }
4280 }
4281 }
4282 }
4283 }
4284
TEST(Parallelize4DTile1D,SingleThreadPoolEachItemProcessedMultipleTimes)4285 TEST(Parallelize4DTile1D, SingleThreadPoolEachItemProcessedMultipleTimes) {
4286 std::vector<std::atomic_int> counters(kParallelize4DTile1DRangeI * kParallelize4DTile1DRangeJ * kParallelize4DTile1DRangeK * kParallelize4DTile1DRangeL);
4287
4288 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
4289 ASSERT_TRUE(threadpool.get());
4290
4291 for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
4292 pthreadpool_parallelize_4d_tile_1d(
4293 threadpool.get(),
4294 reinterpret_cast<pthreadpool_task_4d_tile_1d_t>(Increment4DTile1D),
4295 static_cast<void*>(counters.data()),
4296 kParallelize4DTile1DRangeI, kParallelize4DTile1DRangeJ, kParallelize4DTile1DRangeK, kParallelize4DTile1DRangeL,
4297 kParallelize4DTile1DTileL,
4298 0 /* flags */);
4299 }
4300
4301 for (size_t i = 0; i < kParallelize4DTile1DRangeI; i++) {
4302 for (size_t j = 0; j < kParallelize4DTile1DRangeJ; j++) {
4303 for (size_t k = 0; k < kParallelize4DTile1DRangeK; k++) {
4304 for (size_t l = 0; l < kParallelize4DTile1DRangeL; l++) {
4305 const size_t linear_idx = ((i * kParallelize4DTile1DRangeJ + j) * kParallelize4DTile1DRangeK + k) * kParallelize4DTile1DRangeL + l;
4306 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations)
4307 << "Element (" << i << ", " << j << ", " << k << ", " << l << ") was processed "
4308 << counters[linear_idx].load(std::memory_order_relaxed) << " times "
4309 << "(expected: " << kIncrementIterations << ")";
4310 }
4311 }
4312 }
4313 }
4314 }
4315
TEST(Parallelize4DTile1D,MultiThreadPoolEachItemProcessedMultipleTimes)4316 TEST(Parallelize4DTile1D, MultiThreadPoolEachItemProcessedMultipleTimes) {
4317 std::vector<std::atomic_int> counters(kParallelize4DTile1DRangeI * kParallelize4DTile1DRangeJ * kParallelize4DTile1DRangeK * kParallelize4DTile1DRangeL);
4318
4319 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
4320 ASSERT_TRUE(threadpool.get());
4321
4322 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
4323 GTEST_SKIP();
4324 }
4325
4326 for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
4327 pthreadpool_parallelize_4d_tile_1d(
4328 threadpool.get(),
4329 reinterpret_cast<pthreadpool_task_4d_tile_1d_t>(Increment4DTile1D),
4330 static_cast<void*>(counters.data()),
4331 kParallelize4DTile1DRangeI, kParallelize4DTile1DRangeJ, kParallelize4DTile1DRangeK, kParallelize4DTile1DRangeL,
4332 kParallelize4DTile1DTileL,
4333 0 /* flags */);
4334 }
4335
4336 for (size_t i = 0; i < kParallelize4DTile1DRangeI; i++) {
4337 for (size_t j = 0; j < kParallelize4DTile1DRangeJ; j++) {
4338 for (size_t k = 0; k < kParallelize4DTile1DRangeK; k++) {
4339 for (size_t l = 0; l < kParallelize4DTile1DRangeL; l++) {
4340 const size_t linear_idx = ((i * kParallelize4DTile1DRangeJ + j) * kParallelize4DTile1DRangeK + k) * kParallelize4DTile1DRangeL + l;
4341 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations)
4342 << "Element (" << i << ", " << j << ", " << k << ", " << l << ") was processed "
4343 << counters[linear_idx].load(std::memory_order_relaxed) << " times "
4344 << "(expected: " << kIncrementIterations << ")";
4345 }
4346 }
4347 }
4348 }
4349 }
4350
IncrementSame4DTile1D(std::atomic_int * num_processed_items,size_t i,size_t j,size_t k,size_t start_l,size_t tile_l)4351 static void IncrementSame4DTile1D(std::atomic_int* num_processed_items, size_t i, size_t j, size_t k, size_t start_l, size_t tile_l) {
4352 for (size_t l = start_l; l < start_l + tile_l; l++) {
4353 num_processed_items->fetch_add(1, std::memory_order_relaxed);
4354 }
4355 }
4356
TEST(Parallelize4DTile1D,MultiThreadPoolHighContention)4357 TEST(Parallelize4DTile1D, MultiThreadPoolHighContention) {
4358 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
4359
4360 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
4361 ASSERT_TRUE(threadpool.get());
4362
4363 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
4364 GTEST_SKIP();
4365 }
4366
4367 pthreadpool_parallelize_4d_tile_1d(
4368 threadpool.get(),
4369 reinterpret_cast<pthreadpool_task_4d_tile_1d_t>(IncrementSame4DTile1D),
4370 static_cast<void*>(&num_processed_items),
4371 kParallelize4DTile1DRangeI, kParallelize4DTile1DRangeJ, kParallelize4DTile1DRangeK, kParallelize4DTile1DRangeL,
4372 kParallelize4DTile1DTileL,
4373 0 /* flags */);
4374 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize4DTile1DRangeI * kParallelize4DTile1DRangeJ * kParallelize4DTile1DRangeK * kParallelize4DTile1DRangeL);
4375 }
4376
WorkImbalance4DTile1D(std::atomic_int * num_processed_items,size_t i,size_t j,size_t k,size_t start_l,size_t tile_l)4377 static void WorkImbalance4DTile1D(std::atomic_int* num_processed_items, size_t i, size_t j, size_t k, size_t start_l, size_t tile_l) {
4378 num_processed_items->fetch_add(tile_l, std::memory_order_relaxed);
4379 if (i == 0 && j == 0 && k == 0 && start_l == 0) {
4380 /* Spin-wait until all items are computed */
4381 while (num_processed_items->load(std::memory_order_relaxed) != kParallelize4DTile1DRangeI * kParallelize4DTile1DRangeJ * kParallelize4DTile1DRangeK * kParallelize4DTile1DRangeL) {
4382 std::atomic_thread_fence(std::memory_order_acquire);
4383 }
4384 }
4385 }
4386
TEST(Parallelize4DTile1D,MultiThreadPoolWorkStealing)4387 TEST(Parallelize4DTile1D, MultiThreadPoolWorkStealing) {
4388 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
4389
4390 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
4391 ASSERT_TRUE(threadpool.get());
4392
4393 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
4394 GTEST_SKIP();
4395 }
4396
4397 pthreadpool_parallelize_4d_tile_1d(
4398 threadpool.get(),
4399 reinterpret_cast<pthreadpool_task_4d_tile_1d_t>(WorkImbalance4DTile1D),
4400 static_cast<void*>(&num_processed_items),
4401 kParallelize4DTile1DRangeI, kParallelize4DTile1DRangeJ, kParallelize4DTile1DRangeK, kParallelize4DTile1DRangeL,
4402 kParallelize4DTile1DTileL,
4403 0 /* flags */);
4404 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize4DTile1DRangeI * kParallelize4DTile1DRangeJ * kParallelize4DTile1DRangeK * kParallelize4DTile1DRangeL);
4405 }
4406
ComputeNothing4DTile2D(void *,size_t,size_t,size_t,size_t,size_t,size_t)4407 static void ComputeNothing4DTile2D(void*, size_t, size_t, size_t, size_t, size_t, size_t) {
4408 }
4409
TEST(Parallelize4DTile2D,SingleThreadPoolCompletes)4410 TEST(Parallelize4DTile2D, SingleThreadPoolCompletes) {
4411 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
4412 ASSERT_TRUE(threadpool.get());
4413
4414 pthreadpool_parallelize_4d_tile_2d(threadpool.get(),
4415 ComputeNothing4DTile2D,
4416 nullptr,
4417 kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
4418 kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
4419 0 /* flags */);
4420 }
4421
TEST(Parallelize4DTile2D,MultiThreadPoolCompletes)4422 TEST(Parallelize4DTile2D, MultiThreadPoolCompletes) {
4423 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
4424 ASSERT_TRUE(threadpool.get());
4425
4426 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
4427 GTEST_SKIP();
4428 }
4429
4430 pthreadpool_parallelize_4d_tile_2d(
4431 threadpool.get(),
4432 ComputeNothing4DTile2D,
4433 nullptr,
4434 kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
4435 kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
4436 0 /* flags */);
4437 }
4438
CheckBounds4DTile2D(void *,size_t i,size_t j,size_t start_k,size_t start_l,size_t tile_k,size_t tile_l)4439 static void CheckBounds4DTile2D(void*, size_t i, size_t j, size_t start_k, size_t start_l, size_t tile_k, size_t tile_l) {
4440 EXPECT_LT(i, kParallelize4DTile2DRangeI);
4441 EXPECT_LT(j, kParallelize4DTile2DRangeJ);
4442 EXPECT_LT(start_k, kParallelize4DTile2DRangeK);
4443 EXPECT_LT(start_l, kParallelize4DTile2DRangeL);
4444 EXPECT_LE(start_k + tile_k, kParallelize4DTile2DRangeK);
4445 EXPECT_LE(start_l + tile_l, kParallelize4DTile2DRangeL);
4446 }
4447
TEST(Parallelize4DTile2D,SingleThreadPoolAllItemsInBounds)4448 TEST(Parallelize4DTile2D, SingleThreadPoolAllItemsInBounds) {
4449 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
4450 ASSERT_TRUE(threadpool.get());
4451
4452 pthreadpool_parallelize_4d_tile_2d(
4453 threadpool.get(),
4454 CheckBounds4DTile2D,
4455 nullptr,
4456 kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
4457 kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
4458 0 /* flags */);
4459 }
4460
TEST(Parallelize4DTile2D,MultiThreadPoolAllItemsInBounds)4461 TEST(Parallelize4DTile2D, MultiThreadPoolAllItemsInBounds) {
4462 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
4463 ASSERT_TRUE(threadpool.get());
4464
4465 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
4466 GTEST_SKIP();
4467 }
4468
4469 pthreadpool_parallelize_4d_tile_2d(
4470 threadpool.get(),
4471 CheckBounds4DTile2D,
4472 nullptr,
4473 kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
4474 kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
4475 0 /* flags */);
4476 }
4477
CheckTiling4DTile2D(void *,size_t i,size_t j,size_t start_k,size_t start_l,size_t tile_k,size_t tile_l)4478 static void CheckTiling4DTile2D(void*, size_t i, size_t j, size_t start_k, size_t start_l, size_t tile_k, size_t tile_l) {
4479 EXPECT_GT(tile_k, 0);
4480 EXPECT_LE(tile_k, kParallelize4DTile2DTileK);
4481 EXPECT_EQ(start_k % kParallelize4DTile2DTileK, 0);
4482 EXPECT_EQ(tile_k, std::min<size_t>(kParallelize4DTile2DTileK, kParallelize4DTile2DRangeK - start_k));
4483
4484 EXPECT_GT(tile_l, 0);
4485 EXPECT_LE(tile_l, kParallelize4DTile2DTileL);
4486 EXPECT_EQ(start_l % kParallelize4DTile2DTileL, 0);
4487 EXPECT_EQ(tile_l, std::min<size_t>(kParallelize4DTile2DTileL, kParallelize4DTile2DRangeL - start_l));
4488 }
4489
TEST(Parallelize4DTile2D,SingleThreadPoolUniformTiling)4490 TEST(Parallelize4DTile2D, SingleThreadPoolUniformTiling) {
4491 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
4492 ASSERT_TRUE(threadpool.get());
4493
4494 pthreadpool_parallelize_4d_tile_2d(
4495 threadpool.get(),
4496 CheckTiling4DTile2D,
4497 nullptr,
4498 kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
4499 kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
4500 0 /* flags */);
4501 }
4502
TEST(Parallelize4DTile2D,MultiThreadPoolUniformTiling)4503 TEST(Parallelize4DTile2D, MultiThreadPoolUniformTiling) {
4504 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
4505 ASSERT_TRUE(threadpool.get());
4506
4507 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
4508 GTEST_SKIP();
4509 }
4510
4511 pthreadpool_parallelize_4d_tile_2d(
4512 threadpool.get(),
4513 CheckTiling4DTile2D,
4514 nullptr,
4515 kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
4516 kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
4517 0 /* flags */);
4518 }
4519
SetTrue4DTile2D(std::atomic_bool * processed_indicators,size_t i,size_t j,size_t start_k,size_t start_l,size_t tile_k,size_t tile_l)4520 static void SetTrue4DTile2D(std::atomic_bool* processed_indicators, size_t i, size_t j, size_t start_k, size_t start_l, size_t tile_k, size_t tile_l) {
4521 for (size_t k = start_k; k < start_k + tile_k; k++) {
4522 for (size_t l = start_l; l < start_l + tile_l; l++) {
4523 const size_t linear_idx = ((i * kParallelize4DTile2DRangeJ + j) * kParallelize4DTile2DRangeK + k) * kParallelize4DTile2DRangeL + l;
4524 processed_indicators[linear_idx].store(true, std::memory_order_relaxed);
4525 }
4526 }
4527 }
4528
TEST(Parallelize4DTile2D,SingleThreadPoolAllItemsProcessed)4529 TEST(Parallelize4DTile2D, SingleThreadPoolAllItemsProcessed) {
4530 std::vector<std::atomic_bool> indicators(kParallelize4DTile2DRangeI * kParallelize4DTile2DRangeJ * kParallelize4DTile2DRangeK * kParallelize4DTile2DRangeL);
4531
4532 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
4533 ASSERT_TRUE(threadpool.get());
4534
4535 pthreadpool_parallelize_4d_tile_2d(
4536 threadpool.get(),
4537 reinterpret_cast<pthreadpool_task_4d_tile_2d_t>(SetTrue4DTile2D),
4538 static_cast<void*>(indicators.data()),
4539 kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
4540 kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
4541 0 /* flags */);
4542
4543 for (size_t i = 0; i < kParallelize4DTile2DRangeI; i++) {
4544 for (size_t j = 0; j < kParallelize4DTile2DRangeJ; j++) {
4545 for (size_t k = 0; k < kParallelize4DTile2DRangeK; k++) {
4546 for (size_t l = 0; l < kParallelize4DTile2DRangeL; l++) {
4547 const size_t linear_idx = ((i * kParallelize4DTile2DRangeJ + j) * kParallelize4DTile2DRangeK + k) * kParallelize4DTile2DRangeL + l;
4548 EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
4549 << "Element (" << i << ", " << j << ", " << k << ", " << l << ") not processed";
4550 }
4551 }
4552 }
4553 }
4554 }
4555
TEST(Parallelize4DTile2D,MultiThreadPoolAllItemsProcessed)4556 TEST(Parallelize4DTile2D, MultiThreadPoolAllItemsProcessed) {
4557 std::vector<std::atomic_bool> indicators(kParallelize4DTile2DRangeI * kParallelize4DTile2DRangeJ * kParallelize4DTile2DRangeK * kParallelize4DTile2DRangeL);
4558
4559 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
4560 ASSERT_TRUE(threadpool.get());
4561
4562 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
4563 GTEST_SKIP();
4564 }
4565
4566 pthreadpool_parallelize_4d_tile_2d(
4567 threadpool.get(),
4568 reinterpret_cast<pthreadpool_task_4d_tile_2d_t>(SetTrue4DTile2D),
4569 static_cast<void*>(indicators.data()),
4570 kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
4571 kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
4572 0 /* flags */);
4573
4574 for (size_t i = 0; i < kParallelize4DTile2DRangeI; i++) {
4575 for (size_t j = 0; j < kParallelize4DTile2DRangeJ; j++) {
4576 for (size_t k = 0; k < kParallelize4DTile2DRangeK; k++) {
4577 for (size_t l = 0; l < kParallelize4DTile2DRangeL; l++) {
4578 const size_t linear_idx = ((i * kParallelize4DTile2DRangeJ + j) * kParallelize4DTile2DRangeK + k) * kParallelize4DTile2DRangeL + l;
4579 EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
4580 << "Element (" << i << ", " << j << ", " << k << ", " << l << ") not processed";
4581 }
4582 }
4583 }
4584 }
4585 }
4586
Increment4DTile2D(std::atomic_int * processed_counters,size_t i,size_t j,size_t start_k,size_t start_l,size_t tile_k,size_t tile_l)4587 static void Increment4DTile2D(std::atomic_int* processed_counters, size_t i, size_t j, size_t start_k, size_t start_l, size_t tile_k, size_t tile_l) {
4588 for (size_t k = start_k; k < start_k + tile_k; k++) {
4589 for (size_t l = start_l; l < start_l + tile_l; l++) {
4590 const size_t linear_idx = ((i * kParallelize4DTile2DRangeJ + j) * kParallelize4DTile2DRangeK + k) * kParallelize4DTile2DRangeL + l;
4591 processed_counters[linear_idx].fetch_add(1, std::memory_order_relaxed);
4592 }
4593 }
4594 }
4595
TEST(Parallelize4DTile2D,SingleThreadPoolEachItemProcessedOnce)4596 TEST(Parallelize4DTile2D, SingleThreadPoolEachItemProcessedOnce) {
4597 std::vector<std::atomic_int> counters(kParallelize4DTile2DRangeI * kParallelize4DTile2DRangeJ * kParallelize4DTile2DRangeK * kParallelize4DTile2DRangeL);
4598
4599 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
4600 ASSERT_TRUE(threadpool.get());
4601
4602 pthreadpool_parallelize_4d_tile_2d(
4603 threadpool.get(),
4604 reinterpret_cast<pthreadpool_task_4d_tile_2d_t>(Increment4DTile2D),
4605 static_cast<void*>(counters.data()),
4606 kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
4607 kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
4608 0 /* flags */);
4609
4610 for (size_t i = 0; i < kParallelize4DTile2DRangeI; i++) {
4611 for (size_t j = 0; j < kParallelize4DTile2DRangeJ; j++) {
4612 for (size_t k = 0; k < kParallelize4DTile2DRangeK; k++) {
4613 for (size_t l = 0; l < kParallelize4DTile2DRangeL; l++) {
4614 const size_t linear_idx = ((i * kParallelize4DTile2DRangeJ + j) * kParallelize4DTile2DRangeK + k) * kParallelize4DTile2DRangeL + l;
4615 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
4616 << "Element (" << i << ", " << j << ", " << k << ", " << l << ") was processed "
4617 << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
4618 }
4619 }
4620 }
4621 }
4622 }
4623
TEST(Parallelize4DTile2D,MultiThreadPoolEachItemProcessedOnce)4624 TEST(Parallelize4DTile2D, MultiThreadPoolEachItemProcessedOnce) {
4625 std::vector<std::atomic_int> counters(kParallelize4DTile2DRangeI * kParallelize4DTile2DRangeJ * kParallelize4DTile2DRangeK * kParallelize4DTile2DRangeL);
4626
4627 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
4628 ASSERT_TRUE(threadpool.get());
4629
4630 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
4631 GTEST_SKIP();
4632 }
4633
4634 pthreadpool_parallelize_4d_tile_2d(
4635 threadpool.get(),
4636 reinterpret_cast<pthreadpool_task_4d_tile_2d_t>(Increment4DTile2D),
4637 static_cast<void*>(counters.data()),
4638 kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
4639 kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
4640 0 /* flags */);
4641
4642 for (size_t i = 0; i < kParallelize4DTile2DRangeI; i++) {
4643 for (size_t j = 0; j < kParallelize4DTile2DRangeJ; j++) {
4644 for (size_t k = 0; k < kParallelize4DTile2DRangeK; k++) {
4645 for (size_t l = 0; l < kParallelize4DTile2DRangeL; l++) {
4646 const size_t linear_idx = ((i * kParallelize4DTile2DRangeJ + j) * kParallelize4DTile2DRangeK + k) * kParallelize4DTile2DRangeL + l;
4647 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
4648 << "Element (" << i << ", " << j << ", " << k << ", " << l << ") was processed "
4649 << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
4650 }
4651 }
4652 }
4653 }
4654 }
4655
TEST(Parallelize4DTile2D,SingleThreadPoolEachItemProcessedMultipleTimes)4656 TEST(Parallelize4DTile2D, SingleThreadPoolEachItemProcessedMultipleTimes) {
4657 std::vector<std::atomic_int> counters(kParallelize4DTile2DRangeI * kParallelize4DTile2DRangeJ * kParallelize4DTile2DRangeK * kParallelize4DTile2DRangeL);
4658
4659 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
4660 ASSERT_TRUE(threadpool.get());
4661
4662 for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
4663 pthreadpool_parallelize_4d_tile_2d(
4664 threadpool.get(),
4665 reinterpret_cast<pthreadpool_task_4d_tile_2d_t>(Increment4DTile2D),
4666 static_cast<void*>(counters.data()),
4667 kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
4668 kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
4669 0 /* flags */);
4670 }
4671
4672 for (size_t i = 0; i < kParallelize4DTile2DRangeI; i++) {
4673 for (size_t j = 0; j < kParallelize4DTile2DRangeJ; j++) {
4674 for (size_t k = 0; k < kParallelize4DTile2DRangeK; k++) {
4675 for (size_t l = 0; l < kParallelize4DTile2DRangeL; l++) {
4676 const size_t linear_idx = ((i * kParallelize4DTile2DRangeJ + j) * kParallelize4DTile2DRangeK + k) * kParallelize4DTile2DRangeL + l;
4677 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations)
4678 << "Element (" << i << ", " << j << ", " << k << ", " << l << ") was processed "
4679 << counters[linear_idx].load(std::memory_order_relaxed) << " times "
4680 << "(expected: " << kIncrementIterations << ")";
4681 }
4682 }
4683 }
4684 }
4685 }
4686
TEST(Parallelize4DTile2D,MultiThreadPoolEachItemProcessedMultipleTimes)4687 TEST(Parallelize4DTile2D, MultiThreadPoolEachItemProcessedMultipleTimes) {
4688 std::vector<std::atomic_int> counters(kParallelize4DTile2DRangeI * kParallelize4DTile2DRangeJ * kParallelize4DTile2DRangeK * kParallelize4DTile2DRangeL);
4689
4690 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
4691 ASSERT_TRUE(threadpool.get());
4692
4693 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
4694 GTEST_SKIP();
4695 }
4696
4697 for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
4698 pthreadpool_parallelize_4d_tile_2d(
4699 threadpool.get(),
4700 reinterpret_cast<pthreadpool_task_4d_tile_2d_t>(Increment4DTile2D),
4701 static_cast<void*>(counters.data()),
4702 kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
4703 kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
4704 0 /* flags */);
4705 }
4706
4707 for (size_t i = 0; i < kParallelize4DTile2DRangeI; i++) {
4708 for (size_t j = 0; j < kParallelize4DTile2DRangeJ; j++) {
4709 for (size_t k = 0; k < kParallelize4DTile2DRangeK; k++) {
4710 for (size_t l = 0; l < kParallelize4DTile2DRangeL; l++) {
4711 const size_t linear_idx = ((i * kParallelize4DTile2DRangeJ + j) * kParallelize4DTile2DRangeK + k) * kParallelize4DTile2DRangeL + l;
4712 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations)
4713 << "Element (" << i << ", " << j << ", " << k << ", " << l << ") was processed "
4714 << counters[linear_idx].load(std::memory_order_relaxed) << " times "
4715 << "(expected: " << kIncrementIterations << ")";
4716 }
4717 }
4718 }
4719 }
4720 }
4721
IncrementSame4DTile2D(std::atomic_int * num_processed_items,size_t i,size_t j,size_t start_k,size_t start_l,size_t tile_k,size_t tile_l)4722 static void IncrementSame4DTile2D(std::atomic_int* num_processed_items, size_t i, size_t j, size_t start_k, size_t start_l, size_t tile_k, size_t tile_l) {
4723 for (size_t k = start_k; k < start_k + tile_k; k++) {
4724 for (size_t l = start_l; l < start_l + tile_l; l++) {
4725 num_processed_items->fetch_add(1, std::memory_order_relaxed);
4726 }
4727 }
4728 }
4729
TEST(Parallelize4DTile2D,MultiThreadPoolHighContention)4730 TEST(Parallelize4DTile2D, MultiThreadPoolHighContention) {
4731 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
4732
4733 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
4734 ASSERT_TRUE(threadpool.get());
4735
4736 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
4737 GTEST_SKIP();
4738 }
4739
4740 pthreadpool_parallelize_4d_tile_2d(
4741 threadpool.get(),
4742 reinterpret_cast<pthreadpool_task_4d_tile_2d_t>(IncrementSame4DTile2D),
4743 static_cast<void*>(&num_processed_items),
4744 kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
4745 kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
4746 0 /* flags */);
4747 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize4DTile2DRangeI * kParallelize4DTile2DRangeJ * kParallelize4DTile2DRangeK * kParallelize4DTile2DRangeL);
4748 }
4749
WorkImbalance4DTile2D(std::atomic_int * num_processed_items,size_t i,size_t j,size_t start_k,size_t start_l,size_t tile_k,size_t tile_l)4750 static void WorkImbalance4DTile2D(std::atomic_int* num_processed_items, size_t i, size_t j, size_t start_k, size_t start_l, size_t tile_k, size_t tile_l) {
4751 num_processed_items->fetch_add(tile_k * tile_l, std::memory_order_relaxed);
4752 if (i == 0 && j == 0 && start_k == 0 && start_l == 0) {
4753 /* Spin-wait until all items are computed */
4754 while (num_processed_items->load(std::memory_order_relaxed) != kParallelize4DTile2DRangeI * kParallelize4DTile2DRangeJ * kParallelize4DTile2DRangeK * kParallelize4DTile2DRangeL) {
4755 std::atomic_thread_fence(std::memory_order_acquire);
4756 }
4757 }
4758 }
4759
TEST(Parallelize4DTile2D,MultiThreadPoolWorkStealing)4760 TEST(Parallelize4DTile2D, MultiThreadPoolWorkStealing) {
4761 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
4762
4763 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
4764 ASSERT_TRUE(threadpool.get());
4765
4766 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
4767 GTEST_SKIP();
4768 }
4769
4770 pthreadpool_parallelize_4d_tile_2d(
4771 threadpool.get(),
4772 reinterpret_cast<pthreadpool_task_4d_tile_2d_t>(WorkImbalance4DTile2D),
4773 static_cast<void*>(&num_processed_items),
4774 kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
4775 kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
4776 0 /* flags */);
4777 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize4DTile2DRangeI * kParallelize4DTile2DRangeJ * kParallelize4DTile2DRangeK * kParallelize4DTile2DRangeL);
4778 }
4779
ComputeNothing4DTile2DWithUArch(void *,uint32_t,size_t,size_t,size_t,size_t,size_t,size_t)4780 static void ComputeNothing4DTile2DWithUArch(void*, uint32_t, size_t, size_t, size_t, size_t, size_t, size_t) {
4781 }
4782
TEST(Parallelize4DTile2DWithUArch,SingleThreadPoolCompletes)4783 TEST(Parallelize4DTile2DWithUArch, SingleThreadPoolCompletes) {
4784 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
4785 ASSERT_TRUE(threadpool.get());
4786
4787 pthreadpool_parallelize_4d_tile_2d_with_uarch(threadpool.get(),
4788 ComputeNothing4DTile2DWithUArch,
4789 nullptr,
4790 kDefaultUArchIndex, kMaxUArchIndex,
4791 kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
4792 kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
4793 0 /* flags */);
4794 }
4795
TEST(Parallelize4DTile2DWithUArch,MultiThreadPoolCompletes)4796 TEST(Parallelize4DTile2DWithUArch, MultiThreadPoolCompletes) {
4797 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
4798 ASSERT_TRUE(threadpool.get());
4799
4800 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
4801 GTEST_SKIP();
4802 }
4803
4804 pthreadpool_parallelize_4d_tile_2d_with_uarch(
4805 threadpool.get(),
4806 ComputeNothing4DTile2DWithUArch,
4807 nullptr,
4808 kDefaultUArchIndex, kMaxUArchIndex,
4809 kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
4810 kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
4811 0 /* flags */);
4812 }
4813
CheckUArch4DTile2DWithUArch(void *,uint32_t uarch_index,size_t,size_t,size_t,size_t,size_t,size_t)4814 static void CheckUArch4DTile2DWithUArch(void*, uint32_t uarch_index, size_t, size_t, size_t, size_t, size_t, size_t) {
4815 if (uarch_index != kDefaultUArchIndex) {
4816 EXPECT_LE(uarch_index, kMaxUArchIndex);
4817 }
4818 }
4819
TEST(Parallelize4DTile2DWithUArch,SingleThreadPoolUArchInBounds)4820 TEST(Parallelize4DTile2DWithUArch, SingleThreadPoolUArchInBounds) {
4821 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
4822 ASSERT_TRUE(threadpool.get());
4823
4824 pthreadpool_parallelize_4d_tile_2d_with_uarch(
4825 threadpool.get(),
4826 CheckUArch4DTile2DWithUArch,
4827 nullptr,
4828 kDefaultUArchIndex, kMaxUArchIndex,
4829 kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
4830 kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
4831 0 /* flags */);
4832 }
4833
TEST(Parallelize4DTile2DWithUArch,MultiThreadPoolUArchInBounds)4834 TEST(Parallelize4DTile2DWithUArch, MultiThreadPoolUArchInBounds) {
4835 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
4836 ASSERT_TRUE(threadpool.get());
4837
4838 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
4839 GTEST_SKIP();
4840 }
4841
4842 pthreadpool_parallelize_4d_tile_2d_with_uarch(
4843 threadpool.get(),
4844 CheckUArch4DTile2DWithUArch,
4845 nullptr,
4846 kDefaultUArchIndex, kMaxUArchIndex,
4847 kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
4848 kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
4849 0 /* flags */);
4850 }
4851
CheckBounds4DTile2DWithUArch(void *,uint32_t,size_t i,size_t j,size_t start_k,size_t start_l,size_t tile_k,size_t tile_l)4852 static void CheckBounds4DTile2DWithUArch(void*, uint32_t, size_t i, size_t j, size_t start_k, size_t start_l, size_t tile_k, size_t tile_l) {
4853 EXPECT_LT(i, kParallelize4DTile2DRangeI);
4854 EXPECT_LT(j, kParallelize4DTile2DRangeJ);
4855 EXPECT_LT(start_k, kParallelize4DTile2DRangeK);
4856 EXPECT_LT(start_l, kParallelize4DTile2DRangeL);
4857 EXPECT_LE(start_k + tile_k, kParallelize4DTile2DRangeK);
4858 EXPECT_LE(start_l + tile_l, kParallelize4DTile2DRangeL);
4859 }
4860
TEST(Parallelize4DTile2DWithUArch,SingleThreadPoolAllItemsInBounds)4861 TEST(Parallelize4DTile2DWithUArch, SingleThreadPoolAllItemsInBounds) {
4862 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
4863 ASSERT_TRUE(threadpool.get());
4864
4865 pthreadpool_parallelize_4d_tile_2d_with_uarch(
4866 threadpool.get(),
4867 CheckBounds4DTile2DWithUArch,
4868 nullptr,
4869 kDefaultUArchIndex, kMaxUArchIndex,
4870 kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
4871 kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
4872 0 /* flags */);
4873 }
4874
TEST(Parallelize4DTile2DWithUArch,MultiThreadPoolAllItemsInBounds)4875 TEST(Parallelize4DTile2DWithUArch, MultiThreadPoolAllItemsInBounds) {
4876 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
4877 ASSERT_TRUE(threadpool.get());
4878
4879 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
4880 GTEST_SKIP();
4881 }
4882
4883 pthreadpool_parallelize_4d_tile_2d_with_uarch(
4884 threadpool.get(),
4885 CheckBounds4DTile2DWithUArch,
4886 nullptr,
4887 kDefaultUArchIndex, kMaxUArchIndex,
4888 kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
4889 kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
4890 0 /* flags */);
4891 }
4892
CheckTiling4DTile2DWithUArch(void *,uint32_t,size_t i,size_t j,size_t start_k,size_t start_l,size_t tile_k,size_t tile_l)4893 static void CheckTiling4DTile2DWithUArch(void*, uint32_t, size_t i, size_t j, size_t start_k, size_t start_l, size_t tile_k, size_t tile_l) {
4894 EXPECT_GT(tile_k, 0);
4895 EXPECT_LE(tile_k, kParallelize4DTile2DTileK);
4896 EXPECT_EQ(start_k % kParallelize4DTile2DTileK, 0);
4897 EXPECT_EQ(tile_k, std::min<size_t>(kParallelize4DTile2DTileK, kParallelize4DTile2DRangeK - start_k));
4898
4899 EXPECT_GT(tile_l, 0);
4900 EXPECT_LE(tile_l, kParallelize4DTile2DTileL);
4901 EXPECT_EQ(start_l % kParallelize4DTile2DTileL, 0);
4902 EXPECT_EQ(tile_l, std::min<size_t>(kParallelize4DTile2DTileL, kParallelize4DTile2DRangeL - start_l));
4903 }
4904
TEST(Parallelize4DTile2DWithUArch,SingleThreadPoolUniformTiling)4905 TEST(Parallelize4DTile2DWithUArch, SingleThreadPoolUniformTiling) {
4906 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
4907 ASSERT_TRUE(threadpool.get());
4908
4909 pthreadpool_parallelize_4d_tile_2d_with_uarch(
4910 threadpool.get(),
4911 CheckTiling4DTile2DWithUArch,
4912 nullptr,
4913 kDefaultUArchIndex, kMaxUArchIndex,
4914 kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
4915 kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
4916 0 /* flags */);
4917 }
4918
TEST(Parallelize4DTile2DWithUArch,MultiThreadPoolUniformTiling)4919 TEST(Parallelize4DTile2DWithUArch, MultiThreadPoolUniformTiling) {
4920 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
4921 ASSERT_TRUE(threadpool.get());
4922
4923 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
4924 GTEST_SKIP();
4925 }
4926
4927 pthreadpool_parallelize_4d_tile_2d_with_uarch(
4928 threadpool.get(),
4929 CheckTiling4DTile2DWithUArch,
4930 nullptr,
4931 kDefaultUArchIndex, kMaxUArchIndex,
4932 kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
4933 kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
4934 0 /* flags */);
4935 }
4936
SetTrue4DTile2DWithUArch(std::atomic_bool * processed_indicators,uint32_t,size_t i,size_t j,size_t start_k,size_t start_l,size_t tile_k,size_t tile_l)4937 static void SetTrue4DTile2DWithUArch(std::atomic_bool* processed_indicators, uint32_t, size_t i, size_t j, size_t start_k, size_t start_l, size_t tile_k, size_t tile_l) {
4938 for (size_t k = start_k; k < start_k + tile_k; k++) {
4939 for (size_t l = start_l; l < start_l + tile_l; l++) {
4940 const size_t linear_idx = ((i * kParallelize4DTile2DRangeJ + j) * kParallelize4DTile2DRangeK + k) * kParallelize4DTile2DRangeL + l;
4941 processed_indicators[linear_idx].store(true, std::memory_order_relaxed);
4942 }
4943 }
4944 }
4945
TEST(Parallelize4DTile2DWithUArch,SingleThreadPoolAllItemsProcessed)4946 TEST(Parallelize4DTile2DWithUArch, SingleThreadPoolAllItemsProcessed) {
4947 std::vector<std::atomic_bool> indicators(kParallelize4DTile2DRangeI * kParallelize4DTile2DRangeJ * kParallelize4DTile2DRangeK * kParallelize4DTile2DRangeL);
4948
4949 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
4950 ASSERT_TRUE(threadpool.get());
4951
4952 pthreadpool_parallelize_4d_tile_2d_with_uarch(
4953 threadpool.get(),
4954 reinterpret_cast<pthreadpool_task_4d_tile_2d_with_id_t>(SetTrue4DTile2DWithUArch),
4955 static_cast<void*>(indicators.data()),
4956 kDefaultUArchIndex, kMaxUArchIndex,
4957 kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
4958 kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
4959 0 /* flags */);
4960
4961 for (size_t i = 0; i < kParallelize4DTile2DRangeI; i++) {
4962 for (size_t j = 0; j < kParallelize4DTile2DRangeJ; j++) {
4963 for (size_t k = 0; k < kParallelize4DTile2DRangeK; k++) {
4964 for (size_t l = 0; l < kParallelize4DTile2DRangeL; l++) {
4965 const size_t linear_idx = ((i * kParallelize4DTile2DRangeJ + j) * kParallelize4DTile2DRangeK + k) * kParallelize4DTile2DRangeL + l;
4966 EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
4967 << "Element (" << i << ", " << j << ", " << k << ", " << l << ") not processed";
4968 }
4969 }
4970 }
4971 }
4972 }
4973
TEST(Parallelize4DTile2DWithUArch,MultiThreadPoolAllItemsProcessed)4974 TEST(Parallelize4DTile2DWithUArch, MultiThreadPoolAllItemsProcessed) {
4975 std::vector<std::atomic_bool> indicators(kParallelize4DTile2DRangeI * kParallelize4DTile2DRangeJ * kParallelize4DTile2DRangeK * kParallelize4DTile2DRangeL);
4976
4977 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
4978 ASSERT_TRUE(threadpool.get());
4979
4980 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
4981 GTEST_SKIP();
4982 }
4983
4984 pthreadpool_parallelize_4d_tile_2d_with_uarch(
4985 threadpool.get(),
4986 reinterpret_cast<pthreadpool_task_4d_tile_2d_with_id_t>(SetTrue4DTile2DWithUArch),
4987 static_cast<void*>(indicators.data()),
4988 kDefaultUArchIndex, kMaxUArchIndex,
4989 kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
4990 kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
4991 0 /* flags */);
4992
4993 for (size_t i = 0; i < kParallelize4DTile2DRangeI; i++) {
4994 for (size_t j = 0; j < kParallelize4DTile2DRangeJ; j++) {
4995 for (size_t k = 0; k < kParallelize4DTile2DRangeK; k++) {
4996 for (size_t l = 0; l < kParallelize4DTile2DRangeL; l++) {
4997 const size_t linear_idx = ((i * kParallelize4DTile2DRangeJ + j) * kParallelize4DTile2DRangeK + k) * kParallelize4DTile2DRangeL + l;
4998 EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
4999 << "Element (" << i << ", " << j << ", " << k << ", " << l << ") not processed";
5000 }
5001 }
5002 }
5003 }
5004 }
5005
Increment4DTile2DWithUArch(std::atomic_int * processed_counters,uint32_t,size_t i,size_t j,size_t start_k,size_t start_l,size_t tile_k,size_t tile_l)5006 static void Increment4DTile2DWithUArch(std::atomic_int* processed_counters, uint32_t, size_t i, size_t j, size_t start_k, size_t start_l, size_t tile_k, size_t tile_l) {
5007 for (size_t k = start_k; k < start_k + tile_k; k++) {
5008 for (size_t l = start_l; l < start_l + tile_l; l++) {
5009 const size_t linear_idx = ((i * kParallelize4DTile2DRangeJ + j) * kParallelize4DTile2DRangeK + k) * kParallelize4DTile2DRangeL + l;
5010 processed_counters[linear_idx].fetch_add(1, std::memory_order_relaxed);
5011 }
5012 }
5013 }
5014
TEST(Parallelize4DTile2DWithUArch,SingleThreadPoolEachItemProcessedOnce)5015 TEST(Parallelize4DTile2DWithUArch, SingleThreadPoolEachItemProcessedOnce) {
5016 std::vector<std::atomic_int> counters(kParallelize4DTile2DRangeI * kParallelize4DTile2DRangeJ * kParallelize4DTile2DRangeK * kParallelize4DTile2DRangeL);
5017
5018 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
5019 ASSERT_TRUE(threadpool.get());
5020
5021 pthreadpool_parallelize_4d_tile_2d_with_uarch(
5022 threadpool.get(),
5023 reinterpret_cast<pthreadpool_task_4d_tile_2d_with_id_t>(Increment4DTile2DWithUArch),
5024 static_cast<void*>(counters.data()),
5025 kDefaultUArchIndex, kMaxUArchIndex,
5026 kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
5027 kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
5028 0 /* flags */);
5029
5030 for (size_t i = 0; i < kParallelize4DTile2DRangeI; i++) {
5031 for (size_t j = 0; j < kParallelize4DTile2DRangeJ; j++) {
5032 for (size_t k = 0; k < kParallelize4DTile2DRangeK; k++) {
5033 for (size_t l = 0; l < kParallelize4DTile2DRangeL; l++) {
5034 const size_t linear_idx = ((i * kParallelize4DTile2DRangeJ + j) * kParallelize4DTile2DRangeK + k) * kParallelize4DTile2DRangeL + l;
5035 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
5036 << "Element (" << i << ", " << j << ", " << k << ", " << l << ") was processed "
5037 << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
5038 }
5039 }
5040 }
5041 }
5042 }
5043
TEST(Parallelize4DTile2DWithUArch,MultiThreadPoolEachItemProcessedOnce)5044 TEST(Parallelize4DTile2DWithUArch, MultiThreadPoolEachItemProcessedOnce) {
5045 std::vector<std::atomic_int> counters(kParallelize4DTile2DRangeI * kParallelize4DTile2DRangeJ * kParallelize4DTile2DRangeK * kParallelize4DTile2DRangeL);
5046
5047 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
5048 ASSERT_TRUE(threadpool.get());
5049
5050 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
5051 GTEST_SKIP();
5052 }
5053
5054 pthreadpool_parallelize_4d_tile_2d_with_uarch(
5055 threadpool.get(),
5056 reinterpret_cast<pthreadpool_task_4d_tile_2d_with_id_t>(Increment4DTile2DWithUArch),
5057 static_cast<void*>(counters.data()),
5058 kDefaultUArchIndex, kMaxUArchIndex,
5059 kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
5060 kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
5061 0 /* flags */);
5062
5063 for (size_t i = 0; i < kParallelize4DTile2DRangeI; i++) {
5064 for (size_t j = 0; j < kParallelize4DTile2DRangeJ; j++) {
5065 for (size_t k = 0; k < kParallelize4DTile2DRangeK; k++) {
5066 for (size_t l = 0; l < kParallelize4DTile2DRangeL; l++) {
5067 const size_t linear_idx = ((i * kParallelize4DTile2DRangeJ + j) * kParallelize4DTile2DRangeK + k) * kParallelize4DTile2DRangeL + l;
5068 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
5069 << "Element (" << i << ", " << j << ", " << k << ", " << l << ") was processed "
5070 << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
5071 }
5072 }
5073 }
5074 }
5075 }
5076
TEST(Parallelize4DTile2DWithUArch,SingleThreadPoolEachItemProcessedMultipleTimes)5077 TEST(Parallelize4DTile2DWithUArch, SingleThreadPoolEachItemProcessedMultipleTimes) {
5078 std::vector<std::atomic_int> counters(kParallelize4DTile2DRangeI * kParallelize4DTile2DRangeJ * kParallelize4DTile2DRangeK * kParallelize4DTile2DRangeL);
5079
5080 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
5081 ASSERT_TRUE(threadpool.get());
5082
5083 for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
5084 pthreadpool_parallelize_4d_tile_2d_with_uarch(
5085 threadpool.get(),
5086 reinterpret_cast<pthreadpool_task_4d_tile_2d_with_id_t>(Increment4DTile2DWithUArch),
5087 static_cast<void*>(counters.data()),
5088 kDefaultUArchIndex, kMaxUArchIndex,
5089 kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
5090 kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
5091 0 /* flags */);
5092 }
5093
5094 for (size_t i = 0; i < kParallelize4DTile2DRangeI; i++) {
5095 for (size_t j = 0; j < kParallelize4DTile2DRangeJ; j++) {
5096 for (size_t k = 0; k < kParallelize4DTile2DRangeK; k++) {
5097 for (size_t l = 0; l < kParallelize4DTile2DRangeL; l++) {
5098 const size_t linear_idx = ((i * kParallelize4DTile2DRangeJ + j) * kParallelize4DTile2DRangeK + k) * kParallelize4DTile2DRangeL + l;
5099 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations)
5100 << "Element (" << i << ", " << j << ", " << k << ", " << l << ") was processed "
5101 << counters[linear_idx].load(std::memory_order_relaxed) << " times "
5102 << "(expected: " << kIncrementIterations << ")";
5103 }
5104 }
5105 }
5106 }
5107 }
5108
TEST(Parallelize4DTile2DWithUArch,MultiThreadPoolEachItemProcessedMultipleTimes)5109 TEST(Parallelize4DTile2DWithUArch, MultiThreadPoolEachItemProcessedMultipleTimes) {
5110 std::vector<std::atomic_int> counters(kParallelize4DTile2DRangeI * kParallelize4DTile2DRangeJ * kParallelize4DTile2DRangeK * kParallelize4DTile2DRangeL);
5111
5112 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
5113 ASSERT_TRUE(threadpool.get());
5114
5115 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
5116 GTEST_SKIP();
5117 }
5118
5119 for (size_t iteration = 0; iteration < kIncrementIterations; iteration++) {
5120 pthreadpool_parallelize_4d_tile_2d_with_uarch(
5121 threadpool.get(),
5122 reinterpret_cast<pthreadpool_task_4d_tile_2d_with_id_t>(Increment4DTile2DWithUArch),
5123 static_cast<void*>(counters.data()),
5124 kDefaultUArchIndex, kMaxUArchIndex,
5125 kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
5126 kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
5127 0 /* flags */);
5128 }
5129
5130 for (size_t i = 0; i < kParallelize4DTile2DRangeI; i++) {
5131 for (size_t j = 0; j < kParallelize4DTile2DRangeJ; j++) {
5132 for (size_t k = 0; k < kParallelize4DTile2DRangeK; k++) {
5133 for (size_t l = 0; l < kParallelize4DTile2DRangeL; l++) {
5134 const size_t linear_idx = ((i * kParallelize4DTile2DRangeJ + j) * kParallelize4DTile2DRangeK + k) * kParallelize4DTile2DRangeL + l;
5135 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations)
5136 << "Element (" << i << ", " << j << ", " << k << ", " << l << ") was processed "
5137 << counters[linear_idx].load(std::memory_order_relaxed) << " times "
5138 << "(expected: " << kIncrementIterations << ")";
5139 }
5140 }
5141 }
5142 }
5143 }
5144
IncrementSame4DTile2DWithUArch(std::atomic_int * num_processed_items,uint32_t,size_t i,size_t j,size_t start_k,size_t start_l,size_t tile_k,size_t tile_l)5145 static void IncrementSame4DTile2DWithUArch(std::atomic_int* num_processed_items, uint32_t, size_t i, size_t j, size_t start_k, size_t start_l, size_t tile_k, size_t tile_l) {
5146 for (size_t k = start_k; k < start_k + tile_k; k++) {
5147 for (size_t l = start_l; l < start_l + tile_l; l++) {
5148 num_processed_items->fetch_add(1, std::memory_order_relaxed);
5149 }
5150 }
5151 }
5152
TEST(Parallelize4DTile2DWithUArch,MultiThreadPoolHighContention)5153 TEST(Parallelize4DTile2DWithUArch, MultiThreadPoolHighContention) {
5154 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
5155
5156 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
5157 ASSERT_TRUE(threadpool.get());
5158
5159 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
5160 GTEST_SKIP();
5161 }
5162
5163 pthreadpool_parallelize_4d_tile_2d_with_uarch(
5164 threadpool.get(),
5165 reinterpret_cast<pthreadpool_task_4d_tile_2d_with_id_t>(IncrementSame4DTile2DWithUArch),
5166 static_cast<void*>(&num_processed_items),
5167 kDefaultUArchIndex, kMaxUArchIndex,
5168 kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
5169 kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
5170 0 /* flags */);
5171 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize4DTile2DRangeI * kParallelize4DTile2DRangeJ * kParallelize4DTile2DRangeK * kParallelize4DTile2DRangeL);
5172 }
5173
WorkImbalance4DTile2DWithUArch(std::atomic_int * num_processed_items,uint32_t,size_t i,size_t j,size_t start_k,size_t start_l,size_t tile_k,size_t tile_l)5174 static void WorkImbalance4DTile2DWithUArch(std::atomic_int* num_processed_items, uint32_t, size_t i, size_t j, size_t start_k, size_t start_l, size_t tile_k, size_t tile_l) {
5175 num_processed_items->fetch_add(tile_k * tile_l, std::memory_order_relaxed);
5176 if (i == 0 && j == 0 && start_k == 0 && start_l == 0) {
5177 /* Spin-wait until all items are computed */
5178 while (num_processed_items->load(std::memory_order_relaxed) != kParallelize4DTile2DRangeI * kParallelize4DTile2DRangeJ * kParallelize4DTile2DRangeK * kParallelize4DTile2DRangeL) {
5179 std::atomic_thread_fence(std::memory_order_acquire);
5180 }
5181 }
5182 }
5183
TEST(Parallelize4DTile2DWithUArch,MultiThreadPoolWorkStealing)5184 TEST(Parallelize4DTile2DWithUArch, MultiThreadPoolWorkStealing) {
5185 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
5186
5187 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
5188 ASSERT_TRUE(threadpool.get());
5189
5190 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
5191 GTEST_SKIP();
5192 }
5193
5194 pthreadpool_parallelize_4d_tile_2d_with_uarch(
5195 threadpool.get(),
5196 reinterpret_cast<pthreadpool_task_4d_tile_2d_with_id_t>(WorkImbalance4DTile2DWithUArch),
5197 static_cast<void*>(&num_processed_items),
5198 kDefaultUArchIndex, kMaxUArchIndex,
5199 kParallelize4DTile2DRangeI, kParallelize4DTile2DRangeJ, kParallelize4DTile2DRangeK, kParallelize4DTile2DRangeL,
5200 kParallelize4DTile2DTileK, kParallelize4DTile2DTileL,
5201 0 /* flags */);
5202 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize4DTile2DRangeI * kParallelize4DTile2DRangeJ * kParallelize4DTile2DRangeK * kParallelize4DTile2DRangeL);
5203 }
5204
ComputeNothing5D(void *,size_t,size_t,size_t,size_t,size_t)5205 static void ComputeNothing5D(void*, size_t, size_t, size_t, size_t, size_t) {
5206 }
5207
TEST(Parallelize5D,SingleThreadPoolCompletes)5208 TEST(Parallelize5D, SingleThreadPoolCompletes) {
5209 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
5210 ASSERT_TRUE(threadpool.get());
5211
5212 pthreadpool_parallelize_5d(threadpool.get(),
5213 ComputeNothing5D,
5214 nullptr,
5215 kParallelize5DRangeI, kParallelize5DRangeJ, kParallelize5DRangeK, kParallelize5DRangeL, kParallelize5DRangeM,
5216 0 /* flags */);
5217 }
5218
TEST(Parallelize5D,MultiThreadPoolCompletes)5219 TEST(Parallelize5D, MultiThreadPoolCompletes) {
5220 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
5221 ASSERT_TRUE(threadpool.get());
5222
5223 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
5224 GTEST_SKIP();
5225 }
5226
5227 pthreadpool_parallelize_5d(
5228 threadpool.get(),
5229 ComputeNothing5D,
5230 nullptr,
5231 kParallelize5DRangeI, kParallelize5DRangeJ, kParallelize5DRangeK, kParallelize5DRangeL, kParallelize5DRangeM,
5232 0 /* flags */);
5233 }
5234
CheckBounds5D(void *,size_t i,size_t j,size_t k,size_t l,size_t m)5235 static void CheckBounds5D(void*, size_t i, size_t j, size_t k, size_t l, size_t m) {
5236 EXPECT_LT(i, kParallelize5DRangeI);
5237 EXPECT_LT(j, kParallelize5DRangeJ);
5238 EXPECT_LT(k, kParallelize5DRangeK);
5239 EXPECT_LT(l, kParallelize5DRangeL);
5240 EXPECT_LT(m, kParallelize5DRangeM);
5241 }
5242
TEST(Parallelize5D,SingleThreadPoolAllItemsInBounds)5243 TEST(Parallelize5D, SingleThreadPoolAllItemsInBounds) {
5244 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
5245 ASSERT_TRUE(threadpool.get());
5246
5247 pthreadpool_parallelize_5d(
5248 threadpool.get(),
5249 CheckBounds5D,
5250 nullptr,
5251 kParallelize5DRangeI, kParallelize5DRangeJ, kParallelize5DRangeK, kParallelize5DRangeL, kParallelize5DRangeM,
5252 0 /* flags */);
5253 }
5254
TEST(Parallelize5D,MultiThreadPoolAllItemsInBounds)5255 TEST(Parallelize5D, MultiThreadPoolAllItemsInBounds) {
5256 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
5257 ASSERT_TRUE(threadpool.get());
5258
5259 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
5260 GTEST_SKIP();
5261 }
5262
5263 pthreadpool_parallelize_5d(
5264 threadpool.get(),
5265 CheckBounds5D,
5266 nullptr,
5267 kParallelize5DRangeI, kParallelize5DRangeJ, kParallelize5DRangeK, kParallelize5DRangeL, kParallelize5DRangeM,
5268 0 /* flags */);
5269 }
5270
SetTrue5D(std::atomic_bool * processed_indicators,size_t i,size_t j,size_t k,size_t l,size_t m)5271 static void SetTrue5D(std::atomic_bool* processed_indicators, size_t i, size_t j, size_t k, size_t l, size_t m) {
5272 const size_t linear_idx = (((i * kParallelize5DRangeJ + j) * kParallelize5DRangeK + k) * kParallelize5DRangeL + l) * kParallelize5DRangeM + m;
5273 processed_indicators[linear_idx].store(true, std::memory_order_relaxed);
5274 }
5275
TEST(Parallelize5D,SingleThreadPoolAllItemsProcessed)5276 TEST(Parallelize5D, SingleThreadPoolAllItemsProcessed) {
5277 std::vector<std::atomic_bool> indicators(kParallelize5DRangeI * kParallelize5DRangeJ * kParallelize5DRangeK * kParallelize5DRangeL * kParallelize5DRangeM);
5278
5279 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
5280 ASSERT_TRUE(threadpool.get());
5281
5282 pthreadpool_parallelize_5d(
5283 threadpool.get(),
5284 reinterpret_cast<pthreadpool_task_5d_t>(SetTrue5D),
5285 static_cast<void*>(indicators.data()),
5286 kParallelize5DRangeI, kParallelize5DRangeJ, kParallelize5DRangeK, kParallelize5DRangeL, kParallelize5DRangeM,
5287 0 /* flags */);
5288
5289 for (size_t i = 0; i < kParallelize5DRangeI; i++) {
5290 for (size_t j = 0; j < kParallelize5DRangeJ; j++) {
5291 for (size_t k = 0; k < kParallelize5DRangeK; k++) {
5292 for (size_t l = 0; l < kParallelize5DRangeL; l++) {
5293 for (size_t m = 0; m < kParallelize5DRangeM; m++) {
5294 const size_t linear_idx = (((i * kParallelize5DRangeJ + j) * kParallelize5DRangeK + k) * kParallelize5DRangeL + l) * kParallelize5DRangeM + m;
5295 EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
5296 << "Element (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ") not processed";
5297 }
5298 }
5299 }
5300 }
5301 }
5302 }
5303
TEST(Parallelize5D,MultiThreadPoolAllItemsProcessed)5304 TEST(Parallelize5D, MultiThreadPoolAllItemsProcessed) {
5305 std::vector<std::atomic_bool> indicators(kParallelize5DRangeI * kParallelize5DRangeJ * kParallelize5DRangeK * kParallelize5DRangeL * kParallelize5DRangeM);
5306
5307 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
5308 ASSERT_TRUE(threadpool.get());
5309
5310 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
5311 GTEST_SKIP();
5312 }
5313
5314 pthreadpool_parallelize_5d(
5315 threadpool.get(),
5316 reinterpret_cast<pthreadpool_task_5d_t>(SetTrue5D),
5317 static_cast<void*>(indicators.data()),
5318 kParallelize5DRangeI, kParallelize5DRangeJ, kParallelize5DRangeK, kParallelize5DRangeL, kParallelize5DRangeM,
5319 0 /* flags */);
5320
5321 for (size_t i = 0; i < kParallelize5DRangeI; i++) {
5322 for (size_t j = 0; j < kParallelize5DRangeJ; j++) {
5323 for (size_t k = 0; k < kParallelize5DRangeK; k++) {
5324 for (size_t l = 0; l < kParallelize5DRangeL; l++) {
5325 for (size_t m = 0; m < kParallelize5DRangeM; m++) {
5326 const size_t linear_idx = (((i * kParallelize5DRangeJ + j) * kParallelize5DRangeK + k) * kParallelize5DRangeL + l) * kParallelize5DRangeM + m;
5327 EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
5328 << "Element (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ") not processed";
5329 }
5330 }
5331 }
5332 }
5333 }
5334 }
5335
Increment5D(std::atomic_int * processed_counters,size_t i,size_t j,size_t k,size_t l,size_t m)5336 static void Increment5D(std::atomic_int* processed_counters, size_t i, size_t j, size_t k, size_t l, size_t m) {
5337 const size_t linear_idx = (((i * kParallelize5DRangeJ + j) * kParallelize5DRangeK + k) * kParallelize5DRangeL + l) * kParallelize5DRangeM + m;
5338 processed_counters[linear_idx].fetch_add(1, std::memory_order_relaxed);
5339 }
5340
TEST(Parallelize5D,SingleThreadPoolEachItemProcessedOnce)5341 TEST(Parallelize5D, SingleThreadPoolEachItemProcessedOnce) {
5342 std::vector<std::atomic_int> counters(kParallelize5DRangeI * kParallelize5DRangeJ * kParallelize5DRangeK * kParallelize5DRangeL * kParallelize5DRangeM);
5343
5344 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
5345 ASSERT_TRUE(threadpool.get());
5346
5347 pthreadpool_parallelize_5d(
5348 threadpool.get(),
5349 reinterpret_cast<pthreadpool_task_5d_t>(Increment5D),
5350 static_cast<void*>(counters.data()),
5351 kParallelize5DRangeI, kParallelize5DRangeJ, kParallelize5DRangeK, kParallelize5DRangeL, kParallelize5DRangeM,
5352 0 /* flags */);
5353
5354 for (size_t i = 0; i < kParallelize5DRangeI; i++) {
5355 for (size_t j = 0; j < kParallelize5DRangeJ; j++) {
5356 for (size_t k = 0; k < kParallelize5DRangeK; k++) {
5357 for (size_t l = 0; l < kParallelize5DRangeL; l++) {
5358 for (size_t m = 0; m < kParallelize5DRangeM; m++) {
5359 const size_t linear_idx = (((i * kParallelize5DRangeJ + j) * kParallelize5DRangeK + k) * kParallelize5DRangeL + l) * kParallelize5DRangeM + m;
5360 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
5361 << "Element (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ") was processed "
5362 << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
5363 }
5364 }
5365 }
5366 }
5367 }
5368 }
5369
TEST(Parallelize5D,MultiThreadPoolEachItemProcessedOnce)5370 TEST(Parallelize5D, MultiThreadPoolEachItemProcessedOnce) {
5371 std::vector<std::atomic_int> counters(kParallelize5DRangeI * kParallelize5DRangeJ * kParallelize5DRangeK * kParallelize5DRangeL * kParallelize5DRangeM);
5372
5373 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
5374 ASSERT_TRUE(threadpool.get());
5375
5376 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
5377 GTEST_SKIP();
5378 }
5379
5380 pthreadpool_parallelize_5d(
5381 threadpool.get(),
5382 reinterpret_cast<pthreadpool_task_5d_t>(Increment5D),
5383 static_cast<void*>(counters.data()),
5384 kParallelize5DRangeI, kParallelize5DRangeJ, kParallelize5DRangeK, kParallelize5DRangeL, kParallelize5DRangeM,
5385 0 /* flags */);
5386
5387 for (size_t i = 0; i < kParallelize5DRangeI; i++) {
5388 for (size_t j = 0; j < kParallelize5DRangeJ; j++) {
5389 for (size_t k = 0; k < kParallelize5DRangeK; k++) {
5390 for (size_t l = 0; l < kParallelize5DRangeL; l++) {
5391 for (size_t m = 0; m < kParallelize5DRangeM; m++) {
5392 const size_t linear_idx = (((i * kParallelize5DRangeJ + j) * kParallelize5DRangeK + k) * kParallelize5DRangeL + l) * kParallelize5DRangeM + m;
5393 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
5394 << "Element (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ") was processed "
5395 << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
5396 }
5397 }
5398 }
5399 }
5400 }
5401 }
5402
TEST(Parallelize5D,SingleThreadPoolEachItemProcessedMultipleTimes)5403 TEST(Parallelize5D, SingleThreadPoolEachItemProcessedMultipleTimes) {
5404 std::vector<std::atomic_int> counters(kParallelize5DRangeI * kParallelize5DRangeJ * kParallelize5DRangeK * kParallelize5DRangeL * kParallelize5DRangeM);
5405
5406 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
5407 ASSERT_TRUE(threadpool.get());
5408
5409 for (size_t iteration = 0; iteration < kIncrementIterations5D; iteration++) {
5410 pthreadpool_parallelize_5d(
5411 threadpool.get(),
5412 reinterpret_cast<pthreadpool_task_5d_t>(Increment5D),
5413 static_cast<void*>(counters.data()),
5414 kParallelize5DRangeI, kParallelize5DRangeJ, kParallelize5DRangeK, kParallelize5DRangeL, kParallelize5DRangeM,
5415 0 /* flags */);
5416 }
5417
5418 for (size_t i = 0; i < kParallelize5DRangeI; i++) {
5419 for (size_t j = 0; j < kParallelize5DRangeJ; j++) {
5420 for (size_t k = 0; k < kParallelize5DRangeK; k++) {
5421 for (size_t l = 0; l < kParallelize5DRangeL; l++) {
5422 for (size_t m = 0; m < kParallelize5DRangeM; m++) {
5423 const size_t linear_idx = (((i * kParallelize5DRangeJ + j) * kParallelize5DRangeK + k) * kParallelize5DRangeL + l) * kParallelize5DRangeM + m;
5424 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations5D)
5425 << "Element (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ") was processed "
5426 << counters[linear_idx].load(std::memory_order_relaxed) << " times "
5427 << "(expected: " << kIncrementIterations5D << ")";
5428 }
5429 }
5430 }
5431 }
5432 }
5433 }
5434
TEST(Parallelize5D,MultiThreadPoolEachItemProcessedMultipleTimes)5435 TEST(Parallelize5D, MultiThreadPoolEachItemProcessedMultipleTimes) {
5436 std::vector<std::atomic_int> counters(kParallelize5DRangeI * kParallelize5DRangeJ * kParallelize5DRangeK * kParallelize5DRangeL * kParallelize5DRangeM);
5437
5438 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
5439 ASSERT_TRUE(threadpool.get());
5440
5441 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
5442 GTEST_SKIP();
5443 }
5444
5445 for (size_t iteration = 0; iteration < kIncrementIterations5D; iteration++) {
5446 pthreadpool_parallelize_5d(
5447 threadpool.get(),
5448 reinterpret_cast<pthreadpool_task_5d_t>(Increment5D),
5449 static_cast<void*>(counters.data()),
5450 kParallelize5DRangeI, kParallelize5DRangeJ, kParallelize5DRangeK, kParallelize5DRangeL, kParallelize5DRangeM,
5451 0 /* flags */);
5452 }
5453
5454 for (size_t i = 0; i < kParallelize5DRangeI; i++) {
5455 for (size_t j = 0; j < kParallelize5DRangeJ; j++) {
5456 for (size_t k = 0; k < kParallelize5DRangeK; k++) {
5457 for (size_t l = 0; l < kParallelize5DRangeL; l++) {
5458 for (size_t m = 0; m < kParallelize5DRangeM; m++) {
5459 const size_t linear_idx = (((i * kParallelize5DRangeJ + j) * kParallelize5DRangeK + k) * kParallelize5DRangeL + l) * kParallelize5DRangeM + m;
5460 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations5D)
5461 << "Element (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ") was processed "
5462 << counters[linear_idx].load(std::memory_order_relaxed) << " times "
5463 << "(expected: " << kIncrementIterations5D << ")";
5464 }
5465 }
5466 }
5467 }
5468 }
5469 }
5470
IncrementSame5D(std::atomic_int * num_processed_items,size_t i,size_t j,size_t k,size_t l,size_t m)5471 static void IncrementSame5D(std::atomic_int* num_processed_items, size_t i, size_t j, size_t k, size_t l, size_t m) {
5472 num_processed_items->fetch_add(1, std::memory_order_relaxed);
5473 }
5474
TEST(Parallelize5D,MultiThreadPoolHighContention)5475 TEST(Parallelize5D, MultiThreadPoolHighContention) {
5476 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
5477
5478 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
5479 ASSERT_TRUE(threadpool.get());
5480
5481 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
5482 GTEST_SKIP();
5483 }
5484
5485 pthreadpool_parallelize_5d(
5486 threadpool.get(),
5487 reinterpret_cast<pthreadpool_task_5d_t>(IncrementSame5D),
5488 static_cast<void*>(&num_processed_items),
5489 kParallelize5DRangeI, kParallelize5DRangeJ, kParallelize5DRangeK, kParallelize5DRangeL, kParallelize5DRangeM,
5490 0 /* flags */);
5491 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize5DRangeI * kParallelize5DRangeJ * kParallelize5DRangeK * kParallelize5DRangeL * kParallelize5DRangeM);
5492 }
5493
WorkImbalance5D(std::atomic_int * num_processed_items,size_t i,size_t j,size_t k,size_t l,size_t m)5494 static void WorkImbalance5D(std::atomic_int* num_processed_items, size_t i, size_t j, size_t k, size_t l, size_t m) {
5495 num_processed_items->fetch_add(1, std::memory_order_relaxed);
5496 if (i == 0 && j == 0 && k == 0 && l == 0 && m == 0) {
5497 /* Spin-wait until all items are computed */
5498 while (num_processed_items->load(std::memory_order_relaxed) != kParallelize5DRangeI * kParallelize5DRangeJ * kParallelize5DRangeK * kParallelize5DRangeL * kParallelize5DRangeM) {
5499 std::atomic_thread_fence(std::memory_order_acquire);
5500 }
5501 }
5502 }
5503
TEST(Parallelize5D,MultiThreadPoolWorkStealing)5504 TEST(Parallelize5D, MultiThreadPoolWorkStealing) {
5505 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
5506
5507 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
5508 ASSERT_TRUE(threadpool.get());
5509
5510 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
5511 GTEST_SKIP();
5512 }
5513
5514 pthreadpool_parallelize_5d(
5515 threadpool.get(),
5516 reinterpret_cast<pthreadpool_task_5d_t>(WorkImbalance5D),
5517 static_cast<void*>(&num_processed_items),
5518 kParallelize5DRangeI, kParallelize5DRangeJ, kParallelize5DRangeK, kParallelize5DRangeL, kParallelize5DRangeM,
5519 0 /* flags */);
5520 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize5DRangeI * kParallelize5DRangeJ * kParallelize5DRangeK * kParallelize5DRangeL * kParallelize5DRangeM);
5521 }
5522
ComputeNothing5DTile1D(void *,size_t,size_t,size_t,size_t,size_t,size_t)5523 static void ComputeNothing5DTile1D(void*, size_t, size_t, size_t, size_t, size_t, size_t) {
5524 }
5525
TEST(Parallelize5DTile1D,SingleThreadPoolCompletes)5526 TEST(Parallelize5DTile1D, SingleThreadPoolCompletes) {
5527 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
5528 ASSERT_TRUE(threadpool.get());
5529
5530 pthreadpool_parallelize_5d_tile_1d(threadpool.get(),
5531 ComputeNothing5DTile1D,
5532 nullptr,
5533 kParallelize5DTile1DRangeI, kParallelize5DTile1DRangeJ, kParallelize5DTile1DRangeK, kParallelize5DTile1DRangeL, kParallelize5DTile1DRangeM,
5534 kParallelize5DTile1DTileM,
5535 0 /* flags */);
5536 }
5537
TEST(Parallelize5DTile1D,MultiThreadPoolCompletes)5538 TEST(Parallelize5DTile1D, MultiThreadPoolCompletes) {
5539 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
5540 ASSERT_TRUE(threadpool.get());
5541
5542 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
5543 GTEST_SKIP();
5544 }
5545
5546 pthreadpool_parallelize_5d_tile_1d(
5547 threadpool.get(),
5548 ComputeNothing5DTile1D,
5549 nullptr,
5550 kParallelize5DTile1DRangeI, kParallelize5DTile1DRangeJ, kParallelize5DTile1DRangeK, kParallelize5DTile1DRangeL, kParallelize5DTile1DRangeM,
5551 kParallelize5DTile1DTileM,
5552 0 /* flags */);
5553 }
5554
CheckBounds5DTile1D(void *,size_t i,size_t j,size_t k,size_t l,size_t start_m,size_t tile_m)5555 static void CheckBounds5DTile1D(void*, size_t i, size_t j, size_t k, size_t l, size_t start_m, size_t tile_m) {
5556 EXPECT_LT(i, kParallelize5DTile1DRangeI);
5557 EXPECT_LT(j, kParallelize5DTile1DRangeJ);
5558 EXPECT_LT(k, kParallelize5DTile1DRangeK);
5559 EXPECT_LT(l, kParallelize5DTile1DRangeL);
5560 EXPECT_LT(start_m, kParallelize5DTile1DRangeM);
5561 EXPECT_LE(start_m + tile_m, kParallelize5DTile1DRangeM);
5562 }
5563
TEST(Parallelize5DTile1D,SingleThreadPoolAllItemsInBounds)5564 TEST(Parallelize5DTile1D, SingleThreadPoolAllItemsInBounds) {
5565 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
5566 ASSERT_TRUE(threadpool.get());
5567
5568 pthreadpool_parallelize_5d_tile_1d(
5569 threadpool.get(),
5570 CheckBounds5DTile1D,
5571 nullptr,
5572 kParallelize5DTile1DRangeI, kParallelize5DTile1DRangeJ, kParallelize5DTile1DRangeK, kParallelize5DTile1DRangeL, kParallelize5DTile1DRangeM,
5573 kParallelize5DTile1DTileM,
5574 0 /* flags */);
5575 }
5576
TEST(Parallelize5DTile1D,MultiThreadPoolAllItemsInBounds)5577 TEST(Parallelize5DTile1D, MultiThreadPoolAllItemsInBounds) {
5578 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
5579 ASSERT_TRUE(threadpool.get());
5580
5581 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
5582 GTEST_SKIP();
5583 }
5584
5585 pthreadpool_parallelize_5d_tile_1d(
5586 threadpool.get(),
5587 CheckBounds5DTile1D,
5588 nullptr,
5589 kParallelize5DTile1DRangeI, kParallelize5DTile1DRangeJ, kParallelize5DTile1DRangeK, kParallelize5DTile1DRangeL, kParallelize5DTile1DRangeM,
5590 kParallelize5DTile1DTileM,
5591 0 /* flags */);
5592 }
5593
CheckTiling5DTile1D(void *,size_t i,size_t j,size_t k,size_t l,size_t start_m,size_t tile_m)5594 static void CheckTiling5DTile1D(void*, size_t i, size_t j, size_t k, size_t l, size_t start_m, size_t tile_m) {
5595 EXPECT_GT(tile_m, 0);
5596 EXPECT_LE(tile_m, kParallelize5DTile1DTileM);
5597 EXPECT_EQ(start_m % kParallelize5DTile1DTileM, 0);
5598 EXPECT_EQ(tile_m, std::min<size_t>(kParallelize5DTile1DTileM, kParallelize5DTile1DRangeM - start_m));
5599 }
5600
TEST(Parallelize5DTile1D,SingleThreadPoolUniformTiling)5601 TEST(Parallelize5DTile1D, SingleThreadPoolUniformTiling) {
5602 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
5603 ASSERT_TRUE(threadpool.get());
5604
5605 pthreadpool_parallelize_5d_tile_1d(
5606 threadpool.get(),
5607 CheckTiling5DTile1D,
5608 nullptr,
5609 kParallelize5DTile1DRangeI, kParallelize5DTile1DRangeJ, kParallelize5DTile1DRangeK, kParallelize5DTile1DRangeL, kParallelize5DTile1DRangeM,
5610 kParallelize5DTile1DTileM,
5611 0 /* flags */);
5612 }
5613
TEST(Parallelize5DTile1D,MultiThreadPoolUniformTiling)5614 TEST(Parallelize5DTile1D, MultiThreadPoolUniformTiling) {
5615 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
5616 ASSERT_TRUE(threadpool.get());
5617
5618 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
5619 GTEST_SKIP();
5620 }
5621
5622 pthreadpool_parallelize_5d_tile_1d(
5623 threadpool.get(),
5624 CheckTiling5DTile1D,
5625 nullptr,
5626 kParallelize5DTile1DRangeI, kParallelize5DTile1DRangeJ, kParallelize5DTile1DRangeK, kParallelize5DTile1DRangeL, kParallelize5DTile1DRangeM,
5627 kParallelize5DTile1DTileM,
5628 0 /* flags */);
5629 }
5630
SetTrue5DTile1D(std::atomic_bool * processed_indicators,size_t i,size_t j,size_t k,size_t l,size_t start_m,size_t tile_m)5631 static void SetTrue5DTile1D(std::atomic_bool* processed_indicators, size_t i, size_t j, size_t k, size_t l, size_t start_m, size_t tile_m) {
5632 for (size_t m = start_m; m < start_m + tile_m; m++) {
5633 const size_t linear_idx = (((i * kParallelize5DTile1DRangeJ + j) * kParallelize5DTile1DRangeK + k) * kParallelize5DTile1DRangeL + l) * kParallelize5DTile1DRangeM + m;
5634 processed_indicators[linear_idx].store(true, std::memory_order_relaxed);
5635 }
5636 }
5637
TEST(Parallelize5DTile1D,SingleThreadPoolAllItemsProcessed)5638 TEST(Parallelize5DTile1D, SingleThreadPoolAllItemsProcessed) {
5639 std::vector<std::atomic_bool> indicators(kParallelize5DTile1DRangeI * kParallelize5DTile1DRangeJ * kParallelize5DTile1DRangeK * kParallelize5DTile1DRangeL * kParallelize5DTile1DRangeM);
5640
5641 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
5642 ASSERT_TRUE(threadpool.get());
5643
5644 pthreadpool_parallelize_5d_tile_1d(
5645 threadpool.get(),
5646 reinterpret_cast<pthreadpool_task_5d_tile_1d_t>(SetTrue5DTile1D),
5647 static_cast<void*>(indicators.data()),
5648 kParallelize5DTile1DRangeI, kParallelize5DTile1DRangeJ, kParallelize5DTile1DRangeK, kParallelize5DTile1DRangeL, kParallelize5DTile1DRangeM,
5649 kParallelize5DTile1DTileM,
5650 0 /* flags */);
5651
5652 for (size_t i = 0; i < kParallelize5DTile1DRangeI; i++) {
5653 for (size_t j = 0; j < kParallelize5DTile1DRangeJ; j++) {
5654 for (size_t k = 0; k < kParallelize5DTile1DRangeK; k++) {
5655 for (size_t l = 0; l < kParallelize5DTile1DRangeL; l++) {
5656 for (size_t m = 0; m < kParallelize5DTile1DRangeM; m++) {
5657 const size_t linear_idx = (((i * kParallelize5DTile1DRangeJ + j) * kParallelize5DTile1DRangeK + k) * kParallelize5DTile1DRangeL + l) * kParallelize5DTile1DRangeM + m;
5658 EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
5659 << "Element (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ") not processed";
5660 }
5661 }
5662 }
5663 }
5664 }
5665 }
5666
TEST(Parallelize5DTile1D,MultiThreadPoolAllItemsProcessed)5667 TEST(Parallelize5DTile1D, MultiThreadPoolAllItemsProcessed) {
5668 std::vector<std::atomic_bool> indicators(kParallelize5DTile1DRangeI * kParallelize5DTile1DRangeJ * kParallelize5DTile1DRangeK * kParallelize5DTile1DRangeL * kParallelize5DTile1DRangeM);
5669
5670 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
5671 ASSERT_TRUE(threadpool.get());
5672
5673 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
5674 GTEST_SKIP();
5675 }
5676
5677 pthreadpool_parallelize_5d_tile_1d(
5678 threadpool.get(),
5679 reinterpret_cast<pthreadpool_task_5d_tile_1d_t>(SetTrue5DTile1D),
5680 static_cast<void*>(indicators.data()),
5681 kParallelize5DTile1DRangeI, kParallelize5DTile1DRangeJ, kParallelize5DTile1DRangeK, kParallelize5DTile1DRangeL, kParallelize5DTile1DRangeM,
5682 kParallelize5DTile1DTileM,
5683 0 /* flags */);
5684
5685 for (size_t i = 0; i < kParallelize5DTile1DRangeI; i++) {
5686 for (size_t j = 0; j < kParallelize5DTile1DRangeJ; j++) {
5687 for (size_t k = 0; k < kParallelize5DTile1DRangeK; k++) {
5688 for (size_t l = 0; l < kParallelize5DTile1DRangeL; l++) {
5689 for (size_t m = 0; m < kParallelize5DTile1DRangeM; m++) {
5690 const size_t linear_idx = (((i * kParallelize5DTile1DRangeJ + j) * kParallelize5DTile1DRangeK + k) * kParallelize5DTile1DRangeL + l) * kParallelize5DTile1DRangeM + m;
5691 EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
5692 << "Element (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ") not processed";
5693 }
5694 }
5695 }
5696 }
5697 }
5698 }
5699
Increment5DTile1D(std::atomic_int * processed_counters,size_t i,size_t j,size_t k,size_t l,size_t start_m,size_t tile_m)5700 static void Increment5DTile1D(std::atomic_int* processed_counters, size_t i, size_t j, size_t k, size_t l, size_t start_m, size_t tile_m) {
5701 for (size_t m = start_m; m < start_m + tile_m; m++) {
5702 const size_t linear_idx = (((i * kParallelize5DTile1DRangeJ + j) * kParallelize5DTile1DRangeK + k) * kParallelize5DTile1DRangeL + l) * kParallelize5DTile1DRangeM + m;
5703 processed_counters[linear_idx].fetch_add(1, std::memory_order_relaxed);
5704 }
5705 }
5706
TEST(Parallelize5DTile1D,SingleThreadPoolEachItemProcessedOnce)5707 TEST(Parallelize5DTile1D, SingleThreadPoolEachItemProcessedOnce) {
5708 std::vector<std::atomic_int> counters(kParallelize5DTile1DRangeI * kParallelize5DTile1DRangeJ * kParallelize5DTile1DRangeK * kParallelize5DTile1DRangeL * kParallelize5DTile1DRangeM);
5709
5710 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
5711 ASSERT_TRUE(threadpool.get());
5712
5713 pthreadpool_parallelize_5d_tile_1d(
5714 threadpool.get(),
5715 reinterpret_cast<pthreadpool_task_5d_tile_1d_t>(Increment5DTile1D),
5716 static_cast<void*>(counters.data()),
5717 kParallelize5DTile1DRangeI, kParallelize5DTile1DRangeJ, kParallelize5DTile1DRangeK, kParallelize5DTile1DRangeL, kParallelize5DTile1DRangeM,
5718 kParallelize5DTile1DTileM,
5719 0 /* flags */);
5720
5721 for (size_t i = 0; i < kParallelize5DTile1DRangeI; i++) {
5722 for (size_t j = 0; j < kParallelize5DTile1DRangeJ; j++) {
5723 for (size_t k = 0; k < kParallelize5DTile1DRangeK; k++) {
5724 for (size_t l = 0; l < kParallelize5DTile1DRangeL; l++) {
5725 for (size_t m = 0; m < kParallelize5DTile1DRangeM; m++) {
5726 const size_t linear_idx = (((i * kParallelize5DTile1DRangeJ + j) * kParallelize5DTile1DRangeK + k) * kParallelize5DTile1DRangeL + l) * kParallelize5DTile1DRangeM + m;
5727 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
5728 << "Element (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ") was processed "
5729 << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
5730 }
5731 }
5732 }
5733 }
5734 }
5735 }
5736
TEST(Parallelize5DTile1D,MultiThreadPoolEachItemProcessedOnce)5737 TEST(Parallelize5DTile1D, MultiThreadPoolEachItemProcessedOnce) {
5738 std::vector<std::atomic_int> counters(kParallelize5DTile1DRangeI * kParallelize5DTile1DRangeJ * kParallelize5DTile1DRangeK * kParallelize5DTile1DRangeL * kParallelize5DTile1DRangeM);
5739
5740 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
5741 ASSERT_TRUE(threadpool.get());
5742
5743 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
5744 GTEST_SKIP();
5745 }
5746
5747 pthreadpool_parallelize_5d_tile_1d(
5748 threadpool.get(),
5749 reinterpret_cast<pthreadpool_task_5d_tile_1d_t>(Increment5DTile1D),
5750 static_cast<void*>(counters.data()),
5751 kParallelize5DTile1DRangeI, kParallelize5DTile1DRangeJ, kParallelize5DTile1DRangeK, kParallelize5DTile1DRangeL, kParallelize5DTile1DRangeM,
5752 kParallelize5DTile1DTileM,
5753 0 /* flags */);
5754
5755 for (size_t i = 0; i < kParallelize5DTile1DRangeI; i++) {
5756 for (size_t j = 0; j < kParallelize5DTile1DRangeJ; j++) {
5757 for (size_t k = 0; k < kParallelize5DTile1DRangeK; k++) {
5758 for (size_t l = 0; l < kParallelize5DTile1DRangeL; l++) {
5759 for (size_t m = 0; m < kParallelize5DTile1DRangeM; m++) {
5760 const size_t linear_idx = (((i * kParallelize5DTile1DRangeJ + j) * kParallelize5DTile1DRangeK + k) * kParallelize5DTile1DRangeL + l) * kParallelize5DTile1DRangeM + m;
5761 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
5762 << "Element (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ") was processed "
5763 << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
5764 }
5765 }
5766 }
5767 }
5768 }
5769 }
5770
TEST(Parallelize5DTile1D,SingleThreadPoolEachItemProcessedMultipleTimes)5771 TEST(Parallelize5DTile1D, SingleThreadPoolEachItemProcessedMultipleTimes) {
5772 std::vector<std::atomic_int> counters(kParallelize5DTile1DRangeI * kParallelize5DTile1DRangeJ * kParallelize5DTile1DRangeK * kParallelize5DTile1DRangeL * kParallelize5DTile1DRangeM);
5773
5774 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
5775 ASSERT_TRUE(threadpool.get());
5776
5777 for (size_t iteration = 0; iteration < kIncrementIterations5D; iteration++) {
5778 pthreadpool_parallelize_5d_tile_1d(
5779 threadpool.get(),
5780 reinterpret_cast<pthreadpool_task_5d_tile_1d_t>(Increment5DTile1D),
5781 static_cast<void*>(counters.data()),
5782 kParallelize5DTile1DRangeI, kParallelize5DTile1DRangeJ, kParallelize5DTile1DRangeK, kParallelize5DTile1DRangeL, kParallelize5DTile1DRangeM,
5783 kParallelize5DTile1DTileM,
5784 0 /* flags */);
5785 }
5786
5787 for (size_t i = 0; i < kParallelize5DTile1DRangeI; i++) {
5788 for (size_t j = 0; j < kParallelize5DTile1DRangeJ; j++) {
5789 for (size_t k = 0; k < kParallelize5DTile1DRangeK; k++) {
5790 for (size_t l = 0; l < kParallelize5DTile1DRangeL; l++) {
5791 for (size_t m = 0; m < kParallelize5DTile1DRangeM; m++) {
5792 const size_t linear_idx = (((i * kParallelize5DTile1DRangeJ + j) * kParallelize5DTile1DRangeK + k) * kParallelize5DTile1DRangeL + l) * kParallelize5DTile1DRangeM + m;
5793 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations5D)
5794 << "Element (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ") was processed "
5795 << counters[linear_idx].load(std::memory_order_relaxed) << " times "
5796 << "(expected: " << kIncrementIterations5D << ")";
5797 }
5798 }
5799 }
5800 }
5801 }
5802 }
5803
TEST(Parallelize5DTile1D,MultiThreadPoolEachItemProcessedMultipleTimes)5804 TEST(Parallelize5DTile1D, MultiThreadPoolEachItemProcessedMultipleTimes) {
5805 std::vector<std::atomic_int> counters(kParallelize5DTile1DRangeI * kParallelize5DTile1DRangeJ * kParallelize5DTile1DRangeK * kParallelize5DTile1DRangeL * kParallelize5DTile1DRangeM);
5806
5807 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
5808 ASSERT_TRUE(threadpool.get());
5809
5810 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
5811 GTEST_SKIP();
5812 }
5813
5814 for (size_t iteration = 0; iteration < kIncrementIterations5D; iteration++) {
5815 pthreadpool_parallelize_5d_tile_1d(
5816 threadpool.get(),
5817 reinterpret_cast<pthreadpool_task_5d_tile_1d_t>(Increment5DTile1D),
5818 static_cast<void*>(counters.data()),
5819 kParallelize5DTile1DRangeI, kParallelize5DTile1DRangeJ, kParallelize5DTile1DRangeK, kParallelize5DTile1DRangeL, kParallelize5DTile1DRangeM,
5820 kParallelize5DTile1DTileM,
5821 0 /* flags */);
5822 }
5823
5824 for (size_t i = 0; i < kParallelize5DTile1DRangeI; i++) {
5825 for (size_t j = 0; j < kParallelize5DTile1DRangeJ; j++) {
5826 for (size_t k = 0; k < kParallelize5DTile1DRangeK; k++) {
5827 for (size_t l = 0; l < kParallelize5DTile1DRangeL; l++) {
5828 for (size_t m = 0; m < kParallelize5DTile1DRangeM; m++) {
5829 const size_t linear_idx = (((i * kParallelize5DTile1DRangeJ + j) * kParallelize5DTile1DRangeK + k) * kParallelize5DTile1DRangeL + l) * kParallelize5DTile1DRangeM + m;
5830 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations5D)
5831 << "Element (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ") was processed "
5832 << counters[linear_idx].load(std::memory_order_relaxed) << " times "
5833 << "(expected: " << kIncrementIterations5D << ")";
5834 }
5835 }
5836 }
5837 }
5838 }
5839 }
5840
IncrementSame5DTile1D(std::atomic_int * num_processed_items,size_t i,size_t j,size_t k,size_t l,size_t start_m,size_t tile_m)5841 static void IncrementSame5DTile1D(std::atomic_int* num_processed_items, size_t i, size_t j, size_t k, size_t l, size_t start_m, size_t tile_m) {
5842 for (size_t m = start_m; m < start_m + tile_m; m++) {
5843 num_processed_items->fetch_add(1, std::memory_order_relaxed);
5844 }
5845 }
5846
TEST(Parallelize5DTile1D,MultiThreadPoolHighContention)5847 TEST(Parallelize5DTile1D, MultiThreadPoolHighContention) {
5848 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
5849
5850 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
5851 ASSERT_TRUE(threadpool.get());
5852
5853 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
5854 GTEST_SKIP();
5855 }
5856
5857 pthreadpool_parallelize_5d_tile_1d(
5858 threadpool.get(),
5859 reinterpret_cast<pthreadpool_task_5d_tile_1d_t>(IncrementSame5DTile1D),
5860 static_cast<void*>(&num_processed_items),
5861 kParallelize5DTile1DRangeI, kParallelize5DTile1DRangeJ, kParallelize5DTile1DRangeK, kParallelize5DTile1DRangeL, kParallelize5DTile1DRangeM,
5862 kParallelize5DTile1DTileM,
5863 0 /* flags */);
5864 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize5DTile1DRangeI * kParallelize5DTile1DRangeJ * kParallelize5DTile1DRangeK * kParallelize5DTile1DRangeL * kParallelize5DTile1DRangeM);
5865 }
5866
WorkImbalance5DTile1D(std::atomic_int * num_processed_items,size_t i,size_t j,size_t k,size_t l,size_t start_m,size_t tile_m)5867 static void WorkImbalance5DTile1D(std::atomic_int* num_processed_items, size_t i, size_t j, size_t k, size_t l, size_t start_m, size_t tile_m) {
5868 num_processed_items->fetch_add(tile_m, std::memory_order_relaxed);
5869 if (i == 0 && j == 0 && k == 0 && l == 0 && start_m == 0) {
5870 /* Spin-wait until all items are computed */
5871 while (num_processed_items->load(std::memory_order_relaxed) != kParallelize5DTile1DRangeI * kParallelize5DTile1DRangeJ * kParallelize5DTile1DRangeK * kParallelize5DTile1DRangeL * kParallelize5DTile1DRangeM) {
5872 std::atomic_thread_fence(std::memory_order_acquire);
5873 }
5874 }
5875 }
5876
TEST(Parallelize5DTile1D,MultiThreadPoolWorkStealing)5877 TEST(Parallelize5DTile1D, MultiThreadPoolWorkStealing) {
5878 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
5879
5880 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
5881 ASSERT_TRUE(threadpool.get());
5882
5883 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
5884 GTEST_SKIP();
5885 }
5886
5887 pthreadpool_parallelize_5d_tile_1d(
5888 threadpool.get(),
5889 reinterpret_cast<pthreadpool_task_5d_tile_1d_t>(WorkImbalance5DTile1D),
5890 static_cast<void*>(&num_processed_items),
5891 kParallelize5DTile1DRangeI, kParallelize5DTile1DRangeJ, kParallelize5DTile1DRangeK, kParallelize5DTile1DRangeL, kParallelize5DTile1DRangeM,
5892 kParallelize5DTile1DTileM,
5893 0 /* flags */);
5894 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize5DTile1DRangeI * kParallelize5DTile1DRangeJ * kParallelize5DTile1DRangeK * kParallelize5DTile1DRangeL * kParallelize5DTile1DRangeM);
5895 }
5896
ComputeNothing5DTile2D(void *,size_t,size_t,size_t,size_t,size_t,size_t,size_t)5897 static void ComputeNothing5DTile2D(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t) {
5898 }
5899
TEST(Parallelize5DTile2D,SingleThreadPoolCompletes)5900 TEST(Parallelize5DTile2D, SingleThreadPoolCompletes) {
5901 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
5902 ASSERT_TRUE(threadpool.get());
5903
5904 pthreadpool_parallelize_5d_tile_2d(threadpool.get(),
5905 ComputeNothing5DTile2D,
5906 nullptr,
5907 kParallelize5DTile2DRangeI, kParallelize5DTile2DRangeJ, kParallelize5DTile2DRangeK, kParallelize5DTile2DRangeL, kParallelize5DTile2DRangeM,
5908 kParallelize5DTile2DTileL, kParallelize5DTile2DTileM,
5909 0 /* flags */);
5910 }
5911
TEST(Parallelize5DTile2D,MultiThreadPoolCompletes)5912 TEST(Parallelize5DTile2D, MultiThreadPoolCompletes) {
5913 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
5914 ASSERT_TRUE(threadpool.get());
5915
5916 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
5917 GTEST_SKIP();
5918 }
5919
5920 pthreadpool_parallelize_5d_tile_2d(
5921 threadpool.get(),
5922 ComputeNothing5DTile2D,
5923 nullptr,
5924 kParallelize5DTile2DRangeI, kParallelize5DTile2DRangeJ, kParallelize5DTile2DRangeK, kParallelize5DTile2DRangeL, kParallelize5DTile2DRangeM,
5925 kParallelize5DTile2DTileL, kParallelize5DTile2DTileM,
5926 0 /* flags */);
5927 }
5928
CheckBounds5DTile2D(void *,size_t i,size_t j,size_t k,size_t start_l,size_t start_m,size_t tile_l,size_t tile_m)5929 static void CheckBounds5DTile2D(void*, size_t i, size_t j, size_t k, size_t start_l, size_t start_m, size_t tile_l, size_t tile_m) {
5930 EXPECT_LT(i, kParallelize5DTile2DRangeI);
5931 EXPECT_LT(j, kParallelize5DTile2DRangeJ);
5932 EXPECT_LT(k, kParallelize5DTile2DRangeK);
5933 EXPECT_LT(start_l, kParallelize5DTile2DRangeL);
5934 EXPECT_LT(start_m, kParallelize5DTile2DRangeM);
5935 EXPECT_LE(start_l + tile_l, kParallelize5DTile2DRangeL);
5936 EXPECT_LE(start_m + tile_m, kParallelize5DTile2DRangeM);
5937 }
5938
TEST(Parallelize5DTile2D,SingleThreadPoolAllItemsInBounds)5939 TEST(Parallelize5DTile2D, SingleThreadPoolAllItemsInBounds) {
5940 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
5941 ASSERT_TRUE(threadpool.get());
5942
5943 pthreadpool_parallelize_5d_tile_2d(
5944 threadpool.get(),
5945 CheckBounds5DTile2D,
5946 nullptr,
5947 kParallelize5DTile2DRangeI, kParallelize5DTile2DRangeJ, kParallelize5DTile2DRangeK, kParallelize5DTile2DRangeL, kParallelize5DTile2DRangeM,
5948 kParallelize5DTile2DTileL, kParallelize5DTile2DTileM,
5949 0 /* flags */);
5950 }
5951
TEST(Parallelize5DTile2D,MultiThreadPoolAllItemsInBounds)5952 TEST(Parallelize5DTile2D, MultiThreadPoolAllItemsInBounds) {
5953 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
5954 ASSERT_TRUE(threadpool.get());
5955
5956 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
5957 GTEST_SKIP();
5958 }
5959
5960 pthreadpool_parallelize_5d_tile_2d(
5961 threadpool.get(),
5962 CheckBounds5DTile2D,
5963 nullptr,
5964 kParallelize5DTile2DRangeI, kParallelize5DTile2DRangeJ, kParallelize5DTile2DRangeK, kParallelize5DTile2DRangeL, kParallelize5DTile2DRangeM,
5965 kParallelize5DTile2DTileL, kParallelize5DTile2DTileM,
5966 0 /* flags */);
5967 }
5968
CheckTiling5DTile2D(void *,size_t i,size_t j,size_t k,size_t start_l,size_t start_m,size_t tile_l,size_t tile_m)5969 static void CheckTiling5DTile2D(void*, size_t i, size_t j, size_t k, size_t start_l, size_t start_m, size_t tile_l, size_t tile_m) {
5970 EXPECT_GT(tile_l, 0);
5971 EXPECT_LE(tile_l, kParallelize5DTile2DTileL);
5972 EXPECT_EQ(start_l % kParallelize5DTile2DTileL, 0);
5973 EXPECT_EQ(tile_l, std::min<size_t>(kParallelize5DTile2DTileL, kParallelize5DTile2DRangeL - start_l));
5974
5975 EXPECT_GT(tile_m, 0);
5976 EXPECT_LE(tile_m, kParallelize5DTile2DTileM);
5977 EXPECT_EQ(start_m % kParallelize5DTile2DTileM, 0);
5978 EXPECT_EQ(tile_m, std::min<size_t>(kParallelize5DTile2DTileM, kParallelize5DTile2DRangeM - start_m));
5979 }
5980
TEST(Parallelize5DTile2D,SingleThreadPoolUniformTiling)5981 TEST(Parallelize5DTile2D, SingleThreadPoolUniformTiling) {
5982 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
5983 ASSERT_TRUE(threadpool.get());
5984
5985 pthreadpool_parallelize_5d_tile_2d(
5986 threadpool.get(),
5987 CheckTiling5DTile2D,
5988 nullptr,
5989 kParallelize5DTile2DRangeI, kParallelize5DTile2DRangeJ, kParallelize5DTile2DRangeK, kParallelize5DTile2DRangeL, kParallelize5DTile2DRangeM,
5990 kParallelize5DTile2DTileL, kParallelize5DTile2DTileM,
5991 0 /* flags */);
5992 }
5993
TEST(Parallelize5DTile2D,MultiThreadPoolUniformTiling)5994 TEST(Parallelize5DTile2D, MultiThreadPoolUniformTiling) {
5995 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
5996 ASSERT_TRUE(threadpool.get());
5997
5998 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
5999 GTEST_SKIP();
6000 }
6001
6002 pthreadpool_parallelize_5d_tile_2d(
6003 threadpool.get(),
6004 CheckTiling5DTile2D,
6005 nullptr,
6006 kParallelize5DTile2DRangeI, kParallelize5DTile2DRangeJ, kParallelize5DTile2DRangeK, kParallelize5DTile2DRangeL, kParallelize5DTile2DRangeM,
6007 kParallelize5DTile2DTileL, kParallelize5DTile2DTileM,
6008 0 /* flags */);
6009 }
6010
SetTrue5DTile2D(std::atomic_bool * processed_indicators,size_t i,size_t j,size_t k,size_t start_l,size_t start_m,size_t tile_l,size_t tile_m)6011 static void SetTrue5DTile2D(std::atomic_bool* processed_indicators, size_t i, size_t j, size_t k, size_t start_l, size_t start_m, size_t tile_l, size_t tile_m) {
6012 for (size_t l = start_l; l < start_l + tile_l; l++) {
6013 for (size_t m = start_m; m < start_m + tile_m; m++) {
6014 const size_t linear_idx = (((i * kParallelize5DTile2DRangeJ + j) * kParallelize5DTile2DRangeK + k) * kParallelize5DTile2DRangeL + l) * kParallelize5DTile2DRangeM + m;
6015 processed_indicators[linear_idx].store(true, std::memory_order_relaxed);
6016 }
6017 }
6018 }
6019
TEST(Parallelize5DTile2D,SingleThreadPoolAllItemsProcessed)6020 TEST(Parallelize5DTile2D, SingleThreadPoolAllItemsProcessed) {
6021 std::vector<std::atomic_bool> indicators(kParallelize5DTile2DRangeI * kParallelize5DTile2DRangeJ * kParallelize5DTile2DRangeK * kParallelize5DTile2DRangeL * kParallelize5DTile2DRangeM);
6022
6023 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
6024 ASSERT_TRUE(threadpool.get());
6025
6026 pthreadpool_parallelize_5d_tile_2d(
6027 threadpool.get(),
6028 reinterpret_cast<pthreadpool_task_5d_tile_2d_t>(SetTrue5DTile2D),
6029 static_cast<void*>(indicators.data()),
6030 kParallelize5DTile2DRangeI, kParallelize5DTile2DRangeJ, kParallelize5DTile2DRangeK, kParallelize5DTile2DRangeL, kParallelize5DTile2DRangeM,
6031 kParallelize5DTile2DTileL, kParallelize5DTile2DTileM,
6032 0 /* flags */);
6033
6034 for (size_t i = 0; i < kParallelize5DTile2DRangeI; i++) {
6035 for (size_t j = 0; j < kParallelize5DTile2DRangeJ; j++) {
6036 for (size_t k = 0; k < kParallelize5DTile2DRangeK; k++) {
6037 for (size_t l = 0; l < kParallelize5DTile2DRangeL; l++) {
6038 for (size_t m = 0; m < kParallelize5DTile2DRangeM; m++) {
6039 const size_t linear_idx = (((i * kParallelize5DTile2DRangeJ + j) * kParallelize5DTile2DRangeK + k) * kParallelize5DTile2DRangeL + l) * kParallelize5DTile2DRangeM + m;
6040 EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
6041 << "Element (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ") not processed";
6042 }
6043 }
6044 }
6045 }
6046 }
6047 }
6048
TEST(Parallelize5DTile2D,MultiThreadPoolAllItemsProcessed)6049 TEST(Parallelize5DTile2D, MultiThreadPoolAllItemsProcessed) {
6050 std::vector<std::atomic_bool> indicators(kParallelize5DTile2DRangeI * kParallelize5DTile2DRangeJ * kParallelize5DTile2DRangeK * kParallelize5DTile2DRangeL * kParallelize5DTile2DRangeM);
6051
6052 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
6053 ASSERT_TRUE(threadpool.get());
6054
6055 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
6056 GTEST_SKIP();
6057 }
6058
6059 pthreadpool_parallelize_5d_tile_2d(
6060 threadpool.get(),
6061 reinterpret_cast<pthreadpool_task_5d_tile_2d_t>(SetTrue5DTile2D),
6062 static_cast<void*>(indicators.data()),
6063 kParallelize5DTile2DRangeI, kParallelize5DTile2DRangeJ, kParallelize5DTile2DRangeK, kParallelize5DTile2DRangeL, kParallelize5DTile2DRangeM,
6064 kParallelize5DTile2DTileL, kParallelize5DTile2DTileM,
6065 0 /* flags */);
6066
6067 for (size_t i = 0; i < kParallelize5DTile2DRangeI; i++) {
6068 for (size_t j = 0; j < kParallelize5DTile2DRangeJ; j++) {
6069 for (size_t k = 0; k < kParallelize5DTile2DRangeK; k++) {
6070 for (size_t l = 0; l < kParallelize5DTile2DRangeL; l++) {
6071 for (size_t m = 0; m < kParallelize5DTile2DRangeM; m++) {
6072 const size_t linear_idx = (((i * kParallelize5DTile2DRangeJ + j) * kParallelize5DTile2DRangeK + k) * kParallelize5DTile2DRangeL + l) * kParallelize5DTile2DRangeM + m;
6073 EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
6074 << "Element (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ") not processed";
6075 }
6076 }
6077 }
6078 }
6079 }
6080 }
6081
Increment5DTile2D(std::atomic_int * processed_counters,size_t i,size_t j,size_t k,size_t start_l,size_t start_m,size_t tile_l,size_t tile_m)6082 static void Increment5DTile2D(std::atomic_int* processed_counters, size_t i, size_t j, size_t k, size_t start_l, size_t start_m, size_t tile_l, size_t tile_m) {
6083 for (size_t l = start_l; l < start_l + tile_l; l++) {
6084 for (size_t m = start_m; m < start_m + tile_m; m++) {
6085 const size_t linear_idx = (((i * kParallelize5DTile2DRangeJ + j) * kParallelize5DTile2DRangeK + k) * kParallelize5DTile2DRangeL + l) * kParallelize5DTile2DRangeM + m;
6086 processed_counters[linear_idx].fetch_add(1, std::memory_order_relaxed);
6087 }
6088 }
6089 }
6090
TEST(Parallelize5DTile2D,SingleThreadPoolEachItemProcessedOnce)6091 TEST(Parallelize5DTile2D, SingleThreadPoolEachItemProcessedOnce) {
6092 std::vector<std::atomic_int> counters(kParallelize5DTile2DRangeI * kParallelize5DTile2DRangeJ * kParallelize5DTile2DRangeK * kParallelize5DTile2DRangeL * kParallelize5DTile2DRangeM);
6093
6094 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
6095 ASSERT_TRUE(threadpool.get());
6096
6097 pthreadpool_parallelize_5d_tile_2d(
6098 threadpool.get(),
6099 reinterpret_cast<pthreadpool_task_5d_tile_2d_t>(Increment5DTile2D),
6100 static_cast<void*>(counters.data()),
6101 kParallelize5DTile2DRangeI, kParallelize5DTile2DRangeJ, kParallelize5DTile2DRangeK, kParallelize5DTile2DRangeL, kParallelize5DTile2DRangeM,
6102 kParallelize5DTile2DTileL, kParallelize5DTile2DTileM,
6103 0 /* flags */);
6104
6105 for (size_t i = 0; i < kParallelize5DTile2DRangeI; i++) {
6106 for (size_t j = 0; j < kParallelize5DTile2DRangeJ; j++) {
6107 for (size_t k = 0; k < kParallelize5DTile2DRangeK; k++) {
6108 for (size_t l = 0; l < kParallelize5DTile2DRangeL; l++) {
6109 for (size_t m = 0; m < kParallelize5DTile2DRangeM; m++) {
6110 const size_t linear_idx = (((i * kParallelize5DTile2DRangeJ + j) * kParallelize5DTile2DRangeK + k) * kParallelize5DTile2DRangeL + l) * kParallelize5DTile2DRangeM + m;
6111 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
6112 << "Element (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ") was processed "
6113 << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
6114 }
6115 }
6116 }
6117 }
6118 }
6119 }
6120
TEST(Parallelize5DTile2D,MultiThreadPoolEachItemProcessedOnce)6121 TEST(Parallelize5DTile2D, MultiThreadPoolEachItemProcessedOnce) {
6122 std::vector<std::atomic_int> counters(kParallelize5DTile2DRangeI * kParallelize5DTile2DRangeJ * kParallelize5DTile2DRangeK * kParallelize5DTile2DRangeL * kParallelize5DTile2DRangeM);
6123
6124 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
6125 ASSERT_TRUE(threadpool.get());
6126
6127 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
6128 GTEST_SKIP();
6129 }
6130
6131 pthreadpool_parallelize_5d_tile_2d(
6132 threadpool.get(),
6133 reinterpret_cast<pthreadpool_task_5d_tile_2d_t>(Increment5DTile2D),
6134 static_cast<void*>(counters.data()),
6135 kParallelize5DTile2DRangeI, kParallelize5DTile2DRangeJ, kParallelize5DTile2DRangeK, kParallelize5DTile2DRangeL, kParallelize5DTile2DRangeM,
6136 kParallelize5DTile2DTileL, kParallelize5DTile2DTileM,
6137 0 /* flags */);
6138
6139 for (size_t i = 0; i < kParallelize5DTile2DRangeI; i++) {
6140 for (size_t j = 0; j < kParallelize5DTile2DRangeJ; j++) {
6141 for (size_t k = 0; k < kParallelize5DTile2DRangeK; k++) {
6142 for (size_t l = 0; l < kParallelize5DTile2DRangeL; l++) {
6143 for (size_t m = 0; m < kParallelize5DTile2DRangeM; m++) {
6144 const size_t linear_idx = (((i * kParallelize5DTile2DRangeJ + j) * kParallelize5DTile2DRangeK + k) * kParallelize5DTile2DRangeL + l) * kParallelize5DTile2DRangeM + m;
6145 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
6146 << "Element (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ") was processed "
6147 << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
6148 }
6149 }
6150 }
6151 }
6152 }
6153 }
6154
TEST(Parallelize5DTile2D,SingleThreadPoolEachItemProcessedMultipleTimes)6155 TEST(Parallelize5DTile2D, SingleThreadPoolEachItemProcessedMultipleTimes) {
6156 std::vector<std::atomic_int> counters(kParallelize5DTile2DRangeI * kParallelize5DTile2DRangeJ * kParallelize5DTile2DRangeK * kParallelize5DTile2DRangeL * kParallelize5DTile2DRangeM);
6157
6158 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
6159 ASSERT_TRUE(threadpool.get());
6160
6161 for (size_t iteration = 0; iteration < kIncrementIterations5D; iteration++) {
6162 pthreadpool_parallelize_5d_tile_2d(
6163 threadpool.get(),
6164 reinterpret_cast<pthreadpool_task_5d_tile_2d_t>(Increment5DTile2D),
6165 static_cast<void*>(counters.data()),
6166 kParallelize5DTile2DRangeI, kParallelize5DTile2DRangeJ, kParallelize5DTile2DRangeK, kParallelize5DTile2DRangeL, kParallelize5DTile2DRangeM,
6167 kParallelize5DTile2DTileL, kParallelize5DTile2DTileM,
6168 0 /* flags */);
6169 }
6170
6171 for (size_t i = 0; i < kParallelize5DTile2DRangeI; i++) {
6172 for (size_t j = 0; j < kParallelize5DTile2DRangeJ; j++) {
6173 for (size_t k = 0; k < kParallelize5DTile2DRangeK; k++) {
6174 for (size_t l = 0; l < kParallelize5DTile2DRangeL; l++) {
6175 for (size_t m = 0; m < kParallelize5DTile2DRangeM; m++) {
6176 const size_t linear_idx = (((i * kParallelize5DTile2DRangeJ + j) * kParallelize5DTile2DRangeK + k) * kParallelize5DTile2DRangeL + l) * kParallelize5DTile2DRangeM + m;
6177 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations5D)
6178 << "Element (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ") was processed "
6179 << counters[linear_idx].load(std::memory_order_relaxed) << " times "
6180 << "(expected: " << kIncrementIterations5D << ")";
6181 }
6182 }
6183 }
6184 }
6185 }
6186 }
6187
TEST(Parallelize5DTile2D,MultiThreadPoolEachItemProcessedMultipleTimes)6188 TEST(Parallelize5DTile2D, MultiThreadPoolEachItemProcessedMultipleTimes) {
6189 std::vector<std::atomic_int> counters(kParallelize5DTile2DRangeI * kParallelize5DTile2DRangeJ * kParallelize5DTile2DRangeK * kParallelize5DTile2DRangeL * kParallelize5DTile2DRangeM);
6190
6191 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
6192 ASSERT_TRUE(threadpool.get());
6193
6194 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
6195 GTEST_SKIP();
6196 }
6197
6198 for (size_t iteration = 0; iteration < kIncrementIterations5D; iteration++) {
6199 pthreadpool_parallelize_5d_tile_2d(
6200 threadpool.get(),
6201 reinterpret_cast<pthreadpool_task_5d_tile_2d_t>(Increment5DTile2D),
6202 static_cast<void*>(counters.data()),
6203 kParallelize5DTile2DRangeI, kParallelize5DTile2DRangeJ, kParallelize5DTile2DRangeK, kParallelize5DTile2DRangeL, kParallelize5DTile2DRangeM,
6204 kParallelize5DTile2DTileL, kParallelize5DTile2DTileM,
6205 0 /* flags */);
6206 }
6207
6208 for (size_t i = 0; i < kParallelize5DTile2DRangeI; i++) {
6209 for (size_t j = 0; j < kParallelize5DTile2DRangeJ; j++) {
6210 for (size_t k = 0; k < kParallelize5DTile2DRangeK; k++) {
6211 for (size_t l = 0; l < kParallelize5DTile2DRangeL; l++) {
6212 for (size_t m = 0; m < kParallelize5DTile2DRangeM; m++) {
6213 const size_t linear_idx = (((i * kParallelize5DTile2DRangeJ + j) * kParallelize5DTile2DRangeK + k) * kParallelize5DTile2DRangeL + l) * kParallelize5DTile2DRangeM + m;
6214 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations5D)
6215 << "Element (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ") was processed "
6216 << counters[linear_idx].load(std::memory_order_relaxed) << " times "
6217 << "(expected: " << kIncrementIterations5D << ")";
6218 }
6219 }
6220 }
6221 }
6222 }
6223 }
6224
IncrementSame5DTile2D(std::atomic_int * num_processed_items,size_t i,size_t j,size_t k,size_t start_l,size_t start_m,size_t tile_l,size_t tile_m)6225 static void IncrementSame5DTile2D(std::atomic_int* num_processed_items, size_t i, size_t j, size_t k, size_t start_l, size_t start_m, size_t tile_l, size_t tile_m) {
6226 for (size_t l = start_l; l < start_l + tile_l; l++) {
6227 for (size_t m = start_m; m < start_m + tile_m; m++) {
6228 num_processed_items->fetch_add(1, std::memory_order_relaxed);
6229 }
6230 }
6231 }
6232
TEST(Parallelize5DTile2D,MultiThreadPoolHighContention)6233 TEST(Parallelize5DTile2D, MultiThreadPoolHighContention) {
6234 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
6235
6236 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
6237 ASSERT_TRUE(threadpool.get());
6238
6239 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
6240 GTEST_SKIP();
6241 }
6242
6243 pthreadpool_parallelize_5d_tile_2d(
6244 threadpool.get(),
6245 reinterpret_cast<pthreadpool_task_5d_tile_2d_t>(IncrementSame5DTile2D),
6246 static_cast<void*>(&num_processed_items),
6247 kParallelize5DTile2DRangeI, kParallelize5DTile2DRangeJ, kParallelize5DTile2DRangeK, kParallelize5DTile2DRangeL, kParallelize5DTile2DRangeM,
6248 kParallelize5DTile2DTileL, kParallelize5DTile2DTileM,
6249 0 /* flags */);
6250 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize5DTile2DRangeI * kParallelize5DTile2DRangeJ * kParallelize5DTile2DRangeK * kParallelize5DTile2DRangeL * kParallelize5DTile2DRangeM);
6251 }
6252
WorkImbalance5DTile2D(std::atomic_int * num_processed_items,size_t i,size_t j,size_t k,size_t start_l,size_t start_m,size_t tile_l,size_t tile_m)6253 static void WorkImbalance5DTile2D(std::atomic_int* num_processed_items, size_t i, size_t j, size_t k, size_t start_l, size_t start_m, size_t tile_l, size_t tile_m) {
6254 num_processed_items->fetch_add(tile_l * tile_m, std::memory_order_relaxed);
6255 if (i == 0 && j == 0 && k == 0 && start_l == 0 && start_m == 0) {
6256 /* Spin-wait until all items are computed */
6257 while (num_processed_items->load(std::memory_order_relaxed) != kParallelize5DTile2DRangeI * kParallelize5DTile2DRangeJ * kParallelize5DTile2DRangeK * kParallelize5DTile2DRangeL * kParallelize5DTile2DRangeM) {
6258 std::atomic_thread_fence(std::memory_order_acquire);
6259 }
6260 }
6261 }
6262
TEST(Parallelize5DTile2D,MultiThreadPoolWorkStealing)6263 TEST(Parallelize5DTile2D, MultiThreadPoolWorkStealing) {
6264 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
6265
6266 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
6267 ASSERT_TRUE(threadpool.get());
6268
6269 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
6270 GTEST_SKIP();
6271 }
6272
6273 pthreadpool_parallelize_5d_tile_2d(
6274 threadpool.get(),
6275 reinterpret_cast<pthreadpool_task_5d_tile_2d_t>(WorkImbalance5DTile2D),
6276 static_cast<void*>(&num_processed_items),
6277 kParallelize5DTile2DRangeI, kParallelize5DTile2DRangeJ, kParallelize5DTile2DRangeK, kParallelize5DTile2DRangeL, kParallelize5DTile2DRangeM,
6278 kParallelize5DTile2DTileL, kParallelize5DTile2DTileM,
6279 0 /* flags */);
6280 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize5DTile2DRangeI * kParallelize5DTile2DRangeJ * kParallelize5DTile2DRangeK * kParallelize5DTile2DRangeL * kParallelize5DTile2DRangeM);
6281 }
6282
ComputeNothing6D(void *,size_t,size_t,size_t,size_t,size_t,size_t)6283 static void ComputeNothing6D(void*, size_t, size_t, size_t, size_t, size_t, size_t) {
6284 }
6285
TEST(Parallelize6D,SingleThreadPoolCompletes)6286 TEST(Parallelize6D, SingleThreadPoolCompletes) {
6287 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
6288 ASSERT_TRUE(threadpool.get());
6289
6290 pthreadpool_parallelize_6d(threadpool.get(),
6291 ComputeNothing6D,
6292 nullptr,
6293 kParallelize6DRangeI, kParallelize6DRangeJ, kParallelize6DRangeK, kParallelize6DRangeL, kParallelize6DRangeM, kParallelize6DRangeN,
6294 0 /* flags */);
6295 }
6296
TEST(Parallelize6D,MultiThreadPoolCompletes)6297 TEST(Parallelize6D, MultiThreadPoolCompletes) {
6298 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
6299 ASSERT_TRUE(threadpool.get());
6300
6301 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
6302 GTEST_SKIP();
6303 }
6304
6305 pthreadpool_parallelize_6d(
6306 threadpool.get(),
6307 ComputeNothing6D,
6308 nullptr,
6309 kParallelize6DRangeI, kParallelize6DRangeJ, kParallelize6DRangeK, kParallelize6DRangeL, kParallelize6DRangeM, kParallelize6DRangeN,
6310 0 /* flags */);
6311 }
6312
CheckBounds6D(void *,size_t i,size_t j,size_t k,size_t l,size_t m,size_t n)6313 static void CheckBounds6D(void*, size_t i, size_t j, size_t k, size_t l, size_t m, size_t n) {
6314 EXPECT_LT(i, kParallelize6DRangeI);
6315 EXPECT_LT(j, kParallelize6DRangeJ);
6316 EXPECT_LT(k, kParallelize6DRangeK);
6317 EXPECT_LT(l, kParallelize6DRangeL);
6318 EXPECT_LT(m, kParallelize6DRangeM);
6319 EXPECT_LT(n, kParallelize6DRangeN);
6320 }
6321
TEST(Parallelize6D,SingleThreadPoolAllItemsInBounds)6322 TEST(Parallelize6D, SingleThreadPoolAllItemsInBounds) {
6323 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
6324 ASSERT_TRUE(threadpool.get());
6325
6326 pthreadpool_parallelize_6d(
6327 threadpool.get(),
6328 CheckBounds6D,
6329 nullptr,
6330 kParallelize6DRangeI, kParallelize6DRangeJ, kParallelize6DRangeK, kParallelize6DRangeL, kParallelize6DRangeM, kParallelize6DRangeN,
6331 0 /* flags */);
6332 }
6333
TEST(Parallelize6D,MultiThreadPoolAllItemsInBounds)6334 TEST(Parallelize6D, MultiThreadPoolAllItemsInBounds) {
6335 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
6336 ASSERT_TRUE(threadpool.get());
6337
6338 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
6339 GTEST_SKIP();
6340 }
6341
6342 pthreadpool_parallelize_6d(
6343 threadpool.get(),
6344 CheckBounds6D,
6345 nullptr,
6346 kParallelize6DRangeI, kParallelize6DRangeJ, kParallelize6DRangeK, kParallelize6DRangeL, kParallelize6DRangeM, kParallelize6DRangeN,
6347 0 /* flags */);
6348 }
6349
SetTrue6D(std::atomic_bool * processed_indicators,size_t i,size_t j,size_t k,size_t l,size_t m,size_t n)6350 static void SetTrue6D(std::atomic_bool* processed_indicators, size_t i, size_t j, size_t k, size_t l, size_t m, size_t n) {
6351 const size_t linear_idx = ((((i * kParallelize6DRangeJ + j) * kParallelize6DRangeK + k) * kParallelize6DRangeL + l) * kParallelize6DRangeM + m) * kParallelize6DRangeN + n;
6352 processed_indicators[linear_idx].store(true, std::memory_order_relaxed);
6353 }
6354
TEST(Parallelize6D,SingleThreadPoolAllItemsProcessed)6355 TEST(Parallelize6D, SingleThreadPoolAllItemsProcessed) {
6356 std::vector<std::atomic_bool> indicators(kParallelize6DRangeI * kParallelize6DRangeJ * kParallelize6DRangeK * kParallelize6DRangeL * kParallelize6DRangeM * kParallelize6DRangeN);
6357
6358 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
6359 ASSERT_TRUE(threadpool.get());
6360
6361 pthreadpool_parallelize_6d(
6362 threadpool.get(),
6363 reinterpret_cast<pthreadpool_task_6d_t>(SetTrue6D),
6364 static_cast<void*>(indicators.data()),
6365 kParallelize6DRangeI, kParallelize6DRangeJ, kParallelize6DRangeK, kParallelize6DRangeL, kParallelize6DRangeM, kParallelize6DRangeN,
6366 0 /* flags */);
6367
6368 for (size_t i = 0; i < kParallelize6DRangeI; i++) {
6369 for (size_t j = 0; j < kParallelize6DRangeJ; j++) {
6370 for (size_t k = 0; k < kParallelize6DRangeK; k++) {
6371 for (size_t l = 0; l < kParallelize6DRangeL; l++) {
6372 for (size_t m = 0; m < kParallelize6DRangeM; m++) {
6373 for (size_t n = 0; n < kParallelize6DRangeN; n++) {
6374 const size_t linear_idx = ((((i * kParallelize6DRangeJ + j) * kParallelize6DRangeK + k) * kParallelize6DRangeL + l) * kParallelize6DRangeM + m) * kParallelize6DRangeN + n;
6375 EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
6376 << "Element (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ", " << n << ") not processed";
6377 }
6378 }
6379 }
6380 }
6381 }
6382 }
6383 }
6384
TEST(Parallelize6D,MultiThreadPoolAllItemsProcessed)6385 TEST(Parallelize6D, MultiThreadPoolAllItemsProcessed) {
6386 std::vector<std::atomic_bool> indicators(kParallelize6DRangeI * kParallelize6DRangeJ * kParallelize6DRangeK * kParallelize6DRangeL * kParallelize6DRangeM * kParallelize6DRangeN);
6387
6388 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
6389 ASSERT_TRUE(threadpool.get());
6390
6391 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
6392 GTEST_SKIP();
6393 }
6394
6395 pthreadpool_parallelize_6d(
6396 threadpool.get(),
6397 reinterpret_cast<pthreadpool_task_6d_t>(SetTrue6D),
6398 static_cast<void*>(indicators.data()),
6399 kParallelize6DRangeI, kParallelize6DRangeJ, kParallelize6DRangeK, kParallelize6DRangeL, kParallelize6DRangeM, kParallelize6DRangeN,
6400 0 /* flags */);
6401
6402 for (size_t i = 0; i < kParallelize6DRangeI; i++) {
6403 for (size_t j = 0; j < kParallelize6DRangeJ; j++) {
6404 for (size_t k = 0; k < kParallelize6DRangeK; k++) {
6405 for (size_t l = 0; l < kParallelize6DRangeL; l++) {
6406 for (size_t m = 0; m < kParallelize6DRangeM; m++) {
6407 for (size_t n = 0; n < kParallelize6DRangeN; n++) {
6408 const size_t linear_idx = ((((i * kParallelize6DRangeJ + j) * kParallelize6DRangeK + k) * kParallelize6DRangeL + l) * kParallelize6DRangeM + m) * kParallelize6DRangeN + n;
6409 EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
6410 << "Element (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ", " << n << ") not processed";
6411 }
6412 }
6413 }
6414 }
6415 }
6416 }
6417 }
6418
Increment6D(std::atomic_int * processed_counters,size_t i,size_t j,size_t k,size_t l,size_t m,size_t n)6419 static void Increment6D(std::atomic_int* processed_counters, size_t i, size_t j, size_t k, size_t l, size_t m, size_t n) {
6420 const size_t linear_idx = ((((i * kParallelize6DRangeJ + j) * kParallelize6DRangeK + k) * kParallelize6DRangeL + l) * kParallelize6DRangeM + m) * kParallelize6DRangeN + n;
6421 processed_counters[linear_idx].fetch_add(1, std::memory_order_relaxed);
6422 }
6423
TEST(Parallelize6D,SingleThreadPoolEachItemProcessedOnce)6424 TEST(Parallelize6D, SingleThreadPoolEachItemProcessedOnce) {
6425 std::vector<std::atomic_int> counters(kParallelize6DRangeI * kParallelize6DRangeJ * kParallelize6DRangeK * kParallelize6DRangeL * kParallelize6DRangeM * kParallelize6DRangeN);
6426
6427 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
6428 ASSERT_TRUE(threadpool.get());
6429
6430 pthreadpool_parallelize_6d(
6431 threadpool.get(),
6432 reinterpret_cast<pthreadpool_task_6d_t>(Increment6D),
6433 static_cast<void*>(counters.data()),
6434 kParallelize6DRangeI, kParallelize6DRangeJ, kParallelize6DRangeK, kParallelize6DRangeL, kParallelize6DRangeM, kParallelize6DRangeN,
6435 0 /* flags */);
6436
6437 for (size_t i = 0; i < kParallelize6DRangeI; i++) {
6438 for (size_t j = 0; j < kParallelize6DRangeJ; j++) {
6439 for (size_t k = 0; k < kParallelize6DRangeK; k++) {
6440 for (size_t l = 0; l < kParallelize6DRangeL; l++) {
6441 for (size_t m = 0; m < kParallelize6DRangeM; m++) {
6442 for (size_t n = 0; n < kParallelize6DRangeN; n++) {
6443 const size_t linear_idx = ((((i * kParallelize6DRangeJ + j) * kParallelize6DRangeK + k) * kParallelize6DRangeL + l) * kParallelize6DRangeM + m) * kParallelize6DRangeN + n;
6444 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
6445 << "Element (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ", " << n << ") was processed "
6446 << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
6447 }
6448 }
6449 }
6450 }
6451 }
6452 }
6453 }
6454
TEST(Parallelize6D,MultiThreadPoolEachItemProcessedOnce)6455 TEST(Parallelize6D, MultiThreadPoolEachItemProcessedOnce) {
6456 std::vector<std::atomic_int> counters(kParallelize6DRangeI * kParallelize6DRangeJ * kParallelize6DRangeK * kParallelize6DRangeL * kParallelize6DRangeM * kParallelize6DRangeN);
6457
6458 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
6459 ASSERT_TRUE(threadpool.get());
6460
6461 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
6462 GTEST_SKIP();
6463 }
6464
6465 pthreadpool_parallelize_6d(
6466 threadpool.get(),
6467 reinterpret_cast<pthreadpool_task_6d_t>(Increment6D),
6468 static_cast<void*>(counters.data()),
6469 kParallelize6DRangeI, kParallelize6DRangeJ, kParallelize6DRangeK, kParallelize6DRangeL, kParallelize6DRangeM, kParallelize6DRangeN,
6470 0 /* flags */);
6471
6472 for (size_t i = 0; i < kParallelize6DRangeI; i++) {
6473 for (size_t j = 0; j < kParallelize6DRangeJ; j++) {
6474 for (size_t k = 0; k < kParallelize6DRangeK; k++) {
6475 for (size_t l = 0; l < kParallelize6DRangeL; l++) {
6476 for (size_t m = 0; m < kParallelize6DRangeM; m++) {
6477 for (size_t n = 0; n < kParallelize6DRangeN; n++) {
6478 const size_t linear_idx = ((((i * kParallelize6DRangeJ + j) * kParallelize6DRangeK + k) * kParallelize6DRangeL + l) * kParallelize6DRangeM + m) * kParallelize6DRangeN + n;
6479 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
6480 << "Element (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ", " << n << ") was processed "
6481 << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
6482 }
6483 }
6484 }
6485 }
6486 }
6487 }
6488 }
6489
TEST(Parallelize6D,SingleThreadPoolEachItemProcessedMultipleTimes)6490 TEST(Parallelize6D, SingleThreadPoolEachItemProcessedMultipleTimes) {
6491 std::vector<std::atomic_int> counters(kParallelize6DRangeI * kParallelize6DRangeJ * kParallelize6DRangeK * kParallelize6DRangeL * kParallelize6DRangeM * kParallelize6DRangeN);
6492
6493 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
6494 ASSERT_TRUE(threadpool.get());
6495
6496 for (size_t iteration = 0; iteration < kIncrementIterations6D; iteration++) {
6497 pthreadpool_parallelize_6d(
6498 threadpool.get(),
6499 reinterpret_cast<pthreadpool_task_6d_t>(Increment6D),
6500 static_cast<void*>(counters.data()),
6501 kParallelize6DRangeI, kParallelize6DRangeJ, kParallelize6DRangeK, kParallelize6DRangeL, kParallelize6DRangeM, kParallelize6DRangeN,
6502 0 /* flags */);
6503 }
6504
6505 for (size_t i = 0; i < kParallelize6DRangeI; i++) {
6506 for (size_t j = 0; j < kParallelize6DRangeJ; j++) {
6507 for (size_t k = 0; k < kParallelize6DRangeK; k++) {
6508 for (size_t l = 0; l < kParallelize6DRangeL; l++) {
6509 for (size_t m = 0; m < kParallelize6DRangeM; m++) {
6510 for (size_t n = 0; n < kParallelize6DRangeN; n++) {
6511 const size_t linear_idx = ((((i * kParallelize6DRangeJ + j) * kParallelize6DRangeK + k) * kParallelize6DRangeL + l) * kParallelize6DRangeM + m) * kParallelize6DRangeN;
6512 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations6D)
6513 << "Element (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ", " << n << ") was processed "
6514 << counters[linear_idx].load(std::memory_order_relaxed) << " times "
6515 << "(expected: " << kIncrementIterations6D << ")";
6516 }
6517 }
6518 }
6519 }
6520 }
6521 }
6522 }
6523
TEST(Parallelize6D,MultiThreadPoolEachItemProcessedMultipleTimes)6524 TEST(Parallelize6D, MultiThreadPoolEachItemProcessedMultipleTimes) {
6525 std::vector<std::atomic_int> counters(kParallelize6DRangeI * kParallelize6DRangeJ * kParallelize6DRangeK * kParallelize6DRangeL * kParallelize6DRangeM * kParallelize6DRangeN);
6526
6527 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
6528 ASSERT_TRUE(threadpool.get());
6529
6530 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
6531 GTEST_SKIP();
6532 }
6533
6534 for (size_t iteration = 0; iteration < kIncrementIterations6D; iteration++) {
6535 pthreadpool_parallelize_6d(
6536 threadpool.get(),
6537 reinterpret_cast<pthreadpool_task_6d_t>(Increment6D),
6538 static_cast<void*>(counters.data()),
6539 kParallelize6DRangeI, kParallelize6DRangeJ, kParallelize6DRangeK, kParallelize6DRangeL, kParallelize6DRangeM, kParallelize6DRangeN,
6540 0 /* flags */);
6541 }
6542
6543 for (size_t i = 0; i < kParallelize6DRangeI; i++) {
6544 for (size_t j = 0; j < kParallelize6DRangeJ; j++) {
6545 for (size_t k = 0; k < kParallelize6DRangeK; k++) {
6546 for (size_t l = 0; l < kParallelize6DRangeL; l++) {
6547 for (size_t m = 0; m < kParallelize6DRangeM; m++) {
6548 for (size_t n = 0; n < kParallelize6DRangeN; n++) {
6549 const size_t linear_idx = ((((i * kParallelize6DRangeJ + j) * kParallelize6DRangeK + k) * kParallelize6DRangeL + l) * kParallelize6DRangeM + m) * kParallelize6DRangeN + n;
6550 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations6D)
6551 << "Element (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ", " << n << ") was processed "
6552 << counters[linear_idx].load(std::memory_order_relaxed) << " times "
6553 << "(expected: " << kIncrementIterations6D << ")";
6554 }
6555 }
6556 }
6557 }
6558 }
6559 }
6560 }
6561
IncrementSame6D(std::atomic_int * num_processed_items,size_t i,size_t j,size_t k,size_t l,size_t m,size_t n)6562 static void IncrementSame6D(std::atomic_int* num_processed_items, size_t i, size_t j, size_t k, size_t l, size_t m, size_t n) {
6563 num_processed_items->fetch_add(1, std::memory_order_relaxed);
6564 }
6565
TEST(Parallelize6D,MultiThreadPoolHighContention)6566 TEST(Parallelize6D, MultiThreadPoolHighContention) {
6567 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
6568
6569 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
6570 ASSERT_TRUE(threadpool.get());
6571
6572 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
6573 GTEST_SKIP();
6574 }
6575
6576 pthreadpool_parallelize_6d(
6577 threadpool.get(),
6578 reinterpret_cast<pthreadpool_task_6d_t>(IncrementSame6D),
6579 static_cast<void*>(&num_processed_items),
6580 kParallelize6DRangeI, kParallelize6DRangeJ, kParallelize6DRangeK, kParallelize6DRangeL, kParallelize6DRangeM, kParallelize6DRangeN,
6581 0 /* flags */);
6582 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize6DRangeI * kParallelize6DRangeJ * kParallelize6DRangeK * kParallelize6DRangeL * kParallelize6DRangeM * kParallelize6DRangeN);
6583 }
6584
WorkImbalance6D(std::atomic_int * num_processed_items,size_t i,size_t j,size_t k,size_t l,size_t m,size_t n)6585 static void WorkImbalance6D(std::atomic_int* num_processed_items, size_t i, size_t j, size_t k, size_t l, size_t m, size_t n) {
6586 num_processed_items->fetch_add(1, std::memory_order_relaxed);
6587 if (i == 0 && j == 0 && k == 0 && l == 0 && m == 0 && n == 0) {
6588 /* Spin-wait until all items are computed */
6589 while (num_processed_items->load(std::memory_order_relaxed) != kParallelize6DRangeI * kParallelize6DRangeJ * kParallelize6DRangeK * kParallelize6DRangeL * kParallelize6DRangeM * kParallelize6DRangeN) {
6590 std::atomic_thread_fence(std::memory_order_acquire);
6591 }
6592 }
6593 }
6594
TEST(Parallelize6D,MultiThreadPoolWorkStealing)6595 TEST(Parallelize6D, MultiThreadPoolWorkStealing) {
6596 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
6597
6598 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
6599 ASSERT_TRUE(threadpool.get());
6600
6601 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
6602 GTEST_SKIP();
6603 }
6604
6605 pthreadpool_parallelize_6d(
6606 threadpool.get(),
6607 reinterpret_cast<pthreadpool_task_6d_t>(WorkImbalance6D),
6608 static_cast<void*>(&num_processed_items),
6609 kParallelize6DRangeI, kParallelize6DRangeJ, kParallelize6DRangeK, kParallelize6DRangeL, kParallelize6DRangeM, kParallelize6DRangeN,
6610 0 /* flags */);
6611 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize6DRangeI * kParallelize6DRangeJ * kParallelize6DRangeK * kParallelize6DRangeL * kParallelize6DRangeM * kParallelize6DRangeN);
6612 }
6613
ComputeNothing6DTile1D(void *,size_t,size_t,size_t,size_t,size_t,size_t,size_t)6614 static void ComputeNothing6DTile1D(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t) {
6615 }
6616
TEST(Parallelize6DTile1D,SingleThreadPoolCompletes)6617 TEST(Parallelize6DTile1D, SingleThreadPoolCompletes) {
6618 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
6619 ASSERT_TRUE(threadpool.get());
6620
6621 pthreadpool_parallelize_6d_tile_1d(threadpool.get(),
6622 ComputeNothing6DTile1D,
6623 nullptr,
6624 kParallelize6DTile1DRangeI, kParallelize6DTile1DRangeJ, kParallelize6DTile1DRangeK, kParallelize6DTile1DRangeL, kParallelize6DTile1DRangeM, kParallelize6DTile1DRangeN,
6625 kParallelize6DTile1DTileN,
6626 0 /* flags */);
6627 }
6628
TEST(Parallelize6DTile1D,MultiThreadPoolCompletes)6629 TEST(Parallelize6DTile1D, MultiThreadPoolCompletes) {
6630 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
6631 ASSERT_TRUE(threadpool.get());
6632
6633 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
6634 GTEST_SKIP();
6635 }
6636
6637 pthreadpool_parallelize_6d_tile_1d(
6638 threadpool.get(),
6639 ComputeNothing6DTile1D,
6640 nullptr,
6641 kParallelize6DTile1DRangeI, kParallelize6DTile1DRangeJ, kParallelize6DTile1DRangeK, kParallelize6DTile1DRangeL, kParallelize6DTile1DRangeM, kParallelize6DTile1DRangeN,
6642 kParallelize6DTile1DTileN,
6643 0 /* flags */);
6644 }
6645
CheckBounds6DTile1D(void *,size_t i,size_t j,size_t k,size_t l,size_t m,size_t start_n,size_t tile_n)6646 static void CheckBounds6DTile1D(void*, size_t i, size_t j, size_t k, size_t l, size_t m, size_t start_n, size_t tile_n) {
6647 EXPECT_LT(i, kParallelize6DTile1DRangeI);
6648 EXPECT_LT(j, kParallelize6DTile1DRangeJ);
6649 EXPECT_LT(k, kParallelize6DTile1DRangeK);
6650 EXPECT_LT(l, kParallelize6DTile1DRangeL);
6651 EXPECT_LT(m, kParallelize6DTile1DRangeM);
6652 EXPECT_LT(start_n, kParallelize6DTile1DRangeN);
6653 EXPECT_LE(start_n + tile_n, kParallelize6DTile1DRangeN);
6654 }
6655
TEST(Parallelize6DTile1D,SingleThreadPoolAllItemsInBounds)6656 TEST(Parallelize6DTile1D, SingleThreadPoolAllItemsInBounds) {
6657 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
6658 ASSERT_TRUE(threadpool.get());
6659
6660 pthreadpool_parallelize_6d_tile_1d(
6661 threadpool.get(),
6662 CheckBounds6DTile1D,
6663 nullptr,
6664 kParallelize6DTile1DRangeI, kParallelize6DTile1DRangeJ, kParallelize6DTile1DRangeK, kParallelize6DTile1DRangeL, kParallelize6DTile1DRangeM, kParallelize6DTile1DRangeN,
6665 kParallelize6DTile1DTileN,
6666 0 /* flags */);
6667 }
6668
TEST(Parallelize6DTile1D,MultiThreadPoolAllItemsInBounds)6669 TEST(Parallelize6DTile1D, MultiThreadPoolAllItemsInBounds) {
6670 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
6671 ASSERT_TRUE(threadpool.get());
6672
6673 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
6674 GTEST_SKIP();
6675 }
6676
6677 pthreadpool_parallelize_6d_tile_1d(
6678 threadpool.get(),
6679 CheckBounds6DTile1D,
6680 nullptr,
6681 kParallelize6DTile1DRangeI, kParallelize6DTile1DRangeJ, kParallelize6DTile1DRangeK, kParallelize6DTile1DRangeL, kParallelize6DTile1DRangeM, kParallelize6DTile1DRangeN,
6682 kParallelize6DTile1DTileN,
6683 0 /* flags */);
6684 }
6685
CheckTiling6DTile1D(void *,size_t i,size_t j,size_t k,size_t l,size_t m,size_t start_n,size_t tile_n)6686 static void CheckTiling6DTile1D(void*, size_t i, size_t j, size_t k, size_t l, size_t m, size_t start_n, size_t tile_n) {
6687 EXPECT_GT(tile_n, 0);
6688 EXPECT_LE(tile_n, kParallelize6DTile1DTileN);
6689 EXPECT_EQ(start_n % kParallelize6DTile1DTileN, 0);
6690 EXPECT_EQ(tile_n, std::min<size_t>(kParallelize6DTile1DTileN, kParallelize6DTile1DRangeN - start_n));
6691 }
6692
TEST(Parallelize6DTile1D,SingleThreadPoolUniformTiling)6693 TEST(Parallelize6DTile1D, SingleThreadPoolUniformTiling) {
6694 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
6695 ASSERT_TRUE(threadpool.get());
6696
6697 pthreadpool_parallelize_6d_tile_1d(
6698 threadpool.get(),
6699 CheckTiling6DTile1D,
6700 nullptr,
6701 kParallelize6DTile1DRangeI, kParallelize6DTile1DRangeJ, kParallelize6DTile1DRangeK, kParallelize6DTile1DRangeL, kParallelize6DTile1DRangeM, kParallelize6DTile1DRangeN,
6702 kParallelize6DTile1DTileN,
6703 0 /* flags */);
6704 }
6705
TEST(Parallelize6DTile1D,MultiThreadPoolUniformTiling)6706 TEST(Parallelize6DTile1D, MultiThreadPoolUniformTiling) {
6707 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
6708 ASSERT_TRUE(threadpool.get());
6709
6710 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
6711 GTEST_SKIP();
6712 }
6713
6714 pthreadpool_parallelize_6d_tile_1d(
6715 threadpool.get(),
6716 CheckTiling6DTile1D,
6717 nullptr,
6718 kParallelize6DTile1DRangeI, kParallelize6DTile1DRangeJ, kParallelize6DTile1DRangeK, kParallelize6DTile1DRangeL, kParallelize6DTile1DRangeM, kParallelize6DTile1DRangeN,
6719 kParallelize6DTile1DTileN,
6720 0 /* flags */);
6721 }
6722
SetTrue6DTile1D(std::atomic_bool * processed_indicators,size_t i,size_t j,size_t k,size_t l,size_t m,size_t start_n,size_t tile_n)6723 static void SetTrue6DTile1D(std::atomic_bool* processed_indicators, size_t i, size_t j, size_t k, size_t l, size_t m, size_t start_n, size_t tile_n) {
6724 for (size_t n = start_n; n < start_n + tile_n; n++) {
6725 const size_t linear_idx = ((((i * kParallelize6DTile1DRangeJ + j) * kParallelize6DTile1DRangeK + k) * kParallelize6DTile1DRangeL + l) * kParallelize6DTile1DRangeM + m) * kParallelize6DTile1DRangeN + n;
6726 processed_indicators[linear_idx].store(true, std::memory_order_relaxed);
6727 }
6728 }
6729
TEST(Parallelize6DTile1D,SingleThreadPoolAllItemsProcessed)6730 TEST(Parallelize6DTile1D, SingleThreadPoolAllItemsProcessed) {
6731 std::vector<std::atomic_bool> indicators(kParallelize6DTile1DRangeI * kParallelize6DTile1DRangeJ * kParallelize6DTile1DRangeK * kParallelize6DTile1DRangeL * kParallelize6DTile1DRangeM * kParallelize6DTile1DRangeN);
6732
6733 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
6734 ASSERT_TRUE(threadpool.get());
6735
6736 pthreadpool_parallelize_6d_tile_1d(
6737 threadpool.get(),
6738 reinterpret_cast<pthreadpool_task_6d_tile_1d_t>(SetTrue6DTile1D),
6739 static_cast<void*>(indicators.data()),
6740 kParallelize6DTile1DRangeI, kParallelize6DTile1DRangeJ, kParallelize6DTile1DRangeK, kParallelize6DTile1DRangeL, kParallelize6DTile1DRangeM, kParallelize6DTile1DRangeN,
6741 kParallelize6DTile1DTileN,
6742 0 /* flags */);
6743
6744 for (size_t i = 0; i < kParallelize6DTile1DRangeI; i++) {
6745 for (size_t j = 0; j < kParallelize6DTile1DRangeJ; j++) {
6746 for (size_t k = 0; k < kParallelize6DTile1DRangeK; k++) {
6747 for (size_t l = 0; l < kParallelize6DTile1DRangeL; l++) {
6748 for (size_t m = 0; m < kParallelize6DTile1DRangeM; m++) {
6749 for (size_t n = 0; n < kParallelize6DTile1DRangeN; n++) {
6750 const size_t linear_idx = ((((i * kParallelize6DTile1DRangeJ + j) * kParallelize6DTile1DRangeK + k) * kParallelize6DTile1DRangeL + l) * kParallelize6DTile1DRangeM + m) * kParallelize6DTile1DRangeN + n;
6751 EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
6752 << "Element (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ", " << n << ") not processed";
6753 }
6754 }
6755 }
6756 }
6757 }
6758 }
6759 }
6760
TEST(Parallelize6DTile1D,MultiThreadPoolAllItemsProcessed)6761 TEST(Parallelize6DTile1D, MultiThreadPoolAllItemsProcessed) {
6762 std::vector<std::atomic_bool> indicators(kParallelize6DTile1DRangeI * kParallelize6DTile1DRangeJ * kParallelize6DTile1DRangeK * kParallelize6DTile1DRangeL * kParallelize6DTile1DRangeM * kParallelize6DTile1DRangeN);
6763
6764 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
6765 ASSERT_TRUE(threadpool.get());
6766
6767 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
6768 GTEST_SKIP();
6769 }
6770
6771 pthreadpool_parallelize_6d_tile_1d(
6772 threadpool.get(),
6773 reinterpret_cast<pthreadpool_task_6d_tile_1d_t>(SetTrue6DTile1D),
6774 static_cast<void*>(indicators.data()),
6775 kParallelize6DTile1DRangeI, kParallelize6DTile1DRangeJ, kParallelize6DTile1DRangeK, kParallelize6DTile1DRangeL, kParallelize6DTile1DRangeM, kParallelize6DTile1DRangeN,
6776 kParallelize6DTile1DTileN,
6777 0 /* flags */);
6778
6779 for (size_t i = 0; i < kParallelize6DTile1DRangeI; i++) {
6780 for (size_t j = 0; j < kParallelize6DTile1DRangeJ; j++) {
6781 for (size_t k = 0; k < kParallelize6DTile1DRangeK; k++) {
6782 for (size_t l = 0; l < kParallelize6DTile1DRangeL; l++) {
6783 for (size_t m = 0; m < kParallelize6DTile1DRangeM; m++) {
6784 for (size_t n = 0; n < kParallelize6DTile1DRangeN; n++) {
6785 const size_t linear_idx = ((((i * kParallelize6DTile1DRangeJ + j) * kParallelize6DTile1DRangeK + k) * kParallelize6DTile1DRangeL + l) * kParallelize6DTile1DRangeM + m) * kParallelize6DTile1DRangeN + n;
6786 EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
6787 << "Element (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ", " << n << ") not processed";
6788 }
6789 }
6790 }
6791 }
6792 }
6793 }
6794 }
6795
Increment6DTile1D(std::atomic_int * processed_counters,size_t i,size_t j,size_t k,size_t l,size_t m,size_t start_n,size_t tile_n)6796 static void Increment6DTile1D(std::atomic_int* processed_counters, size_t i, size_t j, size_t k, size_t l, size_t m, size_t start_n, size_t tile_n) {
6797 for (size_t n = start_n; n < start_n + tile_n; n++) {
6798 const size_t linear_idx = ((((i * kParallelize6DTile1DRangeJ + j) * kParallelize6DTile1DRangeK + k) * kParallelize6DTile1DRangeL + l) * kParallelize6DTile1DRangeM + m) * kParallelize6DTile1DRangeN + n;
6799 processed_counters[linear_idx].fetch_add(1, std::memory_order_relaxed);
6800 }
6801 }
6802
TEST(Parallelize6DTile1D,SingleThreadPoolEachItemProcessedOnce)6803 TEST(Parallelize6DTile1D, SingleThreadPoolEachItemProcessedOnce) {
6804 std::vector<std::atomic_int> counters(kParallelize6DTile1DRangeI * kParallelize6DTile1DRangeJ * kParallelize6DTile1DRangeK * kParallelize6DTile1DRangeL * kParallelize6DTile1DRangeM * kParallelize6DTile1DRangeN);
6805
6806 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
6807 ASSERT_TRUE(threadpool.get());
6808
6809 pthreadpool_parallelize_6d_tile_1d(
6810 threadpool.get(),
6811 reinterpret_cast<pthreadpool_task_6d_tile_1d_t>(Increment6DTile1D),
6812 static_cast<void*>(counters.data()),
6813 kParallelize6DTile1DRangeI, kParallelize6DTile1DRangeJ, kParallelize6DTile1DRangeK, kParallelize6DTile1DRangeL, kParallelize6DTile1DRangeM, kParallelize6DTile1DRangeN,
6814 kParallelize6DTile1DTileN,
6815 0 /* flags */);
6816
6817 for (size_t i = 0; i < kParallelize6DTile1DRangeI; i++) {
6818 for (size_t j = 0; j < kParallelize6DTile1DRangeJ; j++) {
6819 for (size_t k = 0; k < kParallelize6DTile1DRangeK; k++) {
6820 for (size_t l = 0; l < kParallelize6DTile1DRangeL; l++) {
6821 for (size_t m = 0; m < kParallelize6DTile1DRangeM; m++) {
6822 for (size_t n = 0; n < kParallelize6DTile1DRangeN; n++) {
6823 const size_t linear_idx = ((((i * kParallelize6DTile1DRangeJ + j) * kParallelize6DTile1DRangeK + k) * kParallelize6DTile1DRangeL + l) * kParallelize6DTile1DRangeM + m) * kParallelize6DTile1DRangeN + n;
6824 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
6825 << "Element (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ", " << n << ") was processed "
6826 << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
6827 }
6828 }
6829 }
6830 }
6831 }
6832 }
6833 }
6834
TEST(Parallelize6DTile1D,MultiThreadPoolEachItemProcessedOnce)6835 TEST(Parallelize6DTile1D, MultiThreadPoolEachItemProcessedOnce) {
6836 std::vector<std::atomic_int> counters(kParallelize6DTile1DRangeI * kParallelize6DTile1DRangeJ * kParallelize6DTile1DRangeK * kParallelize6DTile1DRangeL * kParallelize6DTile1DRangeM * kParallelize6DTile1DRangeN);
6837
6838 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
6839 ASSERT_TRUE(threadpool.get());
6840
6841 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
6842 GTEST_SKIP();
6843 }
6844
6845 pthreadpool_parallelize_6d_tile_1d(
6846 threadpool.get(),
6847 reinterpret_cast<pthreadpool_task_6d_tile_1d_t>(Increment6DTile1D),
6848 static_cast<void*>(counters.data()),
6849 kParallelize6DTile1DRangeI, kParallelize6DTile1DRangeJ, kParallelize6DTile1DRangeK, kParallelize6DTile1DRangeL, kParallelize6DTile1DRangeM, kParallelize6DTile1DRangeN,
6850 kParallelize6DTile1DTileN,
6851 0 /* flags */);
6852
6853 for (size_t i = 0; i < kParallelize6DTile1DRangeI; i++) {
6854 for (size_t j = 0; j < kParallelize6DTile1DRangeJ; j++) {
6855 for (size_t k = 0; k < kParallelize6DTile1DRangeK; k++) {
6856 for (size_t l = 0; l < kParallelize6DTile1DRangeL; l++) {
6857 for (size_t m = 0; m < kParallelize6DTile1DRangeM; m++) {
6858 for (size_t n = 0; n < kParallelize6DTile1DRangeN; n++) {
6859 const size_t linear_idx = ((((i * kParallelize6DTile1DRangeJ + j) * kParallelize6DTile1DRangeK + k) * kParallelize6DTile1DRangeL + l) * kParallelize6DTile1DRangeM + m) * kParallelize6DTile1DRangeN + n;
6860 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
6861 << "Element (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ", " << n << ") was processed "
6862 << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
6863 }
6864 }
6865 }
6866 }
6867 }
6868 }
6869 }
6870
TEST(Parallelize6DTile1D,SingleThreadPoolEachItemProcessedMultipleTimes)6871 TEST(Parallelize6DTile1D, SingleThreadPoolEachItemProcessedMultipleTimes) {
6872 std::vector<std::atomic_int> counters(kParallelize6DTile1DRangeI * kParallelize6DTile1DRangeJ * kParallelize6DTile1DRangeK * kParallelize6DTile1DRangeL * kParallelize6DTile1DRangeM * kParallelize6DTile1DRangeN);
6873
6874 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
6875 ASSERT_TRUE(threadpool.get());
6876
6877 for (size_t iteration = 0; iteration < kIncrementIterations6D; iteration++) {
6878 pthreadpool_parallelize_6d_tile_1d(
6879 threadpool.get(),
6880 reinterpret_cast<pthreadpool_task_6d_tile_1d_t>(Increment6DTile1D),
6881 static_cast<void*>(counters.data()),
6882 kParallelize6DTile1DRangeI, kParallelize6DTile1DRangeJ, kParallelize6DTile1DRangeK, kParallelize6DTile1DRangeL, kParallelize6DTile1DRangeM, kParallelize6DTile1DRangeN,
6883 kParallelize6DTile1DTileN,
6884 0 /* flags */);
6885 }
6886
6887 for (size_t i = 0; i < kParallelize6DTile1DRangeI; i++) {
6888 for (size_t j = 0; j < kParallelize6DTile1DRangeJ; j++) {
6889 for (size_t k = 0; k < kParallelize6DTile1DRangeK; k++) {
6890 for (size_t l = 0; l < kParallelize6DTile1DRangeL; l++) {
6891 for (size_t m = 0; m < kParallelize6DTile1DRangeM; m++) {
6892 for (size_t n = 0; n < kParallelize6DTile1DRangeN; n++) {
6893 const size_t linear_idx = ((((i * kParallelize6DTile1DRangeJ + j) * kParallelize6DTile1DRangeK + k) * kParallelize6DTile1DRangeL + l) * kParallelize6DTile1DRangeM + m) * kParallelize6DTile1DRangeN + n;
6894 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations6D)
6895 << "Element (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ", " << n << ") was processed "
6896 << counters[linear_idx].load(std::memory_order_relaxed) << " times "
6897 << "(expected: " << kIncrementIterations6D << ")";
6898 }
6899 }
6900 }
6901 }
6902 }
6903 }
6904 }
6905
TEST(Parallelize6DTile1D,MultiThreadPoolEachItemProcessedMultipleTimes)6906 TEST(Parallelize6DTile1D, MultiThreadPoolEachItemProcessedMultipleTimes) {
6907 std::vector<std::atomic_int> counters(kParallelize6DTile1DRangeI * kParallelize6DTile1DRangeJ * kParallelize6DTile1DRangeK * kParallelize6DTile1DRangeL * kParallelize6DTile1DRangeM * kParallelize6DTile1DRangeN);
6908
6909 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
6910 ASSERT_TRUE(threadpool.get());
6911
6912 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
6913 GTEST_SKIP();
6914 }
6915
6916 for (size_t iteration = 0; iteration < kIncrementIterations6D; iteration++) {
6917 pthreadpool_parallelize_6d_tile_1d(
6918 threadpool.get(),
6919 reinterpret_cast<pthreadpool_task_6d_tile_1d_t>(Increment6DTile1D),
6920 static_cast<void*>(counters.data()),
6921 kParallelize6DTile1DRangeI, kParallelize6DTile1DRangeJ, kParallelize6DTile1DRangeK, kParallelize6DTile1DRangeL, kParallelize6DTile1DRangeM, kParallelize6DTile1DRangeN,
6922 kParallelize6DTile1DTileN,
6923 0 /* flags */);
6924 }
6925
6926 for (size_t i = 0; i < kParallelize6DTile1DRangeI; i++) {
6927 for (size_t j = 0; j < kParallelize6DTile1DRangeJ; j++) {
6928 for (size_t k = 0; k < kParallelize6DTile1DRangeK; k++) {
6929 for (size_t l = 0; l < kParallelize6DTile1DRangeL; l++) {
6930 for (size_t m = 0; m < kParallelize6DTile1DRangeM; m++) {
6931 for (size_t n = 0; n < kParallelize6DTile1DRangeN; n++) {
6932 const size_t linear_idx = ((((i * kParallelize6DTile1DRangeJ + j) * kParallelize6DTile1DRangeK + k) * kParallelize6DTile1DRangeL + l) * kParallelize6DTile1DRangeM + m) * kParallelize6DTile1DRangeN + n;
6933 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations6D)
6934 << "Element (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ", " << n << ") was processed "
6935 << counters[linear_idx].load(std::memory_order_relaxed) << " times "
6936 << "(expected: " << kIncrementIterations6D << ")";
6937 }
6938 }
6939 }
6940 }
6941 }
6942 }
6943 }
6944
IncrementSame6DTile1D(std::atomic_int * num_processed_items,size_t i,size_t j,size_t k,size_t l,size_t m,size_t start_n,size_t tile_n)6945 static void IncrementSame6DTile1D(std::atomic_int* num_processed_items, size_t i, size_t j, size_t k, size_t l, size_t m, size_t start_n, size_t tile_n) {
6946 for (size_t n = start_n; n < start_n + tile_n; n++) {
6947 num_processed_items->fetch_add(1, std::memory_order_relaxed);
6948 }
6949 }
6950
TEST(Parallelize6DTile1D,MultiThreadPoolHighContention)6951 TEST(Parallelize6DTile1D, MultiThreadPoolHighContention) {
6952 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
6953
6954 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
6955 ASSERT_TRUE(threadpool.get());
6956
6957 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
6958 GTEST_SKIP();
6959 }
6960
6961 pthreadpool_parallelize_6d_tile_1d(
6962 threadpool.get(),
6963 reinterpret_cast<pthreadpool_task_6d_tile_1d_t>(IncrementSame6DTile1D),
6964 static_cast<void*>(&num_processed_items),
6965 kParallelize6DTile1DRangeI, kParallelize6DTile1DRangeJ, kParallelize6DTile1DRangeK, kParallelize6DTile1DRangeL, kParallelize6DTile1DRangeM, kParallelize6DTile1DRangeN,
6966 kParallelize6DTile1DTileN,
6967 0 /* flags */);
6968 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize6DTile1DRangeI * kParallelize6DTile1DRangeJ * kParallelize6DTile1DRangeK * kParallelize6DTile1DRangeL * kParallelize6DTile1DRangeM * kParallelize6DTile1DRangeN);
6969 }
6970
WorkImbalance6DTile1D(std::atomic_int * num_processed_items,size_t i,size_t j,size_t k,size_t l,size_t m,size_t start_n,size_t tile_n)6971 static void WorkImbalance6DTile1D(std::atomic_int* num_processed_items, size_t i, size_t j, size_t k, size_t l, size_t m, size_t start_n, size_t tile_n) {
6972 num_processed_items->fetch_add(tile_n, std::memory_order_relaxed);
6973 if (i == 0 && j == 0 && k == 0 && l == 0 && m == 0 && start_n == 0) {
6974 /* Spin-wait until all items are computed */
6975 while (num_processed_items->load(std::memory_order_relaxed) != kParallelize6DTile1DRangeI * kParallelize6DTile1DRangeJ * kParallelize6DTile1DRangeK * kParallelize6DTile1DRangeL * kParallelize6DTile1DRangeM * kParallelize6DTile1DRangeN) {
6976 std::atomic_thread_fence(std::memory_order_acquire);
6977 }
6978 }
6979 }
6980
TEST(Parallelize6DTile1D,MultiThreadPoolWorkStealing)6981 TEST(Parallelize6DTile1D, MultiThreadPoolWorkStealing) {
6982 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
6983
6984 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
6985 ASSERT_TRUE(threadpool.get());
6986
6987 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
6988 GTEST_SKIP();
6989 }
6990
6991 pthreadpool_parallelize_6d_tile_1d(
6992 threadpool.get(),
6993 reinterpret_cast<pthreadpool_task_6d_tile_1d_t>(WorkImbalance6DTile1D),
6994 static_cast<void*>(&num_processed_items),
6995 kParallelize6DTile1DRangeI, kParallelize6DTile1DRangeJ, kParallelize6DTile1DRangeK, kParallelize6DTile1DRangeL, kParallelize6DTile1DRangeM, kParallelize6DTile1DRangeN,
6996 kParallelize6DTile1DTileN,
6997 0 /* flags */);
6998 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize6DTile1DRangeI * kParallelize6DTile1DRangeJ * kParallelize6DTile1DRangeK * kParallelize6DTile1DRangeL * kParallelize6DTile1DRangeM * kParallelize6DTile1DRangeN);
6999 }
7000
ComputeNothing6DTile2D(void *,size_t,size_t,size_t,size_t,size_t,size_t,size_t,size_t)7001 static void ComputeNothing6DTile2D(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t) {
7002 }
7003
TEST(Parallelize6DTile2D,SingleThreadPoolCompletes)7004 TEST(Parallelize6DTile2D, SingleThreadPoolCompletes) {
7005 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
7006 ASSERT_TRUE(threadpool.get());
7007
7008 pthreadpool_parallelize_6d_tile_2d(threadpool.get(),
7009 ComputeNothing6DTile2D,
7010 nullptr,
7011 kParallelize6DTile2DRangeI, kParallelize6DTile2DRangeJ, kParallelize6DTile2DRangeK, kParallelize6DTile2DRangeL, kParallelize6DTile2DRangeM, kParallelize6DTile2DRangeN,
7012 kParallelize6DTile2DTileM, kParallelize6DTile2DTileN,
7013 0 /* flags */);
7014 }
7015
TEST(Parallelize6DTile2D,MultiThreadPoolCompletes)7016 TEST(Parallelize6DTile2D, MultiThreadPoolCompletes) {
7017 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
7018 ASSERT_TRUE(threadpool.get());
7019
7020 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
7021 GTEST_SKIP();
7022 }
7023
7024 pthreadpool_parallelize_6d_tile_2d(
7025 threadpool.get(),
7026 ComputeNothing6DTile2D,
7027 nullptr,
7028 kParallelize6DTile2DRangeI, kParallelize6DTile2DRangeJ, kParallelize6DTile2DRangeK, kParallelize6DTile2DRangeL, kParallelize6DTile2DRangeM, kParallelize6DTile2DRangeN,
7029 kParallelize6DTile2DTileM, kParallelize6DTile2DTileN,
7030 0 /* flags */);
7031 }
7032
CheckBounds6DTile2D(void *,size_t i,size_t j,size_t k,size_t l,size_t start_m,size_t start_n,size_t tile_m,size_t tile_n)7033 static void CheckBounds6DTile2D(void*, size_t i, size_t j, size_t k, size_t l, size_t start_m, size_t start_n, size_t tile_m, size_t tile_n) {
7034 EXPECT_LT(i, kParallelize6DTile2DRangeI);
7035 EXPECT_LT(j, kParallelize6DTile2DRangeJ);
7036 EXPECT_LT(k, kParallelize6DTile2DRangeK);
7037 EXPECT_LT(l, kParallelize6DTile2DRangeL);
7038 EXPECT_LT(start_m, kParallelize6DTile2DRangeM);
7039 EXPECT_LT(start_n, kParallelize6DTile2DRangeN);
7040 EXPECT_LE(start_m + tile_m, kParallelize6DTile2DRangeM);
7041 EXPECT_LE(start_n + tile_n, kParallelize6DTile2DRangeN);
7042 }
7043
TEST(Parallelize6DTile2D,SingleThreadPoolAllItemsInBounds)7044 TEST(Parallelize6DTile2D, SingleThreadPoolAllItemsInBounds) {
7045 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
7046 ASSERT_TRUE(threadpool.get());
7047
7048 pthreadpool_parallelize_6d_tile_2d(
7049 threadpool.get(),
7050 CheckBounds6DTile2D,
7051 nullptr,
7052 kParallelize6DTile2DRangeI, kParallelize6DTile2DRangeJ, kParallelize6DTile2DRangeK, kParallelize6DTile2DRangeL, kParallelize6DTile2DRangeM, kParallelize6DTile2DRangeN,
7053 kParallelize6DTile2DTileM, kParallelize6DTile2DTileN,
7054 0 /* flags */);
7055 }
7056
TEST(Parallelize6DTile2D,MultiThreadPoolAllItemsInBounds)7057 TEST(Parallelize6DTile2D, MultiThreadPoolAllItemsInBounds) {
7058 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
7059 ASSERT_TRUE(threadpool.get());
7060
7061 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
7062 GTEST_SKIP();
7063 }
7064
7065 pthreadpool_parallelize_6d_tile_2d(
7066 threadpool.get(),
7067 CheckBounds6DTile2D,
7068 nullptr,
7069 kParallelize6DTile2DRangeI, kParallelize6DTile2DRangeJ, kParallelize6DTile2DRangeK, kParallelize6DTile2DRangeL, kParallelize6DTile2DRangeM, kParallelize6DTile2DRangeN,
7070 kParallelize6DTile2DTileM, kParallelize6DTile2DTileN,
7071 0 /* flags */);
7072 }
7073
CheckTiling6DTile2D(void *,size_t i,size_t j,size_t k,size_t l,size_t start_m,size_t start_n,size_t tile_m,size_t tile_n)7074 static void CheckTiling6DTile2D(void*, size_t i, size_t j, size_t k, size_t l, size_t start_m, size_t start_n, size_t tile_m, size_t tile_n) {
7075 EXPECT_GT(tile_m, 0);
7076 EXPECT_LE(tile_m, kParallelize6DTile2DTileM);
7077 EXPECT_EQ(start_m % kParallelize6DTile2DTileM, 0);
7078 EXPECT_EQ(tile_m, std::min<size_t>(kParallelize6DTile2DTileM, kParallelize6DTile2DRangeM - start_m));
7079
7080 EXPECT_GT(tile_n, 0);
7081 EXPECT_LE(tile_n, kParallelize6DTile2DTileN);
7082 EXPECT_EQ(start_n % kParallelize6DTile2DTileN, 0);
7083 EXPECT_EQ(tile_n, std::min<size_t>(kParallelize6DTile2DTileN, kParallelize6DTile2DRangeN - start_n));
7084 }
7085
TEST(Parallelize6DTile2D,SingleThreadPoolUniformTiling)7086 TEST(Parallelize6DTile2D, SingleThreadPoolUniformTiling) {
7087 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
7088 ASSERT_TRUE(threadpool.get());
7089
7090 pthreadpool_parallelize_6d_tile_2d(
7091 threadpool.get(),
7092 CheckTiling6DTile2D,
7093 nullptr,
7094 kParallelize6DTile2DRangeI, kParallelize6DTile2DRangeJ, kParallelize6DTile2DRangeK, kParallelize6DTile2DRangeL, kParallelize6DTile2DRangeM, kParallelize6DTile2DRangeN,
7095 kParallelize6DTile2DTileM, kParallelize6DTile2DTileN,
7096 0 /* flags */);
7097 }
7098
TEST(Parallelize6DTile2D,MultiThreadPoolUniformTiling)7099 TEST(Parallelize6DTile2D, MultiThreadPoolUniformTiling) {
7100 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
7101 ASSERT_TRUE(threadpool.get());
7102
7103 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
7104 GTEST_SKIP();
7105 }
7106
7107 pthreadpool_parallelize_6d_tile_2d(
7108 threadpool.get(),
7109 CheckTiling6DTile2D,
7110 nullptr,
7111 kParallelize6DTile2DRangeI, kParallelize6DTile2DRangeJ, kParallelize6DTile2DRangeK, kParallelize6DTile2DRangeL, kParallelize6DTile2DRangeM, kParallelize6DTile2DRangeN,
7112 kParallelize6DTile2DTileM, kParallelize6DTile2DTileN,
7113 0 /* flags */);
7114 }
7115
SetTrue6DTile2D(std::atomic_bool * processed_indicators,size_t i,size_t j,size_t k,size_t l,size_t start_m,size_t start_n,size_t tile_m,size_t tile_n)7116 static void SetTrue6DTile2D(std::atomic_bool* processed_indicators, size_t i, size_t j, size_t k, size_t l, size_t start_m, size_t start_n, size_t tile_m, size_t tile_n) {
7117 for (size_t m = start_m; m < start_m + tile_m; m++) {
7118 for (size_t n = start_n; n < start_n + tile_n; n++) {
7119 const size_t linear_idx = ((((i * kParallelize6DTile2DRangeJ + j) * kParallelize6DTile2DRangeK + k) * kParallelize6DTile2DRangeL + l) * kParallelize6DTile2DRangeM + m) * kParallelize6DTile2DRangeN + n;
7120 processed_indicators[linear_idx].store(true, std::memory_order_relaxed);
7121 }
7122 }
7123 }
7124
TEST(Parallelize6DTile2D,SingleThreadPoolAllItemsProcessed)7125 TEST(Parallelize6DTile2D, SingleThreadPoolAllItemsProcessed) {
7126 std::vector<std::atomic_bool> indicators(kParallelize6DTile2DRangeI * kParallelize6DTile2DRangeJ * kParallelize6DTile2DRangeK * kParallelize6DTile2DRangeL * kParallelize6DTile2DRangeM * kParallelize6DTile2DRangeN);
7127
7128 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
7129 ASSERT_TRUE(threadpool.get());
7130
7131 pthreadpool_parallelize_6d_tile_2d(
7132 threadpool.get(),
7133 reinterpret_cast<pthreadpool_task_6d_tile_2d_t>(SetTrue6DTile2D),
7134 static_cast<void*>(indicators.data()),
7135 kParallelize6DTile2DRangeI, kParallelize6DTile2DRangeJ, kParallelize6DTile2DRangeK, kParallelize6DTile2DRangeL, kParallelize6DTile2DRangeM, kParallelize6DTile2DRangeN,
7136 kParallelize6DTile2DTileM, kParallelize6DTile2DTileN,
7137 0 /* flags */);
7138
7139 for (size_t i = 0; i < kParallelize6DTile2DRangeI; i++) {
7140 for (size_t j = 0; j < kParallelize6DTile2DRangeJ; j++) {
7141 for (size_t k = 0; k < kParallelize6DTile2DRangeK; k++) {
7142 for (size_t l = 0; l < kParallelize6DTile2DRangeL; l++) {
7143 for (size_t m = 0; m < kParallelize6DTile2DRangeM; m++) {
7144 for (size_t n = 0; n < kParallelize6DTile2DRangeN; n++) {
7145 const size_t linear_idx = ((((i * kParallelize6DTile2DRangeJ + j) * kParallelize6DTile2DRangeK + k) * kParallelize6DTile2DRangeL + l) * kParallelize6DTile2DRangeM + m) * kParallelize6DTile2DRangeN + n;
7146 EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
7147 << "Element (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ", " << n << ") not processed";
7148 }
7149 }
7150 }
7151 }
7152 }
7153 }
7154 }
7155
TEST(Parallelize6DTile2D,MultiThreadPoolAllItemsProcessed)7156 TEST(Parallelize6DTile2D, MultiThreadPoolAllItemsProcessed) {
7157 std::vector<std::atomic_bool> indicators(kParallelize6DTile2DRangeI * kParallelize6DTile2DRangeJ * kParallelize6DTile2DRangeK * kParallelize6DTile2DRangeL * kParallelize6DTile2DRangeM * kParallelize6DTile2DRangeN);
7158
7159 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
7160 ASSERT_TRUE(threadpool.get());
7161
7162 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
7163 GTEST_SKIP();
7164 }
7165
7166 pthreadpool_parallelize_6d_tile_2d(
7167 threadpool.get(),
7168 reinterpret_cast<pthreadpool_task_6d_tile_2d_t>(SetTrue6DTile2D),
7169 static_cast<void*>(indicators.data()),
7170 kParallelize6DTile2DRangeI, kParallelize6DTile2DRangeJ, kParallelize6DTile2DRangeK, kParallelize6DTile2DRangeL, kParallelize6DTile2DRangeM, kParallelize6DTile2DRangeN,
7171 kParallelize6DTile2DTileM, kParallelize6DTile2DTileN,
7172 0 /* flags */);
7173
7174 for (size_t i = 0; i < kParallelize6DTile2DRangeI; i++) {
7175 for (size_t j = 0; j < kParallelize6DTile2DRangeJ; j++) {
7176 for (size_t k = 0; k < kParallelize6DTile2DRangeK; k++) {
7177 for (size_t l = 0; l < kParallelize6DTile2DRangeL; l++) {
7178 for (size_t m = 0; m < kParallelize6DTile2DRangeM; m++) {
7179 for (size_t n = 0; n < kParallelize6DTile2DRangeN; n++) {
7180 const size_t linear_idx = ((((i * kParallelize6DTile2DRangeJ + j) * kParallelize6DTile2DRangeK + k) * kParallelize6DTile2DRangeL + l) * kParallelize6DTile2DRangeM + m) * kParallelize6DTile2DRangeN + n;
7181 EXPECT_TRUE(indicators[linear_idx].load(std::memory_order_relaxed))
7182 << "Element (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ", " << n << ") not processed";
7183 }
7184 }
7185 }
7186 }
7187 }
7188 }
7189 }
7190
Increment6DTile2D(std::atomic_int * processed_counters,size_t i,size_t j,size_t k,size_t l,size_t start_m,size_t start_n,size_t tile_m,size_t tile_n)7191 static void Increment6DTile2D(std::atomic_int* processed_counters, size_t i, size_t j, size_t k, size_t l, size_t start_m, size_t start_n, size_t tile_m, size_t tile_n) {
7192 for (size_t m = start_m; m < start_m + tile_m; m++) {
7193 for (size_t n = start_n; n < start_n + tile_n; n++) {
7194 const size_t linear_idx = ((((i * kParallelize6DTile2DRangeJ + j) * kParallelize6DTile2DRangeK + k) * kParallelize6DTile2DRangeL + l) * kParallelize6DTile2DRangeM + m) * kParallelize6DTile2DRangeN + n;
7195 processed_counters[linear_idx].fetch_add(1, std::memory_order_relaxed);
7196 }
7197 }
7198 }
7199
TEST(Parallelize6DTile2D,SingleThreadPoolEachItemProcessedOnce)7200 TEST(Parallelize6DTile2D, SingleThreadPoolEachItemProcessedOnce) {
7201 std::vector<std::atomic_int> counters(kParallelize6DTile2DRangeI * kParallelize6DTile2DRangeJ * kParallelize6DTile2DRangeK * kParallelize6DTile2DRangeL * kParallelize6DTile2DRangeM * kParallelize6DTile2DRangeN);
7202
7203 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
7204 ASSERT_TRUE(threadpool.get());
7205
7206 pthreadpool_parallelize_6d_tile_2d(
7207 threadpool.get(),
7208 reinterpret_cast<pthreadpool_task_6d_tile_2d_t>(Increment6DTile2D),
7209 static_cast<void*>(counters.data()),
7210 kParallelize6DTile2DRangeI, kParallelize6DTile2DRangeJ, kParallelize6DTile2DRangeK, kParallelize6DTile2DRangeL, kParallelize6DTile2DRangeM, kParallelize6DTile2DRangeN,
7211 kParallelize6DTile2DTileM, kParallelize6DTile2DTileN,
7212 0 /* flags */);
7213
7214 for (size_t i = 0; i < kParallelize6DTile2DRangeI; i++) {
7215 for (size_t j = 0; j < kParallelize6DTile2DRangeJ; j++) {
7216 for (size_t k = 0; k < kParallelize6DTile2DRangeK; k++) {
7217 for (size_t l = 0; l < kParallelize6DTile2DRangeL; l++) {
7218 for (size_t m = 0; m < kParallelize6DTile2DRangeM; m++) {
7219 for (size_t n = 0; n < kParallelize6DTile2DRangeN; n++) {
7220 const size_t linear_idx = ((((i * kParallelize6DTile2DRangeJ + j) * kParallelize6DTile2DRangeK + k) * kParallelize6DTile2DRangeL + l) * kParallelize6DTile2DRangeM + m) * kParallelize6DTile2DRangeN + n;
7221 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
7222 << "Element (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ", " << n << ") was processed "
7223 << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
7224 }
7225 }
7226 }
7227 }
7228 }
7229 }
7230 }
7231
TEST(Parallelize6DTile2D,MultiThreadPoolEachItemProcessedOnce)7232 TEST(Parallelize6DTile2D, MultiThreadPoolEachItemProcessedOnce) {
7233 std::vector<std::atomic_int> counters(kParallelize6DTile2DRangeI * kParallelize6DTile2DRangeJ * kParallelize6DTile2DRangeK * kParallelize6DTile2DRangeL * kParallelize6DTile2DRangeM * kParallelize6DTile2DRangeN);
7234
7235 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
7236 ASSERT_TRUE(threadpool.get());
7237
7238 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
7239 GTEST_SKIP();
7240 }
7241
7242 pthreadpool_parallelize_6d_tile_2d(
7243 threadpool.get(),
7244 reinterpret_cast<pthreadpool_task_6d_tile_2d_t>(Increment6DTile2D),
7245 static_cast<void*>(counters.data()),
7246 kParallelize6DTile2DRangeI, kParallelize6DTile2DRangeJ, kParallelize6DTile2DRangeK, kParallelize6DTile2DRangeL, kParallelize6DTile2DRangeM, kParallelize6DTile2DRangeN,
7247 kParallelize6DTile2DTileM, kParallelize6DTile2DTileN,
7248 0 /* flags */);
7249
7250 for (size_t i = 0; i < kParallelize6DTile2DRangeI; i++) {
7251 for (size_t j = 0; j < kParallelize6DTile2DRangeJ; j++) {
7252 for (size_t k = 0; k < kParallelize6DTile2DRangeK; k++) {
7253 for (size_t l = 0; l < kParallelize6DTile2DRangeL; l++) {
7254 for (size_t m = 0; m < kParallelize6DTile2DRangeM; m++) {
7255 for (size_t n = 0; n < kParallelize6DTile2DRangeN; n++) {
7256 const size_t linear_idx = ((((i * kParallelize6DTile2DRangeJ + j) * kParallelize6DTile2DRangeK + k) * kParallelize6DTile2DRangeL + l) * kParallelize6DTile2DRangeM + m) * kParallelize6DTile2DRangeN + n;
7257 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), 1)
7258 << "Element (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ", " << n << ") was processed "
7259 << counters[linear_idx].load(std::memory_order_relaxed) << " times (expected: 1)";
7260 }
7261 }
7262 }
7263 }
7264 }
7265 }
7266 }
7267
TEST(Parallelize6DTile2D,SingleThreadPoolEachItemProcessedMultipleTimes)7268 TEST(Parallelize6DTile2D, SingleThreadPoolEachItemProcessedMultipleTimes) {
7269 std::vector<std::atomic_int> counters(kParallelize6DTile2DRangeI * kParallelize6DTile2DRangeJ * kParallelize6DTile2DRangeK * kParallelize6DTile2DRangeL * kParallelize6DTile2DRangeM * kParallelize6DTile2DRangeN);
7270
7271 auto_pthreadpool_t threadpool(pthreadpool_create(1), pthreadpool_destroy);
7272 ASSERT_TRUE(threadpool.get());
7273
7274 for (size_t iteration = 0; iteration < kIncrementIterations6D; iteration++) {
7275 pthreadpool_parallelize_6d_tile_2d(
7276 threadpool.get(),
7277 reinterpret_cast<pthreadpool_task_6d_tile_2d_t>(Increment6DTile2D),
7278 static_cast<void*>(counters.data()),
7279 kParallelize6DTile2DRangeI, kParallelize6DTile2DRangeJ, kParallelize6DTile2DRangeK, kParallelize6DTile2DRangeL, kParallelize6DTile2DRangeM, kParallelize6DTile2DRangeN,
7280 kParallelize6DTile2DTileM, kParallelize6DTile2DTileN,
7281 0 /* flags */);
7282 }
7283
7284 for (size_t i = 0; i < kParallelize6DTile2DRangeI; i++) {
7285 for (size_t j = 0; j < kParallelize6DTile2DRangeJ; j++) {
7286 for (size_t k = 0; k < kParallelize6DTile2DRangeK; k++) {
7287 for (size_t l = 0; l < kParallelize6DTile2DRangeL; l++) {
7288 for (size_t m = 0; m < kParallelize6DTile2DRangeM; m++) {
7289 for (size_t n = 0; n < kParallelize6DTile2DRangeN; n++) {
7290 const size_t linear_idx = ((((i * kParallelize6DTile2DRangeJ + j) * kParallelize6DTile2DRangeK + k) * kParallelize6DTile2DRangeL + l) * kParallelize6DTile2DRangeM + m) * kParallelize6DTile2DRangeN + n;
7291 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations6D)
7292 << "Element (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ", " << n << ") was processed "
7293 << counters[linear_idx].load(std::memory_order_relaxed) << " times "
7294 << "(expected: " << kIncrementIterations6D << ")";
7295 }
7296 }
7297 }
7298 }
7299 }
7300 }
7301 }
7302
TEST(Parallelize6DTile2D,MultiThreadPoolEachItemProcessedMultipleTimes)7303 TEST(Parallelize6DTile2D, MultiThreadPoolEachItemProcessedMultipleTimes) {
7304 std::vector<std::atomic_int> counters(kParallelize6DTile2DRangeI * kParallelize6DTile2DRangeJ * kParallelize6DTile2DRangeK * kParallelize6DTile2DRangeL * kParallelize6DTile2DRangeM * kParallelize6DTile2DRangeN);
7305
7306 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
7307 ASSERT_TRUE(threadpool.get());
7308
7309 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
7310 GTEST_SKIP();
7311 }
7312
7313 for (size_t iteration = 0; iteration < kIncrementIterations6D; iteration++) {
7314 pthreadpool_parallelize_6d_tile_2d(
7315 threadpool.get(),
7316 reinterpret_cast<pthreadpool_task_6d_tile_2d_t>(Increment6DTile2D),
7317 static_cast<void*>(counters.data()),
7318 kParallelize6DTile2DRangeI, kParallelize6DTile2DRangeJ, kParallelize6DTile2DRangeK, kParallelize6DTile2DRangeL, kParallelize6DTile2DRangeM, kParallelize6DTile2DRangeN,
7319 kParallelize6DTile2DTileM, kParallelize6DTile2DTileN,
7320 0 /* flags */);
7321 }
7322
7323 for (size_t i = 0; i < kParallelize6DTile2DRangeI; i++) {
7324 for (size_t j = 0; j < kParallelize6DTile2DRangeJ; j++) {
7325 for (size_t k = 0; k < kParallelize6DTile2DRangeK; k++) {
7326 for (size_t l = 0; l < kParallelize6DTile2DRangeL; l++) {
7327 for (size_t m = 0; m < kParallelize6DTile2DRangeM; m++) {
7328 for (size_t n = 0; n < kParallelize6DTile2DRangeN; n++) {
7329 const size_t linear_idx = ((((i * kParallelize6DTile2DRangeJ + j) * kParallelize6DTile2DRangeK + k) * kParallelize6DTile2DRangeL + l) * kParallelize6DTile2DRangeM + m) * kParallelize6DTile2DRangeN + n;
7330 EXPECT_EQ(counters[linear_idx].load(std::memory_order_relaxed), kIncrementIterations6D)
7331 << "Element (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ", " << n << ") was processed "
7332 << counters[linear_idx].load(std::memory_order_relaxed) << " times "
7333 << "(expected: " << kIncrementIterations6D << ")";
7334 }
7335 }
7336 }
7337 }
7338 }
7339 }
7340 }
7341
IncrementSame6DTile2D(std::atomic_int * num_processed_items,size_t i,size_t j,size_t k,size_t l,size_t start_m,size_t start_n,size_t tile_m,size_t tile_n)7342 static void IncrementSame6DTile2D(std::atomic_int* num_processed_items, size_t i, size_t j, size_t k, size_t l, size_t start_m, size_t start_n, size_t tile_m, size_t tile_n) {
7343 for (size_t m = start_m; m < start_m + tile_m; m++) {
7344 for (size_t n = start_n; n < start_n + tile_n; n++) {
7345 num_processed_items->fetch_add(1, std::memory_order_relaxed);
7346 }
7347 }
7348 }
7349
TEST(Parallelize6DTile2D,MultiThreadPoolHighContention)7350 TEST(Parallelize6DTile2D, MultiThreadPoolHighContention) {
7351 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
7352
7353 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
7354 ASSERT_TRUE(threadpool.get());
7355
7356 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
7357 GTEST_SKIP();
7358 }
7359
7360 pthreadpool_parallelize_6d_tile_2d(
7361 threadpool.get(),
7362 reinterpret_cast<pthreadpool_task_6d_tile_2d_t>(IncrementSame6DTile2D),
7363 static_cast<void*>(&num_processed_items),
7364 kParallelize6DTile2DRangeI, kParallelize6DTile2DRangeJ, kParallelize6DTile2DRangeK, kParallelize6DTile2DRangeL, kParallelize6DTile2DRangeM, kParallelize6DTile2DRangeN,
7365 kParallelize6DTile2DTileM, kParallelize6DTile2DTileN,
7366 0 /* flags */);
7367 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize6DTile2DRangeI * kParallelize6DTile2DRangeJ * kParallelize6DTile2DRangeK * kParallelize6DTile2DRangeL * kParallelize6DTile2DRangeM * kParallelize6DTile2DRangeN);
7368 }
7369
WorkImbalance6DTile2D(std::atomic_int * num_processed_items,size_t i,size_t j,size_t k,size_t l,size_t start_m,size_t start_n,size_t tile_m,size_t tile_n)7370 static void WorkImbalance6DTile2D(std::atomic_int* num_processed_items, size_t i, size_t j, size_t k, size_t l, size_t start_m, size_t start_n, size_t tile_m, size_t tile_n) {
7371 num_processed_items->fetch_add(tile_m * tile_n, std::memory_order_relaxed);
7372 if (i == 0 && j == 0 && k == 0 && l == 0 && start_m == 0 && start_n == 0) {
7373 /* Spin-wait until all items are computed */
7374 while (num_processed_items->load(std::memory_order_relaxed) != kParallelize6DTile2DRangeI * kParallelize6DTile2DRangeJ * kParallelize6DTile2DRangeK * kParallelize6DTile2DRangeL * kParallelize6DTile2DRangeM * kParallelize6DTile2DRangeN) {
7375 std::atomic_thread_fence(std::memory_order_acquire);
7376 }
7377 }
7378 }
7379
TEST(Parallelize6DTile2D,MultiThreadPoolWorkStealing)7380 TEST(Parallelize6DTile2D, MultiThreadPoolWorkStealing) {
7381 std::atomic_int num_processed_items = ATOMIC_VAR_INIT(0);
7382
7383 auto_pthreadpool_t threadpool(pthreadpool_create(0), pthreadpool_destroy);
7384 ASSERT_TRUE(threadpool.get());
7385
7386 if (pthreadpool_get_threads_count(threadpool.get()) <= 1) {
7387 GTEST_SKIP();
7388 }
7389
7390 pthreadpool_parallelize_6d_tile_2d(
7391 threadpool.get(),
7392 reinterpret_cast<pthreadpool_task_6d_tile_2d_t>(WorkImbalance6DTile2D),
7393 static_cast<void*>(&num_processed_items),
7394 kParallelize6DTile2DRangeI, kParallelize6DTile2DRangeJ, kParallelize6DTile2DRangeK, kParallelize6DTile2DRangeL, kParallelize6DTile2DRangeM, kParallelize6DTile2DRangeN,
7395 kParallelize6DTile2DTileM, kParallelize6DTile2DTileN,
7396 0 /* flags */);
7397 EXPECT_EQ(num_processed_items.load(std::memory_order_relaxed), kParallelize6DTile2DRangeI * kParallelize6DTile2DRangeJ * kParallelize6DTile2DRangeK * kParallelize6DTile2DRangeL * kParallelize6DTile2DRangeM * kParallelize6DTile2DRangeN);
7398 }
7399