1 /*
2 * kmp_affinity.h -- header for affinity management
3 */
4
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12
13 #ifndef KMP_AFFINITY_H
14 #define KMP_AFFINITY_H
15
16 #include "kmp.h"
17 #include "kmp_os.h"
18
19 #if KMP_AFFINITY_SUPPORTED
20 #if KMP_USE_HWLOC
21 class KMPHwlocAffinity : public KMPAffinity {
22 public:
23 class Mask : public KMPAffinity::Mask {
24 hwloc_cpuset_t mask;
25
26 public:
Mask()27 Mask() {
28 mask = hwloc_bitmap_alloc();
29 this->zero();
30 }
~Mask()31 ~Mask() { hwloc_bitmap_free(mask); }
set(int i)32 void set(int i) override { hwloc_bitmap_set(mask, i); }
is_set(int i)33 bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
clear(int i)34 void clear(int i) override { hwloc_bitmap_clr(mask, i); }
zero()35 void zero() override { hwloc_bitmap_zero(mask); }
copy(const KMPAffinity::Mask * src)36 void copy(const KMPAffinity::Mask *src) override {
37 const Mask *convert = static_cast<const Mask *>(src);
38 hwloc_bitmap_copy(mask, convert->mask);
39 }
bitwise_and(const KMPAffinity::Mask * rhs)40 void bitwise_and(const KMPAffinity::Mask *rhs) override {
41 const Mask *convert = static_cast<const Mask *>(rhs);
42 hwloc_bitmap_and(mask, mask, convert->mask);
43 }
bitwise_or(const KMPAffinity::Mask * rhs)44 void bitwise_or(const KMPAffinity::Mask *rhs) override {
45 const Mask *convert = static_cast<const Mask *>(rhs);
46 hwloc_bitmap_or(mask, mask, convert->mask);
47 }
bitwise_not()48 void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
begin()49 int begin() const override { return hwloc_bitmap_first(mask); }
end()50 int end() const override { return -1; }
next(int previous)51 int next(int previous) const override {
52 return hwloc_bitmap_next(mask, previous);
53 }
get_system_affinity(bool abort_on_error)54 int get_system_affinity(bool abort_on_error) override {
55 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
56 "Illegal get affinity operation when not capable");
57 int retval =
58 hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
59 if (retval >= 0) {
60 return 0;
61 }
62 int error = errno;
63 if (abort_on_error) {
64 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
65 }
66 return error;
67 }
set_system_affinity(bool abort_on_error)68 int set_system_affinity(bool abort_on_error) const override {
69 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
70 "Illegal get affinity operation when not capable");
71 int retval =
72 hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
73 if (retval >= 0) {
74 return 0;
75 }
76 int error = errno;
77 if (abort_on_error) {
78 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
79 }
80 return error;
81 }
get_proc_group()82 int get_proc_group() const override {
83 int group = -1;
84 #if KMP_OS_WINDOWS
85 if (__kmp_num_proc_groups == 1) {
86 return 1;
87 }
88 for (int i = 0; i < __kmp_num_proc_groups; i++) {
89 // On windows, the long type is always 32 bits
90 unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
91 unsigned long second_32_bits =
92 hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
93 if (first_32_bits == 0 && second_32_bits == 0) {
94 continue;
95 }
96 if (group >= 0) {
97 return -1;
98 }
99 group = i;
100 }
101 #endif /* KMP_OS_WINDOWS */
102 return group;
103 }
104 };
determine_capable(const char * var)105 void determine_capable(const char *var) override {
106 const hwloc_topology_support *topology_support;
107 if (__kmp_hwloc_topology == NULL) {
108 if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
109 __kmp_hwloc_error = TRUE;
110 if (__kmp_affinity_verbose)
111 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
112 }
113 if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
114 __kmp_hwloc_error = TRUE;
115 if (__kmp_affinity_verbose)
116 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
117 }
118 }
119 topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
120 // Is the system capable of setting/getting this thread's affinity?
121 // Also, is topology discovery possible? (pu indicates ability to discover
122 // processing units). And finally, were there no errors when calling any
123 // hwloc_* API functions?
124 if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
125 topology_support->cpubind->get_thisthread_cpubind &&
126 topology_support->discovery->pu && !__kmp_hwloc_error) {
127 // enables affinity according to KMP_AFFINITY_CAPABLE() macro
128 KMP_AFFINITY_ENABLE(TRUE);
129 } else {
130 // indicate that hwloc didn't work and disable affinity
131 __kmp_hwloc_error = TRUE;
132 KMP_AFFINITY_DISABLE();
133 }
134 }
bind_thread(int which)135 void bind_thread(int which) override {
136 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
137 "Illegal set affinity operation when not capable");
138 KMPAffinity::Mask *mask;
139 KMP_CPU_ALLOC_ON_STACK(mask);
140 KMP_CPU_ZERO(mask);
141 KMP_CPU_SET(which, mask);
142 __kmp_set_system_affinity(mask, TRUE);
143 KMP_CPU_FREE_FROM_STACK(mask);
144 }
allocate_mask()145 KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
deallocate_mask(KMPAffinity::Mask * m)146 void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
allocate_mask_array(int num)147 KMPAffinity::Mask *allocate_mask_array(int num) override {
148 return new Mask[num];
149 }
deallocate_mask_array(KMPAffinity::Mask * array)150 void deallocate_mask_array(KMPAffinity::Mask *array) override {
151 Mask *hwloc_array = static_cast<Mask *>(array);
152 delete[] hwloc_array;
153 }
index_mask_array(KMPAffinity::Mask * array,int index)154 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
155 int index) override {
156 Mask *hwloc_array = static_cast<Mask *>(array);
157 return &(hwloc_array[index]);
158 }
get_api_type()159 api_type get_api_type() const override { return HWLOC; }
160 };
161 #endif /* KMP_USE_HWLOC */
162
163 #if KMP_OS_LINUX || KMP_OS_FREEBSD
164 #if KMP_OS_LINUX
165 /* On some of the older OS's that we build on, these constants aren't present
166 in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
167 all systems of the same arch where they are defined, and they cannot change.
168 stone forever. */
169 #include <sys/syscall.h>
170 #if KMP_ARCH_X86 || KMP_ARCH_ARM
171 #ifndef __NR_sched_setaffinity
172 #define __NR_sched_setaffinity 241
173 #elif __NR_sched_setaffinity != 241
174 #error Wrong code for setaffinity system call.
175 #endif /* __NR_sched_setaffinity */
176 #ifndef __NR_sched_getaffinity
177 #define __NR_sched_getaffinity 242
178 #elif __NR_sched_getaffinity != 242
179 #error Wrong code for getaffinity system call.
180 #endif /* __NR_sched_getaffinity */
181 #elif KMP_ARCH_AARCH64
182 #ifndef __NR_sched_setaffinity
183 #define __NR_sched_setaffinity 122
184 #elif __NR_sched_setaffinity != 122
185 #error Wrong code for setaffinity system call.
186 #endif /* __NR_sched_setaffinity */
187 #ifndef __NR_sched_getaffinity
188 #define __NR_sched_getaffinity 123
189 #elif __NR_sched_getaffinity != 123
190 #error Wrong code for getaffinity system call.
191 #endif /* __NR_sched_getaffinity */
192 #elif KMP_ARCH_X86_64
193 #ifndef __NR_sched_setaffinity
194 #define __NR_sched_setaffinity 203
195 #elif __NR_sched_setaffinity != 203
196 #error Wrong code for setaffinity system call.
197 #endif /* __NR_sched_setaffinity */
198 #ifndef __NR_sched_getaffinity
199 #define __NR_sched_getaffinity 204
200 #elif __NR_sched_getaffinity != 204
201 #error Wrong code for getaffinity system call.
202 #endif /* __NR_sched_getaffinity */
203 #elif KMP_ARCH_PPC64
204 #ifndef __NR_sched_setaffinity
205 #define __NR_sched_setaffinity 222
206 #elif __NR_sched_setaffinity != 222
207 #error Wrong code for setaffinity system call.
208 #endif /* __NR_sched_setaffinity */
209 #ifndef __NR_sched_getaffinity
210 #define __NR_sched_getaffinity 223
211 #elif __NR_sched_getaffinity != 223
212 #error Wrong code for getaffinity system call.
213 #endif /* __NR_sched_getaffinity */
214 #elif KMP_ARCH_MIPS
215 #ifndef __NR_sched_setaffinity
216 #define __NR_sched_setaffinity 4239
217 #elif __NR_sched_setaffinity != 4239
218 #error Wrong code for setaffinity system call.
219 #endif /* __NR_sched_setaffinity */
220 #ifndef __NR_sched_getaffinity
221 #define __NR_sched_getaffinity 4240
222 #elif __NR_sched_getaffinity != 4240
223 #error Wrong code for getaffinity system call.
224 #endif /* __NR_sched_getaffinity */
225 #elif KMP_ARCH_MIPS64
226 #ifndef __NR_sched_setaffinity
227 #define __NR_sched_setaffinity 5195
228 #elif __NR_sched_setaffinity != 5195
229 #error Wrong code for setaffinity system call.
230 #endif /* __NR_sched_setaffinity */
231 #ifndef __NR_sched_getaffinity
232 #define __NR_sched_getaffinity 5196
233 #elif __NR_sched_getaffinity != 5196
234 #error Wrong code for getaffinity system call.
235 #endif /* __NR_sched_getaffinity */
236 #error Unknown or unsupported architecture
237 #endif /* KMP_ARCH_* */
238 #elif KMP_OS_FREEBSD
239 #include <pthread.h>
240 #include <pthread_np.h>
241 #endif
242 class KMPNativeAffinity : public KMPAffinity {
243 class Mask : public KMPAffinity::Mask {
244 typedef unsigned char mask_t;
245 static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
246
247 public:
248 mask_t *mask;
Mask()249 Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
~Mask()250 ~Mask() {
251 if (mask)
252 __kmp_free(mask);
253 }
set(int i)254 void set(int i) override {
255 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
256 }
is_set(int i)257 bool is_set(int i) const override {
258 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
259 }
clear(int i)260 void clear(int i) override {
261 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
262 }
zero()263 void zero() override {
264 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
265 mask[i] = 0;
266 }
copy(const KMPAffinity::Mask * src)267 void copy(const KMPAffinity::Mask *src) override {
268 const Mask *convert = static_cast<const Mask *>(src);
269 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
270 mask[i] = convert->mask[i];
271 }
bitwise_and(const KMPAffinity::Mask * rhs)272 void bitwise_and(const KMPAffinity::Mask *rhs) override {
273 const Mask *convert = static_cast<const Mask *>(rhs);
274 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
275 mask[i] &= convert->mask[i];
276 }
bitwise_or(const KMPAffinity::Mask * rhs)277 void bitwise_or(const KMPAffinity::Mask *rhs) override {
278 const Mask *convert = static_cast<const Mask *>(rhs);
279 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
280 mask[i] |= convert->mask[i];
281 }
bitwise_not()282 void bitwise_not() override {
283 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
284 mask[i] = ~(mask[i]);
285 }
begin()286 int begin() const override {
287 int retval = 0;
288 while (retval < end() && !is_set(retval))
289 ++retval;
290 return retval;
291 }
end()292 int end() const override { return __kmp_affin_mask_size * BITS_PER_MASK_T; }
next(int previous)293 int next(int previous) const override {
294 int retval = previous + 1;
295 while (retval < end() && !is_set(retval))
296 ++retval;
297 return retval;
298 }
get_system_affinity(bool abort_on_error)299 int get_system_affinity(bool abort_on_error) override {
300 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
301 "Illegal get affinity operation when not capable");
302 #if KMP_OS_LINUX
303 int retval =
304 syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
305 #elif KMP_OS_FREEBSD
306 int r =
307 pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask));
308 int retval = (r == 0 ? 0 : -1);
309 #endif
310 if (retval >= 0) {
311 return 0;
312 }
313 int error = errno;
314 if (abort_on_error) {
315 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
316 }
317 return error;
318 }
set_system_affinity(bool abort_on_error)319 int set_system_affinity(bool abort_on_error) const override {
320 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
321 "Illegal get affinity operation when not capable");
322 #if KMP_OS_LINUX
323 int retval =
324 syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
325 #elif KMP_OS_FREEBSD
326 int r =
327 pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask));
328 int retval = (r == 0 ? 0 : -1);
329 #endif
330 if (retval >= 0) {
331 return 0;
332 }
333 int error = errno;
334 if (abort_on_error) {
335 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
336 }
337 return error;
338 }
339 };
determine_capable(const char * env_var)340 void determine_capable(const char *env_var) override {
341 __kmp_affinity_determine_capable(env_var);
342 }
bind_thread(int which)343 void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
allocate_mask()344 KMPAffinity::Mask *allocate_mask() override {
345 KMPNativeAffinity::Mask *retval = new Mask();
346 return retval;
347 }
deallocate_mask(KMPAffinity::Mask * m)348 void deallocate_mask(KMPAffinity::Mask *m) override {
349 KMPNativeAffinity::Mask *native_mask =
350 static_cast<KMPNativeAffinity::Mask *>(m);
351 delete native_mask;
352 }
allocate_mask_array(int num)353 KMPAffinity::Mask *allocate_mask_array(int num) override {
354 return new Mask[num];
355 }
deallocate_mask_array(KMPAffinity::Mask * array)356 void deallocate_mask_array(KMPAffinity::Mask *array) override {
357 Mask *linux_array = static_cast<Mask *>(array);
358 delete[] linux_array;
359 }
index_mask_array(KMPAffinity::Mask * array,int index)360 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
361 int index) override {
362 Mask *linux_array = static_cast<Mask *>(array);
363 return &(linux_array[index]);
364 }
get_api_type()365 api_type get_api_type() const override { return NATIVE_OS; }
366 };
367 #endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */
368
369 #if KMP_OS_WINDOWS
370 class KMPNativeAffinity : public KMPAffinity {
371 class Mask : public KMPAffinity::Mask {
372 typedef ULONG_PTR mask_t;
373 static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
374 mask_t *mask;
375
376 public:
Mask()377 Mask() {
378 mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
379 }
~Mask()380 ~Mask() {
381 if (mask)
382 __kmp_free(mask);
383 }
set(int i)384 void set(int i) override {
385 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
386 }
is_set(int i)387 bool is_set(int i) const override {
388 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
389 }
clear(int i)390 void clear(int i) override {
391 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
392 }
zero()393 void zero() override {
394 for (int i = 0; i < __kmp_num_proc_groups; ++i)
395 mask[i] = 0;
396 }
copy(const KMPAffinity::Mask * src)397 void copy(const KMPAffinity::Mask *src) override {
398 const Mask *convert = static_cast<const Mask *>(src);
399 for (int i = 0; i < __kmp_num_proc_groups; ++i)
400 mask[i] = convert->mask[i];
401 }
bitwise_and(const KMPAffinity::Mask * rhs)402 void bitwise_and(const KMPAffinity::Mask *rhs) override {
403 const Mask *convert = static_cast<const Mask *>(rhs);
404 for (int i = 0; i < __kmp_num_proc_groups; ++i)
405 mask[i] &= convert->mask[i];
406 }
bitwise_or(const KMPAffinity::Mask * rhs)407 void bitwise_or(const KMPAffinity::Mask *rhs) override {
408 const Mask *convert = static_cast<const Mask *>(rhs);
409 for (int i = 0; i < __kmp_num_proc_groups; ++i)
410 mask[i] |= convert->mask[i];
411 }
bitwise_not()412 void bitwise_not() override {
413 for (int i = 0; i < __kmp_num_proc_groups; ++i)
414 mask[i] = ~(mask[i]);
415 }
begin()416 int begin() const override {
417 int retval = 0;
418 while (retval < end() && !is_set(retval))
419 ++retval;
420 return retval;
421 }
end()422 int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
next(int previous)423 int next(int previous) const override {
424 int retval = previous + 1;
425 while (retval < end() && !is_set(retval))
426 ++retval;
427 return retval;
428 }
set_system_affinity(bool abort_on_error)429 int set_system_affinity(bool abort_on_error) const override {
430 if (__kmp_num_proc_groups > 1) {
431 // Check for a valid mask.
432 GROUP_AFFINITY ga;
433 int group = get_proc_group();
434 if (group < 0) {
435 if (abort_on_error) {
436 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
437 }
438 return -1;
439 }
440 // Transform the bit vector into a GROUP_AFFINITY struct
441 // and make the system call to set affinity.
442 ga.Group = group;
443 ga.Mask = mask[group];
444 ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
445
446 KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
447 if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
448 DWORD error = GetLastError();
449 if (abort_on_error) {
450 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
451 __kmp_msg_null);
452 }
453 return error;
454 }
455 } else {
456 if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
457 DWORD error = GetLastError();
458 if (abort_on_error) {
459 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
460 __kmp_msg_null);
461 }
462 return error;
463 }
464 }
465 return 0;
466 }
get_system_affinity(bool abort_on_error)467 int get_system_affinity(bool abort_on_error) override {
468 if (__kmp_num_proc_groups > 1) {
469 this->zero();
470 GROUP_AFFINITY ga;
471 KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
472 if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
473 DWORD error = GetLastError();
474 if (abort_on_error) {
475 __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
476 KMP_ERR(error), __kmp_msg_null);
477 }
478 return error;
479 }
480 if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
481 (ga.Mask == 0)) {
482 return -1;
483 }
484 mask[ga.Group] = ga.Mask;
485 } else {
486 mask_t newMask, sysMask, retval;
487 if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
488 DWORD error = GetLastError();
489 if (abort_on_error) {
490 __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
491 KMP_ERR(error), __kmp_msg_null);
492 }
493 return error;
494 }
495 retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
496 if (!retval) {
497 DWORD error = GetLastError();
498 if (abort_on_error) {
499 __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
500 KMP_ERR(error), __kmp_msg_null);
501 }
502 return error;
503 }
504 newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
505 if (!newMask) {
506 DWORD error = GetLastError();
507 if (abort_on_error) {
508 __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
509 KMP_ERR(error), __kmp_msg_null);
510 }
511 }
512 *mask = retval;
513 }
514 return 0;
515 }
get_proc_group()516 int get_proc_group() const override {
517 int group = -1;
518 if (__kmp_num_proc_groups == 1) {
519 return 1;
520 }
521 for (int i = 0; i < __kmp_num_proc_groups; i++) {
522 if (mask[i] == 0)
523 continue;
524 if (group >= 0)
525 return -1;
526 group = i;
527 }
528 return group;
529 }
530 };
determine_capable(const char * env_var)531 void determine_capable(const char *env_var) override {
532 __kmp_affinity_determine_capable(env_var);
533 }
bind_thread(int which)534 void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
allocate_mask()535 KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
deallocate_mask(KMPAffinity::Mask * m)536 void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
allocate_mask_array(int num)537 KMPAffinity::Mask *allocate_mask_array(int num) override {
538 return new Mask[num];
539 }
deallocate_mask_array(KMPAffinity::Mask * array)540 void deallocate_mask_array(KMPAffinity::Mask *array) override {
541 Mask *windows_array = static_cast<Mask *>(array);
542 delete[] windows_array;
543 }
index_mask_array(KMPAffinity::Mask * array,int index)544 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
545 int index) override {
546 Mask *windows_array = static_cast<Mask *>(array);
547 return &(windows_array[index]);
548 }
get_api_type()549 api_type get_api_type() const override { return NATIVE_OS; }
550 };
551 #endif /* KMP_OS_WINDOWS */
552 #endif /* KMP_AFFINITY_SUPPORTED */
553
554 class Address {
555 public:
556 static const unsigned maxDepth = 32;
557 unsigned labels[maxDepth];
558 unsigned childNums[maxDepth];
559 unsigned depth;
560 unsigned leader;
Address(unsigned _depth)561 Address(unsigned _depth) : depth(_depth), leader(FALSE) {}
562 Address &operator=(const Address &b) {
563 depth = b.depth;
564 for (unsigned i = 0; i < depth; i++) {
565 labels[i] = b.labels[i];
566 childNums[i] = b.childNums[i];
567 }
568 leader = FALSE;
569 return *this;
570 }
571 bool operator==(const Address &b) const {
572 if (depth != b.depth)
573 return false;
574 for (unsigned i = 0; i < depth; i++)
575 if (labels[i] != b.labels[i])
576 return false;
577 return true;
578 }
isClose(const Address & b,int level)579 bool isClose(const Address &b, int level) const {
580 if (depth != b.depth)
581 return false;
582 if ((unsigned)level >= depth)
583 return true;
584 for (unsigned i = 0; i < (depth - level); i++)
585 if (labels[i] != b.labels[i])
586 return false;
587 return true;
588 }
589 bool operator!=(const Address &b) const { return !operator==(b); }
print()590 void print() const {
591 unsigned i;
592 printf("Depth: %u --- ", depth);
593 for (i = 0; i < depth; i++) {
594 printf("%u ", labels[i]);
595 }
596 }
597 };
598
599 class AddrUnsPair {
600 public:
601 Address first;
602 unsigned second;
AddrUnsPair(Address _first,unsigned _second)603 AddrUnsPair(Address _first, unsigned _second)
604 : first(_first), second(_second) {}
605 AddrUnsPair &operator=(const AddrUnsPair &b) {
606 first = b.first;
607 second = b.second;
608 return *this;
609 }
print()610 void print() const {
611 printf("first = ");
612 first.print();
613 printf(" --- second = %u", second);
614 }
615 bool operator==(const AddrUnsPair &b) const {
616 if (first != b.first)
617 return false;
618 if (second != b.second)
619 return false;
620 return true;
621 }
622 bool operator!=(const AddrUnsPair &b) const { return !operator==(b); }
623 };
624
__kmp_affinity_cmp_Address_labels(const void * a,const void * b)625 static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) {
626 const Address *aa = &(((const AddrUnsPair *)a)->first);
627 const Address *bb = &(((const AddrUnsPair *)b)->first);
628 unsigned depth = aa->depth;
629 unsigned i;
630 KMP_DEBUG_ASSERT(depth == bb->depth);
631 for (i = 0; i < depth; i++) {
632 if (aa->labels[i] < bb->labels[i])
633 return -1;
634 if (aa->labels[i] > bb->labels[i])
635 return 1;
636 }
637 return 0;
638 }
639
640 /* A structure for holding machine-specific hierarchy info to be computed once
641 at init. This structure represents a mapping of threads to the actual machine
642 hierarchy, or to our best guess at what the hierarchy might be, for the
643 purpose of performing an efficient barrier. In the worst case, when there is
644 no machine hierarchy information, it produces a tree suitable for a barrier,
645 similar to the tree used in the hyper barrier. */
646 class hierarchy_info {
647 public:
648 /* Good default values for number of leaves and branching factor, given no
649 affinity information. Behaves a bit like hyper barrier. */
650 static const kmp_uint32 maxLeaves = 4;
651 static const kmp_uint32 minBranch = 4;
652 /** Number of levels in the hierarchy. Typical levels are threads/core,
653 cores/package or socket, packages/node, nodes/machine, etc. We don't want
654 to get specific with nomenclature. When the machine is oversubscribed we
655 add levels to duplicate the hierarchy, doubling the thread capacity of the
656 hierarchy each time we add a level. */
657 kmp_uint32 maxLevels;
658
659 /** This is specifically the depth of the machine configuration hierarchy, in
660 terms of the number of levels along the longest path from root to any
661 leaf. It corresponds to the number of entries in numPerLevel if we exclude
662 all but one trailing 1. */
663 kmp_uint32 depth;
664 kmp_uint32 base_num_threads;
665 enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
666 volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
667 // 2=initialization in progress
668 volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
669
670 /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children
671 the parent of a node at level i has. For example, if we have a machine
672 with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel =
673 {2, 4, 4, 1, 1}. All empty levels are set to 1. */
674 kmp_uint32 *numPerLevel;
675 kmp_uint32 *skipPerLevel;
676
deriveLevels(AddrUnsPair * adr2os,int num_addrs)677 void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
678 int hier_depth = adr2os[0].first.depth;
679 int level = 0;
680 for (int i = hier_depth - 1; i >= 0; --i) {
681 int max = -1;
682 for (int j = 0; j < num_addrs; ++j) {
683 int next = adr2os[j].first.childNums[i];
684 if (next > max)
685 max = next;
686 }
687 numPerLevel[level] = max + 1;
688 ++level;
689 }
690 }
691
hierarchy_info()692 hierarchy_info()
693 : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
694
fini()695 void fini() {
696 if (!uninitialized && numPerLevel) {
697 __kmp_free(numPerLevel);
698 numPerLevel = NULL;
699 uninitialized = not_initialized;
700 }
701 }
702
init(AddrUnsPair * adr2os,int num_addrs)703 void init(AddrUnsPair *adr2os, int num_addrs) {
704 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
705 &uninitialized, not_initialized, initializing);
706 if (bool_result == 0) { // Wait for initialization
707 while (TCR_1(uninitialized) != initialized)
708 KMP_CPU_PAUSE();
709 return;
710 }
711 KMP_DEBUG_ASSERT(bool_result == 1);
712
713 /* Added explicit initialization of the data fields here to prevent usage of
714 dirty value observed when static library is re-initialized multiple times
715 (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
716 OpenMP). */
717 depth = 1;
718 resizing = 0;
719 maxLevels = 7;
720 numPerLevel =
721 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
722 skipPerLevel = &(numPerLevel[maxLevels]);
723 for (kmp_uint32 i = 0; i < maxLevels;
724 ++i) { // init numPerLevel[*] to 1 item per level
725 numPerLevel[i] = 1;
726 skipPerLevel[i] = 1;
727 }
728
729 // Sort table by physical ID
730 if (adr2os) {
731 qsort(adr2os, num_addrs, sizeof(*adr2os),
732 __kmp_affinity_cmp_Address_labels);
733 deriveLevels(adr2os, num_addrs);
734 } else {
735 numPerLevel[0] = maxLeaves;
736 numPerLevel[1] = num_addrs / maxLeaves;
737 if (num_addrs % maxLeaves)
738 numPerLevel[1]++;
739 }
740
741 base_num_threads = num_addrs;
742 for (int i = maxLevels - 1; i >= 0;
743 --i) // count non-empty levels to get depth
744 if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
745 depth++;
746
747 kmp_uint32 branch = minBranch;
748 if (numPerLevel[0] == 1)
749 branch = num_addrs / maxLeaves;
750 if (branch < minBranch)
751 branch = minBranch;
752 for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
753 while (numPerLevel[d] > branch ||
754 (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
755 if (numPerLevel[d] & 1)
756 numPerLevel[d]++;
757 numPerLevel[d] = numPerLevel[d] >> 1;
758 if (numPerLevel[d + 1] == 1)
759 depth++;
760 numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
761 }
762 if (numPerLevel[0] == 1) {
763 branch = branch >> 1;
764 if (branch < 4)
765 branch = minBranch;
766 }
767 }
768
769 for (kmp_uint32 i = 1; i < depth; ++i)
770 skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
771 // Fill in hierarchy in the case of oversubscription
772 for (kmp_uint32 i = depth; i < maxLevels; ++i)
773 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
774
775 uninitialized = initialized; // One writer
776 }
777
778 // Resize the hierarchy if nproc changes to something larger than before
resize(kmp_uint32 nproc)779 void resize(kmp_uint32 nproc) {
780 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
781 while (bool_result == 0) { // someone else is trying to resize
782 KMP_CPU_PAUSE();
783 if (nproc <= base_num_threads) // happy with other thread's resize
784 return;
785 else // try to resize
786 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
787 }
788 KMP_DEBUG_ASSERT(bool_result != 0);
789 if (nproc <= base_num_threads)
790 return; // happy with other thread's resize
791
792 // Calculate new maxLevels
793 kmp_uint32 old_sz = skipPerLevel[depth - 1];
794 kmp_uint32 incs = 0, old_maxLevels = maxLevels;
795 // First see if old maxLevels is enough to contain new size
796 for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
797 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
798 numPerLevel[i - 1] *= 2;
799 old_sz *= 2;
800 depth++;
801 }
802 if (nproc > old_sz) { // Not enough space, need to expand hierarchy
803 while (nproc > old_sz) {
804 old_sz *= 2;
805 incs++;
806 depth++;
807 }
808 maxLevels += incs;
809
810 // Resize arrays
811 kmp_uint32 *old_numPerLevel = numPerLevel;
812 kmp_uint32 *old_skipPerLevel = skipPerLevel;
813 numPerLevel = skipPerLevel = NULL;
814 numPerLevel =
815 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
816 skipPerLevel = &(numPerLevel[maxLevels]);
817
818 // Copy old elements from old arrays
819 for (kmp_uint32 i = 0; i < old_maxLevels;
820 ++i) { // init numPerLevel[*] to 1 item per level
821 numPerLevel[i] = old_numPerLevel[i];
822 skipPerLevel[i] = old_skipPerLevel[i];
823 }
824
825 // Init new elements in arrays to 1
826 for (kmp_uint32 i = old_maxLevels; i < maxLevels;
827 ++i) { // init numPerLevel[*] to 1 item per level
828 numPerLevel[i] = 1;
829 skipPerLevel[i] = 1;
830 }
831
832 // Free old arrays
833 __kmp_free(old_numPerLevel);
834 }
835
836 // Fill in oversubscription levels of hierarchy
837 for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
838 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
839
840 base_num_threads = nproc;
841 resizing = 0; // One writer
842 }
843 };
844 #endif // KMP_AFFINITY_H
845