14 #ifndef KMP_AFFINITY_H 15 #define KMP_AFFINITY_H 20 #if KMP_AFFINITY_SUPPORTED 22 class KMPHwlocAffinity :
public KMPAffinity {
24 class Mask :
public KMPAffinity::Mask {
29 mask = hwloc_bitmap_alloc();
32 ~Mask() { hwloc_bitmap_free(mask); }
33 void set(
int i)
override { hwloc_bitmap_set(mask, i); }
34 bool is_set(
int i)
const override {
return hwloc_bitmap_isset(mask, i); }
35 void clear(
int i)
override { hwloc_bitmap_clr(mask, i); }
36 void zero()
override { hwloc_bitmap_zero(mask); }
37 void copy(
const KMPAffinity::Mask *src)
override {
38 const Mask *convert =
static_cast<const Mask *
>(src);
39 hwloc_bitmap_copy(mask, convert->mask);
41 void bitwise_and(
const KMPAffinity::Mask *rhs)
override {
42 const Mask *convert =
static_cast<const Mask *
>(rhs);
43 hwloc_bitmap_and(mask, mask, convert->mask);
45 void bitwise_or(
const KMPAffinity::Mask *rhs)
override {
46 const Mask *convert =
static_cast<const Mask *
>(rhs);
47 hwloc_bitmap_or(mask, mask, convert->mask);
49 void bitwise_not()
override { hwloc_bitmap_not(mask, mask); }
50 int begin()
const override {
return hwloc_bitmap_first(mask); }
51 int end()
const override {
return -1; }
52 int next(
int previous)
const override {
53 return hwloc_bitmap_next(mask, previous);
55 int get_system_affinity(
bool abort_on_error)
override {
56 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
57 "Illegal get affinity operation when not capable");
59 hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
65 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
69 int set_system_affinity(
bool abort_on_error)
const override {
70 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
71 "Illegal get affinity operation when not capable");
73 hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
79 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
83 int get_proc_group()
const override {
86 if (__kmp_num_proc_groups == 1) {
89 for (
int i = 0; i < __kmp_num_proc_groups; i++) {
91 unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
92 unsigned long second_32_bits =
93 hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
94 if (first_32_bits == 0 && second_32_bits == 0) {
106 void determine_capable(
const char *var)
override {
107 const hwloc_topology_support *topology_support;
108 if (__kmp_hwloc_topology == NULL) {
109 if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
110 __kmp_hwloc_error = TRUE;
111 if (__kmp_affinity_verbose)
112 KMP_WARNING(AffHwlocErrorOccurred, var,
"hwloc_topology_init()");
114 if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
115 __kmp_hwloc_error = TRUE;
116 if (__kmp_affinity_verbose)
117 KMP_WARNING(AffHwlocErrorOccurred, var,
"hwloc_topology_load()");
120 topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
125 if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
126 topology_support->cpubind->get_thisthread_cpubind &&
127 topology_support->discovery->pu && !__kmp_hwloc_error) {
129 KMP_AFFINITY_ENABLE(TRUE);
132 __kmp_hwloc_error = TRUE;
133 KMP_AFFINITY_DISABLE();
136 void bind_thread(
int which)
override {
137 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
138 "Illegal set affinity operation when not capable");
139 KMPAffinity::Mask *mask;
140 KMP_CPU_ALLOC_ON_STACK(mask);
142 KMP_CPU_SET(which, mask);
143 __kmp_set_system_affinity(mask, TRUE);
144 KMP_CPU_FREE_FROM_STACK(mask);
146 KMPAffinity::Mask *allocate_mask()
override {
return new Mask(); }
147 void deallocate_mask(KMPAffinity::Mask *m)
override {
delete m; }
148 KMPAffinity::Mask *allocate_mask_array(
int num)
override {
149 return new Mask[num];
151 void deallocate_mask_array(KMPAffinity::Mask *array)
override {
152 Mask *hwloc_array =
static_cast<Mask *
>(array);
153 delete[] hwloc_array;
155 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
156 int index)
override {
157 Mask *hwloc_array =
static_cast<Mask *
>(array);
158 return &(hwloc_array[index]);
160 api_type get_api_type()
const override {
return HWLOC; }
169 #include <sys/syscall.h> 170 #if KMP_ARCH_X86 || KMP_ARCH_ARM 171 #ifndef __NR_sched_setaffinity 172 #define __NR_sched_setaffinity 241 173 #elif __NR_sched_setaffinity != 241 174 #error Wrong code for setaffinity system call. 176 #ifndef __NR_sched_getaffinity 177 #define __NR_sched_getaffinity 242 178 #elif __NR_sched_getaffinity != 242 179 #error Wrong code for getaffinity system call. 181 #elif KMP_ARCH_AARCH64 182 #ifndef __NR_sched_setaffinity 183 #define __NR_sched_setaffinity 122 184 #elif __NR_sched_setaffinity != 122 185 #error Wrong code for setaffinity system call. 187 #ifndef __NR_sched_getaffinity 188 #define __NR_sched_getaffinity 123 189 #elif __NR_sched_getaffinity != 123 190 #error Wrong code for getaffinity system call. 192 #elif KMP_ARCH_X86_64 193 #ifndef __NR_sched_setaffinity 194 #define __NR_sched_setaffinity 203 195 #elif __NR_sched_setaffinity != 203 196 #error Wrong code for setaffinity system call. 198 #ifndef __NR_sched_getaffinity 199 #define __NR_sched_getaffinity 204 200 #elif __NR_sched_getaffinity != 204 201 #error Wrong code for getaffinity system call. 204 #ifndef __NR_sched_setaffinity 205 #define __NR_sched_setaffinity 222 206 #elif __NR_sched_setaffinity != 222 207 #error Wrong code for setaffinity system call. 209 #ifndef __NR_sched_getaffinity 210 #define __NR_sched_getaffinity 223 211 #elif __NR_sched_getaffinity != 223 212 #error Wrong code for getaffinity system call. 215 # ifndef __NR_sched_setaffinity 216 # define __NR_sched_setaffinity 4239 217 # elif __NR_sched_setaffinity != 4239 218 # error Wrong code for setaffinity system call. 220 # ifndef __NR_sched_getaffinity 221 # define __NR_sched_getaffinity 4240 222 # elif __NR_sched_getaffinity != 4240 223 # error Wrong code for getaffinity system call. 225 # elif KMP_ARCH_MIPS64 226 # ifndef __NR_sched_setaffinity 227 # define __NR_sched_setaffinity 5195 228 # elif __NR_sched_setaffinity != 5195 229 # error Wrong code for setaffinity system call. 231 # ifndef __NR_sched_getaffinity 232 # define __NR_sched_getaffinity 5196 233 # elif __NR_sched_getaffinity != 5196 234 # error Wrong code for getaffinity system call. 237 #error Unknown or unsupported architecture 239 class KMPNativeAffinity :
public KMPAffinity {
240 class Mask :
public KMPAffinity::Mask {
241 typedef unsigned char mask_t;
242 static const int BITS_PER_MASK_T =
sizeof(mask_t) * CHAR_BIT;
246 Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
251 void set(
int i)
override {
252 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
254 bool is_set(
int i)
const override {
255 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
257 void clear(
int i)
override {
258 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
260 void zero()
override {
261 for (
size_t i = 0; i < __kmp_affin_mask_size; ++i)
264 void copy(
const KMPAffinity::Mask *src)
override {
265 const Mask *convert =
static_cast<const Mask *
>(src);
266 for (
size_t i = 0; i < __kmp_affin_mask_size; ++i)
267 mask[i] = convert->mask[i];
269 void bitwise_and(
const KMPAffinity::Mask *rhs)
override {
270 const Mask *convert =
static_cast<const Mask *
>(rhs);
271 for (
size_t i = 0; i < __kmp_affin_mask_size; ++i)
272 mask[i] &= convert->mask[i];
274 void bitwise_or(
const KMPAffinity::Mask *rhs)
override {
275 const Mask *convert =
static_cast<const Mask *
>(rhs);
276 for (
size_t i = 0; i < __kmp_affin_mask_size; ++i)
277 mask[i] |= convert->mask[i];
279 void bitwise_not()
override {
280 for (
size_t i = 0; i < __kmp_affin_mask_size; ++i)
281 mask[i] = ~(mask[i]);
283 int begin()
const override {
285 while (retval < end() && !is_set(retval))
289 int end()
const override {
return __kmp_affin_mask_size * BITS_PER_MASK_T; }
290 int next(
int previous)
const override {
291 int retval = previous + 1;
292 while (retval < end() && !is_set(retval))
296 int get_system_affinity(
bool abort_on_error)
override {
297 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
298 "Illegal get affinity operation when not capable");
300 syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
305 if (abort_on_error) {
306 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
310 int set_system_affinity(
bool abort_on_error)
const override {
311 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
312 "Illegal get affinity operation when not capable");
314 syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
319 if (abort_on_error) {
320 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
325 void determine_capable(
const char *env_var)
override {
326 __kmp_affinity_determine_capable(env_var);
328 void bind_thread(
int which)
override { __kmp_affinity_bind_thread(which); }
329 KMPAffinity::Mask *allocate_mask()
override {
330 KMPNativeAffinity::Mask *retval =
new Mask();
333 void deallocate_mask(KMPAffinity::Mask *m)
override {
334 KMPNativeAffinity::Mask *native_mask =
335 static_cast<KMPNativeAffinity::Mask *
>(m);
338 KMPAffinity::Mask *allocate_mask_array(
int num)
override {
339 return new Mask[num];
341 void deallocate_mask_array(KMPAffinity::Mask *array)
override {
342 Mask *linux_array =
static_cast<Mask *
>(array);
343 delete[] linux_array;
345 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
346 int index)
override {
347 Mask *linux_array =
static_cast<Mask *
>(array);
348 return &(linux_array[index]);
350 api_type get_api_type()
const override {
return NATIVE_OS; }
355 class KMPNativeAffinity :
public KMPAffinity {
356 class Mask :
public KMPAffinity::Mask {
357 typedef ULONG_PTR mask_t;
358 static const int BITS_PER_MASK_T =
sizeof(mask_t) * CHAR_BIT;
363 mask = (mask_t *)__kmp_allocate(
sizeof(mask_t) * __kmp_num_proc_groups);
369 void set(
int i)
override {
370 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
372 bool is_set(
int i)
const override {
373 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
375 void clear(
int i)
override {
376 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
378 void zero()
override {
379 for (
int i = 0; i < __kmp_num_proc_groups; ++i)
382 void copy(
const KMPAffinity::Mask *src)
override {
383 const Mask *convert =
static_cast<const Mask *
>(src);
384 for (
int i = 0; i < __kmp_num_proc_groups; ++i)
385 mask[i] = convert->mask[i];
387 void bitwise_and(
const KMPAffinity::Mask *rhs)
override {
388 const Mask *convert =
static_cast<const Mask *
>(rhs);
389 for (
int i = 0; i < __kmp_num_proc_groups; ++i)
390 mask[i] &= convert->mask[i];
392 void bitwise_or(
const KMPAffinity::Mask *rhs)
override {
393 const Mask *convert =
static_cast<const Mask *
>(rhs);
394 for (
int i = 0; i < __kmp_num_proc_groups; ++i)
395 mask[i] |= convert->mask[i];
397 void bitwise_not()
override {
398 for (
int i = 0; i < __kmp_num_proc_groups; ++i)
399 mask[i] = ~(mask[i]);
401 int begin()
const override {
403 while (retval < end() && !is_set(retval))
407 int end()
const override {
return __kmp_num_proc_groups * BITS_PER_MASK_T; }
408 int next(
int previous)
const override {
409 int retval = previous + 1;
410 while (retval < end() && !is_set(retval))
414 int set_system_affinity(
bool abort_on_error)
const override {
415 if (__kmp_num_proc_groups > 1) {
418 int group = get_proc_group();
420 if (abort_on_error) {
421 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
428 ga.Mask = mask[group];
429 ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
431 KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
432 if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
433 DWORD error = GetLastError();
434 if (abort_on_error) {
435 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
441 if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
442 DWORD error = GetLastError();
443 if (abort_on_error) {
444 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
452 int get_system_affinity(
bool abort_on_error)
override {
453 if (__kmp_num_proc_groups > 1) {
456 KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
457 if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
458 DWORD error = GetLastError();
459 if (abort_on_error) {
460 __kmp_fatal(KMP_MSG(FunctionError,
"GetThreadGroupAffinity()"),
461 KMP_ERR(error), __kmp_msg_null);
465 if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
469 mask[ga.Group] = ga.Mask;
471 mask_t newMask, sysMask, retval;
472 if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
473 DWORD error = GetLastError();
474 if (abort_on_error) {
475 __kmp_fatal(KMP_MSG(FunctionError,
"GetProcessAffinityMask()"),
476 KMP_ERR(error), __kmp_msg_null);
480 retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
482 DWORD error = GetLastError();
483 if (abort_on_error) {
484 __kmp_fatal(KMP_MSG(FunctionError,
"SetThreadAffinityMask()"),
485 KMP_ERR(error), __kmp_msg_null);
489 newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
491 DWORD error = GetLastError();
492 if (abort_on_error) {
493 __kmp_fatal(KMP_MSG(FunctionError,
"SetThreadAffinityMask()"),
494 KMP_ERR(error), __kmp_msg_null);
501 int get_proc_group()
const override {
503 if (__kmp_num_proc_groups == 1) {
506 for (
int i = 0; i < __kmp_num_proc_groups; i++) {
516 void determine_capable(
const char *env_var)
override {
517 __kmp_affinity_determine_capable(env_var);
519 void bind_thread(
int which)
override { __kmp_affinity_bind_thread(which); }
520 KMPAffinity::Mask *allocate_mask()
override {
return new Mask(); }
521 void deallocate_mask(KMPAffinity::Mask *m)
override {
delete m; }
522 KMPAffinity::Mask *allocate_mask_array(
int num)
override {
523 return new Mask[num];
525 void deallocate_mask_array(KMPAffinity::Mask *array)
override {
526 Mask *windows_array =
static_cast<Mask *
>(array);
527 delete[] windows_array;
529 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
530 int index)
override {
531 Mask *windows_array =
static_cast<Mask *
>(array);
532 return &(windows_array[index]);
534 api_type get_api_type()
const override {
return NATIVE_OS; }
541 static const unsigned maxDepth = 32;
542 unsigned labels[maxDepth];
543 unsigned childNums[maxDepth];
546 Address(
unsigned _depth) : depth(_depth), leader(FALSE) {}
547 Address &operator=(
const Address &b) {
549 for (
unsigned i = 0; i < depth; i++) {
550 labels[i] = b.labels[i];
551 childNums[i] = b.childNums[i];
556 bool operator==(
const Address &b)
const {
557 if (depth != b.depth)
559 for (
unsigned i = 0; i < depth; i++)
560 if (labels[i] != b.labels[i])
564 bool isClose(
const Address &b,
int level)
const {
565 if (depth != b.depth)
567 if ((
unsigned)level >= depth)
569 for (
unsigned i = 0; i < (depth - level); i++)
570 if (labels[i] != b.labels[i])
574 bool operator!=(
const Address &b)
const {
return !operator==(b); }
577 printf(
"Depth: %u --- ", depth);
578 for (i = 0; i < depth; i++) {
579 printf(
"%u ", labels[i]);
588 AddrUnsPair(Address _first,
unsigned _second)
589 : first(_first), second(_second) {}
590 AddrUnsPair &operator=(
const AddrUnsPair &b) {
598 printf(
" --- second = %u", second);
600 bool operator==(
const AddrUnsPair &b)
const {
601 if (first != b.first)
603 if (second != b.second)
607 bool operator!=(
const AddrUnsPair &b)
const {
return !operator==(b); }
610 static int __kmp_affinity_cmp_Address_labels(
const void *a,
const void *b) {
611 const Address *aa = &(((
const AddrUnsPair *)a)->first);
612 const Address *bb = &(((
const AddrUnsPair *)b)->first);
613 unsigned depth = aa->depth;
615 KMP_DEBUG_ASSERT(depth == bb->depth);
616 for (i = 0; i < depth; i++) {
617 if (aa->labels[i] < bb->labels[i])
619 if (aa->labels[i] > bb->labels[i])
631 class hierarchy_info {
635 static const kmp_uint32 maxLeaves = 4;
636 static const kmp_uint32 minBranch = 4;
642 kmp_uint32 maxLevels;
649 kmp_uint32 base_num_threads;
650 enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
651 volatile kmp_int8 uninitialized;
653 volatile kmp_int8 resizing;
659 kmp_uint32 *numPerLevel;
660 kmp_uint32 *skipPerLevel;
662 void deriveLevels(AddrUnsPair *adr2os,
int num_addrs) {
663 int hier_depth = adr2os[0].first.depth;
665 for (
int i = hier_depth - 1; i >= 0; --i) {
667 for (
int j = 0; j < num_addrs; ++j) {
668 int next = adr2os[j].first.childNums[i];
672 numPerLevel[level] = max + 1;
678 : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
681 if (!uninitialized && numPerLevel) {
682 __kmp_free(numPerLevel);
684 uninitialized = not_initialized;
688 void init(AddrUnsPair *adr2os,
int num_addrs) {
689 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
690 &uninitialized, not_initialized, initializing);
691 if (bool_result == 0) {
692 while (TCR_1(uninitialized) != initialized)
696 KMP_DEBUG_ASSERT(bool_result == 1);
706 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 *
sizeof(kmp_uint32));
707 skipPerLevel = &(numPerLevel[maxLevels]);
708 for (kmp_uint32 i = 0; i < maxLevels;
716 qsort(adr2os, num_addrs,
sizeof(*adr2os),
717 __kmp_affinity_cmp_Address_labels);
718 deriveLevels(adr2os, num_addrs);
720 numPerLevel[0] = maxLeaves;
721 numPerLevel[1] = num_addrs / maxLeaves;
722 if (num_addrs % maxLeaves)
726 base_num_threads = num_addrs;
727 for (
int i = maxLevels - 1; i >= 0;
729 if (numPerLevel[i] != 1 || depth > 1)
732 kmp_uint32 branch = minBranch;
733 if (numPerLevel[0] == 1)
734 branch = num_addrs / maxLeaves;
735 if (branch < minBranch)
737 for (kmp_uint32 d = 0; d < depth - 1; ++d) {
738 while (numPerLevel[d] > branch ||
739 (d == 0 && numPerLevel[d] > maxLeaves)) {
740 if (numPerLevel[d] & 1)
742 numPerLevel[d] = numPerLevel[d] >> 1;
743 if (numPerLevel[d + 1] == 1)
745 numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
747 if (numPerLevel[0] == 1) {
748 branch = branch >> 1;
754 for (kmp_uint32 i = 1; i < depth; ++i)
755 skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
757 for (kmp_uint32 i = depth; i < maxLevels; ++i)
758 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
760 uninitialized = initialized;
764 void resize(kmp_uint32 nproc) {
765 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
766 while (bool_result == 0) {
768 if (nproc <= base_num_threads)
771 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
773 KMP_DEBUG_ASSERT(bool_result != 0);
774 if (nproc <= base_num_threads)
778 kmp_uint32 old_sz = skipPerLevel[depth - 1];
779 kmp_uint32 incs = 0, old_maxLevels = maxLevels;
781 for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
782 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
783 numPerLevel[i - 1] *= 2;
787 if (nproc > old_sz) {
788 while (nproc > old_sz) {
796 kmp_uint32 *old_numPerLevel = numPerLevel;
797 kmp_uint32 *old_skipPerLevel = skipPerLevel;
798 numPerLevel = skipPerLevel = NULL;
800 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 *
sizeof(kmp_uint32));
801 skipPerLevel = &(numPerLevel[maxLevels]);
804 for (kmp_uint32 i = 0; i < old_maxLevels;
806 numPerLevel[i] = old_numPerLevel[i];
807 skipPerLevel[i] = old_skipPerLevel[i];
811 for (kmp_uint32 i = old_maxLevels; i < maxLevels;
818 __kmp_free(old_numPerLevel);
822 for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
823 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
825 base_num_threads = nproc;
829 #endif // KMP_AFFINITY_H