14 #ifndef KMP_AFFINITY_H 15 #define KMP_AFFINITY_H 20 #if KMP_AFFINITY_SUPPORTED 22 class KMPHwlocAffinity :
public KMPAffinity {
24 class Mask :
public KMPAffinity::Mask {
29 mask = hwloc_bitmap_alloc();
32 ~Mask() { hwloc_bitmap_free(mask); }
33 void set(
int i)
override { hwloc_bitmap_set(mask, i); }
34 bool is_set(
int i)
const override {
return hwloc_bitmap_isset(mask, i); }
35 void clear(
int i)
override { hwloc_bitmap_clr(mask, i); }
36 void zero()
override { hwloc_bitmap_zero(mask); }
37 void copy(
const KMPAffinity::Mask *src)
override {
38 const Mask *convert =
static_cast<const Mask *
>(src);
39 hwloc_bitmap_copy(mask, convert->mask);
41 void bitwise_and(
const KMPAffinity::Mask *rhs)
override {
42 const Mask *convert =
static_cast<const Mask *
>(rhs);
43 hwloc_bitmap_and(mask, mask, convert->mask);
45 void bitwise_or(
const KMPAffinity::Mask *rhs)
override {
46 const Mask *convert =
static_cast<const Mask *
>(rhs);
47 hwloc_bitmap_or(mask, mask, convert->mask);
49 void bitwise_not()
override { hwloc_bitmap_not(mask, mask); }
50 int begin()
const override {
return hwloc_bitmap_first(mask); }
51 int end()
const override {
return -1; }
52 int next(
int previous)
const override {
53 return hwloc_bitmap_next(mask, previous);
55 int get_system_affinity(
bool abort_on_error)
override {
56 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
57 "Illegal get affinity operation when not capable");
59 hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
65 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
69 int set_system_affinity(
bool abort_on_error)
const override {
70 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
71 "Illegal get affinity operation when not capable");
73 hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
79 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
83 int get_proc_group()
const override {
87 if (__kmp_num_proc_groups == 1) {
90 for (i = 0; i < __kmp_num_proc_groups; i++) {
92 unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
93 unsigned long second_32_bits =
94 hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
95 if (first_32_bits == 0 && second_32_bits == 0) {
107 void determine_capable(
const char *var)
override {
108 const hwloc_topology_support *topology_support;
109 if (__kmp_hwloc_topology == NULL) {
110 if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
111 __kmp_hwloc_error = TRUE;
112 if (__kmp_affinity_verbose)
113 KMP_WARNING(AffHwlocErrorOccurred, var,
"hwloc_topology_init()");
115 if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
116 __kmp_hwloc_error = TRUE;
117 if (__kmp_affinity_verbose)
118 KMP_WARNING(AffHwlocErrorOccurred, var,
"hwloc_topology_load()");
121 topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
126 if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
127 topology_support->cpubind->get_thisthread_cpubind &&
128 topology_support->discovery->pu && !__kmp_hwloc_error) {
130 KMP_AFFINITY_ENABLE(TRUE);
133 __kmp_hwloc_error = TRUE;
134 KMP_AFFINITY_DISABLE();
137 void bind_thread(
int which)
override {
138 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
139 "Illegal set affinity operation when not capable");
140 KMPAffinity::Mask *mask;
141 KMP_CPU_ALLOC_ON_STACK(mask);
143 KMP_CPU_SET(which, mask);
144 __kmp_set_system_affinity(mask, TRUE);
145 KMP_CPU_FREE_FROM_STACK(mask);
147 KMPAffinity::Mask *allocate_mask()
override {
return new Mask(); }
148 void deallocate_mask(KMPAffinity::Mask *m)
override {
delete m; }
149 KMPAffinity::Mask *allocate_mask_array(
int num)
override {
150 return new Mask[num];
152 void deallocate_mask_array(KMPAffinity::Mask *array)
override {
153 Mask *hwloc_array =
static_cast<Mask *
>(array);
154 delete[] hwloc_array;
156 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
157 int index)
override {
158 Mask *hwloc_array =
static_cast<Mask *
>(array);
159 return &(hwloc_array[index]);
161 api_type get_api_type()
const override {
return HWLOC; }
170 #include <sys/syscall.h> 171 #if KMP_ARCH_X86 || KMP_ARCH_ARM 172 #ifndef __NR_sched_setaffinity 173 #define __NR_sched_setaffinity 241 174 #elif __NR_sched_setaffinity != 241 175 #error Wrong code for setaffinity system call. 177 #ifndef __NR_sched_getaffinity 178 #define __NR_sched_getaffinity 242 179 #elif __NR_sched_getaffinity != 242 180 #error Wrong code for getaffinity system call. 182 #elif KMP_ARCH_AARCH64 183 #ifndef __NR_sched_setaffinity 184 #define __NR_sched_setaffinity 122 185 #elif __NR_sched_setaffinity != 122 186 #error Wrong code for setaffinity system call. 188 #ifndef __NR_sched_getaffinity 189 #define __NR_sched_getaffinity 123 190 #elif __NR_sched_getaffinity != 123 191 #error Wrong code for getaffinity system call. 193 #elif KMP_ARCH_X86_64 194 #ifndef __NR_sched_setaffinity 195 #define __NR_sched_setaffinity 203 196 #elif __NR_sched_setaffinity != 203 197 #error Wrong code for setaffinity system call. 199 #ifndef __NR_sched_getaffinity 200 #define __NR_sched_getaffinity 204 201 #elif __NR_sched_getaffinity != 204 202 #error Wrong code for getaffinity system call. 205 #ifndef __NR_sched_setaffinity 206 #define __NR_sched_setaffinity 222 207 #elif __NR_sched_setaffinity != 222 208 #error Wrong code for setaffinity system call. 210 #ifndef __NR_sched_getaffinity 211 #define __NR_sched_getaffinity 223 212 #elif __NR_sched_getaffinity != 223 213 #error Wrong code for getaffinity system call. 216 # ifndef __NR_sched_setaffinity 217 # define __NR_sched_setaffinity 4239 218 # elif __NR_sched_setaffinity != 4239 219 # error Wrong code for setaffinity system call. 221 # ifndef __NR_sched_getaffinity 222 # define __NR_sched_getaffinity 4240 223 # elif __NR_sched_getaffinity != 4240 224 # error Wrong code for getaffinity system call. 226 # elif KMP_ARCH_MIPS64 227 # ifndef __NR_sched_setaffinity 228 # define __NR_sched_setaffinity 5195 229 # elif __NR_sched_setaffinity != 5195 230 # error Wrong code for setaffinity system call. 232 # ifndef __NR_sched_getaffinity 233 # define __NR_sched_getaffinity 5196 234 # elif __NR_sched_getaffinity != 5196 235 # error Wrong code for getaffinity system call. 238 #error Unknown or unsupported architecture 240 class KMPNativeAffinity :
public KMPAffinity {
241 class Mask :
public KMPAffinity::Mask {
242 typedef unsigned char mask_t;
243 static const int BITS_PER_MASK_T =
sizeof(mask_t) * CHAR_BIT;
247 Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
252 void set(
int i)
override {
253 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
255 bool is_set(
int i)
const override {
256 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
258 void clear(
int i)
override {
259 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
261 void zero()
override {
262 for (
size_t i = 0; i < __kmp_affin_mask_size; ++i)
265 void copy(
const KMPAffinity::Mask *src)
override {
266 const Mask *convert =
static_cast<const Mask *
>(src);
267 for (
size_t i = 0; i < __kmp_affin_mask_size; ++i)
268 mask[i] = convert->mask[i];
270 void bitwise_and(
const KMPAffinity::Mask *rhs)
override {
271 const Mask *convert =
static_cast<const Mask *
>(rhs);
272 for (
size_t i = 0; i < __kmp_affin_mask_size; ++i)
273 mask[i] &= convert->mask[i];
275 void bitwise_or(
const KMPAffinity::Mask *rhs)
override {
276 const Mask *convert =
static_cast<const Mask *
>(rhs);
277 for (
size_t i = 0; i < __kmp_affin_mask_size; ++i)
278 mask[i] |= convert->mask[i];
280 void bitwise_not()
override {
281 for (
size_t i = 0; i < __kmp_affin_mask_size; ++i)
282 mask[i] = ~(mask[i]);
284 int begin()
const override {
286 while (retval < end() && !is_set(retval))
290 int end()
const override {
return __kmp_affin_mask_size * BITS_PER_MASK_T; }
291 int next(
int previous)
const override {
292 int retval = previous + 1;
293 while (retval < end() && !is_set(retval))
297 int get_system_affinity(
bool abort_on_error)
override {
298 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
299 "Illegal get affinity operation when not capable");
301 syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
306 if (abort_on_error) {
307 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
311 int set_system_affinity(
bool abort_on_error)
const override {
312 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
313 "Illegal get affinity operation when not capable");
315 syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
320 if (abort_on_error) {
321 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
326 void determine_capable(
const char *env_var)
override {
327 __kmp_affinity_determine_capable(env_var);
329 void bind_thread(
int which)
override { __kmp_affinity_bind_thread(which); }
330 KMPAffinity::Mask *allocate_mask()
override {
331 KMPNativeAffinity::Mask *retval =
new Mask();
334 void deallocate_mask(KMPAffinity::Mask *m)
override {
335 KMPNativeAffinity::Mask *native_mask =
336 static_cast<KMPNativeAffinity::Mask *
>(m);
339 KMPAffinity::Mask *allocate_mask_array(
int num)
override {
340 return new Mask[num];
342 void deallocate_mask_array(KMPAffinity::Mask *array)
override {
343 Mask *linux_array =
static_cast<Mask *
>(array);
344 delete[] linux_array;
346 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
347 int index)
override {
348 Mask *linux_array =
static_cast<Mask *
>(array);
349 return &(linux_array[index]);
351 api_type get_api_type()
const override {
return NATIVE_OS; }
356 class KMPNativeAffinity :
public KMPAffinity {
357 class Mask :
public KMPAffinity::Mask {
358 typedef ULONG_PTR mask_t;
359 static const int BITS_PER_MASK_T =
sizeof(mask_t) * CHAR_BIT;
364 mask = (mask_t *)__kmp_allocate(
sizeof(mask_t) * __kmp_num_proc_groups);
370 void set(
int i)
override {
371 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
373 bool is_set(
int i)
const override {
374 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
376 void clear(
int i)
override {
377 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
379 void zero()
override {
380 for (
size_t i = 0; i < __kmp_num_proc_groups; ++i)
383 void copy(
const KMPAffinity::Mask *src)
override {
384 const Mask *convert =
static_cast<const Mask *
>(src);
385 for (
size_t i = 0; i < __kmp_num_proc_groups; ++i)
386 mask[i] = convert->mask[i];
388 void bitwise_and(
const KMPAffinity::Mask *rhs)
override {
389 const Mask *convert =
static_cast<const Mask *
>(rhs);
390 for (
size_t i = 0; i < __kmp_num_proc_groups; ++i)
391 mask[i] &= convert->mask[i];
393 void bitwise_or(
const KMPAffinity::Mask *rhs)
override {
394 const Mask *convert =
static_cast<const Mask *
>(rhs);
395 for (
size_t i = 0; i < __kmp_num_proc_groups; ++i)
396 mask[i] |= convert->mask[i];
398 void bitwise_not()
override {
399 for (
size_t i = 0; i < __kmp_num_proc_groups; ++i)
400 mask[i] = ~(mask[i]);
402 int begin()
const override {
404 while (retval < end() && !is_set(retval))
408 int end()
const override {
return __kmp_num_proc_groups * BITS_PER_MASK_T; }
409 int next(
int previous)
const override {
410 int retval = previous + 1;
411 while (retval < end() && !is_set(retval))
415 int set_system_affinity(
bool abort_on_error)
const override {
416 if (__kmp_num_proc_groups > 1) {
419 int group = get_proc_group();
421 if (abort_on_error) {
422 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
429 ga.Mask = mask[group];
430 ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
432 KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
433 if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
434 DWORD error = GetLastError();
435 if (abort_on_error) {
436 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
442 if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
443 DWORD error = GetLastError();
444 if (abort_on_error) {
445 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
453 int get_system_affinity(
bool abort_on_error)
override {
454 if (__kmp_num_proc_groups > 1) {
457 KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
458 if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
459 DWORD error = GetLastError();
460 if (abort_on_error) {
461 __kmp_fatal(KMP_MSG(FunctionError,
"GetThreadGroupAffinity()"),
462 KMP_ERR(error), __kmp_msg_null);
466 if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
470 mask[ga.Group] = ga.Mask;
472 mask_t newMask, sysMask, retval;
473 if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
474 DWORD error = GetLastError();
475 if (abort_on_error) {
476 __kmp_fatal(KMP_MSG(FunctionError,
"GetProcessAffinityMask()"),
477 KMP_ERR(error), __kmp_msg_null);
481 retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
483 DWORD error = GetLastError();
484 if (abort_on_error) {
485 __kmp_fatal(KMP_MSG(FunctionError,
"SetThreadAffinityMask()"),
486 KMP_ERR(error), __kmp_msg_null);
490 newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
492 DWORD error = GetLastError();
493 if (abort_on_error) {
494 __kmp_fatal(KMP_MSG(FunctionError,
"SetThreadAffinityMask()"),
495 KMP_ERR(error), __kmp_msg_null);
502 int get_proc_group()
const override {
504 if (__kmp_num_proc_groups == 1) {
507 for (
int i = 0; i < __kmp_num_proc_groups; i++) {
517 void determine_capable(
const char *env_var)
override {
518 __kmp_affinity_determine_capable(env_var);
520 void bind_thread(
int which)
override { __kmp_affinity_bind_thread(which); }
521 KMPAffinity::Mask *allocate_mask()
override {
return new Mask(); }
522 void deallocate_mask(KMPAffinity::Mask *m)
override {
delete m; }
523 KMPAffinity::Mask *allocate_mask_array(
int num)
override {
524 return new Mask[num];
526 void deallocate_mask_array(KMPAffinity::Mask *array)
override {
527 Mask *windows_array =
static_cast<Mask *
>(array);
528 delete[] windows_array;
530 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
531 int index)
override {
532 Mask *windows_array =
static_cast<Mask *
>(array);
533 return &(windows_array[index]);
535 api_type get_api_type()
const override {
return NATIVE_OS; }
542 static const unsigned maxDepth = 32;
543 unsigned labels[maxDepth];
544 unsigned childNums[maxDepth];
547 Address(
unsigned _depth) : depth(_depth), leader(FALSE) {}
548 Address &operator=(
const Address &b) {
550 for (
unsigned i = 0; i < depth; i++) {
551 labels[i] = b.labels[i];
552 childNums[i] = b.childNums[i];
557 bool operator==(
const Address &b)
const {
558 if (depth != b.depth)
560 for (
unsigned i = 0; i < depth; i++)
561 if (labels[i] != b.labels[i])
565 bool isClose(
const Address &b,
int level)
const {
566 if (depth != b.depth)
568 if ((
unsigned)level >= depth)
570 for (
unsigned i = 0; i < (depth - level); i++)
571 if (labels[i] != b.labels[i])
575 bool operator!=(
const Address &b)
const {
return !operator==(b); }
578 printf(
"Depth: %u --- ", depth);
579 for (i = 0; i < depth; i++) {
580 printf(
"%u ", labels[i]);
589 AddrUnsPair(Address _first,
unsigned _second)
590 : first(_first), second(_second) {}
591 AddrUnsPair &operator=(
const AddrUnsPair &b) {
599 printf(
" --- second = %u", second);
601 bool operator==(
const AddrUnsPair &b)
const {
602 if (first != b.first)
604 if (second != b.second)
608 bool operator!=(
const AddrUnsPair &b)
const {
return !operator==(b); }
611 static int __kmp_affinity_cmp_Address_labels(
const void *a,
const void *b) {
612 const Address *aa = &(((
const AddrUnsPair *)a)->first);
613 const Address *bb = &(((
const AddrUnsPair *)b)->first);
614 unsigned depth = aa->depth;
616 KMP_DEBUG_ASSERT(depth == bb->depth);
617 for (i = 0; i < depth; i++) {
618 if (aa->labels[i] < bb->labels[i])
620 if (aa->labels[i] > bb->labels[i])
632 class hierarchy_info {
636 static const kmp_uint32 maxLeaves = 4;
637 static const kmp_uint32 minBranch = 4;
643 kmp_uint32 maxLevels;
650 kmp_uint32 base_num_threads;
651 enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
652 volatile kmp_int8 uninitialized;
654 volatile kmp_int8 resizing;
660 kmp_uint32 *numPerLevel;
661 kmp_uint32 *skipPerLevel;
663 void deriveLevels(AddrUnsPair *adr2os,
int num_addrs) {
664 int hier_depth = adr2os[0].first.depth;
666 for (
int i = hier_depth - 1; i >= 0; --i) {
668 for (
int j = 0; j < num_addrs; ++j) {
669 int next = adr2os[j].first.childNums[i];
673 numPerLevel[level] = max + 1;
679 : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
682 if (!uninitialized && numPerLevel) {
683 __kmp_free(numPerLevel);
685 uninitialized = not_initialized;
689 void init(AddrUnsPair *adr2os,
int num_addrs) {
690 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
691 &uninitialized, not_initialized, initializing);
692 if (bool_result == 0) {
693 while (TCR_1(uninitialized) != initialized)
697 KMP_DEBUG_ASSERT(bool_result == 1);
707 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 *
sizeof(kmp_uint32));
708 skipPerLevel = &(numPerLevel[maxLevels]);
709 for (kmp_uint32 i = 0; i < maxLevels;
717 qsort(adr2os, num_addrs,
sizeof(*adr2os),
718 __kmp_affinity_cmp_Address_labels);
719 deriveLevels(adr2os, num_addrs);
721 numPerLevel[0] = maxLeaves;
722 numPerLevel[1] = num_addrs / maxLeaves;
723 if (num_addrs % maxLeaves)
727 base_num_threads = num_addrs;
728 for (
int i = maxLevels - 1; i >= 0;
730 if (numPerLevel[i] != 1 || depth > 1)
733 kmp_uint32 branch = minBranch;
734 if (numPerLevel[0] == 1)
735 branch = num_addrs / maxLeaves;
736 if (branch < minBranch)
738 for (kmp_uint32 d = 0; d < depth - 1; ++d) {
739 while (numPerLevel[d] > branch ||
740 (d == 0 && numPerLevel[d] > maxLeaves)) {
741 if (numPerLevel[d] & 1)
743 numPerLevel[d] = numPerLevel[d] >> 1;
744 if (numPerLevel[d + 1] == 1)
746 numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
748 if (numPerLevel[0] == 1) {
749 branch = branch >> 1;
755 for (kmp_uint32 i = 1; i < depth; ++i)
756 skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
758 for (kmp_uint32 i = depth; i < maxLevels; ++i)
759 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
761 uninitialized = initialized;
765 void resize(kmp_uint32 nproc) {
766 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
767 while (bool_result == 0) {
769 if (nproc <= base_num_threads)
772 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
774 KMP_DEBUG_ASSERT(bool_result != 0);
775 if (nproc <= base_num_threads)
779 kmp_uint32 old_sz = skipPerLevel[depth - 1];
780 kmp_uint32 incs = 0, old_maxLevels = maxLevels;
782 for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
783 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
784 numPerLevel[i - 1] *= 2;
788 if (nproc > old_sz) {
789 while (nproc > old_sz) {
797 kmp_uint32 *old_numPerLevel = numPerLevel;
798 kmp_uint32 *old_skipPerLevel = skipPerLevel;
799 numPerLevel = skipPerLevel = NULL;
801 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 *
sizeof(kmp_uint32));
802 skipPerLevel = &(numPerLevel[maxLevels]);
805 for (kmp_uint32 i = 0; i < old_maxLevels;
807 numPerLevel[i] = old_numPerLevel[i];
808 skipPerLevel[i] = old_skipPerLevel[i];
812 for (kmp_uint32 i = old_maxLevels; i < maxLevels;
819 __kmp_free(old_numPerLevel);
823 for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
824 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
826 base_num_threads = nproc;
830 #endif // KMP_AFFINITY_H