LLVM OpenMP* Runtime Library
kmp_affinity.h
1 /*
2  * kmp_affinity.h -- header for affinity management
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // The LLVM Compiler Infrastructure
8 //
9 // This file is dual licensed under the MIT and the University of Illinois Open
10 // Source Licenses. See LICENSE.txt for details.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef KMP_AFFINITY_H
15 #define KMP_AFFINITY_H
16 
17 #include "kmp.h"
18 #include "kmp_os.h"
19 
20 #if KMP_AFFINITY_SUPPORTED
21 #if KMP_USE_HWLOC
22 class KMPHwlocAffinity : public KMPAffinity {
23 public:
24  class Mask : public KMPAffinity::Mask {
25  hwloc_cpuset_t mask;
26 
27  public:
28  Mask() {
29  mask = hwloc_bitmap_alloc();
30  this->zero();
31  }
32  ~Mask() { hwloc_bitmap_free(mask); }
33  void set(int i) override { hwloc_bitmap_set(mask, i); }
34  bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
35  void clear(int i) override { hwloc_bitmap_clr(mask, i); }
36  void zero() override { hwloc_bitmap_zero(mask); }
37  void copy(const KMPAffinity::Mask *src) override {
38  const Mask *convert = static_cast<const Mask *>(src);
39  hwloc_bitmap_copy(mask, convert->mask);
40  }
41  void bitwise_and(const KMPAffinity::Mask *rhs) override {
42  const Mask *convert = static_cast<const Mask *>(rhs);
43  hwloc_bitmap_and(mask, mask, convert->mask);
44  }
45  void bitwise_or(const KMPAffinity::Mask *rhs) override {
46  const Mask *convert = static_cast<const Mask *>(rhs);
47  hwloc_bitmap_or(mask, mask, convert->mask);
48  }
49  void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
50  int begin() const override { return hwloc_bitmap_first(mask); }
51  int end() const override { return -1; }
52  int next(int previous) const override {
53  return hwloc_bitmap_next(mask, previous);
54  }
55  int get_system_affinity(bool abort_on_error) override {
56  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
57  "Illegal get affinity operation when not capable");
58  int retval =
59  hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
60  if (retval >= 0) {
61  return 0;
62  }
63  int error = errno;
64  if (abort_on_error) {
65  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
66  }
67  return error;
68  }
69  int set_system_affinity(bool abort_on_error) const override {
70  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
71  "Illegal get affinity operation when not capable");
72  int retval =
73  hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
74  if (retval >= 0) {
75  return 0;
76  }
77  int error = errno;
78  if (abort_on_error) {
79  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
80  }
81  return error;
82  }
83  int get_proc_group() const override {
84  int group = -1;
85 #if KMP_OS_WINDOWS
86  if (__kmp_num_proc_groups == 1) {
87  return 1;
88  }
89  for (int i = 0; i < __kmp_num_proc_groups; i++) {
90  // On windows, the long type is always 32 bits
91  unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
92  unsigned long second_32_bits =
93  hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
94  if (first_32_bits == 0 && second_32_bits == 0) {
95  continue;
96  }
97  if (group >= 0) {
98  return -1;
99  }
100  group = i;
101  }
102 #endif /* KMP_OS_WINDOWS */
103  return group;
104  }
105  };
106  void determine_capable(const char *var) override {
107  const hwloc_topology_support *topology_support;
108  if (__kmp_hwloc_topology == NULL) {
109  if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
110  __kmp_hwloc_error = TRUE;
111  if (__kmp_affinity_verbose)
112  KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
113  }
114  if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
115  __kmp_hwloc_error = TRUE;
116  if (__kmp_affinity_verbose)
117  KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
118  }
119  }
120  topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
121  // Is the system capable of setting/getting this thread's affinity?
122  // Also, is topology discovery possible? (pu indicates ability to discover
123  // processing units). And finally, were there no errors when calling any
124  // hwloc_* API functions?
125  if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
126  topology_support->cpubind->get_thisthread_cpubind &&
127  topology_support->discovery->pu && !__kmp_hwloc_error) {
128  // enables affinity according to KMP_AFFINITY_CAPABLE() macro
129  KMP_AFFINITY_ENABLE(TRUE);
130  } else {
131  // indicate that hwloc didn't work and disable affinity
132  __kmp_hwloc_error = TRUE;
133  KMP_AFFINITY_DISABLE();
134  }
135  }
136  void bind_thread(int which) override {
137  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
138  "Illegal set affinity operation when not capable");
139  KMPAffinity::Mask *mask;
140  KMP_CPU_ALLOC_ON_STACK(mask);
141  KMP_CPU_ZERO(mask);
142  KMP_CPU_SET(which, mask);
143  __kmp_set_system_affinity(mask, TRUE);
144  KMP_CPU_FREE_FROM_STACK(mask);
145  }
146  KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
147  void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
148  KMPAffinity::Mask *allocate_mask_array(int num) override {
149  return new Mask[num];
150  }
151  void deallocate_mask_array(KMPAffinity::Mask *array) override {
152  Mask *hwloc_array = static_cast<Mask *>(array);
153  delete[] hwloc_array;
154  }
155  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
156  int index) override {
157  Mask *hwloc_array = static_cast<Mask *>(array);
158  return &(hwloc_array[index]);
159  }
160  api_type get_api_type() const override { return HWLOC; }
161 };
162 #endif /* KMP_USE_HWLOC */
163 
164 #if KMP_OS_LINUX
165 /* On some of the older OS's that we build on, these constants aren't present
166  in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
167  all systems of the same arch where they are defined, and they cannot change.
168  stone forever. */
169 #include <sys/syscall.h>
170 #if KMP_ARCH_X86 || KMP_ARCH_ARM
171 #ifndef __NR_sched_setaffinity
172 #define __NR_sched_setaffinity 241
173 #elif __NR_sched_setaffinity != 241
174 #error Wrong code for setaffinity system call.
175 #endif /* __NR_sched_setaffinity */
176 #ifndef __NR_sched_getaffinity
177 #define __NR_sched_getaffinity 242
178 #elif __NR_sched_getaffinity != 242
179 #error Wrong code for getaffinity system call.
180 #endif /* __NR_sched_getaffinity */
181 #elif KMP_ARCH_AARCH64
182 #ifndef __NR_sched_setaffinity
183 #define __NR_sched_setaffinity 122
184 #elif __NR_sched_setaffinity != 122
185 #error Wrong code for setaffinity system call.
186 #endif /* __NR_sched_setaffinity */
187 #ifndef __NR_sched_getaffinity
188 #define __NR_sched_getaffinity 123
189 #elif __NR_sched_getaffinity != 123
190 #error Wrong code for getaffinity system call.
191 #endif /* __NR_sched_getaffinity */
192 #elif KMP_ARCH_X86_64
193 #ifndef __NR_sched_setaffinity
194 #define __NR_sched_setaffinity 203
195 #elif __NR_sched_setaffinity != 203
196 #error Wrong code for setaffinity system call.
197 #endif /* __NR_sched_setaffinity */
198 #ifndef __NR_sched_getaffinity
199 #define __NR_sched_getaffinity 204
200 #elif __NR_sched_getaffinity != 204
201 #error Wrong code for getaffinity system call.
202 #endif /* __NR_sched_getaffinity */
203 #elif KMP_ARCH_PPC64
204 #ifndef __NR_sched_setaffinity
205 #define __NR_sched_setaffinity 222
206 #elif __NR_sched_setaffinity != 222
207 #error Wrong code for setaffinity system call.
208 #endif /* __NR_sched_setaffinity */
209 #ifndef __NR_sched_getaffinity
210 #define __NR_sched_getaffinity 223
211 #elif __NR_sched_getaffinity != 223
212 #error Wrong code for getaffinity system call.
213 #endif /* __NR_sched_getaffinity */
214 # elif KMP_ARCH_MIPS
215 # ifndef __NR_sched_setaffinity
216 # define __NR_sched_setaffinity 4239
217 # elif __NR_sched_setaffinity != 4239
218 # error Wrong code for setaffinity system call.
219 # endif /* __NR_sched_setaffinity */
220 # ifndef __NR_sched_getaffinity
221 # define __NR_sched_getaffinity 4240
222 # elif __NR_sched_getaffinity != 4240
223 # error Wrong code for getaffinity system call.
224 # endif /* __NR_sched_getaffinity */
225 # elif KMP_ARCH_MIPS64
226 # ifndef __NR_sched_setaffinity
227 # define __NR_sched_setaffinity 5195
228 # elif __NR_sched_setaffinity != 5195
229 # error Wrong code for setaffinity system call.
230 # endif /* __NR_sched_setaffinity */
231 # ifndef __NR_sched_getaffinity
232 # define __NR_sched_getaffinity 5196
233 # elif __NR_sched_getaffinity != 5196
234 # error Wrong code for getaffinity system call.
235 # endif /* __NR_sched_getaffinity */
236 # else
237 #error Unknown or unsupported architecture
238 #endif /* KMP_ARCH_* */
239 class KMPNativeAffinity : public KMPAffinity {
240  class Mask : public KMPAffinity::Mask {
241  typedef unsigned char mask_t;
242  static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
243 
244  public:
245  mask_t *mask;
246  Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
247  ~Mask() {
248  if (mask)
249  __kmp_free(mask);
250  }
251  void set(int i) override {
252  mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
253  }
254  bool is_set(int i) const override {
255  return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
256  }
257  void clear(int i) override {
258  mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
259  }
260  void zero() override {
261  for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
262  mask[i] = 0;
263  }
264  void copy(const KMPAffinity::Mask *src) override {
265  const Mask *convert = static_cast<const Mask *>(src);
266  for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
267  mask[i] = convert->mask[i];
268  }
269  void bitwise_and(const KMPAffinity::Mask *rhs) override {
270  const Mask *convert = static_cast<const Mask *>(rhs);
271  for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
272  mask[i] &= convert->mask[i];
273  }
274  void bitwise_or(const KMPAffinity::Mask *rhs) override {
275  const Mask *convert = static_cast<const Mask *>(rhs);
276  for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
277  mask[i] |= convert->mask[i];
278  }
279  void bitwise_not() override {
280  for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
281  mask[i] = ~(mask[i]);
282  }
283  int begin() const override {
284  int retval = 0;
285  while (retval < end() && !is_set(retval))
286  ++retval;
287  return retval;
288  }
289  int end() const override { return __kmp_affin_mask_size * BITS_PER_MASK_T; }
290  int next(int previous) const override {
291  int retval = previous + 1;
292  while (retval < end() && !is_set(retval))
293  ++retval;
294  return retval;
295  }
296  int get_system_affinity(bool abort_on_error) override {
297  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
298  "Illegal get affinity operation when not capable");
299  int retval =
300  syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
301  if (retval >= 0) {
302  return 0;
303  }
304  int error = errno;
305  if (abort_on_error) {
306  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
307  }
308  return error;
309  }
310  int set_system_affinity(bool abort_on_error) const override {
311  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
312  "Illegal get affinity operation when not capable");
313  int retval =
314  syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
315  if (retval >= 0) {
316  return 0;
317  }
318  int error = errno;
319  if (abort_on_error) {
320  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
321  }
322  return error;
323  }
324  };
325  void determine_capable(const char *env_var) override {
326  __kmp_affinity_determine_capable(env_var);
327  }
328  void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
329  KMPAffinity::Mask *allocate_mask() override {
330  KMPNativeAffinity::Mask *retval = new Mask();
331  return retval;
332  }
333  void deallocate_mask(KMPAffinity::Mask *m) override {
334  KMPNativeAffinity::Mask *native_mask =
335  static_cast<KMPNativeAffinity::Mask *>(m);
336  delete native_mask;
337  }
338  KMPAffinity::Mask *allocate_mask_array(int num) override {
339  return new Mask[num];
340  }
341  void deallocate_mask_array(KMPAffinity::Mask *array) override {
342  Mask *linux_array = static_cast<Mask *>(array);
343  delete[] linux_array;
344  }
345  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
346  int index) override {
347  Mask *linux_array = static_cast<Mask *>(array);
348  return &(linux_array[index]);
349  }
350  api_type get_api_type() const override { return NATIVE_OS; }
351 };
352 #endif /* KMP_OS_LINUX */
353 
354 #if KMP_OS_WINDOWS
355 class KMPNativeAffinity : public KMPAffinity {
356  class Mask : public KMPAffinity::Mask {
357  typedef ULONG_PTR mask_t;
358  static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
359  mask_t *mask;
360 
361  public:
362  Mask() {
363  mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
364  }
365  ~Mask() {
366  if (mask)
367  __kmp_free(mask);
368  }
369  void set(int i) override {
370  mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
371  }
372  bool is_set(int i) const override {
373  return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
374  }
375  void clear(int i) override {
376  mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
377  }
378  void zero() override {
379  for (int i = 0; i < __kmp_num_proc_groups; ++i)
380  mask[i] = 0;
381  }
382  void copy(const KMPAffinity::Mask *src) override {
383  const Mask *convert = static_cast<const Mask *>(src);
384  for (int i = 0; i < __kmp_num_proc_groups; ++i)
385  mask[i] = convert->mask[i];
386  }
387  void bitwise_and(const KMPAffinity::Mask *rhs) override {
388  const Mask *convert = static_cast<const Mask *>(rhs);
389  for (int i = 0; i < __kmp_num_proc_groups; ++i)
390  mask[i] &= convert->mask[i];
391  }
392  void bitwise_or(const KMPAffinity::Mask *rhs) override {
393  const Mask *convert = static_cast<const Mask *>(rhs);
394  for (int i = 0; i < __kmp_num_proc_groups; ++i)
395  mask[i] |= convert->mask[i];
396  }
397  void bitwise_not() override {
398  for (int i = 0; i < __kmp_num_proc_groups; ++i)
399  mask[i] = ~(mask[i]);
400  }
401  int begin() const override {
402  int retval = 0;
403  while (retval < end() && !is_set(retval))
404  ++retval;
405  return retval;
406  }
407  int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
408  int next(int previous) const override {
409  int retval = previous + 1;
410  while (retval < end() && !is_set(retval))
411  ++retval;
412  return retval;
413  }
414  int set_system_affinity(bool abort_on_error) const override {
415  if (__kmp_num_proc_groups > 1) {
416  // Check for a valid mask.
417  GROUP_AFFINITY ga;
418  int group = get_proc_group();
419  if (group < 0) {
420  if (abort_on_error) {
421  KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
422  }
423  return -1;
424  }
425  // Transform the bit vector into a GROUP_AFFINITY struct
426  // and make the system call to set affinity.
427  ga.Group = group;
428  ga.Mask = mask[group];
429  ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
430 
431  KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
432  if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
433  DWORD error = GetLastError();
434  if (abort_on_error) {
435  __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
436  __kmp_msg_null);
437  }
438  return error;
439  }
440  } else {
441  if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
442  DWORD error = GetLastError();
443  if (abort_on_error) {
444  __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
445  __kmp_msg_null);
446  }
447  return error;
448  }
449  }
450  return 0;
451  }
452  int get_system_affinity(bool abort_on_error) override {
453  if (__kmp_num_proc_groups > 1) {
454  this->zero();
455  GROUP_AFFINITY ga;
456  KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
457  if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
458  DWORD error = GetLastError();
459  if (abort_on_error) {
460  __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
461  KMP_ERR(error), __kmp_msg_null);
462  }
463  return error;
464  }
465  if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
466  (ga.Mask == 0)) {
467  return -1;
468  }
469  mask[ga.Group] = ga.Mask;
470  } else {
471  mask_t newMask, sysMask, retval;
472  if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
473  DWORD error = GetLastError();
474  if (abort_on_error) {
475  __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
476  KMP_ERR(error), __kmp_msg_null);
477  }
478  return error;
479  }
480  retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
481  if (!retval) {
482  DWORD error = GetLastError();
483  if (abort_on_error) {
484  __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
485  KMP_ERR(error), __kmp_msg_null);
486  }
487  return error;
488  }
489  newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
490  if (!newMask) {
491  DWORD error = GetLastError();
492  if (abort_on_error) {
493  __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
494  KMP_ERR(error), __kmp_msg_null);
495  }
496  }
497  *mask = retval;
498  }
499  return 0;
500  }
501  int get_proc_group() const override {
502  int group = -1;
503  if (__kmp_num_proc_groups == 1) {
504  return 1;
505  }
506  for (int i = 0; i < __kmp_num_proc_groups; i++) {
507  if (mask[i] == 0)
508  continue;
509  if (group >= 0)
510  return -1;
511  group = i;
512  }
513  return group;
514  }
515  };
516  void determine_capable(const char *env_var) override {
517  __kmp_affinity_determine_capable(env_var);
518  }
519  void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
520  KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
521  void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
522  KMPAffinity::Mask *allocate_mask_array(int num) override {
523  return new Mask[num];
524  }
525  void deallocate_mask_array(KMPAffinity::Mask *array) override {
526  Mask *windows_array = static_cast<Mask *>(array);
527  delete[] windows_array;
528  }
529  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
530  int index) override {
531  Mask *windows_array = static_cast<Mask *>(array);
532  return &(windows_array[index]);
533  }
534  api_type get_api_type() const override { return NATIVE_OS; }
535 };
536 #endif /* KMP_OS_WINDOWS */
537 #endif /* KMP_AFFINITY_SUPPORTED */
538 
539 class Address {
540 public:
541  static const unsigned maxDepth = 32;
542  unsigned labels[maxDepth];
543  unsigned childNums[maxDepth];
544  unsigned depth;
545  unsigned leader;
546  Address(unsigned _depth) : depth(_depth), leader(FALSE) {}
547  Address &operator=(const Address &b) {
548  depth = b.depth;
549  for (unsigned i = 0; i < depth; i++) {
550  labels[i] = b.labels[i];
551  childNums[i] = b.childNums[i];
552  }
553  leader = FALSE;
554  return *this;
555  }
556  bool operator==(const Address &b) const {
557  if (depth != b.depth)
558  return false;
559  for (unsigned i = 0; i < depth; i++)
560  if (labels[i] != b.labels[i])
561  return false;
562  return true;
563  }
564  bool isClose(const Address &b, int level) const {
565  if (depth != b.depth)
566  return false;
567  if ((unsigned)level >= depth)
568  return true;
569  for (unsigned i = 0; i < (depth - level); i++)
570  if (labels[i] != b.labels[i])
571  return false;
572  return true;
573  }
574  bool operator!=(const Address &b) const { return !operator==(b); }
575  void print() const {
576  unsigned i;
577  printf("Depth: %u --- ", depth);
578  for (i = 0; i < depth; i++) {
579  printf("%u ", labels[i]);
580  }
581  }
582 };
583 
584 class AddrUnsPair {
585 public:
586  Address first;
587  unsigned second;
588  AddrUnsPair(Address _first, unsigned _second)
589  : first(_first), second(_second) {}
590  AddrUnsPair &operator=(const AddrUnsPair &b) {
591  first = b.first;
592  second = b.second;
593  return *this;
594  }
595  void print() const {
596  printf("first = ");
597  first.print();
598  printf(" --- second = %u", second);
599  }
600  bool operator==(const AddrUnsPair &b) const {
601  if (first != b.first)
602  return false;
603  if (second != b.second)
604  return false;
605  return true;
606  }
607  bool operator!=(const AddrUnsPair &b) const { return !operator==(b); }
608 };
609 
610 static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) {
611  const Address *aa = &(((const AddrUnsPair *)a)->first);
612  const Address *bb = &(((const AddrUnsPair *)b)->first);
613  unsigned depth = aa->depth;
614  unsigned i;
615  KMP_DEBUG_ASSERT(depth == bb->depth);
616  for (i = 0; i < depth; i++) {
617  if (aa->labels[i] < bb->labels[i])
618  return -1;
619  if (aa->labels[i] > bb->labels[i])
620  return 1;
621  }
622  return 0;
623 }
624 
625 /* A structure for holding machine-specific hierarchy info to be computed once
626  at init. This structure represents a mapping of threads to the actual machine
627  hierarchy, or to our best guess at what the hierarchy might be, for the
628  purpose of performing an efficient barrier. In the worst case, when there is
629  no machine hierarchy information, it produces a tree suitable for a barrier,
630  similar to the tree used in the hyper barrier. */
631 class hierarchy_info {
632 public:
633  /* Good default values for number of leaves and branching factor, given no
634  affinity information. Behaves a bit like hyper barrier. */
635  static const kmp_uint32 maxLeaves = 4;
636  static const kmp_uint32 minBranch = 4;
642  kmp_uint32 maxLevels;
643 
648  kmp_uint32 depth;
649  kmp_uint32 base_num_threads;
650  enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
651  volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
652  // 2=initialization in progress
653  volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
654 
659  kmp_uint32 *numPerLevel;
660  kmp_uint32 *skipPerLevel;
661 
662  void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
663  int hier_depth = adr2os[0].first.depth;
664  int level = 0;
665  for (int i = hier_depth - 1; i >= 0; --i) {
666  int max = -1;
667  for (int j = 0; j < num_addrs; ++j) {
668  int next = adr2os[j].first.childNums[i];
669  if (next > max)
670  max = next;
671  }
672  numPerLevel[level] = max + 1;
673  ++level;
674  }
675  }
676 
677  hierarchy_info()
678  : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
679 
680  void fini() {
681  if (!uninitialized && numPerLevel) {
682  __kmp_free(numPerLevel);
683  numPerLevel = NULL;
684  uninitialized = not_initialized;
685  }
686  }
687 
688  void init(AddrUnsPair *adr2os, int num_addrs) {
689  kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
690  &uninitialized, not_initialized, initializing);
691  if (bool_result == 0) { // Wait for initialization
692  while (TCR_1(uninitialized) != initialized)
693  KMP_CPU_PAUSE();
694  return;
695  }
696  KMP_DEBUG_ASSERT(bool_result == 1);
697 
698  /* Added explicit initialization of the data fields here to prevent usage of
699  dirty value observed when static library is re-initialized multiple times
700  (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
701  OpenMP). */
702  depth = 1;
703  resizing = 0;
704  maxLevels = 7;
705  numPerLevel =
706  (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
707  skipPerLevel = &(numPerLevel[maxLevels]);
708  for (kmp_uint32 i = 0; i < maxLevels;
709  ++i) { // init numPerLevel[*] to 1 item per level
710  numPerLevel[i] = 1;
711  skipPerLevel[i] = 1;
712  }
713 
714  // Sort table by physical ID
715  if (adr2os) {
716  qsort(adr2os, num_addrs, sizeof(*adr2os),
717  __kmp_affinity_cmp_Address_labels);
718  deriveLevels(adr2os, num_addrs);
719  } else {
720  numPerLevel[0] = maxLeaves;
721  numPerLevel[1] = num_addrs / maxLeaves;
722  if (num_addrs % maxLeaves)
723  numPerLevel[1]++;
724  }
725 
726  base_num_threads = num_addrs;
727  for (int i = maxLevels - 1; i >= 0;
728  --i) // count non-empty levels to get depth
729  if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
730  depth++;
731 
732  kmp_uint32 branch = minBranch;
733  if (numPerLevel[0] == 1)
734  branch = num_addrs / maxLeaves;
735  if (branch < minBranch)
736  branch = minBranch;
737  for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
738  while (numPerLevel[d] > branch ||
739  (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
740  if (numPerLevel[d] & 1)
741  numPerLevel[d]++;
742  numPerLevel[d] = numPerLevel[d] >> 1;
743  if (numPerLevel[d + 1] == 1)
744  depth++;
745  numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
746  }
747  if (numPerLevel[0] == 1) {
748  branch = branch >> 1;
749  if (branch < 4)
750  branch = minBranch;
751  }
752  }
753 
754  for (kmp_uint32 i = 1; i < depth; ++i)
755  skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
756  // Fill in hierarchy in the case of oversubscription
757  for (kmp_uint32 i = depth; i < maxLevels; ++i)
758  skipPerLevel[i] = 2 * skipPerLevel[i - 1];
759 
760  uninitialized = initialized; // One writer
761  }
762 
763  // Resize the hierarchy if nproc changes to something larger than before
764  void resize(kmp_uint32 nproc) {
765  kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
766  while (bool_result == 0) { // someone else is trying to resize
767  KMP_CPU_PAUSE();
768  if (nproc <= base_num_threads) // happy with other thread's resize
769  return;
770  else // try to resize
771  bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
772  }
773  KMP_DEBUG_ASSERT(bool_result != 0);
774  if (nproc <= base_num_threads)
775  return; // happy with other thread's resize
776 
777  // Calculate new maxLevels
778  kmp_uint32 old_sz = skipPerLevel[depth - 1];
779  kmp_uint32 incs = 0, old_maxLevels = maxLevels;
780  // First see if old maxLevels is enough to contain new size
781  for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
782  skipPerLevel[i] = 2 * skipPerLevel[i - 1];
783  numPerLevel[i - 1] *= 2;
784  old_sz *= 2;
785  depth++;
786  }
787  if (nproc > old_sz) { // Not enough space, need to expand hierarchy
788  while (nproc > old_sz) {
789  old_sz *= 2;
790  incs++;
791  depth++;
792  }
793  maxLevels += incs;
794 
795  // Resize arrays
796  kmp_uint32 *old_numPerLevel = numPerLevel;
797  kmp_uint32 *old_skipPerLevel = skipPerLevel;
798  numPerLevel = skipPerLevel = NULL;
799  numPerLevel =
800  (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
801  skipPerLevel = &(numPerLevel[maxLevels]);
802 
803  // Copy old elements from old arrays
804  for (kmp_uint32 i = 0; i < old_maxLevels;
805  ++i) { // init numPerLevel[*] to 1 item per level
806  numPerLevel[i] = old_numPerLevel[i];
807  skipPerLevel[i] = old_skipPerLevel[i];
808  }
809 
810  // Init new elements in arrays to 1
811  for (kmp_uint32 i = old_maxLevels; i < maxLevels;
812  ++i) { // init numPerLevel[*] to 1 item per level
813  numPerLevel[i] = 1;
814  skipPerLevel[i] = 1;
815  }
816 
817  // Free old arrays
818  __kmp_free(old_numPerLevel);
819  }
820 
821  // Fill in oversubscription levels of hierarchy
822  for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
823  skipPerLevel[i] = 2 * skipPerLevel[i - 1];
824 
825  base_num_threads = nproc;
826  resizing = 0; // One writer
827  }
828 };
829 #endif // KMP_AFFINITY_H