LLVM OpenMP* Runtime Library
kmp_affinity.h
1 /*
2  * kmp_affinity.h -- header for affinity management
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // The LLVM Compiler Infrastructure
8 //
9 // This file is dual licensed under the MIT and the University of Illinois Open
10 // Source Licenses. See LICENSE.txt for details.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef KMP_AFFINITY_H
15 #define KMP_AFFINITY_H
16 
17 #include "kmp.h"
18 #include "kmp_os.h"
19 
20 #if KMP_AFFINITY_SUPPORTED
21 #if KMP_USE_HWLOC
22 class KMPHwlocAffinity : public KMPAffinity {
23 public:
24  class Mask : public KMPAffinity::Mask {
25  hwloc_cpuset_t mask;
26 
27  public:
28  Mask() {
29  mask = hwloc_bitmap_alloc();
30  this->zero();
31  }
32  ~Mask() { hwloc_bitmap_free(mask); }
33  void set(int i) override { hwloc_bitmap_set(mask, i); }
34  bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
35  void clear(int i) override { hwloc_bitmap_clr(mask, i); }
36  void zero() override { hwloc_bitmap_zero(mask); }
37  void copy(const KMPAffinity::Mask *src) override {
38  const Mask *convert = static_cast<const Mask *>(src);
39  hwloc_bitmap_copy(mask, convert->mask);
40  }
41  void bitwise_and(const KMPAffinity::Mask *rhs) override {
42  const Mask *convert = static_cast<const Mask *>(rhs);
43  hwloc_bitmap_and(mask, mask, convert->mask);
44  }
45  void bitwise_or(const KMPAffinity::Mask *rhs) override {
46  const Mask *convert = static_cast<const Mask *>(rhs);
47  hwloc_bitmap_or(mask, mask, convert->mask);
48  }
49  void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
50  int begin() const override { return hwloc_bitmap_first(mask); }
51  int end() const override { return -1; }
52  int next(int previous) const override {
53  return hwloc_bitmap_next(mask, previous);
54  }
55  int get_system_affinity(bool abort_on_error) override {
56  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
57  "Illegal get affinity operation when not capable");
58  int retval =
59  hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
60  if (retval >= 0) {
61  return 0;
62  }
63  int error = errno;
64  if (abort_on_error) {
65  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
66  }
67  return error;
68  }
69  int set_system_affinity(bool abort_on_error) const override {
70  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
71  "Illegal get affinity operation when not capable");
72  int retval =
73  hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
74  if (retval >= 0) {
75  return 0;
76  }
77  int error = errno;
78  if (abort_on_error) {
79  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
80  }
81  return error;
82  }
83  int get_proc_group() const override {
84  int i;
85  int group = -1;
86 #if KMP_OS_WINDOWS
87  if (__kmp_num_proc_groups == 1) {
88  return 1;
89  }
90  for (i = 0; i < __kmp_num_proc_groups; i++) {
91  // On windows, the long type is always 32 bits
92  unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
93  unsigned long second_32_bits =
94  hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
95  if (first_32_bits == 0 && second_32_bits == 0) {
96  continue;
97  }
98  if (group >= 0) {
99  return -1;
100  }
101  group = i;
102  }
103 #endif /* KMP_OS_WINDOWS */
104  return group;
105  }
106  };
107  void determine_capable(const char *var) override {
108  const hwloc_topology_support *topology_support;
109  if (__kmp_hwloc_topology == NULL) {
110  if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
111  __kmp_hwloc_error = TRUE;
112  if (__kmp_affinity_verbose)
113  KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
114  }
115  if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
116  __kmp_hwloc_error = TRUE;
117  if (__kmp_affinity_verbose)
118  KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
119  }
120  }
121  topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
122  // Is the system capable of setting/getting this thread's affinity?
123  // Also, is topology discovery possible? (pu indicates ability to discover
124  // processing units). And finally, were there no errors when calling any
125  // hwloc_* API functions?
126  if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
127  topology_support->cpubind->get_thisthread_cpubind &&
128  topology_support->discovery->pu && !__kmp_hwloc_error) {
129  // enables affinity according to KMP_AFFINITY_CAPABLE() macro
130  KMP_AFFINITY_ENABLE(TRUE);
131  } else {
132  // indicate that hwloc didn't work and disable affinity
133  __kmp_hwloc_error = TRUE;
134  KMP_AFFINITY_DISABLE();
135  }
136  }
137  void bind_thread(int which) override {
138  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
139  "Illegal set affinity operation when not capable");
140  KMPAffinity::Mask *mask;
141  KMP_CPU_ALLOC_ON_STACK(mask);
142  KMP_CPU_ZERO(mask);
143  KMP_CPU_SET(which, mask);
144  __kmp_set_system_affinity(mask, TRUE);
145  KMP_CPU_FREE_FROM_STACK(mask);
146  }
147  KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
148  void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
149  KMPAffinity::Mask *allocate_mask_array(int num) override {
150  return new Mask[num];
151  }
152  void deallocate_mask_array(KMPAffinity::Mask *array) override {
153  Mask *hwloc_array = static_cast<Mask *>(array);
154  delete[] hwloc_array;
155  }
156  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
157  int index) override {
158  Mask *hwloc_array = static_cast<Mask *>(array);
159  return &(hwloc_array[index]);
160  }
161  api_type get_api_type() const override { return HWLOC; }
162 };
163 #endif /* KMP_USE_HWLOC */
164 
165 #if KMP_OS_LINUX
166 /* On some of the older OS's that we build on, these constants aren't present
167  in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
168  all systems of the same arch where they are defined, and they cannot change.
169  stone forever. */
170 #include <sys/syscall.h>
171 #if KMP_ARCH_X86 || KMP_ARCH_ARM
172 #ifndef __NR_sched_setaffinity
173 #define __NR_sched_setaffinity 241
174 #elif __NR_sched_setaffinity != 241
175 #error Wrong code for setaffinity system call.
176 #endif /* __NR_sched_setaffinity */
177 #ifndef __NR_sched_getaffinity
178 #define __NR_sched_getaffinity 242
179 #elif __NR_sched_getaffinity != 242
180 #error Wrong code for getaffinity system call.
181 #endif /* __NR_sched_getaffinity */
182 #elif KMP_ARCH_AARCH64
183 #ifndef __NR_sched_setaffinity
184 #define __NR_sched_setaffinity 122
185 #elif __NR_sched_setaffinity != 122
186 #error Wrong code for setaffinity system call.
187 #endif /* __NR_sched_setaffinity */
188 #ifndef __NR_sched_getaffinity
189 #define __NR_sched_getaffinity 123
190 #elif __NR_sched_getaffinity != 123
191 #error Wrong code for getaffinity system call.
192 #endif /* __NR_sched_getaffinity */
193 #elif KMP_ARCH_X86_64
194 #ifndef __NR_sched_setaffinity
195 #define __NR_sched_setaffinity 203
196 #elif __NR_sched_setaffinity != 203
197 #error Wrong code for setaffinity system call.
198 #endif /* __NR_sched_setaffinity */
199 #ifndef __NR_sched_getaffinity
200 #define __NR_sched_getaffinity 204
201 #elif __NR_sched_getaffinity != 204
202 #error Wrong code for getaffinity system call.
203 #endif /* __NR_sched_getaffinity */
204 #elif KMP_ARCH_PPC64
205 #ifndef __NR_sched_setaffinity
206 #define __NR_sched_setaffinity 222
207 #elif __NR_sched_setaffinity != 222
208 #error Wrong code for setaffinity system call.
209 #endif /* __NR_sched_setaffinity */
210 #ifndef __NR_sched_getaffinity
211 #define __NR_sched_getaffinity 223
212 #elif __NR_sched_getaffinity != 223
213 #error Wrong code for getaffinity system call.
214 #endif /* __NR_sched_getaffinity */
215 # elif KMP_ARCH_MIPS
216 # ifndef __NR_sched_setaffinity
217 # define __NR_sched_setaffinity 4239
218 # elif __NR_sched_setaffinity != 4239
219 # error Wrong code for setaffinity system call.
220 # endif /* __NR_sched_setaffinity */
221 # ifndef __NR_sched_getaffinity
222 # define __NR_sched_getaffinity 4240
223 # elif __NR_sched_getaffinity != 4240
224 # error Wrong code for getaffinity system call.
225 # endif /* __NR_sched_getaffinity */
226 # elif KMP_ARCH_MIPS64
227 # ifndef __NR_sched_setaffinity
228 # define __NR_sched_setaffinity 5195
229 # elif __NR_sched_setaffinity != 5195
230 # error Wrong code for setaffinity system call.
231 # endif /* __NR_sched_setaffinity */
232 # ifndef __NR_sched_getaffinity
233 # define __NR_sched_getaffinity 5196
234 # elif __NR_sched_getaffinity != 5196
235 # error Wrong code for getaffinity system call.
236 # endif /* __NR_sched_getaffinity */
237 # else
238 #error Unknown or unsupported architecture
239 #endif /* KMP_ARCH_* */
240 class KMPNativeAffinity : public KMPAffinity {
241  class Mask : public KMPAffinity::Mask {
242  typedef unsigned char mask_t;
243  static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
244 
245  public:
246  mask_t *mask;
247  Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
248  ~Mask() {
249  if (mask)
250  __kmp_free(mask);
251  }
252  void set(int i) override {
253  mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
254  }
255  bool is_set(int i) const override {
256  return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
257  }
258  void clear(int i) override {
259  mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
260  }
261  void zero() override {
262  for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
263  mask[i] = 0;
264  }
265  void copy(const KMPAffinity::Mask *src) override {
266  const Mask *convert = static_cast<const Mask *>(src);
267  for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
268  mask[i] = convert->mask[i];
269  }
270  void bitwise_and(const KMPAffinity::Mask *rhs) override {
271  const Mask *convert = static_cast<const Mask *>(rhs);
272  for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
273  mask[i] &= convert->mask[i];
274  }
275  void bitwise_or(const KMPAffinity::Mask *rhs) override {
276  const Mask *convert = static_cast<const Mask *>(rhs);
277  for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
278  mask[i] |= convert->mask[i];
279  }
280  void bitwise_not() override {
281  for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
282  mask[i] = ~(mask[i]);
283  }
284  int begin() const override {
285  int retval = 0;
286  while (retval < end() && !is_set(retval))
287  ++retval;
288  return retval;
289  }
290  int end() const override { return __kmp_affin_mask_size * BITS_PER_MASK_T; }
291  int next(int previous) const override {
292  int retval = previous + 1;
293  while (retval < end() && !is_set(retval))
294  ++retval;
295  return retval;
296  }
297  int get_system_affinity(bool abort_on_error) override {
298  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
299  "Illegal get affinity operation when not capable");
300  int retval =
301  syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
302  if (retval >= 0) {
303  return 0;
304  }
305  int error = errno;
306  if (abort_on_error) {
307  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
308  }
309  return error;
310  }
311  int set_system_affinity(bool abort_on_error) const override {
312  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
313  "Illegal get affinity operation when not capable");
314  int retval =
315  syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
316  if (retval >= 0) {
317  return 0;
318  }
319  int error = errno;
320  if (abort_on_error) {
321  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
322  }
323  return error;
324  }
325  };
326  void determine_capable(const char *env_var) override {
327  __kmp_affinity_determine_capable(env_var);
328  }
329  void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
330  KMPAffinity::Mask *allocate_mask() override {
331  KMPNativeAffinity::Mask *retval = new Mask();
332  return retval;
333  }
334  void deallocate_mask(KMPAffinity::Mask *m) override {
335  KMPNativeAffinity::Mask *native_mask =
336  static_cast<KMPNativeAffinity::Mask *>(m);
337  delete m;
338  }
339  KMPAffinity::Mask *allocate_mask_array(int num) override {
340  return new Mask[num];
341  }
342  void deallocate_mask_array(KMPAffinity::Mask *array) override {
343  Mask *linux_array = static_cast<Mask *>(array);
344  delete[] linux_array;
345  }
346  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
347  int index) override {
348  Mask *linux_array = static_cast<Mask *>(array);
349  return &(linux_array[index]);
350  }
351  api_type get_api_type() const override { return NATIVE_OS; }
352 };
353 #endif /* KMP_OS_LINUX */
354 
355 #if KMP_OS_WINDOWS
356 class KMPNativeAffinity : public KMPAffinity {
357  class Mask : public KMPAffinity::Mask {
358  typedef ULONG_PTR mask_t;
359  static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
360  mask_t *mask;
361 
362  public:
363  Mask() {
364  mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
365  }
366  ~Mask() {
367  if (mask)
368  __kmp_free(mask);
369  }
370  void set(int i) override {
371  mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
372  }
373  bool is_set(int i) const override {
374  return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
375  }
376  void clear(int i) override {
377  mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
378  }
379  void zero() override {
380  for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
381  mask[i] = 0;
382  }
383  void copy(const KMPAffinity::Mask *src) override {
384  const Mask *convert = static_cast<const Mask *>(src);
385  for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
386  mask[i] = convert->mask[i];
387  }
388  void bitwise_and(const KMPAffinity::Mask *rhs) override {
389  const Mask *convert = static_cast<const Mask *>(rhs);
390  for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
391  mask[i] &= convert->mask[i];
392  }
393  void bitwise_or(const KMPAffinity::Mask *rhs) override {
394  const Mask *convert = static_cast<const Mask *>(rhs);
395  for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
396  mask[i] |= convert->mask[i];
397  }
398  void bitwise_not() override {
399  for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
400  mask[i] = ~(mask[i]);
401  }
402  int begin() const override {
403  int retval = 0;
404  while (retval < end() && !is_set(retval))
405  ++retval;
406  return retval;
407  }
408  int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
409  int next(int previous) const override {
410  int retval = previous + 1;
411  while (retval < end() && !is_set(retval))
412  ++retval;
413  return retval;
414  }
415  int set_system_affinity(bool abort_on_error) const override {
416  if (__kmp_num_proc_groups > 1) {
417  // Check for a valid mask.
418  GROUP_AFFINITY ga;
419  int group = get_proc_group();
420  if (group < 0) {
421  if (abort_on_error) {
422  KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
423  }
424  return -1;
425  }
426  // Transform the bit vector into a GROUP_AFFINITY struct
427  // and make the system call to set affinity.
428  ga.Group = group;
429  ga.Mask = mask[group];
430  ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
431 
432  KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
433  if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
434  DWORD error = GetLastError();
435  if (abort_on_error) {
436  __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
437  __kmp_msg_null);
438  }
439  return error;
440  }
441  } else {
442  if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
443  DWORD error = GetLastError();
444  if (abort_on_error) {
445  __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
446  __kmp_msg_null);
447  }
448  return error;
449  }
450  }
451  return 0;
452  }
453  int get_system_affinity(bool abort_on_error) override {
454  if (__kmp_num_proc_groups > 1) {
455  this->zero();
456  GROUP_AFFINITY ga;
457  KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
458  if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
459  DWORD error = GetLastError();
460  if (abort_on_error) {
461  __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
462  KMP_ERR(error), __kmp_msg_null);
463  }
464  return error;
465  }
466  if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
467  (ga.Mask == 0)) {
468  return -1;
469  }
470  mask[ga.Group] = ga.Mask;
471  } else {
472  mask_t newMask, sysMask, retval;
473  if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
474  DWORD error = GetLastError();
475  if (abort_on_error) {
476  __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
477  KMP_ERR(error), __kmp_msg_null);
478  }
479  return error;
480  }
481  retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
482  if (!retval) {
483  DWORD error = GetLastError();
484  if (abort_on_error) {
485  __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
486  KMP_ERR(error), __kmp_msg_null);
487  }
488  return error;
489  }
490  newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
491  if (!newMask) {
492  DWORD error = GetLastError();
493  if (abort_on_error) {
494  __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
495  KMP_ERR(error), __kmp_msg_null);
496  }
497  }
498  *mask = retval;
499  }
500  return 0;
501  }
502  int get_proc_group() const override {
503  int group = -1;
504  if (__kmp_num_proc_groups == 1) {
505  return 1;
506  }
507  for (int i = 0; i < __kmp_num_proc_groups; i++) {
508  if (mask[i] == 0)
509  continue;
510  if (group >= 0)
511  return -1;
512  group = i;
513  }
514  return group;
515  }
516  };
517  void determine_capable(const char *env_var) override {
518  __kmp_affinity_determine_capable(env_var);
519  }
520  void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
521  KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
522  void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
523  KMPAffinity::Mask *allocate_mask_array(int num) override {
524  return new Mask[num];
525  }
526  void deallocate_mask_array(KMPAffinity::Mask *array) override {
527  Mask *windows_array = static_cast<Mask *>(array);
528  delete[] windows_array;
529  }
530  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
531  int index) override {
532  Mask *windows_array = static_cast<Mask *>(array);
533  return &(windows_array[index]);
534  }
535  api_type get_api_type() const override { return NATIVE_OS; }
536 };
537 #endif /* KMP_OS_WINDOWS */
538 #endif /* KMP_AFFINITY_SUPPORTED */
539 
540 class Address {
541 public:
542  static const unsigned maxDepth = 32;
543  unsigned labels[maxDepth];
544  unsigned childNums[maxDepth];
545  unsigned depth;
546  unsigned leader;
547  Address(unsigned _depth) : depth(_depth), leader(FALSE) {}
548  Address &operator=(const Address &b) {
549  depth = b.depth;
550  for (unsigned i = 0; i < depth; i++) {
551  labels[i] = b.labels[i];
552  childNums[i] = b.childNums[i];
553  }
554  leader = FALSE;
555  return *this;
556  }
557  bool operator==(const Address &b) const {
558  if (depth != b.depth)
559  return false;
560  for (unsigned i = 0; i < depth; i++)
561  if (labels[i] != b.labels[i])
562  return false;
563  return true;
564  }
565  bool isClose(const Address &b, int level) const {
566  if (depth != b.depth)
567  return false;
568  if ((unsigned)level >= depth)
569  return true;
570  for (unsigned i = 0; i < (depth - level); i++)
571  if (labels[i] != b.labels[i])
572  return false;
573  return true;
574  }
575  bool operator!=(const Address &b) const { return !operator==(b); }
576  void print() const {
577  unsigned i;
578  printf("Depth: %u --- ", depth);
579  for (i = 0; i < depth; i++) {
580  printf("%u ", labels[i]);
581  }
582  }
583 };
584 
585 class AddrUnsPair {
586 public:
587  Address first;
588  unsigned second;
589  AddrUnsPair(Address _first, unsigned _second)
590  : first(_first), second(_second) {}
591  AddrUnsPair &operator=(const AddrUnsPair &b) {
592  first = b.first;
593  second = b.second;
594  return *this;
595  }
596  void print() const {
597  printf("first = ");
598  first.print();
599  printf(" --- second = %u", second);
600  }
601  bool operator==(const AddrUnsPair &b) const {
602  if (first != b.first)
603  return false;
604  if (second != b.second)
605  return false;
606  return true;
607  }
608  bool operator!=(const AddrUnsPair &b) const { return !operator==(b); }
609 };
610 
611 static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) {
612  const Address *aa = &(((const AddrUnsPair *)a)->first);
613  const Address *bb = &(((const AddrUnsPair *)b)->first);
614  unsigned depth = aa->depth;
615  unsigned i;
616  KMP_DEBUG_ASSERT(depth == bb->depth);
617  for (i = 0; i < depth; i++) {
618  if (aa->labels[i] < bb->labels[i])
619  return -1;
620  if (aa->labels[i] > bb->labels[i])
621  return 1;
622  }
623  return 0;
624 }
625 
626 /* A structure for holding machine-specific hierarchy info to be computed once
627  at init. This structure represents a mapping of threads to the actual machine
628  hierarchy, or to our best guess at what the hierarchy might be, for the
629  purpose of performing an efficient barrier. In the worst case, when there is
630  no machine hierarchy information, it produces a tree suitable for a barrier,
631  similar to the tree used in the hyper barrier. */
632 class hierarchy_info {
633 public:
634  /* Good default values for number of leaves and branching factor, given no
635  affinity information. Behaves a bit like hyper barrier. */
636  static const kmp_uint32 maxLeaves = 4;
637  static const kmp_uint32 minBranch = 4;
643  kmp_uint32 maxLevels;
644 
649  kmp_uint32 depth;
650  kmp_uint32 base_num_threads;
651  enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
652  volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
653  // 2=initialization in progress
654  volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
655 
660  kmp_uint32 *numPerLevel;
661  kmp_uint32 *skipPerLevel;
662 
663  void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
664  int hier_depth = adr2os[0].first.depth;
665  int level = 0;
666  for (int i = hier_depth - 1; i >= 0; --i) {
667  int max = -1;
668  for (int j = 0; j < num_addrs; ++j) {
669  int next = adr2os[j].first.childNums[i];
670  if (next > max)
671  max = next;
672  }
673  numPerLevel[level] = max + 1;
674  ++level;
675  }
676  }
677 
678  hierarchy_info()
679  : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
680 
681  void fini() {
682  if (!uninitialized && numPerLevel) {
683  __kmp_free(numPerLevel);
684  numPerLevel = NULL;
685  uninitialized = not_initialized;
686  }
687  }
688 
689  void init(AddrUnsPair *adr2os, int num_addrs) {
690  kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
691  &uninitialized, not_initialized, initializing);
692  if (bool_result == 0) { // Wait for initialization
693  while (TCR_1(uninitialized) != initialized)
694  KMP_CPU_PAUSE();
695  return;
696  }
697  KMP_DEBUG_ASSERT(bool_result == 1);
698 
699  /* Added explicit initialization of the data fields here to prevent usage of
700  dirty value observed when static library is re-initialized multiple times
701  (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
702  OpenMP). */
703  depth = 1;
704  resizing = 0;
705  maxLevels = 7;
706  numPerLevel =
707  (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
708  skipPerLevel = &(numPerLevel[maxLevels]);
709  for (kmp_uint32 i = 0; i < maxLevels;
710  ++i) { // init numPerLevel[*] to 1 item per level
711  numPerLevel[i] = 1;
712  skipPerLevel[i] = 1;
713  }
714 
715  // Sort table by physical ID
716  if (adr2os) {
717  qsort(adr2os, num_addrs, sizeof(*adr2os),
718  __kmp_affinity_cmp_Address_labels);
719  deriveLevels(adr2os, num_addrs);
720  } else {
721  numPerLevel[0] = maxLeaves;
722  numPerLevel[1] = num_addrs / maxLeaves;
723  if (num_addrs % maxLeaves)
724  numPerLevel[1]++;
725  }
726 
727  base_num_threads = num_addrs;
728  for (int i = maxLevels - 1; i >= 0;
729  --i) // count non-empty levels to get depth
730  if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
731  depth++;
732 
733  kmp_uint32 branch = minBranch;
734  if (numPerLevel[0] == 1)
735  branch = num_addrs / maxLeaves;
736  if (branch < minBranch)
737  branch = minBranch;
738  for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
739  while (numPerLevel[d] > branch ||
740  (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
741  if (numPerLevel[d] & 1)
742  numPerLevel[d]++;
743  numPerLevel[d] = numPerLevel[d] >> 1;
744  if (numPerLevel[d + 1] == 1)
745  depth++;
746  numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
747  }
748  if (numPerLevel[0] == 1) {
749  branch = branch >> 1;
750  if (branch < 4)
751  branch = minBranch;
752  }
753  }
754 
755  for (kmp_uint32 i = 1; i < depth; ++i)
756  skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
757  // Fill in hierarchy in the case of oversubscription
758  for (kmp_uint32 i = depth; i < maxLevels; ++i)
759  skipPerLevel[i] = 2 * skipPerLevel[i - 1];
760 
761  uninitialized = initialized; // One writer
762  }
763 
764  // Resize the hierarchy if nproc changes to something larger than before
765  void resize(kmp_uint32 nproc) {
766  kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
767  while (bool_result == 0) { // someone else is trying to resize
768  KMP_CPU_PAUSE();
769  if (nproc <= base_num_threads) // happy with other thread's resize
770  return;
771  else // try to resize
772  bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
773  }
774  KMP_DEBUG_ASSERT(bool_result != 0);
775  if (nproc <= base_num_threads)
776  return; // happy with other thread's resize
777 
778  // Calculate new maxLevels
779  kmp_uint32 old_sz = skipPerLevel[depth - 1];
780  kmp_uint32 incs = 0, old_maxLevels = maxLevels;
781  // First see if old maxLevels is enough to contain new size
782  for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
783  skipPerLevel[i] = 2 * skipPerLevel[i - 1];
784  numPerLevel[i - 1] *= 2;
785  old_sz *= 2;
786  depth++;
787  }
788  if (nproc > old_sz) { // Not enough space, need to expand hierarchy
789  while (nproc > old_sz) {
790  old_sz *= 2;
791  incs++;
792  depth++;
793  }
794  maxLevels += incs;
795 
796  // Resize arrays
797  kmp_uint32 *old_numPerLevel = numPerLevel;
798  kmp_uint32 *old_skipPerLevel = skipPerLevel;
799  numPerLevel = skipPerLevel = NULL;
800  numPerLevel =
801  (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
802  skipPerLevel = &(numPerLevel[maxLevels]);
803 
804  // Copy old elements from old arrays
805  for (kmp_uint32 i = 0; i < old_maxLevels;
806  ++i) { // init numPerLevel[*] to 1 item per level
807  numPerLevel[i] = old_numPerLevel[i];
808  skipPerLevel[i] = old_skipPerLevel[i];
809  }
810 
811  // Init new elements in arrays to 1
812  for (kmp_uint32 i = old_maxLevels; i < maxLevels;
813  ++i) { // init numPerLevel[*] to 1 item per level
814  numPerLevel[i] = 1;
815  skipPerLevel[i] = 1;
816  }
817 
818  // Free old arrays
819  __kmp_free(old_numPerLevel);
820  }
821 
822  // Fill in oversubscription levels of hierarchy
823  for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
824  skipPerLevel[i] = 2 * skipPerLevel[i - 1];
825 
826  base_num_threads = nproc;
827  resizing = 0; // One writer
828  }
829 };
830 #endif // KMP_AFFINITY_H