LLVM OpenMP* Runtime Library
kmp_affinity.h
1/*
2 * kmp_affinity.h -- header for affinity management
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef KMP_AFFINITY_H
14#define KMP_AFFINITY_H
15
16#include "kmp.h"
17#include "kmp_os.h"
18#include <limits>
19
20#if KMP_AFFINITY_SUPPORTED
21#if KMP_USE_HWLOC
22class KMPHwlocAffinity : public KMPAffinity {
23public:
24 class Mask : public KMPAffinity::Mask {
25 hwloc_cpuset_t mask;
26
27 public:
28 Mask() {
29 mask = hwloc_bitmap_alloc();
30 this->zero();
31 }
32 ~Mask() { hwloc_bitmap_free(mask); }
33 void set(int i) override { hwloc_bitmap_set(mask, i); }
34 bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
35 void clear(int i) override { hwloc_bitmap_clr(mask, i); }
36 void zero() override { hwloc_bitmap_zero(mask); }
37 void copy(const KMPAffinity::Mask *src) override {
38 const Mask *convert = static_cast<const Mask *>(src);
39 hwloc_bitmap_copy(mask, convert->mask);
40 }
41 void bitwise_and(const KMPAffinity::Mask *rhs) override {
42 const Mask *convert = static_cast<const Mask *>(rhs);
43 hwloc_bitmap_and(mask, mask, convert->mask);
44 }
45 void bitwise_or(const KMPAffinity::Mask *rhs) override {
46 const Mask *convert = static_cast<const Mask *>(rhs);
47 hwloc_bitmap_or(mask, mask, convert->mask);
48 }
49 void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
50 int begin() const override { return hwloc_bitmap_first(mask); }
51 int end() const override { return -1; }
52 int next(int previous) const override {
53 return hwloc_bitmap_next(mask, previous);
54 }
55 int get_system_affinity(bool abort_on_error) override {
56 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
57 "Illegal get affinity operation when not capable");
58 long retval =
59 hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
60 if (retval >= 0) {
61 return 0;
62 }
63 int error = errno;
64 if (abort_on_error) {
65 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
66 }
67 return error;
68 }
69 int set_system_affinity(bool abort_on_error) const override {
70 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
71 "Illegal set affinity operation when not capable");
72 long retval =
73 hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
74 if (retval >= 0) {
75 return 0;
76 }
77 int error = errno;
78 if (abort_on_error) {
79 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
80 }
81 return error;
82 }
83#if KMP_OS_WINDOWS
84 int set_process_affinity(bool abort_on_error) const override {
85 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
86 "Illegal set process affinity operation when not capable");
87 int error = 0;
88 const hwloc_topology_support *support =
89 hwloc_topology_get_support(__kmp_hwloc_topology);
90 if (support->cpubind->set_proc_cpubind) {
91 int retval;
92 retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask,
93 HWLOC_CPUBIND_PROCESS);
94 if (retval >= 0)
95 return 0;
96 error = errno;
97 if (abort_on_error)
98 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
99 }
100 return error;
101 }
102#endif
103 int get_proc_group() const override {
104 int group = -1;
105#if KMP_OS_WINDOWS
106 if (__kmp_num_proc_groups == 1) {
107 return 1;
108 }
109 for (int i = 0; i < __kmp_num_proc_groups; i++) {
110 // On windows, the long type is always 32 bits
111 unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
112 unsigned long second_32_bits =
113 hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
114 if (first_32_bits == 0 && second_32_bits == 0) {
115 continue;
116 }
117 if (group >= 0) {
118 return -1;
119 }
120 group = i;
121 }
122#endif /* KMP_OS_WINDOWS */
123 return group;
124 }
125 };
126 void determine_capable(const char *var) override {
127 const hwloc_topology_support *topology_support;
128 if (__kmp_hwloc_topology == NULL) {
129 if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
130 __kmp_hwloc_error = TRUE;
131 if (__kmp_affinity.flags.verbose) {
132 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
133 }
134 }
135 if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
136 __kmp_hwloc_error = TRUE;
137 if (__kmp_affinity.flags.verbose) {
138 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
139 }
140 }
141 }
142 topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
143 // Is the system capable of setting/getting this thread's affinity?
144 // Also, is topology discovery possible? (pu indicates ability to discover
145 // processing units). And finally, were there no errors when calling any
146 // hwloc_* API functions?
147 if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
148 topology_support->cpubind->get_thisthread_cpubind &&
149 topology_support->discovery->pu && !__kmp_hwloc_error) {
150 // enables affinity according to KMP_AFFINITY_CAPABLE() macro
151 KMP_AFFINITY_ENABLE(TRUE);
152 } else {
153 // indicate that hwloc didn't work and disable affinity
154 __kmp_hwloc_error = TRUE;
155 KMP_AFFINITY_DISABLE();
156 }
157 }
158 void bind_thread(int which) override {
159 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
160 "Illegal set affinity operation when not capable");
161 KMPAffinity::Mask *mask;
162 KMP_CPU_ALLOC_ON_STACK(mask);
163 KMP_CPU_ZERO(mask);
164 KMP_CPU_SET(which, mask);
165 __kmp_set_system_affinity(mask, TRUE);
166 KMP_CPU_FREE_FROM_STACK(mask);
167 }
168 KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
169 void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
170 KMPAffinity::Mask *allocate_mask_array(int num) override {
171 return new Mask[num];
172 }
173 void deallocate_mask_array(KMPAffinity::Mask *array) override {
174 Mask *hwloc_array = static_cast<Mask *>(array);
175 delete[] hwloc_array;
176 }
177 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
178 int index) override {
179 Mask *hwloc_array = static_cast<Mask *>(array);
180 return &(hwloc_array[index]);
181 }
182 api_type get_api_type() const override { return HWLOC; }
183};
184#endif /* KMP_USE_HWLOC */
185
186#if KMP_OS_LINUX || KMP_OS_FREEBSD
187#if KMP_OS_LINUX
188/* On some of the older OS's that we build on, these constants aren't present
189 in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
190 all systems of the same arch where they are defined, and they cannot change.
191 stone forever. */
192#include <sys/syscall.h>
193#if KMP_ARCH_X86 || KMP_ARCH_ARM
194#ifndef __NR_sched_setaffinity
195#define __NR_sched_setaffinity 241
196#elif __NR_sched_setaffinity != 241
197#error Wrong code for setaffinity system call.
198#endif /* __NR_sched_setaffinity */
199#ifndef __NR_sched_getaffinity
200#define __NR_sched_getaffinity 242
201#elif __NR_sched_getaffinity != 242
202#error Wrong code for getaffinity system call.
203#endif /* __NR_sched_getaffinity */
204#elif KMP_ARCH_AARCH64
205#ifndef __NR_sched_setaffinity
206#define __NR_sched_setaffinity 122
207#elif __NR_sched_setaffinity != 122
208#error Wrong code for setaffinity system call.
209#endif /* __NR_sched_setaffinity */
210#ifndef __NR_sched_getaffinity
211#define __NR_sched_getaffinity 123
212#elif __NR_sched_getaffinity != 123
213#error Wrong code for getaffinity system call.
214#endif /* __NR_sched_getaffinity */
215#elif KMP_ARCH_RISCV64
216#ifndef __NR_sched_setaffinity
217#define __NR_sched_setaffinity 122
218#elif __NR_sched_setaffinity != 122
219#error Wrong code for setaffinity system call.
220#endif /* __NR_sched_setaffinity */
221#ifndef __NR_sched_getaffinity
222#define __NR_sched_getaffinity 123
223#elif __NR_sched_getaffinity != 123
224#error Wrong code for getaffinity system call.
225#endif /* __NR_sched_getaffinity */
226#elif KMP_ARCH_X86_64
227#ifndef __NR_sched_setaffinity
228#define __NR_sched_setaffinity 203
229#elif __NR_sched_setaffinity != 203
230#error Wrong code for setaffinity system call.
231#endif /* __NR_sched_setaffinity */
232#ifndef __NR_sched_getaffinity
233#define __NR_sched_getaffinity 204
234#elif __NR_sched_getaffinity != 204
235#error Wrong code for getaffinity system call.
236#endif /* __NR_sched_getaffinity */
237#elif KMP_ARCH_PPC64
238#ifndef __NR_sched_setaffinity
239#define __NR_sched_setaffinity 222
240#elif __NR_sched_setaffinity != 222
241#error Wrong code for setaffinity system call.
242#endif /* __NR_sched_setaffinity */
243#ifndef __NR_sched_getaffinity
244#define __NR_sched_getaffinity 223
245#elif __NR_sched_getaffinity != 223
246#error Wrong code for getaffinity system call.
247#endif /* __NR_sched_getaffinity */
248#elif KMP_ARCH_MIPS
249#ifndef __NR_sched_setaffinity
250#define __NR_sched_setaffinity 4239
251#elif __NR_sched_setaffinity != 4239
252#error Wrong code for setaffinity system call.
253#endif /* __NR_sched_setaffinity */
254#ifndef __NR_sched_getaffinity
255#define __NR_sched_getaffinity 4240
256#elif __NR_sched_getaffinity != 4240
257#error Wrong code for getaffinity system call.
258#endif /* __NR_sched_getaffinity */
259#elif KMP_ARCH_MIPS64
260#ifndef __NR_sched_setaffinity
261#define __NR_sched_setaffinity 5195
262#elif __NR_sched_setaffinity != 5195
263#error Wrong code for setaffinity system call.
264#endif /* __NR_sched_setaffinity */
265#ifndef __NR_sched_getaffinity
266#define __NR_sched_getaffinity 5196
267#elif __NR_sched_getaffinity != 5196
268#error Wrong code for getaffinity system call.
269#endif /* __NR_sched_getaffinity */
270#elif KMP_ARCH_LOONGARCH64
271#ifndef __NR_sched_setaffinity
272#define __NR_sched_setaffinity 122
273#elif __NR_sched_setaffinity != 122
274#error Wrong code for setaffinity system call.
275#endif /* __NR_sched_setaffinity */
276#ifndef __NR_sched_getaffinity
277#define __NR_sched_getaffinity 123
278#elif __NR_sched_getaffinity != 123
279#error Wrong code for getaffinity system call.
280#endif /* __NR_sched_getaffinity */
281#elif KMP_ARCH_RISCV64
282#ifndef __NR_sched_setaffinity
283#define __NR_sched_setaffinity 122
284#elif __NR_sched_setaffinity != 122
285#error Wrong code for setaffinity system call.
286#endif /* __NR_sched_setaffinity */
287#ifndef __NR_sched_getaffinity
288#define __NR_sched_getaffinity 123
289#elif __NR_sched_getaffinity != 123
290#error Wrong code for getaffinity system call.
291#endif /* __NR_sched_getaffinity */
292#else
293#error Unknown or unsupported architecture
294#endif /* KMP_ARCH_* */
295#elif KMP_OS_FREEBSD
296#include <pthread.h>
297#include <pthread_np.h>
298#endif
299class KMPNativeAffinity : public KMPAffinity {
300 class Mask : public KMPAffinity::Mask {
301 typedef unsigned long mask_t;
302 typedef decltype(__kmp_affin_mask_size) mask_size_type;
303 static const unsigned int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
304 static const mask_t ONE = 1;
305 mask_size_type get_num_mask_types() const {
306 return __kmp_affin_mask_size / sizeof(mask_t);
307 }
308
309 public:
310 mask_t *mask;
311 Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
312 ~Mask() {
313 if (mask)
314 __kmp_free(mask);
315 }
316 void set(int i) override {
317 mask[i / BITS_PER_MASK_T] |= (ONE << (i % BITS_PER_MASK_T));
318 }
319 bool is_set(int i) const override {
320 return (mask[i / BITS_PER_MASK_T] & (ONE << (i % BITS_PER_MASK_T)));
321 }
322 void clear(int i) override {
323 mask[i / BITS_PER_MASK_T] &= ~(ONE << (i % BITS_PER_MASK_T));
324 }
325 void zero() override {
326 mask_size_type e = get_num_mask_types();
327 for (mask_size_type i = 0; i < e; ++i)
328 mask[i] = (mask_t)0;
329 }
330 void copy(const KMPAffinity::Mask *src) override {
331 const Mask *convert = static_cast<const Mask *>(src);
332 mask_size_type e = get_num_mask_types();
333 for (mask_size_type i = 0; i < e; ++i)
334 mask[i] = convert->mask[i];
335 }
336 void bitwise_and(const KMPAffinity::Mask *rhs) override {
337 const Mask *convert = static_cast<const Mask *>(rhs);
338 mask_size_type e = get_num_mask_types();
339 for (mask_size_type i = 0; i < e; ++i)
340 mask[i] &= convert->mask[i];
341 }
342 void bitwise_or(const KMPAffinity::Mask *rhs) override {
343 const Mask *convert = static_cast<const Mask *>(rhs);
344 mask_size_type e = get_num_mask_types();
345 for (mask_size_type i = 0; i < e; ++i)
346 mask[i] |= convert->mask[i];
347 }
348 void bitwise_not() override {
349 mask_size_type e = get_num_mask_types();
350 for (mask_size_type i = 0; i < e; ++i)
351 mask[i] = ~(mask[i]);
352 }
353 int begin() const override {
354 int retval = 0;
355 while (retval < end() && !is_set(retval))
356 ++retval;
357 return retval;
358 }
359 int end() const override {
360 int e;
361 __kmp_type_convert(get_num_mask_types() * BITS_PER_MASK_T, &e);
362 return e;
363 }
364 int next(int previous) const override {
365 int retval = previous + 1;
366 while (retval < end() && !is_set(retval))
367 ++retval;
368 return retval;
369 }
370 int get_system_affinity(bool abort_on_error) override {
371 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
372 "Illegal get affinity operation when not capable");
373#if KMP_OS_LINUX
374 long retval =
375 syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
376#elif KMP_OS_FREEBSD
377 int r = pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size,
378 reinterpret_cast<cpuset_t *>(mask));
379 int retval = (r == 0 ? 0 : -1);
380#endif
381 if (retval >= 0) {
382 return 0;
383 }
384 int error = errno;
385 if (abort_on_error) {
386 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
387 }
388 return error;
389 }
390 int set_system_affinity(bool abort_on_error) const override {
391 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
392 "Illegal set affinity operation when not capable");
393#if KMP_OS_LINUX
394 long retval =
395 syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
396#elif KMP_OS_FREEBSD
397 int r = pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size,
398 reinterpret_cast<cpuset_t *>(mask));
399 int retval = (r == 0 ? 0 : -1);
400#endif
401 if (retval >= 0) {
402 return 0;
403 }
404 int error = errno;
405 if (abort_on_error) {
406 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
407 }
408 return error;
409 }
410 };
411 void determine_capable(const char *env_var) override {
412 __kmp_affinity_determine_capable(env_var);
413 }
414 void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
415 KMPAffinity::Mask *allocate_mask() override {
416 KMPNativeAffinity::Mask *retval = new Mask();
417 return retval;
418 }
419 void deallocate_mask(KMPAffinity::Mask *m) override {
420 KMPNativeAffinity::Mask *native_mask =
421 static_cast<KMPNativeAffinity::Mask *>(m);
422 delete native_mask;
423 }
424 KMPAffinity::Mask *allocate_mask_array(int num) override {
425 return new Mask[num];
426 }
427 void deallocate_mask_array(KMPAffinity::Mask *array) override {
428 Mask *linux_array = static_cast<Mask *>(array);
429 delete[] linux_array;
430 }
431 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
432 int index) override {
433 Mask *linux_array = static_cast<Mask *>(array);
434 return &(linux_array[index]);
435 }
436 api_type get_api_type() const override { return NATIVE_OS; }
437};
438#endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */
439
440#if KMP_OS_WINDOWS
441class KMPNativeAffinity : public KMPAffinity {
442 class Mask : public KMPAffinity::Mask {
443 typedef ULONG_PTR mask_t;
444 static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
445 mask_t *mask;
446
447 public:
448 Mask() {
449 mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
450 }
451 ~Mask() {
452 if (mask)
453 __kmp_free(mask);
454 }
455 void set(int i) override {
456 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
457 }
458 bool is_set(int i) const override {
459 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
460 }
461 void clear(int i) override {
462 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
463 }
464 void zero() override {
465 for (int i = 0; i < __kmp_num_proc_groups; ++i)
466 mask[i] = 0;
467 }
468 void copy(const KMPAffinity::Mask *src) override {
469 const Mask *convert = static_cast<const Mask *>(src);
470 for (int i = 0; i < __kmp_num_proc_groups; ++i)
471 mask[i] = convert->mask[i];
472 }
473 void bitwise_and(const KMPAffinity::Mask *rhs) override {
474 const Mask *convert = static_cast<const Mask *>(rhs);
475 for (int i = 0; i < __kmp_num_proc_groups; ++i)
476 mask[i] &= convert->mask[i];
477 }
478 void bitwise_or(const KMPAffinity::Mask *rhs) override {
479 const Mask *convert = static_cast<const Mask *>(rhs);
480 for (int i = 0; i < __kmp_num_proc_groups; ++i)
481 mask[i] |= convert->mask[i];
482 }
483 void bitwise_not() override {
484 for (int i = 0; i < __kmp_num_proc_groups; ++i)
485 mask[i] = ~(mask[i]);
486 }
487 int begin() const override {
488 int retval = 0;
489 while (retval < end() && !is_set(retval))
490 ++retval;
491 return retval;
492 }
493 int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
494 int next(int previous) const override {
495 int retval = previous + 1;
496 while (retval < end() && !is_set(retval))
497 ++retval;
498 return retval;
499 }
500 int set_process_affinity(bool abort_on_error) const override {
501 if (__kmp_num_proc_groups <= 1) {
502 if (!SetProcessAffinityMask(GetCurrentProcess(), *mask)) {
503 DWORD error = GetLastError();
504 if (abort_on_error) {
505 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
506 __kmp_msg_null);
507 }
508 return error;
509 }
510 }
511 return 0;
512 }
513 int set_system_affinity(bool abort_on_error) const override {
514 if (__kmp_num_proc_groups > 1) {
515 // Check for a valid mask.
516 GROUP_AFFINITY ga;
517 int group = get_proc_group();
518 if (group < 0) {
519 if (abort_on_error) {
520 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
521 }
522 return -1;
523 }
524 // Transform the bit vector into a GROUP_AFFINITY struct
525 // and make the system call to set affinity.
526 ga.Group = group;
527 ga.Mask = mask[group];
528 ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
529
530 KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
531 if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
532 DWORD error = GetLastError();
533 if (abort_on_error) {
534 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
535 __kmp_msg_null);
536 }
537 return error;
538 }
539 } else {
540 if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
541 DWORD error = GetLastError();
542 if (abort_on_error) {
543 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
544 __kmp_msg_null);
545 }
546 return error;
547 }
548 }
549 return 0;
550 }
551 int get_system_affinity(bool abort_on_error) override {
552 if (__kmp_num_proc_groups > 1) {
553 this->zero();
554 GROUP_AFFINITY ga;
555 KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
556 if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
557 DWORD error = GetLastError();
558 if (abort_on_error) {
559 __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
560 KMP_ERR(error), __kmp_msg_null);
561 }
562 return error;
563 }
564 if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
565 (ga.Mask == 0)) {
566 return -1;
567 }
568 mask[ga.Group] = ga.Mask;
569 } else {
570 mask_t newMask, sysMask, retval;
571 if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
572 DWORD error = GetLastError();
573 if (abort_on_error) {
574 __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
575 KMP_ERR(error), __kmp_msg_null);
576 }
577 return error;
578 }
579 retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
580 if (!retval) {
581 DWORD error = GetLastError();
582 if (abort_on_error) {
583 __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
584 KMP_ERR(error), __kmp_msg_null);
585 }
586 return error;
587 }
588 newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
589 if (!newMask) {
590 DWORD error = GetLastError();
591 if (abort_on_error) {
592 __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
593 KMP_ERR(error), __kmp_msg_null);
594 }
595 }
596 *mask = retval;
597 }
598 return 0;
599 }
600 int get_proc_group() const override {
601 int group = -1;
602 if (__kmp_num_proc_groups == 1) {
603 return 1;
604 }
605 for (int i = 0; i < __kmp_num_proc_groups; i++) {
606 if (mask[i] == 0)
607 continue;
608 if (group >= 0)
609 return -1;
610 group = i;
611 }
612 return group;
613 }
614 };
615 void determine_capable(const char *env_var) override {
616 __kmp_affinity_determine_capable(env_var);
617 }
618 void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
619 KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
620 void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
621 KMPAffinity::Mask *allocate_mask_array(int num) override {
622 return new Mask[num];
623 }
624 void deallocate_mask_array(KMPAffinity::Mask *array) override {
625 Mask *windows_array = static_cast<Mask *>(array);
626 delete[] windows_array;
627 }
628 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
629 int index) override {
630 Mask *windows_array = static_cast<Mask *>(array);
631 return &(windows_array[index]);
632 }
633 api_type get_api_type() const override { return NATIVE_OS; }
634};
635#endif /* KMP_OS_WINDOWS */
636#endif /* KMP_AFFINITY_SUPPORTED */
637
638// Describe an attribute for a level in the machine topology
639struct kmp_hw_attr_t {
640 int core_type : 8;
641 int core_eff : 8;
642 unsigned valid : 1;
643 unsigned reserved : 15;
644
645 static const int UNKNOWN_CORE_EFF = -1;
646
647 kmp_hw_attr_t()
648 : core_type(KMP_HW_CORE_TYPE_UNKNOWN), core_eff(UNKNOWN_CORE_EFF),
649 valid(0), reserved(0) {}
650 void set_core_type(kmp_hw_core_type_t type) {
651 valid = 1;
652 core_type = type;
653 }
654 void set_core_eff(int eff) {
655 valid = 1;
656 core_eff = eff;
657 }
658 kmp_hw_core_type_t get_core_type() const {
659 return (kmp_hw_core_type_t)core_type;
660 }
661 int get_core_eff() const { return core_eff; }
662 bool is_core_type_valid() const {
663 return core_type != KMP_HW_CORE_TYPE_UNKNOWN;
664 }
665 bool is_core_eff_valid() const { return core_eff != UNKNOWN_CORE_EFF; }
666 operator bool() const { return valid; }
667 void clear() {
668 core_type = KMP_HW_CORE_TYPE_UNKNOWN;
669 core_eff = UNKNOWN_CORE_EFF;
670 valid = 0;
671 }
672 bool contains(const kmp_hw_attr_t &other) const {
673 if (!valid && !other.valid)
674 return true;
675 if (valid && other.valid) {
676 if (other.is_core_type_valid()) {
677 if (!is_core_type_valid() || (get_core_type() != other.get_core_type()))
678 return false;
679 }
680 if (other.is_core_eff_valid()) {
681 if (!is_core_eff_valid() || (get_core_eff() != other.get_core_eff()))
682 return false;
683 }
684 return true;
685 }
686 return false;
687 }
688 bool operator==(const kmp_hw_attr_t &rhs) const {
689 return (rhs.valid == valid && rhs.core_eff == core_eff &&
690 rhs.core_type == core_type);
691 }
692 bool operator!=(const kmp_hw_attr_t &rhs) const { return !operator==(rhs); }
693};
694
695#if KMP_AFFINITY_SUPPORTED
696KMP_BUILD_ASSERT(sizeof(kmp_hw_attr_t) == sizeof(kmp_affinity_attrs_t));
697#endif
698
699class kmp_hw_thread_t {
700public:
701 static const int UNKNOWN_ID = -1;
702 static const int MULTIPLE_ID = -2;
703 static int compare_ids(const void *a, const void *b);
704 static int compare_compact(const void *a, const void *b);
705 int ids[KMP_HW_LAST];
706 int sub_ids[KMP_HW_LAST];
707 bool leader;
708 int os_id;
709 kmp_hw_attr_t attrs;
710
711 void print() const;
712 void clear() {
713 for (int i = 0; i < (int)KMP_HW_LAST; ++i)
714 ids[i] = UNKNOWN_ID;
715 leader = false;
716 attrs.clear();
717 }
718};
719
720class kmp_topology_t {
721
722 struct flags_t {
723 int uniform : 1;
724 int reserved : 31;
725 };
726
727 int depth;
728
729 // The following arrays are all 'depth' long and have been
730 // allocated to hold up to KMP_HW_LAST number of objects if
731 // needed so layers can be added without reallocation of any array
732
733 // Orderd array of the types in the topology
734 kmp_hw_t *types;
735
736 // Keep quick topology ratios, for non-uniform topologies,
737 // this ratio holds the max number of itemAs per itemB
738 // e.g., [ 4 packages | 6 cores / package | 2 threads / core ]
739 int *ratio;
740
741 // Storage containing the absolute number of each topology layer
742 int *count;
743
744 // The number of core efficiencies. This is only useful for hybrid
745 // topologies. Core efficiencies will range from 0 to num efficiencies - 1
746 int num_core_efficiencies;
747 int num_core_types;
748 kmp_hw_core_type_t core_types[KMP_HW_MAX_NUM_CORE_TYPES];
749
750 // The hardware threads array
751 // hw_threads is num_hw_threads long
752 // Each hw_thread's ids and sub_ids are depth deep
753 int num_hw_threads;
754 kmp_hw_thread_t *hw_threads;
755
756 // Equivalence hash where the key is the hardware topology item
757 // and the value is the equivalent hardware topology type in the
758 // types[] array, if the value is KMP_HW_UNKNOWN, then there is no
759 // known equivalence for the topology type
760 kmp_hw_t equivalent[KMP_HW_LAST];
761
762 // Flags describing the topology
763 flags_t flags;
764
765 // Compact value used during sort_compact()
766 int compact;
767
768 // Insert a new topology layer after allocation
769 void _insert_layer(kmp_hw_t type, const int *ids);
770
771#if KMP_GROUP_AFFINITY
772 // Insert topology information about Windows Processor groups
773 void _insert_windows_proc_groups();
774#endif
775
776 // Count each item & get the num x's per y
777 // e.g., get the number of cores and the number of threads per core
778 // for each (x, y) in (KMP_HW_* , KMP_HW_*)
779 void _gather_enumeration_information();
780
781 // Remove layers that don't add information to the topology.
782 // This is done by having the layer take on the id = UNKNOWN_ID (-1)
783 void _remove_radix1_layers();
784
785 // Find out if the topology is uniform
786 void _discover_uniformity();
787
788 // Set all the sub_ids for each hardware thread
789 void _set_sub_ids();
790
791 // Set global affinity variables describing the number of threads per
792 // core, the number of packages, the number of cores per package, and
793 // the number of cores.
794 void _set_globals();
795
796 // Set the last level cache equivalent type
797 void _set_last_level_cache();
798
799 // Return the number of cores with a particular attribute, 'attr'.
800 // If 'find_all' is true, then find all cores on the machine, otherwise find
801 // all cores per the layer 'above'
802 int _get_ncores_with_attr(const kmp_hw_attr_t &attr, int above,
803 bool find_all = false) const;
804
805public:
806 // Force use of allocate()/deallocate()
807 kmp_topology_t() = delete;
808 kmp_topology_t(const kmp_topology_t &t) = delete;
809 kmp_topology_t(kmp_topology_t &&t) = delete;
810 kmp_topology_t &operator=(const kmp_topology_t &t) = delete;
811 kmp_topology_t &operator=(kmp_topology_t &&t) = delete;
812
813 static kmp_topology_t *allocate(int nproc, int ndepth, const kmp_hw_t *types);
814 static void deallocate(kmp_topology_t *);
815
816 // Functions used in create_map() routines
817 kmp_hw_thread_t &at(int index) {
818 KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads);
819 return hw_threads[index];
820 }
821 const kmp_hw_thread_t &at(int index) const {
822 KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads);
823 return hw_threads[index];
824 }
825 int get_num_hw_threads() const { return num_hw_threads; }
826 void sort_ids() {
827 qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t),
828 kmp_hw_thread_t::compare_ids);
829 }
830 // Check if the hardware ids are unique, if they are
831 // return true, otherwise return false
832 bool check_ids() const;
833
834 // Function to call after the create_map() routine
835 void canonicalize();
836 void canonicalize(int pkgs, int cores_per_pkg, int thr_per_core, int cores);
837
838// Functions used after canonicalize() called
839
840#if KMP_AFFINITY_SUPPORTED
841 // Set the granularity for affinity settings
842 void set_granularity(kmp_affinity_t &stgs) const;
843#endif
844 bool filter_hw_subset();
845 bool is_close(int hwt1, int hwt2, int level) const;
846 bool is_uniform() const { return flags.uniform; }
847 // Tell whether a type is a valid type in the topology
848 // returns KMP_HW_UNKNOWN when there is no equivalent type
849 kmp_hw_t get_equivalent_type(kmp_hw_t type) const { return equivalent[type]; }
850 // Set type1 = type2
851 void set_equivalent_type(kmp_hw_t type1, kmp_hw_t type2) {
852 KMP_DEBUG_ASSERT_VALID_HW_TYPE(type1);
853 KMP_DEBUG_ASSERT_VALID_HW_TYPE(type2);
854 kmp_hw_t real_type2 = equivalent[type2];
855 if (real_type2 == KMP_HW_UNKNOWN)
856 real_type2 = type2;
857 equivalent[type1] = real_type2;
858 // This loop is required since any of the types may have been set to
859 // be equivalent to type1. They all must be checked and reset to type2.
860 KMP_FOREACH_HW_TYPE(type) {
861 if (equivalent[type] == type1) {
862 equivalent[type] = real_type2;
863 }
864 }
865 }
866 // Calculate number of types corresponding to level1
867 // per types corresponding to level2 (e.g., number of threads per core)
868 int calculate_ratio(int level1, int level2) const {
869 KMP_DEBUG_ASSERT(level1 >= 0 && level1 < depth);
870 KMP_DEBUG_ASSERT(level2 >= 0 && level2 < depth);
871 int r = 1;
872 for (int level = level1; level > level2; --level)
873 r *= ratio[level];
874 return r;
875 }
876 int get_ratio(int level) const {
877 KMP_DEBUG_ASSERT(level >= 0 && level < depth);
878 return ratio[level];
879 }
880 int get_depth() const { return depth; };
881 kmp_hw_t get_type(int level) const {
882 KMP_DEBUG_ASSERT(level >= 0 && level < depth);
883 return types[level];
884 }
885 int get_level(kmp_hw_t type) const {
886 KMP_DEBUG_ASSERT_VALID_HW_TYPE(type);
887 int eq_type = equivalent[type];
888 if (eq_type == KMP_HW_UNKNOWN)
889 return -1;
890 for (int i = 0; i < depth; ++i)
891 if (types[i] == eq_type)
892 return i;
893 return -1;
894 }
895 int get_count(int level) const {
896 KMP_DEBUG_ASSERT(level >= 0 && level < depth);
897 return count[level];
898 }
899 // Return the total number of cores with attribute 'attr'
900 int get_ncores_with_attr(const kmp_hw_attr_t &attr) const {
901 return _get_ncores_with_attr(attr, -1, true);
902 }
903 // Return the number of cores with attribute
904 // 'attr' per topology level 'above'
905 int get_ncores_with_attr_per(const kmp_hw_attr_t &attr, int above) const {
906 return _get_ncores_with_attr(attr, above, false);
907 }
908
909#if KMP_AFFINITY_SUPPORTED
910 friend int kmp_hw_thread_t::compare_compact(const void *a, const void *b);
911 void sort_compact(kmp_affinity_t &affinity) {
912 compact = affinity.compact;
913 qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t),
914 kmp_hw_thread_t::compare_compact);
915 }
916#endif
917 void print(const char *env_var = "KMP_AFFINITY") const;
918 void dump() const;
919};
920extern kmp_topology_t *__kmp_topology;
921
922class kmp_hw_subset_t {
923 const static size_t MAX_ATTRS = KMP_HW_MAX_NUM_CORE_EFFS;
924
925public:
926 // Describe a machine topology item in KMP_HW_SUBSET
927 struct item_t {
928 kmp_hw_t type;
929 int num_attrs;
930 int num[MAX_ATTRS];
931 int offset[MAX_ATTRS];
932 kmp_hw_attr_t attr[MAX_ATTRS];
933 };
934 // Put parenthesis around max to avoid accidental use of Windows max macro.
935 const static int USE_ALL = (std::numeric_limits<int>::max)();
936
937private:
938 int depth;
939 int capacity;
940 item_t *items;
941 kmp_uint64 set;
942 bool absolute;
943 // The set must be able to handle up to KMP_HW_LAST number of layers
944 KMP_BUILD_ASSERT(sizeof(set) * 8 >= KMP_HW_LAST);
945 // Sorting the KMP_HW_SUBSET items to follow topology order
946 // All unknown topology types will be at the beginning of the subset
947 static int hw_subset_compare(const void *i1, const void *i2) {
948 kmp_hw_t type1 = ((const item_t *)i1)->type;
949 kmp_hw_t type2 = ((const item_t *)i2)->type;
950 int level1 = __kmp_topology->get_level(type1);
951 int level2 = __kmp_topology->get_level(type2);
952 return level1 - level2;
953 }
954
955public:
956 // Force use of allocate()/deallocate()
957 kmp_hw_subset_t() = delete;
958 kmp_hw_subset_t(const kmp_hw_subset_t &t) = delete;
959 kmp_hw_subset_t(kmp_hw_subset_t &&t) = delete;
960 kmp_hw_subset_t &operator=(const kmp_hw_subset_t &t) = delete;
961 kmp_hw_subset_t &operator=(kmp_hw_subset_t &&t) = delete;
962
963 static kmp_hw_subset_t *allocate() {
964 int initial_capacity = 5;
965 kmp_hw_subset_t *retval =
966 (kmp_hw_subset_t *)__kmp_allocate(sizeof(kmp_hw_subset_t));
967 retval->depth = 0;
968 retval->capacity = initial_capacity;
969 retval->set = 0ull;
970 retval->absolute = false;
971 retval->items = (item_t *)__kmp_allocate(sizeof(item_t) * initial_capacity);
972 return retval;
973 }
974 static void deallocate(kmp_hw_subset_t *subset) {
975 __kmp_free(subset->items);
976 __kmp_free(subset);
977 }
978 void set_absolute() { absolute = true; }
979 bool is_absolute() const { return absolute; }
980 void push_back(int num, kmp_hw_t type, int offset, kmp_hw_attr_t attr) {
981 for (int i = 0; i < depth; ++i) {
982 // Found an existing item for this layer type
983 // Add the num, offset, and attr to this item
984 if (items[i].type == type) {
985 int idx = items[i].num_attrs++;
986 if ((size_t)idx >= MAX_ATTRS)
987 return;
988 items[i].num[idx] = num;
989 items[i].offset[idx] = offset;
990 items[i].attr[idx] = attr;
991 return;
992 }
993 }
994 if (depth == capacity - 1) {
995 capacity *= 2;
996 item_t *new_items = (item_t *)__kmp_allocate(sizeof(item_t) * capacity);
997 for (int i = 0; i < depth; ++i)
998 new_items[i] = items[i];
999 __kmp_free(items);
1000 items = new_items;
1001 }
1002 items[depth].num_attrs = 1;
1003 items[depth].type = type;
1004 items[depth].num[0] = num;
1005 items[depth].offset[0] = offset;
1006 items[depth].attr[0] = attr;
1007 depth++;
1008 set |= (1ull << type);
1009 }
1010 int get_depth() const { return depth; }
1011 const item_t &at(int index) const {
1012 KMP_DEBUG_ASSERT(index >= 0 && index < depth);
1013 return items[index];
1014 }
1015 item_t &at(int index) {
1016 KMP_DEBUG_ASSERT(index >= 0 && index < depth);
1017 return items[index];
1018 }
1019 void remove(int index) {
1020 KMP_DEBUG_ASSERT(index >= 0 && index < depth);
1021 set &= ~(1ull << items[index].type);
1022 for (int j = index + 1; j < depth; ++j) {
1023 items[j - 1] = items[j];
1024 }
1025 depth--;
1026 }
1027 void sort() {
1028 KMP_DEBUG_ASSERT(__kmp_topology);
1029 qsort(items, depth, sizeof(item_t), hw_subset_compare);
1030 }
1031 bool specified(kmp_hw_t type) const { return ((set & (1ull << type)) > 0); }
1032 void dump() const {
1033 printf("**********************\n");
1034 printf("*** kmp_hw_subset: ***\n");
1035 printf("* depth: %d\n", depth);
1036 printf("* items:\n");
1037 for (int i = 0; i < depth; ++i) {
1038 printf(" type: %s\n", __kmp_hw_get_keyword(items[i].type));
1039 for (int j = 0; j < items[i].num_attrs; ++j) {
1040 printf(" num: %d, offset: %d, attr: ", items[i].num[j],
1041 items[i].offset[j]);
1042 if (!items[i].attr[j]) {
1043 printf(" (none)\n");
1044 } else {
1045 printf(
1046 " core_type = %s, core_eff = %d\n",
1047 __kmp_hw_get_core_type_string(items[i].attr[j].get_core_type()),
1048 items[i].attr[j].get_core_eff());
1049 }
1050 }
1051 }
1052 printf("* set: 0x%llx\n", set);
1053 printf("* absolute: %d\n", absolute);
1054 printf("**********************\n");
1055 }
1056};
1057extern kmp_hw_subset_t *__kmp_hw_subset;
1058
1059/* A structure for holding machine-specific hierarchy info to be computed once
1060 at init. This structure represents a mapping of threads to the actual machine
1061 hierarchy, or to our best guess at what the hierarchy might be, for the
1062 purpose of performing an efficient barrier. In the worst case, when there is
1063 no machine hierarchy information, it produces a tree suitable for a barrier,
1064 similar to the tree used in the hyper barrier. */
1065class hierarchy_info {
1066public:
1067 /* Good default values for number of leaves and branching factor, given no
1068 affinity information. Behaves a bit like hyper barrier. */
1069 static const kmp_uint32 maxLeaves = 4;
1070 static const kmp_uint32 minBranch = 4;
1076 kmp_uint32 maxLevels;
1077
1082 kmp_uint32 depth;
1083 kmp_uint32 base_num_threads;
1084 enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
1085 volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
1086 // 2=initialization in progress
1087 volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
1088
1093 kmp_uint32 *numPerLevel;
1094 kmp_uint32 *skipPerLevel;
1095
1096 void deriveLevels() {
1097 int hier_depth = __kmp_topology->get_depth();
1098 for (int i = hier_depth - 1, level = 0; i >= 0; --i, ++level) {
1099 numPerLevel[level] = __kmp_topology->get_ratio(i);
1100 }
1101 }
1102
1103 hierarchy_info()
1104 : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
1105
1106 void fini() {
1107 if (!uninitialized && numPerLevel) {
1108 __kmp_free(numPerLevel);
1109 numPerLevel = NULL;
1110 uninitialized = not_initialized;
1111 }
1112 }
1113
1114 void init(int num_addrs) {
1115 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
1116 &uninitialized, not_initialized, initializing);
1117 if (bool_result == 0) { // Wait for initialization
1118 while (TCR_1(uninitialized) != initialized)
1119 KMP_CPU_PAUSE();
1120 return;
1121 }
1122 KMP_DEBUG_ASSERT(bool_result == 1);
1123
1124 /* Added explicit initialization of the data fields here to prevent usage of
1125 dirty value observed when static library is re-initialized multiple times
1126 (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
1127 OpenMP). */
1128 depth = 1;
1129 resizing = 0;
1130 maxLevels = 7;
1131 numPerLevel =
1132 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
1133 skipPerLevel = &(numPerLevel[maxLevels]);
1134 for (kmp_uint32 i = 0; i < maxLevels;
1135 ++i) { // init numPerLevel[*] to 1 item per level
1136 numPerLevel[i] = 1;
1137 skipPerLevel[i] = 1;
1138 }
1139
1140 // Sort table by physical ID
1141 if (__kmp_topology && __kmp_topology->get_depth() > 0) {
1142 deriveLevels();
1143 } else {
1144 numPerLevel[0] = maxLeaves;
1145 numPerLevel[1] = num_addrs / maxLeaves;
1146 if (num_addrs % maxLeaves)
1147 numPerLevel[1]++;
1148 }
1149
1150 base_num_threads = num_addrs;
1151 for (int i = maxLevels - 1; i >= 0;
1152 --i) // count non-empty levels to get depth
1153 if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
1154 depth++;
1155
1156 kmp_uint32 branch = minBranch;
1157 if (numPerLevel[0] == 1)
1158 branch = num_addrs / maxLeaves;
1159 if (branch < minBranch)
1160 branch = minBranch;
1161 for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
1162 while (numPerLevel[d] > branch ||
1163 (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
1164 if (numPerLevel[d] & 1)
1165 numPerLevel[d]++;
1166 numPerLevel[d] = numPerLevel[d] >> 1;
1167 if (numPerLevel[d + 1] == 1)
1168 depth++;
1169 numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
1170 }
1171 if (numPerLevel[0] == 1) {
1172 branch = branch >> 1;
1173 if (branch < 4)
1174 branch = minBranch;
1175 }
1176 }
1177
1178 for (kmp_uint32 i = 1; i < depth; ++i)
1179 skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
1180 // Fill in hierarchy in the case of oversubscription
1181 for (kmp_uint32 i = depth; i < maxLevels; ++i)
1182 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
1183
1184 uninitialized = initialized; // One writer
1185 }
1186
1187 // Resize the hierarchy if nproc changes to something larger than before
1188 void resize(kmp_uint32 nproc) {
1189 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
1190 while (bool_result == 0) { // someone else is trying to resize
1191 KMP_CPU_PAUSE();
1192 if (nproc <= base_num_threads) // happy with other thread's resize
1193 return;
1194 else // try to resize
1195 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
1196 }
1197 KMP_DEBUG_ASSERT(bool_result != 0);
1198 if (nproc <= base_num_threads)
1199 return; // happy with other thread's resize
1200
1201 // Calculate new maxLevels
1202 kmp_uint32 old_sz = skipPerLevel[depth - 1];
1203 kmp_uint32 incs = 0, old_maxLevels = maxLevels;
1204 // First see if old maxLevels is enough to contain new size
1205 for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
1206 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
1207 numPerLevel[i - 1] *= 2;
1208 old_sz *= 2;
1209 depth++;
1210 }
1211 if (nproc > old_sz) { // Not enough space, need to expand hierarchy
1212 while (nproc > old_sz) {
1213 old_sz *= 2;
1214 incs++;
1215 depth++;
1216 }
1217 maxLevels += incs;
1218
1219 // Resize arrays
1220 kmp_uint32 *old_numPerLevel = numPerLevel;
1221 kmp_uint32 *old_skipPerLevel = skipPerLevel;
1222 numPerLevel = skipPerLevel = NULL;
1223 numPerLevel =
1224 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
1225 skipPerLevel = &(numPerLevel[maxLevels]);
1226
1227 // Copy old elements from old arrays
1228 for (kmp_uint32 i = 0; i < old_maxLevels; ++i) {
1229 // init numPerLevel[*] to 1 item per level
1230 numPerLevel[i] = old_numPerLevel[i];
1231 skipPerLevel[i] = old_skipPerLevel[i];
1232 }
1233
1234 // Init new elements in arrays to 1
1235 for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) {
1236 // init numPerLevel[*] to 1 item per level
1237 numPerLevel[i] = 1;
1238 skipPerLevel[i] = 1;
1239 }
1240
1241 // Free old arrays
1242 __kmp_free(old_numPerLevel);
1243 }
1244
1245 // Fill in oversubscription levels of hierarchy
1246 for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
1247 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
1248
1249 base_num_threads = nproc;
1250 resizing = 0; // One writer
1251 }
1252};
1253#endif // KMP_AFFINITY_H