14 #include "kmp_affinity.h"
18 #include "kmp_wrapper_getpid.h"
19 #if KMP_USE_HIER_SCHED
20 #include "kmp_dispatch_hier.h"
24 static hierarchy_info machine_hierarchy;
26 void __kmp_cleanup_hierarchy() { machine_hierarchy.fini(); }
28 void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
32 if (TCR_1(machine_hierarchy.uninitialized))
33 machine_hierarchy.init(NULL, nproc);
36 if (nproc > machine_hierarchy.base_num_threads)
37 machine_hierarchy.resize(nproc);
39 depth = machine_hierarchy.depth;
40 KMP_DEBUG_ASSERT(depth > 0);
42 thr_bar->depth = depth;
43 thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0] - 1;
44 thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
47 #if KMP_AFFINITY_SUPPORTED
49 bool KMPAffinity::picked_api =
false;
51 void *KMPAffinity::Mask::operator
new(
size_t n) {
return __kmp_allocate(n); }
52 void *KMPAffinity::Mask::operator
new[](
size_t n) {
return __kmp_allocate(n); }
53 void KMPAffinity::Mask::operator
delete(
void *p) { __kmp_free(p); }
54 void KMPAffinity::Mask::operator
delete[](
void *p) { __kmp_free(p); }
55 void *KMPAffinity::operator
new(
size_t n) {
return __kmp_allocate(n); }
56 void KMPAffinity::operator
delete(
void *p) { __kmp_free(p); }
58 void KMPAffinity::pick_api() {
59 KMPAffinity *affinity_dispatch;
65 if (__kmp_affinity_top_method == affinity_top_method_hwloc &&
66 __kmp_affinity_type != affinity_disabled) {
67 affinity_dispatch =
new KMPHwlocAffinity();
71 affinity_dispatch =
new KMPNativeAffinity();
73 __kmp_affinity_dispatch = affinity_dispatch;
77 void KMPAffinity::destroy_api() {
78 if (__kmp_affinity_dispatch != NULL) {
79 delete __kmp_affinity_dispatch;
80 __kmp_affinity_dispatch = NULL;
85 #define KMP_ADVANCE_SCAN(scan) \
86 while (*scan != '\0') { \
94 char *__kmp_affinity_print_mask(
char *buf,
int buf_len,
95 kmp_affin_mask_t *mask) {
96 int start = 0, finish = 0, previous = 0;
99 KMP_ASSERT(buf_len >= 40);
102 char *end = buf + buf_len - 1;
105 if (mask->begin() == mask->end()) {
106 KMP_SNPRINTF(scan, end - scan + 1,
"{<empty>}");
107 KMP_ADVANCE_SCAN(scan);
108 KMP_ASSERT(scan <= end);
113 start = mask->begin();
117 for (finish = mask->next(start), previous = start;
118 finish == previous + 1 && finish != mask->end();
119 finish = mask->next(finish)) {
126 KMP_SNPRINTF(scan, end - scan + 1,
"%s",
",");
127 KMP_ADVANCE_SCAN(scan);
132 if (previous - start > 1) {
133 KMP_SNPRINTF(scan, end - scan + 1,
"%d-%d",
static_cast<int>(start),
134 static_cast<int>(previous));
137 KMP_SNPRINTF(scan, end - scan + 1,
"%d",
static_cast<int>(start));
138 KMP_ADVANCE_SCAN(scan);
139 if (previous - start > 0) {
140 KMP_SNPRINTF(scan, end - scan + 1,
",%d",
static_cast<int>(previous));
143 KMP_ADVANCE_SCAN(scan);
146 if (start == mask->end())
154 KMP_ASSERT(scan <= end);
157 #undef KMP_ADVANCE_SCAN
163 kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf,
164 kmp_affin_mask_t *mask) {
165 int start = 0, finish = 0, previous = 0;
170 __kmp_str_buf_clear(buf);
173 if (mask->begin() == mask->end()) {
174 __kmp_str_buf_print(buf,
"%s",
"{<empty>}");
179 start = mask->begin();
183 for (finish = mask->next(start), previous = start;
184 finish == previous + 1 && finish != mask->end();
185 finish = mask->next(finish)) {
192 __kmp_str_buf_print(buf,
"%s",
",");
197 if (previous - start > 1) {
198 __kmp_str_buf_print(buf,
"%d-%d",
static_cast<int>(start),
199 static_cast<int>(previous));
202 __kmp_str_buf_print(buf,
"%d",
static_cast<int>(start));
203 if (previous - start > 0) {
204 __kmp_str_buf_print(buf,
",%d",
static_cast<int>(previous));
209 if (start == mask->end())
215 void __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) {
218 #if KMP_GROUP_AFFINITY
220 if (__kmp_num_proc_groups > 1) {
222 KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
223 for (group = 0; group < __kmp_num_proc_groups; group++) {
225 int num = __kmp_GetActiveProcessorCount(group);
226 for (i = 0; i < num; i++) {
227 KMP_CPU_SET(i + group * (CHAR_BIT *
sizeof(DWORD_PTR)), mask);
236 for (proc = 0; proc < __kmp_xproc; proc++) {
237 KMP_CPU_SET(proc, mask);
253 static void __kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
255 KMP_DEBUG_ASSERT(numAddrs > 0);
256 int depth = address2os->first.depth;
257 unsigned *counts = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
258 unsigned *lastLabel = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
260 for (labCt = 0; labCt < depth; labCt++) {
261 address2os[0].first.childNums[labCt] = counts[labCt] = 0;
262 lastLabel[labCt] = address2os[0].first.labels[labCt];
265 for (i = 1; i < numAddrs; i++) {
266 for (labCt = 0; labCt < depth; labCt++) {
267 if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
269 for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
271 lastLabel[labCt2] = address2os[i].first.labels[labCt2];
274 lastLabel[labCt] = address2os[i].first.labels[labCt];
278 for (labCt = 0; labCt < depth; labCt++) {
279 address2os[i].first.childNums[labCt] = counts[labCt];
281 for (; labCt < (int)Address::maxDepth; labCt++) {
282 address2os[i].first.childNums[labCt] = 0;
285 __kmp_free(lastLabel);
300 kmp_affin_mask_t *__kmp_affin_fullMask = NULL;
302 static int nCoresPerPkg, nPackages;
303 static int __kmp_nThreadsPerCore;
304 #ifndef KMP_DFLT_NTH_CORES
305 static int __kmp_ncores;
307 static int *__kmp_pu_os_idx = NULL;
313 inline static bool __kmp_affinity_uniform_topology() {
314 return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
319 static void __kmp_affinity_print_topology(AddrUnsPair *address2os,
int len,
320 int depth,
int pkgLevel,
321 int coreLevel,
int threadLevel) {
324 KMP_INFORM(OSProcToPhysicalThreadMap,
"KMP_AFFINITY");
325 for (proc = 0; proc < len; proc++) {
328 __kmp_str_buf_init(&buf);
329 for (level = 0; level < depth; level++) {
330 if (level == threadLevel) {
331 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Thread));
332 }
else if (level == coreLevel) {
333 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Core));
334 }
else if (level == pkgLevel) {
335 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Package));
336 }
else if (level > pkgLevel) {
337 __kmp_str_buf_print(&buf,
"%s_%d ", KMP_I18N_STR(Node),
338 level - pkgLevel - 1);
340 __kmp_str_buf_print(&buf,
"L%d ", level);
342 __kmp_str_buf_print(&buf,
"%d ", address2os[proc].first.labels[level]);
344 KMP_INFORM(OSProcMapToPack,
"KMP_AFFINITY", address2os[proc].second,
346 __kmp_str_buf_free(&buf);
352 static void __kmp_affinity_print_hwloc_tp(AddrUnsPair *addrP,
int len,
353 int depth,
int *levels) {
356 __kmp_str_buf_init(&buf);
357 KMP_INFORM(OSProcToPhysicalThreadMap,
"KMP_AFFINITY");
358 for (proc = 0; proc < len; proc++) {
359 __kmp_str_buf_print(&buf,
"%s %d ", KMP_I18N_STR(Package),
360 addrP[proc].first.labels[0]);
364 if (__kmp_numa_detected)
366 if (levels[level++] > 0)
367 __kmp_str_buf_print(&buf,
"%s %d ", KMP_I18N_STR(Node),
368 addrP[proc].first.labels[label++]);
369 if (__kmp_tile_depth > 0)
371 if (levels[level++] > 0)
372 __kmp_str_buf_print(&buf,
"%s %d ", KMP_I18N_STR(Tile),
373 addrP[proc].first.labels[label++]);
374 if (levels[level++] > 0)
376 __kmp_str_buf_print(&buf,
"%s %d ", KMP_I18N_STR(Core),
377 addrP[proc].first.labels[label++]);
378 if (levels[level++] > 0)
380 __kmp_str_buf_print(&buf,
"%s %d ", KMP_I18N_STR(Thread),
381 addrP[proc].first.labels[label++]);
382 KMP_DEBUG_ASSERT(label == depth);
384 KMP_INFORM(OSProcMapToPack,
"KMP_AFFINITY", addrP[proc].second, buf.str);
385 __kmp_str_buf_clear(&buf);
387 __kmp_str_buf_free(&buf);
390 static int nNodePerPkg, nTilePerPkg, nTilePerNode, nCorePerNode, nCorePerTile;
397 static int __kmp_affinity_remove_radix_one_levels(AddrUnsPair *addrP,
int nTh,
398 int depth,
int *levels) {
402 int new_depth = depth;
403 for (level = depth - 1; level > 0; --level) {
406 for (i = 1; i < nTh; ++i) {
407 if (addrP[0].first.labels[level] != addrP[i].first.labels[level]) {
413 if (!radix1_detected)
418 if (level == new_depth) {
421 for (i = 0; i < nTh; ++i) {
422 addrP[i].first.depth--;
427 for (j = level; j < new_depth; ++j) {
428 for (i = 0; i < nTh; ++i) {
429 addrP[i].first.labels[j] = addrP[i].first.labels[j + 1];
430 addrP[i].first.depth--;
443 static int __kmp_hwloc_get_nobjs_under_obj(hwloc_obj_t obj,
444 hwloc_obj_type_t type) {
447 for (first = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, obj->type,
448 obj->logical_index, type, 0);
450 hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, obj->type, first) ==
452 first = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, first->type,
459 static int __kmp_hwloc_count_children_by_depth(hwloc_topology_t t,
461 kmp_hwloc_depth_t depth,
463 if (o->depth == depth) {
469 for (
unsigned i = 0; i < o->arity; i++)
470 sum += __kmp_hwloc_count_children_by_depth(t, o->children[i], depth, f);
474 static int __kmp_hwloc_count_children_by_type(hwloc_topology_t t, hwloc_obj_t o,
475 hwloc_obj_type_t type,
477 if (!hwloc_compare_types(o->type, type)) {
483 for (
unsigned i = 0; i < o->arity; i++)
484 sum += __kmp_hwloc_count_children_by_type(t, o->children[i], type, f);
488 static int __kmp_hwloc_process_obj_core_pu(AddrUnsPair *addrPair,
490 int &num_active_cores,
491 hwloc_obj_t obj,
int depth,
493 hwloc_obj_t core = NULL;
494 hwloc_topology_t &tp = __kmp_hwloc_topology;
495 int NC = __kmp_hwloc_count_children_by_type(tp, obj, HWLOC_OBJ_CORE, &core);
496 for (
int core_id = 0; core_id < NC; ++core_id, core = core->next_cousin) {
497 hwloc_obj_t pu = NULL;
498 KMP_DEBUG_ASSERT(core != NULL);
499 int num_active_threads = 0;
500 int NT = __kmp_hwloc_count_children_by_type(tp, core, HWLOC_OBJ_PU, &pu);
502 for (
int pu_id = 0; pu_id < NT; ++pu_id, pu = pu->next_cousin) {
503 KMP_DEBUG_ASSERT(pu != NULL);
504 if (!KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask))
506 Address addr(depth + 2);
507 KA_TRACE(20, (
"Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n",
508 obj->os_index, obj->logical_index, core->os_index,
509 core->logical_index, pu->os_index, pu->logical_index));
510 for (
int i = 0; i < depth; ++i)
511 addr.labels[i] = labels[i];
512 addr.labels[depth] = core_id;
513 addr.labels[depth + 1] = pu_id;
514 addrPair[nActiveThreads] = AddrUnsPair(addr, pu->os_index);
515 __kmp_pu_os_idx[nActiveThreads] = pu->os_index;
517 ++num_active_threads;
519 if (num_active_threads) {
522 if (num_active_threads > __kmp_nThreadsPerCore)
523 __kmp_nThreadsPerCore = num_active_threads;
531 static int __kmp_hwloc_check_numa() {
532 hwloc_topology_t &tp = __kmp_hwloc_topology;
533 hwloc_obj_t hT, hC, hL, hN, hS;
534 int depth, l2cache_depth, package_depth;
537 hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, 0);
542 hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT);
543 hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT);
544 KMP_DEBUG_ASSERT(hS != NULL);
545 if (hN != NULL && hN->depth > hS->depth) {
546 __kmp_numa_detected = TRUE;
547 if (__kmp_affinity_gran == affinity_gran_node) {
548 __kmp_affinity_gran = affinity_gran_numa;
552 package_depth = hwloc_get_type_depth(tp, HWLOC_OBJ_PACKAGE);
553 l2cache_depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED);
555 depth = (l2cache_depth < package_depth) ? package_depth : l2cache_depth;
556 hL = hwloc_get_ancestor_obj_by_depth(tp, depth, hT);
559 __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC) > 1)
560 __kmp_tile_depth = depth;
564 static int __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
565 kmp_i18n_id_t *
const msg_id) {
566 hwloc_topology_t &tp = __kmp_hwloc_topology;
568 *msg_id = kmp_i18n_null;
571 kmp_affin_mask_t *oldMask;
572 KMP_CPU_ALLOC(oldMask);
573 __kmp_get_system_affinity(oldMask, TRUE);
574 __kmp_hwloc_check_numa();
576 if (!KMP_AFFINITY_CAPABLE()) {
579 KMP_ASSERT(__kmp_affinity_type == affinity_none);
581 nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(
582 hwloc_get_obj_by_type(tp, HWLOC_OBJ_PACKAGE, 0), HWLOC_OBJ_CORE);
583 __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(
584 hwloc_get_obj_by_type(tp, HWLOC_OBJ_CORE, 0), HWLOC_OBJ_PU);
585 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
586 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
587 if (__kmp_affinity_verbose) {
588 KMP_INFORM(AffNotCapableUseLocCpuidL11,
"KMP_AFFINITY");
589 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
590 if (__kmp_affinity_uniform_topology()) {
591 KMP_INFORM(Uniform,
"KMP_AFFINITY");
593 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
595 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
596 __kmp_nThreadsPerCore, __kmp_ncores);
598 KMP_CPU_FREE(oldMask);
603 int levels[5] = {0, 1, 2, 3, 4};
605 if (__kmp_numa_detected)
607 if (__kmp_tile_depth)
611 AddrUnsPair *retval =
612 (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * __kmp_avail_proc);
613 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
614 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
621 hwloc_obj_t socket, node, tile;
622 int nActiveThreads = 0;
625 __kmp_ncores = nPackages = nCoresPerPkg = __kmp_nThreadsPerCore = 0;
626 nNodePerPkg = nTilePerPkg = nTilePerNode = nCorePerNode = nCorePerTile = 0;
627 for (socket = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PACKAGE, 0); socket != NULL;
628 socket = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, socket),
630 labels[0] = socket_id;
631 if (__kmp_numa_detected) {
633 int n_active_nodes = 0;
635 NN = __kmp_hwloc_count_children_by_type(tp, socket, HWLOC_OBJ_NUMANODE,
637 for (
int node_id = 0; node_id < NN; ++node_id, node = node->next_cousin) {
639 if (__kmp_tile_depth) {
642 int n_active_tiles = 0;
644 NT = __kmp_hwloc_count_children_by_depth(tp, node, __kmp_tile_depth,
646 for (
int tl_id = 0; tl_id < NT; ++tl_id, tile = tile->next_cousin) {
648 int n_active_cores = 0;
649 __kmp_hwloc_process_obj_core_pu(retval, nActiveThreads,
650 n_active_cores, tile, 3, labels);
651 if (n_active_cores) {
653 if (n_active_cores > nCorePerTile)
654 nCorePerTile = n_active_cores;
657 if (n_active_tiles) {
659 if (n_active_tiles > nTilePerNode)
660 nTilePerNode = n_active_tiles;
664 int n_active_cores = 0;
665 __kmp_hwloc_process_obj_core_pu(retval, nActiveThreads,
666 n_active_cores, node, 2, labels);
667 if (n_active_cores) {
669 if (n_active_cores > nCorePerNode)
670 nCorePerNode = n_active_cores;
674 if (n_active_nodes) {
676 if (n_active_nodes > nNodePerPkg)
677 nNodePerPkg = n_active_nodes;
680 if (__kmp_tile_depth) {
683 int n_active_tiles = 0;
685 NT = __kmp_hwloc_count_children_by_depth(tp, socket, __kmp_tile_depth,
687 for (
int tl_id = 0; tl_id < NT; ++tl_id, tile = tile->next_cousin) {
689 int n_active_cores = 0;
690 __kmp_hwloc_process_obj_core_pu(retval, nActiveThreads,
691 n_active_cores, tile, 2, labels);
692 if (n_active_cores) {
694 if (n_active_cores > nCorePerTile)
695 nCorePerTile = n_active_cores;
698 if (n_active_tiles) {
700 if (n_active_tiles > nTilePerPkg)
701 nTilePerPkg = n_active_tiles;
705 int n_active_cores = 0;
706 __kmp_hwloc_process_obj_core_pu(retval, nActiveThreads, n_active_cores,
708 if (n_active_cores) {
710 if (n_active_cores > nCoresPerPkg)
711 nCoresPerPkg = n_active_cores;
718 KMP_DEBUG_ASSERT(nActiveThreads == __kmp_avail_proc);
719 KMP_ASSERT(nActiveThreads > 0);
720 if (nActiveThreads == 1) {
721 __kmp_ncores = nPackages = 1;
722 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
723 if (__kmp_affinity_verbose) {
724 char buf[KMP_AFFIN_MASK_PRINT_LEN];
725 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
727 KMP_INFORM(AffUsingHwloc,
"KMP_AFFINITY");
728 if (__kmp_affinity_respect_mask) {
729 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
731 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
733 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
734 KMP_INFORM(Uniform,
"KMP_AFFINITY");
735 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
736 __kmp_nThreadsPerCore, __kmp_ncores);
739 if (__kmp_affinity_type == affinity_none) {
741 KMP_CPU_FREE(oldMask);
747 addr.labels[0] = retval[0].first.labels[0];
748 retval[0].first = addr;
750 if (__kmp_affinity_gran_levels < 0) {
751 __kmp_affinity_gran_levels = 0;
754 if (__kmp_affinity_verbose) {
755 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
758 *address2os = retval;
759 KMP_CPU_FREE(oldMask);
764 qsort(retval, nActiveThreads,
sizeof(*retval),
765 __kmp_affinity_cmp_Address_labels);
768 int nPUs = nPackages * __kmp_nThreadsPerCore;
769 if (__kmp_numa_detected) {
770 if (__kmp_tile_depth) {
771 nPUs *= (nNodePerPkg * nTilePerNode * nCorePerTile);
773 nPUs *= (nNodePerPkg * nCorePerNode);
776 if (__kmp_tile_depth) {
777 nPUs *= (nTilePerPkg * nCorePerTile);
779 nPUs *= nCoresPerPkg;
782 unsigned uniform = (nPUs == nActiveThreads);
785 if (__kmp_affinity_verbose) {
786 char mask[KMP_AFFIN_MASK_PRINT_LEN];
787 __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
788 if (__kmp_affinity_respect_mask) {
789 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", mask);
791 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", mask);
793 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
795 KMP_INFORM(Uniform,
"KMP_AFFINITY");
797 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
799 if (__kmp_numa_detected) {
800 if (__kmp_tile_depth) {
801 KMP_INFORM(TopologyExtraNoTi,
"KMP_AFFINITY", nPackages, nNodePerPkg,
802 nTilePerNode, nCorePerTile, __kmp_nThreadsPerCore,
805 KMP_INFORM(TopologyExtraNode,
"KMP_AFFINITY", nPackages, nNodePerPkg,
806 nCorePerNode, __kmp_nThreadsPerCore, __kmp_ncores);
807 nPUs *= (nNodePerPkg * nCorePerNode);
810 if (__kmp_tile_depth) {
811 KMP_INFORM(TopologyExtraTile,
"KMP_AFFINITY", nPackages, nTilePerPkg,
812 nCorePerTile, __kmp_nThreadsPerCore, __kmp_ncores);
815 __kmp_str_buf_init(&buf);
816 __kmp_str_buf_print(&buf,
"%d", nPackages);
817 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, nCoresPerPkg,
818 __kmp_nThreadsPerCore, __kmp_ncores);
819 __kmp_str_buf_free(&buf);
824 if (__kmp_affinity_type == affinity_none) {
826 KMP_CPU_FREE(oldMask);
830 int depth_full = depth;
833 depth = __kmp_affinity_remove_radix_one_levels(retval, nActiveThreads, depth,
835 KMP_DEBUG_ASSERT(__kmp_affinity_gran != affinity_gran_default);
836 if (__kmp_affinity_gran_levels < 0) {
839 __kmp_affinity_gran_levels = 0;
840 if (__kmp_affinity_gran > affinity_gran_thread) {
841 for (
int i = 1; i <= depth_full; ++i) {
842 if (__kmp_affinity_gran <= i)
844 if (levels[depth_full - i] > 0)
845 __kmp_affinity_gran_levels++;
848 if (__kmp_affinity_gran > affinity_gran_package)
849 __kmp_affinity_gran_levels++;
852 if (__kmp_affinity_verbose)
853 __kmp_affinity_print_hwloc_tp(retval, nActiveThreads, depth, levels);
855 KMP_CPU_FREE(oldMask);
856 *address2os = retval;
859 #endif // KMP_USE_HWLOC
864 static int __kmp_affinity_create_flat_map(AddrUnsPair **address2os,
865 kmp_i18n_id_t *
const msg_id) {
867 *msg_id = kmp_i18n_null;
872 if (!KMP_AFFINITY_CAPABLE()) {
873 KMP_ASSERT(__kmp_affinity_type == affinity_none);
874 __kmp_ncores = nPackages = __kmp_xproc;
875 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
876 if (__kmp_affinity_verbose) {
877 KMP_INFORM(AffFlatTopology,
"KMP_AFFINITY");
878 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
879 KMP_INFORM(Uniform,
"KMP_AFFINITY");
880 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
881 __kmp_nThreadsPerCore, __kmp_ncores);
890 __kmp_ncores = nPackages = __kmp_avail_proc;
891 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
892 if (__kmp_affinity_verbose) {
893 char buf[KMP_AFFIN_MASK_PRINT_LEN];
894 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
895 __kmp_affin_fullMask);
897 KMP_INFORM(AffCapableUseFlat,
"KMP_AFFINITY");
898 if (__kmp_affinity_respect_mask) {
899 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
901 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
903 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
904 KMP_INFORM(Uniform,
"KMP_AFFINITY");
905 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
906 __kmp_nThreadsPerCore, __kmp_ncores);
908 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
909 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
910 if (__kmp_affinity_type == affinity_none) {
913 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
914 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask))
916 __kmp_pu_os_idx[avail_ct++] = i;
923 (AddrUnsPair *)__kmp_allocate(
sizeof(**address2os) * __kmp_avail_proc);
926 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
928 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
931 __kmp_pu_os_idx[avail_ct] = i;
934 (*address2os)[avail_ct++] = AddrUnsPair(addr, i);
936 if (__kmp_affinity_verbose) {
937 KMP_INFORM(OSProcToPackage,
"KMP_AFFINITY");
940 if (__kmp_affinity_gran_levels < 0) {
943 if (__kmp_affinity_gran > affinity_gran_package) {
944 __kmp_affinity_gran_levels = 1;
946 __kmp_affinity_gran_levels = 0;
952 #if KMP_GROUP_AFFINITY
958 static int __kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
959 kmp_i18n_id_t *
const msg_id) {
961 *msg_id = kmp_i18n_null;
965 if (!KMP_AFFINITY_CAPABLE()) {
972 (AddrUnsPair *)__kmp_allocate(
sizeof(**address2os) * __kmp_avail_proc);
973 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
974 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
977 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
979 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
982 __kmp_pu_os_idx[avail_ct] = i;
984 addr.labels[0] = i / (CHAR_BIT *
sizeof(DWORD_PTR));
985 addr.labels[1] = i % (CHAR_BIT *
sizeof(DWORD_PTR));
986 (*address2os)[avail_ct++] = AddrUnsPair(addr, i);
988 if (__kmp_affinity_verbose) {
989 KMP_INFORM(AffOSProcToGroup,
"KMP_AFFINITY", i, addr.labels[0],
994 if (__kmp_affinity_gran_levels < 0) {
995 if (__kmp_affinity_gran == affinity_gran_group) {
996 __kmp_affinity_gran_levels = 1;
997 }
else if ((__kmp_affinity_gran == affinity_gran_fine) ||
998 (__kmp_affinity_gran == affinity_gran_thread)) {
999 __kmp_affinity_gran_levels = 0;
1001 const char *gran_str = NULL;
1002 if (__kmp_affinity_gran == affinity_gran_core) {
1004 }
else if (__kmp_affinity_gran == affinity_gran_package) {
1005 gran_str =
"package";
1006 }
else if (__kmp_affinity_gran == affinity_gran_node) {
1014 __kmp_affinity_gran_levels = 0;
1022 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1024 static int __kmp_cpuid_mask_width(
int count) {
1027 while ((1 << r) < count)
1032 class apicThreadInfo {
1036 unsigned maxCoresPerPkg;
1037 unsigned maxThreadsPerPkg;
1043 static int __kmp_affinity_cmp_apicThreadInfo_phys_id(
const void *a,
1045 const apicThreadInfo *aa = (
const apicThreadInfo *)a;
1046 const apicThreadInfo *bb = (
const apicThreadInfo *)b;
1047 if (aa->pkgId < bb->pkgId)
1049 if (aa->pkgId > bb->pkgId)
1051 if (aa->coreId < bb->coreId)
1053 if (aa->coreId > bb->coreId)
1055 if (aa->threadId < bb->threadId)
1057 if (aa->threadId > bb->threadId)
1066 static int __kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
1067 kmp_i18n_id_t *
const msg_id) {
1070 *msg_id = kmp_i18n_null;
1073 __kmp_x86_cpuid(0, 0, &buf);
1075 *msg_id = kmp_i18n_str_NoLeaf4Support;
1084 if (!KMP_AFFINITY_CAPABLE()) {
1087 KMP_ASSERT(__kmp_affinity_type == affinity_none);
1093 __kmp_x86_cpuid(1, 0, &buf);
1094 int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
1095 if (maxThreadsPerPkg == 0) {
1096 maxThreadsPerPkg = 1;
1110 __kmp_x86_cpuid(0, 0, &buf);
1112 __kmp_x86_cpuid(4, 0, &buf);
1113 nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
1131 __kmp_ncores = __kmp_xproc;
1132 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
1133 __kmp_nThreadsPerCore = 1;
1134 if (__kmp_affinity_verbose) {
1135 KMP_INFORM(AffNotCapableUseLocCpuid,
"KMP_AFFINITY");
1136 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1137 if (__kmp_affinity_uniform_topology()) {
1138 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1140 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1142 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1143 __kmp_nThreadsPerCore, __kmp_ncores);
1153 kmp_affin_mask_t *oldMask;
1154 KMP_CPU_ALLOC(oldMask);
1155 KMP_ASSERT(oldMask != NULL);
1156 __kmp_get_system_affinity(oldMask, TRUE);
1184 apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
1185 __kmp_avail_proc *
sizeof(apicThreadInfo));
1186 unsigned nApics = 0;
1187 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
1189 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
1192 KMP_DEBUG_ASSERT((
int)nApics < __kmp_avail_proc);
1194 __kmp_affinity_dispatch->bind_thread(i);
1195 threadInfo[nApics].osId = i;
1198 __kmp_x86_cpuid(1, 0, &buf);
1199 if (((buf.edx >> 9) & 1) == 0) {
1200 __kmp_set_system_affinity(oldMask, TRUE);
1201 __kmp_free(threadInfo);
1202 KMP_CPU_FREE(oldMask);
1203 *msg_id = kmp_i18n_str_ApicNotPresent;
1206 threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
1207 threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
1208 if (threadInfo[nApics].maxThreadsPerPkg == 0) {
1209 threadInfo[nApics].maxThreadsPerPkg = 1;
1218 __kmp_x86_cpuid(0, 0, &buf);
1220 __kmp_x86_cpuid(4, 0, &buf);
1221 threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
1223 threadInfo[nApics].maxCoresPerPkg = 1;
1227 int widthCT = __kmp_cpuid_mask_width(threadInfo[nApics].maxThreadsPerPkg);
1228 threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
1230 int widthC = __kmp_cpuid_mask_width(threadInfo[nApics].maxCoresPerPkg);
1231 int widthT = widthCT - widthC;
1236 __kmp_set_system_affinity(oldMask, TRUE);
1237 __kmp_free(threadInfo);
1238 KMP_CPU_FREE(oldMask);
1239 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1243 int maskC = (1 << widthC) - 1;
1244 threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT) & maskC;
1246 int maskT = (1 << widthT) - 1;
1247 threadInfo[nApics].threadId = threadInfo[nApics].apicId & maskT;
1254 __kmp_set_system_affinity(oldMask, TRUE);
1263 KMP_ASSERT(nApics > 0);
1265 __kmp_ncores = nPackages = 1;
1266 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1267 if (__kmp_affinity_verbose) {
1268 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1269 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1271 KMP_INFORM(AffUseGlobCpuid,
"KMP_AFFINITY");
1272 if (__kmp_affinity_respect_mask) {
1273 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
1275 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
1277 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1278 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1279 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1280 __kmp_nThreadsPerCore, __kmp_ncores);
1283 if (__kmp_affinity_type == affinity_none) {
1284 __kmp_free(threadInfo);
1285 KMP_CPU_FREE(oldMask);
1289 *address2os = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair));
1291 addr.labels[0] = threadInfo[0].pkgId;
1292 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
1294 if (__kmp_affinity_gran_levels < 0) {
1295 __kmp_affinity_gran_levels = 0;
1298 if (__kmp_affinity_verbose) {
1299 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
1302 __kmp_free(threadInfo);
1303 KMP_CPU_FREE(oldMask);
1308 qsort(threadInfo, nApics,
sizeof(*threadInfo),
1309 __kmp_affinity_cmp_apicThreadInfo_phys_id);
1326 __kmp_nThreadsPerCore = 1;
1327 unsigned nCores = 1;
1330 unsigned lastPkgId = threadInfo[0].pkgId;
1331 unsigned coreCt = 1;
1332 unsigned lastCoreId = threadInfo[0].coreId;
1333 unsigned threadCt = 1;
1334 unsigned lastThreadId = threadInfo[0].threadId;
1337 unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
1338 unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
1340 for (i = 1; i < nApics; i++) {
1341 if (threadInfo[i].pkgId != lastPkgId) {
1344 lastPkgId = threadInfo[i].pkgId;
1345 if ((
int)coreCt > nCoresPerPkg)
1346 nCoresPerPkg = coreCt;
1348 lastCoreId = threadInfo[i].coreId;
1349 if ((
int)threadCt > __kmp_nThreadsPerCore)
1350 __kmp_nThreadsPerCore = threadCt;
1352 lastThreadId = threadInfo[i].threadId;
1356 prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
1357 prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
1361 if (threadInfo[i].coreId != lastCoreId) {
1364 lastCoreId = threadInfo[i].coreId;
1365 if ((
int)threadCt > __kmp_nThreadsPerCore)
1366 __kmp_nThreadsPerCore = threadCt;
1368 lastThreadId = threadInfo[i].threadId;
1369 }
else if (threadInfo[i].threadId != lastThreadId) {
1371 lastThreadId = threadInfo[i].threadId;
1373 __kmp_free(threadInfo);
1374 KMP_CPU_FREE(oldMask);
1375 *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
1381 if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg) ||
1382 (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
1383 __kmp_free(threadInfo);
1384 KMP_CPU_FREE(oldMask);
1385 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1390 if ((
int)coreCt > nCoresPerPkg)
1391 nCoresPerPkg = coreCt;
1392 if ((
int)threadCt > __kmp_nThreadsPerCore)
1393 __kmp_nThreadsPerCore = threadCt;
1399 __kmp_ncores = nCores;
1400 if (__kmp_affinity_verbose) {
1401 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1402 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1404 KMP_INFORM(AffUseGlobCpuid,
"KMP_AFFINITY");
1405 if (__kmp_affinity_respect_mask) {
1406 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
1408 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
1410 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1411 if (__kmp_affinity_uniform_topology()) {
1412 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1414 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1416 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1417 __kmp_nThreadsPerCore, __kmp_ncores);
1419 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
1420 KMP_DEBUG_ASSERT(nApics == (
unsigned)__kmp_avail_proc);
1421 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
1422 for (i = 0; i < nApics; ++i) {
1423 __kmp_pu_os_idx[i] = threadInfo[i].osId;
1425 if (__kmp_affinity_type == affinity_none) {
1426 __kmp_free(threadInfo);
1427 KMP_CPU_FREE(oldMask);
1435 int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
1437 (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
1438 unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
1440 KMP_ASSERT(depth > 0);
1441 *address2os = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * nApics);
1443 for (i = 0; i < nApics; ++i) {
1444 Address addr(depth);
1445 unsigned os = threadInfo[i].osId;
1448 if (pkgLevel >= 0) {
1449 addr.labels[d++] = threadInfo[i].pkgId;
1451 if (coreLevel >= 0) {
1452 addr.labels[d++] = threadInfo[i].coreId;
1454 if (threadLevel >= 0) {
1455 addr.labels[d++] = threadInfo[i].threadId;
1457 (*address2os)[i] = AddrUnsPair(addr, os);
1460 if (__kmp_affinity_gran_levels < 0) {
1463 __kmp_affinity_gran_levels = 0;
1464 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
1465 __kmp_affinity_gran_levels++;
1467 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1468 __kmp_affinity_gran_levels++;
1470 if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
1471 __kmp_affinity_gran_levels++;
1475 if (__kmp_affinity_verbose) {
1476 __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
1477 coreLevel, threadLevel);
1480 __kmp_free(threadInfo);
1481 KMP_CPU_FREE(oldMask);
1488 static int __kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
1489 kmp_i18n_id_t *
const msg_id) {
1492 *msg_id = kmp_i18n_null;
1495 __kmp_x86_cpuid(0, 0, &buf);
1497 *msg_id = kmp_i18n_str_NoLeaf11Support;
1500 __kmp_x86_cpuid(11, 0, &buf);
1502 *msg_id = kmp_i18n_str_NoLeaf11Support;
1511 int threadLevel = -1;
1514 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
1516 for (level = 0;; level++) {
1527 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1530 __kmp_x86_cpuid(11, level, &buf);
1539 int kind = (buf.ecx >> 8) & 0xff;
1542 threadLevel = level;
1545 __kmp_nThreadsPerCore = buf.ebx & 0xffff;
1546 if (__kmp_nThreadsPerCore == 0) {
1547 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1550 }
else if (kind == 2) {
1554 nCoresPerPkg = buf.ebx & 0xffff;
1555 if (nCoresPerPkg == 0) {
1556 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1561 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1564 if (pkgLevel >= 0) {
1568 nPackages = buf.ebx & 0xffff;
1569 if (nPackages == 0) {
1570 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1581 if (threadLevel >= 0) {
1582 threadLevel = depth - threadLevel - 1;
1584 if (coreLevel >= 0) {
1585 coreLevel = depth - coreLevel - 1;
1587 KMP_DEBUG_ASSERT(pkgLevel >= 0);
1588 pkgLevel = depth - pkgLevel - 1;
1595 if (!KMP_AFFINITY_CAPABLE()) {
1598 KMP_ASSERT(__kmp_affinity_type == affinity_none);
1600 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
1601 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
1602 if (__kmp_affinity_verbose) {
1603 KMP_INFORM(AffNotCapableUseLocCpuidL11,
"KMP_AFFINITY");
1604 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1605 if (__kmp_affinity_uniform_topology()) {
1606 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1608 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1610 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1611 __kmp_nThreadsPerCore, __kmp_ncores);
1621 kmp_affin_mask_t *oldMask;
1622 KMP_CPU_ALLOC(oldMask);
1623 __kmp_get_system_affinity(oldMask, TRUE);
1626 AddrUnsPair *retval =
1627 (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * __kmp_avail_proc);
1633 KMP_CPU_SET_ITERATE(proc, __kmp_affin_fullMask) {
1635 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
1638 KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
1640 __kmp_affinity_dispatch->bind_thread(proc);
1643 Address addr(depth);
1646 for (level = 0; level < depth; level++) {
1647 __kmp_x86_cpuid(11, level, &buf);
1648 unsigned apicId = buf.edx;
1650 if (level != depth - 1) {
1651 KMP_CPU_FREE(oldMask);
1652 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1655 addr.labels[depth - level - 1] = apicId >> prev_shift;
1659 int shift = buf.eax & 0x1f;
1660 int mask = (1 << shift) - 1;
1661 addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
1664 if (level != depth) {
1665 KMP_CPU_FREE(oldMask);
1666 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1670 retval[nApics] = AddrUnsPair(addr, proc);
1676 __kmp_set_system_affinity(oldMask, TRUE);
1679 KMP_ASSERT(nApics > 0);
1681 __kmp_ncores = nPackages = 1;
1682 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1683 if (__kmp_affinity_verbose) {
1684 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1685 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1687 KMP_INFORM(AffUseGlobCpuidL11,
"KMP_AFFINITY");
1688 if (__kmp_affinity_respect_mask) {
1689 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
1691 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
1693 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1694 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1695 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1696 __kmp_nThreadsPerCore, __kmp_ncores);
1699 if (__kmp_affinity_type == affinity_none) {
1701 KMP_CPU_FREE(oldMask);
1707 addr.labels[0] = retval[0].first.labels[pkgLevel];
1708 retval[0].first = addr;
1710 if (__kmp_affinity_gran_levels < 0) {
1711 __kmp_affinity_gran_levels = 0;
1714 if (__kmp_affinity_verbose) {
1715 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
1718 *address2os = retval;
1719 KMP_CPU_FREE(oldMask);
1724 qsort(retval, nApics,
sizeof(*retval), __kmp_affinity_cmp_Address_labels);
1727 unsigned *totals = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1728 unsigned *counts = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1729 unsigned *maxCt = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1730 unsigned *last = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1731 for (level = 0; level < depth; level++) {
1735 last[level] = retval[0].first.labels[level];
1742 for (proc = 1; (int)proc < nApics; proc++) {
1744 for (level = 0; level < depth; level++) {
1745 if (retval[proc].first.labels[level] != last[level]) {
1747 for (j = level + 1; j < depth; j++) {
1758 last[j] = retval[proc].first.labels[j];
1762 if (counts[level] > maxCt[level]) {
1763 maxCt[level] = counts[level];
1765 last[level] = retval[proc].first.labels[level];
1767 }
else if (level == depth - 1) {
1773 KMP_CPU_FREE(oldMask);
1774 *msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
1784 if (threadLevel >= 0) {
1785 __kmp_nThreadsPerCore = maxCt[threadLevel];
1787 __kmp_nThreadsPerCore = 1;
1789 nPackages = totals[pkgLevel];
1791 if (coreLevel >= 0) {
1792 __kmp_ncores = totals[coreLevel];
1793 nCoresPerPkg = maxCt[coreLevel];
1795 __kmp_ncores = nPackages;
1800 unsigned prod = maxCt[0];
1801 for (level = 1; level < depth; level++) {
1802 prod *= maxCt[level];
1804 bool uniform = (prod == totals[level - 1]);
1807 if (__kmp_affinity_verbose) {
1808 char mask[KMP_AFFIN_MASK_PRINT_LEN];
1809 __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1811 KMP_INFORM(AffUseGlobCpuidL11,
"KMP_AFFINITY");
1812 if (__kmp_affinity_respect_mask) {
1813 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", mask);
1815 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", mask);
1817 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1819 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1821 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1825 __kmp_str_buf_init(&buf);
1827 __kmp_str_buf_print(&buf,
"%d", totals[0]);
1828 for (level = 1; level <= pkgLevel; level++) {
1829 __kmp_str_buf_print(&buf,
" x %d", maxCt[level]);
1831 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, nCoresPerPkg,
1832 __kmp_nThreadsPerCore, __kmp_ncores);
1834 __kmp_str_buf_free(&buf);
1836 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
1837 KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
1838 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
1839 for (proc = 0; (int)proc < nApics; ++proc) {
1840 __kmp_pu_os_idx[proc] = retval[proc].second;
1842 if (__kmp_affinity_type == affinity_none) {
1848 KMP_CPU_FREE(oldMask);
1855 for (level = 0; level < depth; level++) {
1856 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1864 if (new_depth != depth) {
1865 AddrUnsPair *new_retval =
1866 (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * nApics);
1867 for (proc = 0; (int)proc < nApics; proc++) {
1868 Address addr(new_depth);
1869 new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
1872 int newPkgLevel = -1;
1873 int newCoreLevel = -1;
1874 int newThreadLevel = -1;
1875 for (level = 0; level < depth; level++) {
1876 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1880 if (level == pkgLevel) {
1881 newPkgLevel = new_level;
1883 if (level == coreLevel) {
1884 newCoreLevel = new_level;
1886 if (level == threadLevel) {
1887 newThreadLevel = new_level;
1889 for (proc = 0; (int)proc < nApics; proc++) {
1890 new_retval[proc].first.labels[new_level] =
1891 retval[proc].first.labels[level];
1897 retval = new_retval;
1899 pkgLevel = newPkgLevel;
1900 coreLevel = newCoreLevel;
1901 threadLevel = newThreadLevel;
1904 if (__kmp_affinity_gran_levels < 0) {
1907 __kmp_affinity_gran_levels = 0;
1908 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
1909 __kmp_affinity_gran_levels++;
1911 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1912 __kmp_affinity_gran_levels++;
1914 if (__kmp_affinity_gran > affinity_gran_package) {
1915 __kmp_affinity_gran_levels++;
1919 if (__kmp_affinity_verbose) {
1920 __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel, coreLevel,
1928 KMP_CPU_FREE(oldMask);
1929 *address2os = retval;
1936 #define threadIdIndex 1
1937 #define coreIdIndex 2
1938 #define pkgIdIndex 3
1939 #define nodeIdIndex 4
1941 typedef unsigned *ProcCpuInfo;
1942 static unsigned maxIndex = pkgIdIndex;
1944 static int __kmp_affinity_cmp_ProcCpuInfo_phys_id(
const void *a,
1947 const unsigned *aa = *(
unsigned *
const *)a;
1948 const unsigned *bb = *(
unsigned *
const *)b;
1949 for (i = maxIndex;; i--) {
1960 #if KMP_USE_HIER_SCHED
1962 static void __kmp_dispatch_set_hierarchy_values() {
1968 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1] =
1969 nPackages * nCoresPerPkg * __kmp_nThreadsPerCore;
1970 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L1 + 1] = __kmp_ncores;
1971 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS) && \
1973 if (__kmp_mic_type >= mic3)
1974 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores / 2;
1976 #endif // KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
1977 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores;
1978 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L3 + 1] = nPackages;
1979 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_NUMA + 1] = nPackages;
1980 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_LOOP + 1] = 1;
1983 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_THREAD + 1] = 1;
1984 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L1 + 1] =
1985 __kmp_nThreadsPerCore;
1986 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS) && \
1988 if (__kmp_mic_type >= mic3)
1989 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] =
1990 2 * __kmp_nThreadsPerCore;
1992 #endif // KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
1993 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] =
1994 __kmp_nThreadsPerCore;
1995 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L3 + 1] =
1996 nCoresPerPkg * __kmp_nThreadsPerCore;
1997 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_NUMA + 1] =
1998 nCoresPerPkg * __kmp_nThreadsPerCore;
1999 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_LOOP + 1] =
2000 nPackages * nCoresPerPkg * __kmp_nThreadsPerCore;
2005 int __kmp_dispatch_get_index(
int tid, kmp_hier_layer_e type) {
2006 int index = type + 1;
2007 int num_hw_threads = __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1];
2008 KMP_DEBUG_ASSERT(type != kmp_hier_layer_e::LAYER_LAST);
2009 if (type == kmp_hier_layer_e::LAYER_THREAD)
2011 else if (type == kmp_hier_layer_e::LAYER_LOOP)
2013 KMP_DEBUG_ASSERT(__kmp_hier_max_units[index] != 0);
2014 if (tid >= num_hw_threads)
2015 tid = tid % num_hw_threads;
2016 return (tid / __kmp_hier_threads_per[index]) % __kmp_hier_max_units[index];
2020 int __kmp_dispatch_get_t1_per_t2(kmp_hier_layer_e t1, kmp_hier_layer_e t2) {
2023 KMP_DEBUG_ASSERT(i1 <= i2);
2024 KMP_DEBUG_ASSERT(t1 != kmp_hier_layer_e::LAYER_LAST);
2025 KMP_DEBUG_ASSERT(t2 != kmp_hier_layer_e::LAYER_LAST);
2026 KMP_DEBUG_ASSERT(__kmp_hier_threads_per[i1] != 0);
2028 return __kmp_hier_threads_per[i2] / __kmp_hier_threads_per[i1];
2030 #endif // KMP_USE_HIER_SCHED
2034 static int __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os,
2036 kmp_i18n_id_t *
const msg_id,
2039 *msg_id = kmp_i18n_null;
2044 unsigned num_records = 0;
2046 buf[
sizeof(buf) - 1] = 1;
2047 if (!fgets(buf,
sizeof(buf), f)) {
2052 char s1[] =
"processor";
2053 if (strncmp(buf, s1,
sizeof(s1) - 1) == 0) {
2060 if (KMP_SSCANF(buf,
"node_%u id", &level) == 1) {
2061 if (nodeIdIndex + level >= maxIndex) {
2062 maxIndex = nodeIdIndex + level;
2070 if (num_records == 0) {
2072 *msg_id = kmp_i18n_str_NoProcRecords;
2075 if (num_records > (
unsigned)__kmp_xproc) {
2077 *msg_id = kmp_i18n_str_TooManyProcRecords;
2086 if (fseek(f, 0, SEEK_SET) != 0) {
2088 *msg_id = kmp_i18n_str_CantRewindCpuinfo;
2094 unsigned **threadInfo =
2095 (
unsigned **)__kmp_allocate((num_records + 1) *
sizeof(
unsigned *));
2097 for (i = 0; i <= num_records; i++) {
2099 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2102 #define CLEANUP_THREAD_INFO \
2103 for (i = 0; i <= num_records; i++) { \
2104 __kmp_free(threadInfo[i]); \
2106 __kmp_free(threadInfo);
2111 #define INIT_PROC_INFO(p) \
2112 for (__index = 0; __index <= maxIndex; __index++) { \
2113 (p)[__index] = UINT_MAX; \
2116 for (i = 0; i <= num_records; i++) {
2117 INIT_PROC_INFO(threadInfo[i]);
2120 unsigned num_avail = 0;
2127 buf[
sizeof(buf) - 1] = 1;
2128 bool long_line =
false;
2129 if (!fgets(buf,
sizeof(buf), f)) {
2134 for (i = 0; i <= maxIndex; i++) {
2135 if (threadInfo[num_avail][i] != UINT_MAX) {
2143 }
else if (!buf[
sizeof(buf) - 1]) {
2148 #define CHECK_LINE \
2150 CLEANUP_THREAD_INFO; \
2151 *msg_id = kmp_i18n_str_LongLineCpuinfo; \
2157 char s1[] =
"processor";
2158 if (strncmp(buf, s1,
sizeof(s1) - 1) == 0) {
2160 char *p = strchr(buf +
sizeof(s1) - 1,
':');
2162 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2164 if (threadInfo[num_avail][osIdIndex] != UINT_MAX)
2165 #if KMP_ARCH_AARCH64
2174 threadInfo[num_avail][osIdIndex] = val;
2175 #if KMP_OS_LINUX && !(KMP_ARCH_X86 || KMP_ARCH_X86_64)
2179 "/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
2180 threadInfo[num_avail][osIdIndex]);
2181 __kmp_read_from_file(path,
"%u", &threadInfo[num_avail][pkgIdIndex]);
2183 KMP_SNPRINTF(path,
sizeof(path),
2184 "/sys/devices/system/cpu/cpu%u/topology/core_id",
2185 threadInfo[num_avail][osIdIndex]);
2186 __kmp_read_from_file(path,
"%u", &threadInfo[num_avail][coreIdIndex]);
2190 char s2[] =
"physical id";
2191 if (strncmp(buf, s2,
sizeof(s2) - 1) == 0) {
2193 char *p = strchr(buf +
sizeof(s2) - 1,
':');
2195 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2197 if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX)
2199 threadInfo[num_avail][pkgIdIndex] = val;
2202 char s3[] =
"core id";
2203 if (strncmp(buf, s3,
sizeof(s3) - 1) == 0) {
2205 char *p = strchr(buf +
sizeof(s3) - 1,
':');
2207 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2209 if (threadInfo[num_avail][coreIdIndex] != UINT_MAX)
2211 threadInfo[num_avail][coreIdIndex] = val;
2213 #endif // KMP_OS_LINUX && USE_SYSFS_INFO
2215 char s4[] =
"thread id";
2216 if (strncmp(buf, s4,
sizeof(s4) - 1) == 0) {
2218 char *p = strchr(buf +
sizeof(s4) - 1,
':');
2220 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2222 if (threadInfo[num_avail][threadIdIndex] != UINT_MAX)
2224 threadInfo[num_avail][threadIdIndex] = val;
2228 if (KMP_SSCANF(buf,
"node_%u id", &level) == 1) {
2230 char *p = strchr(buf +
sizeof(s4) - 1,
':');
2232 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2234 KMP_ASSERT(nodeIdIndex + level <= maxIndex);
2235 if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX)
2237 threadInfo[num_avail][nodeIdIndex + level] = val;
2244 if ((*buf != 0) && (*buf !=
'\n')) {
2249 while (((ch = fgetc(f)) != EOF) && (ch !=
'\n'))
2257 if ((
int)num_avail == __kmp_xproc) {
2258 CLEANUP_THREAD_INFO;
2259 *msg_id = kmp_i18n_str_TooManyEntries;
2265 if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
2266 CLEANUP_THREAD_INFO;
2267 *msg_id = kmp_i18n_str_MissingProcField;
2270 if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
2271 CLEANUP_THREAD_INFO;
2272 *msg_id = kmp_i18n_str_MissingPhysicalIDField;
2277 if (!KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex],
2278 __kmp_affin_fullMask)) {
2279 INIT_PROC_INFO(threadInfo[num_avail]);
2286 KMP_ASSERT(num_avail <= num_records);
2287 INIT_PROC_INFO(threadInfo[num_avail]);
2292 CLEANUP_THREAD_INFO;
2293 *msg_id = kmp_i18n_str_MissingValCpuinfo;
2297 CLEANUP_THREAD_INFO;
2298 *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
2303 #if KMP_MIC && REDUCE_TEAM_SIZE
2304 unsigned teamSize = 0;
2305 #endif // KMP_MIC && REDUCE_TEAM_SIZE
2316 KMP_ASSERT(num_avail > 0);
2317 KMP_ASSERT(num_avail <= num_records);
2318 if (num_avail == 1) {
2320 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
2321 if (__kmp_affinity_verbose) {
2322 if (!KMP_AFFINITY_CAPABLE()) {
2323 KMP_INFORM(AffNotCapableUseCpuinfo,
"KMP_AFFINITY");
2324 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2325 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2327 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2328 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
2329 __kmp_affin_fullMask);
2330 KMP_INFORM(AffCapableUseCpuinfo,
"KMP_AFFINITY");
2331 if (__kmp_affinity_respect_mask) {
2332 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
2334 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
2336 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2337 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2341 __kmp_str_buf_init(&buf);
2342 __kmp_str_buf_print(&buf,
"1");
2343 for (index = maxIndex - 1; index > pkgIdIndex; index--) {
2344 __kmp_str_buf_print(&buf,
" x 1");
2346 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, 1, 1, 1);
2347 __kmp_str_buf_free(&buf);
2350 if (__kmp_affinity_type == affinity_none) {
2351 CLEANUP_THREAD_INFO;
2355 *address2os = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair));
2357 addr.labels[0] = threadInfo[0][pkgIdIndex];
2358 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
2360 if (__kmp_affinity_gran_levels < 0) {
2361 __kmp_affinity_gran_levels = 0;
2364 if (__kmp_affinity_verbose) {
2365 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
2368 CLEANUP_THREAD_INFO;
2373 qsort(threadInfo, num_avail,
sizeof(*threadInfo),
2374 __kmp_affinity_cmp_ProcCpuInfo_phys_id);
2386 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2388 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2390 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2392 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2394 bool assign_thread_ids =
false;
2395 unsigned threadIdCt;
2398 restart_radix_check:
2402 if (assign_thread_ids) {
2403 if (threadInfo[0][threadIdIndex] == UINT_MAX) {
2404 threadInfo[0][threadIdIndex] = threadIdCt++;
2405 }
else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
2406 threadIdCt = threadInfo[0][threadIdIndex] + 1;
2409 for (index = 0; index <= maxIndex; index++) {
2413 lastId[index] = threadInfo[0][index];
2418 for (i = 1; i < num_avail; i++) {
2421 for (index = maxIndex; index >= threadIdIndex; index--) {
2422 if (assign_thread_ids && (index == threadIdIndex)) {
2424 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2425 threadInfo[i][threadIdIndex] = threadIdCt++;
2429 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2430 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2433 if (threadInfo[i][index] != lastId[index]) {
2438 for (index2 = threadIdIndex; index2 < index; index2++) {
2440 if (counts[index2] > maxCt[index2]) {
2441 maxCt[index2] = counts[index2];
2444 lastId[index2] = threadInfo[i][index2];
2448 lastId[index] = threadInfo[i][index];
2450 if (assign_thread_ids && (index > threadIdIndex)) {
2452 #if KMP_MIC && REDUCE_TEAM_SIZE
2455 teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
2456 #endif // KMP_MIC && REDUCE_TEAM_SIZE
2462 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2463 threadInfo[i][threadIdIndex] = threadIdCt++;
2469 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2470 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2476 if (index < threadIdIndex) {
2480 if ((threadInfo[i][threadIdIndex] != UINT_MAX) || assign_thread_ids) {
2485 CLEANUP_THREAD_INFO;
2486 *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
2492 assign_thread_ids =
true;
2493 goto restart_radix_check;
2497 #if KMP_MIC && REDUCE_TEAM_SIZE
2500 teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
2501 #endif // KMP_MIC && REDUCE_TEAM_SIZE
2503 for (index = threadIdIndex; index <= maxIndex; index++) {
2504 if (counts[index] > maxCt[index]) {
2505 maxCt[index] = counts[index];
2509 __kmp_nThreadsPerCore = maxCt[threadIdIndex];
2510 nCoresPerPkg = maxCt[coreIdIndex];
2511 nPackages = totals[pkgIdIndex];
2514 unsigned prod = totals[maxIndex];
2515 for (index = threadIdIndex; index < maxIndex; index++) {
2516 prod *= maxCt[index];
2518 bool uniform = (prod == totals[threadIdIndex]);
2524 __kmp_ncores = totals[coreIdIndex];
2526 if (__kmp_affinity_verbose) {
2527 if (!KMP_AFFINITY_CAPABLE()) {
2528 KMP_INFORM(AffNotCapableUseCpuinfo,
"KMP_AFFINITY");
2529 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2531 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2533 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
2536 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2537 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
2538 __kmp_affin_fullMask);
2539 KMP_INFORM(AffCapableUseCpuinfo,
"KMP_AFFINITY");
2540 if (__kmp_affinity_respect_mask) {
2541 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
2543 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
2545 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2547 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2549 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
2553 __kmp_str_buf_init(&buf);
2555 __kmp_str_buf_print(&buf,
"%d", totals[maxIndex]);
2556 for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
2557 __kmp_str_buf_print(&buf,
" x %d", maxCt[index]);
2559 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
2560 maxCt[threadIdIndex], __kmp_ncores);
2562 __kmp_str_buf_free(&buf);
2565 #if KMP_MIC && REDUCE_TEAM_SIZE
2567 if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
2568 __kmp_dflt_team_nth = teamSize;
2569 KA_TRACE(20, (
"__kmp_affinity_create_cpuinfo_map: setting "
2570 "__kmp_dflt_team_nth = %d\n",
2571 __kmp_dflt_team_nth));
2573 #endif // KMP_MIC && REDUCE_TEAM_SIZE
2575 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
2576 KMP_DEBUG_ASSERT(num_avail == (
unsigned)__kmp_avail_proc);
2577 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
2578 for (i = 0; i < num_avail; ++i) {
2579 __kmp_pu_os_idx[i] = threadInfo[i][osIdIndex];
2582 if (__kmp_affinity_type == affinity_none) {
2587 CLEANUP_THREAD_INFO;
2596 bool *inMap = (
bool *)__kmp_allocate((maxIndex + 1) *
sizeof(bool));
2597 for (index = threadIdIndex; index < maxIndex; index++) {
2598 KMP_ASSERT(totals[index] >= totals[index + 1]);
2599 inMap[index] = (totals[index] > totals[index + 1]);
2601 inMap[maxIndex] = (totals[maxIndex] > 1);
2602 inMap[pkgIdIndex] =
true;
2605 for (index = threadIdIndex; index <= maxIndex; index++) {
2610 KMP_ASSERT(depth > 0);
2613 *address2os = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * num_avail);
2616 int threadLevel = -1;
2618 for (i = 0; i < num_avail; ++i) {
2619 Address addr(depth);
2620 unsigned os = threadInfo[i][osIdIndex];
2624 for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
2625 if (!inMap[src_index]) {
2628 addr.labels[dst_index] = threadInfo[i][src_index];
2629 if (src_index == pkgIdIndex) {
2630 pkgLevel = dst_index;
2631 }
else if (src_index == coreIdIndex) {
2632 coreLevel = dst_index;
2633 }
else if (src_index == threadIdIndex) {
2634 threadLevel = dst_index;
2638 (*address2os)[i] = AddrUnsPair(addr, os);
2641 if (__kmp_affinity_gran_levels < 0) {
2645 __kmp_affinity_gran_levels = 0;
2646 for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
2647 if (!inMap[src_index]) {
2650 switch (src_index) {
2652 if (__kmp_affinity_gran > affinity_gran_thread) {
2653 __kmp_affinity_gran_levels++;
2658 if (__kmp_affinity_gran > affinity_gran_core) {
2659 __kmp_affinity_gran_levels++;
2664 if (__kmp_affinity_gran > affinity_gran_package) {
2665 __kmp_affinity_gran_levels++;
2672 if (__kmp_affinity_verbose) {
2673 __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
2674 coreLevel, threadLevel);
2682 CLEANUP_THREAD_INFO;
2689 static kmp_affin_mask_t *__kmp_create_masks(
unsigned *maxIndex,
2690 unsigned *numUnique,
2691 AddrUnsPair *address2os,
2692 unsigned numAddrs) {
2698 KMP_ASSERT(numAddrs > 0);
2699 depth = address2os[0].first.depth;
2702 for (i = numAddrs - 1;; --i) {
2703 unsigned osId = address2os[i].second;
2704 if (osId > maxOsId) {
2710 kmp_affin_mask_t *osId2Mask;
2711 KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId + 1));
2715 qsort(address2os, numAddrs,
sizeof(*address2os),
2716 __kmp_affinity_cmp_Address_labels);
2718 KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
2719 if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
2720 KMP_INFORM(ThreadsMigrate,
"KMP_AFFINITY", __kmp_affinity_gran_levels);
2722 if (__kmp_affinity_gran_levels >= (
int)depth) {
2723 if (__kmp_affinity_verbose ||
2724 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
2725 KMP_WARNING(AffThreadsMayMigrate);
2733 unsigned unique = 0;
2735 unsigned leader = 0;
2736 Address *leaderAddr = &(address2os[0].first);
2737 kmp_affin_mask_t *sum;
2738 KMP_CPU_ALLOC_ON_STACK(sum);
2740 KMP_CPU_SET(address2os[0].second, sum);
2741 for (i = 1; i < numAddrs; i++) {
2745 if (leaderAddr->isClose(address2os[i].first, __kmp_affinity_gran_levels)) {
2746 KMP_CPU_SET(address2os[i].second, sum);
2752 for (; j < i; j++) {
2753 unsigned osId = address2os[j].second;
2754 KMP_DEBUG_ASSERT(osId <= maxOsId);
2755 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2756 KMP_CPU_COPY(mask, sum);
2757 address2os[j].first.leader = (j == leader);
2763 leaderAddr = &(address2os[i].first);
2765 KMP_CPU_SET(address2os[i].second, sum);
2770 for (; j < i; j++) {
2771 unsigned osId = address2os[j].second;
2772 KMP_DEBUG_ASSERT(osId <= maxOsId);
2773 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2774 KMP_CPU_COPY(mask, sum);
2775 address2os[j].first.leader = (j == leader);
2778 KMP_CPU_FREE_FROM_STACK(sum);
2780 *maxIndex = maxOsId;
2781 *numUnique = unique;
2788 static kmp_affin_mask_t *newMasks;
2789 static int numNewMasks;
2790 static int nextNewMask;
2792 #define ADD_MASK(_mask) \
2794 if (nextNewMask >= numNewMasks) { \
2797 kmp_affin_mask_t *temp; \
2798 KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks); \
2799 for (i = 0; i < numNewMasks / 2; i++) { \
2800 kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i); \
2801 kmp_affin_mask_t *dest = KMP_CPU_INDEX(temp, i); \
2802 KMP_CPU_COPY(dest, src); \
2804 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks / 2); \
2807 KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
2811 #define ADD_MASK_OSID(_osId, _osId2Mask, _maxOsId) \
2813 if (((_osId) > _maxOsId) || \
2814 (!KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
2815 if (__kmp_affinity_verbose || \
2816 (__kmp_affinity_warnings && \
2817 (__kmp_affinity_type != affinity_none))) { \
2818 KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
2821 ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
2827 static void __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
2828 unsigned int *out_numMasks,
2829 const char *proclist,
2830 kmp_affin_mask_t *osId2Mask,
2833 const char *scan = proclist;
2834 const char *next = proclist;
2839 KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
2841 kmp_affin_mask_t *sumMask;
2842 KMP_CPU_ALLOC(sumMask);
2846 int start, end, stride;
2850 if (*next ==
'\0') {
2862 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad proclist");
2864 num = __kmp_str_to_int(scan, *next);
2865 KMP_ASSERT2(num >= 0,
"bad explicit proc list");
2868 if ((num > maxOsId) ||
2869 (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2870 if (__kmp_affinity_verbose ||
2871 (__kmp_affinity_warnings &&
2872 (__kmp_affinity_type != affinity_none))) {
2873 KMP_WARNING(AffIgnoreInvalidProcID, num);
2875 KMP_CPU_ZERO(sumMask);
2877 KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2897 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2900 num = __kmp_str_to_int(scan, *next);
2901 KMP_ASSERT2(num >= 0,
"bad explicit proc list");
2904 if ((num > maxOsId) ||
2905 (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2906 if (__kmp_affinity_verbose ||
2907 (__kmp_affinity_warnings &&
2908 (__kmp_affinity_type != affinity_none))) {
2909 KMP_WARNING(AffIgnoreInvalidProcID, num);
2912 KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2929 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2931 start = __kmp_str_to_int(scan, *next);
2932 KMP_ASSERT2(start >= 0,
"bad explicit proc list");
2937 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2951 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2953 end = __kmp_str_to_int(scan, *next);
2954 KMP_ASSERT2(end >= 0,
"bad explicit proc list");
2971 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2973 stride = __kmp_str_to_int(scan, *next);
2974 KMP_ASSERT2(stride >= 0,
"bad explicit proc list");
2979 KMP_ASSERT2(stride != 0,
"bad explicit proc list");
2981 KMP_ASSERT2(start <= end,
"bad explicit proc list");
2983 KMP_ASSERT2(start >= end,
"bad explicit proc list");
2985 KMP_ASSERT2((end - start) / stride <= 65536,
"bad explicit proc list");
2990 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2992 }
while (start <= end);
2995 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2997 }
while (start >= end);
3008 *out_numMasks = nextNewMask;
3009 if (nextNewMask == 0) {
3011 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3014 KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
3015 for (i = 0; i < nextNewMask; i++) {
3016 kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
3017 kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i);
3018 KMP_CPU_COPY(dest, src);
3020 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3021 KMP_CPU_FREE(sumMask);
3044 static void __kmp_process_subplace_list(
const char **scan,
3045 kmp_affin_mask_t *osId2Mask,
3046 int maxOsId, kmp_affin_mask_t *tempMask,
3051 int start, count, stride, i;
3055 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
"bad explicit places list");
3058 start = __kmp_str_to_int(*scan, *next);
3059 KMP_ASSERT(start >= 0);
3064 if (**scan ==
'}' || **scan ==
',') {
3065 if ((start > maxOsId) ||
3066 (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3067 if (__kmp_affinity_verbose ||
3068 (__kmp_affinity_warnings &&
3069 (__kmp_affinity_type != affinity_none))) {
3070 KMP_WARNING(AffIgnoreInvalidProcID, start);
3073 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3076 if (**scan ==
'}') {
3082 KMP_ASSERT2(**scan ==
':',
"bad explicit places list");
3087 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
"bad explicit places list");
3090 count = __kmp_str_to_int(*scan, *next);
3091 KMP_ASSERT(count >= 0);
3096 if (**scan ==
'}' || **scan ==
',') {
3097 for (i = 0; i < count; i++) {
3098 if ((start > maxOsId) ||
3099 (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3100 if (__kmp_affinity_verbose ||
3101 (__kmp_affinity_warnings &&
3102 (__kmp_affinity_type != affinity_none))) {
3103 KMP_WARNING(AffIgnoreInvalidProcID, start);
3107 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3112 if (**scan ==
'}') {
3118 KMP_ASSERT2(**scan ==
':',
"bad explicit places list");
3125 if (**scan ==
'+') {
3129 if (**scan ==
'-') {
3137 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
"bad explicit places list");
3140 stride = __kmp_str_to_int(*scan, *next);
3141 KMP_ASSERT(stride >= 0);
3147 if (**scan ==
'}' || **scan ==
',') {
3148 for (i = 0; i < count; i++) {
3149 if ((start > maxOsId) ||
3150 (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3151 if (__kmp_affinity_verbose ||
3152 (__kmp_affinity_warnings &&
3153 (__kmp_affinity_type != affinity_none))) {
3154 KMP_WARNING(AffIgnoreInvalidProcID, start);
3158 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3163 if (**scan ==
'}') {
3170 KMP_ASSERT2(0,
"bad explicit places list");
3174 static void __kmp_process_place(
const char **scan, kmp_affin_mask_t *osId2Mask,
3175 int maxOsId, kmp_affin_mask_t *tempMask,
3181 if (**scan ==
'{') {
3183 __kmp_process_subplace_list(scan, osId2Mask, maxOsId, tempMask, setSize);
3184 KMP_ASSERT2(**scan ==
'}',
"bad explicit places list");
3186 }
else if (**scan ==
'!') {
3188 __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
3189 KMP_CPU_COMPLEMENT(maxOsId, tempMask);
3190 }
else if ((**scan >=
'0') && (**scan <=
'9')) {
3193 int num = __kmp_str_to_int(*scan, *next);
3194 KMP_ASSERT(num >= 0);
3195 if ((num > maxOsId) ||
3196 (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
3197 if (__kmp_affinity_verbose ||
3198 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
3199 KMP_WARNING(AffIgnoreInvalidProcID, num);
3202 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
3207 KMP_ASSERT2(0,
"bad explicit places list");
3212 void __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
3213 unsigned int *out_numMasks,
3214 const char *placelist,
3215 kmp_affin_mask_t *osId2Mask,
3217 int i, j, count, stride, sign;
3218 const char *scan = placelist;
3219 const char *next = placelist;
3222 KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
3228 kmp_affin_mask_t *tempMask;
3229 kmp_affin_mask_t *previousMask;
3230 KMP_CPU_ALLOC(tempMask);
3231 KMP_CPU_ZERO(tempMask);
3232 KMP_CPU_ALLOC(previousMask);
3233 KMP_CPU_ZERO(previousMask);
3237 __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
3241 if (*scan ==
'\0' || *scan ==
',') {
3245 KMP_CPU_ZERO(tempMask);
3247 if (*scan ==
'\0') {
3254 KMP_ASSERT2(*scan ==
':',
"bad explicit places list");
3259 KMP_ASSERT2((*scan >=
'0') && (*scan <=
'9'),
"bad explicit places list");
3262 count = __kmp_str_to_int(scan, *next);
3263 KMP_ASSERT(count >= 0);
3268 if (*scan ==
'\0' || *scan ==
',') {
3271 KMP_ASSERT2(*scan ==
':',
"bad explicit places list");
3290 KMP_ASSERT2((*scan >=
'0') && (*scan <=
'9'),
"bad explicit places list");
3293 stride = __kmp_str_to_int(scan, *next);
3294 KMP_DEBUG_ASSERT(stride >= 0);
3300 for (i = 0; i < count; i++) {
3305 KMP_CPU_COPY(previousMask, tempMask);
3306 ADD_MASK(previousMask);
3307 KMP_CPU_ZERO(tempMask);
3309 KMP_CPU_SET_ITERATE(j, previousMask) {
3310 if (!KMP_CPU_ISSET(j, previousMask)) {
3313 if ((j + stride > maxOsId) || (j + stride < 0) ||
3314 (!KMP_CPU_ISSET(j, __kmp_affin_fullMask)) ||
3315 (!KMP_CPU_ISSET(j + stride,
3316 KMP_CPU_INDEX(osId2Mask, j + stride)))) {
3317 if ((__kmp_affinity_verbose ||
3318 (__kmp_affinity_warnings &&
3319 (__kmp_affinity_type != affinity_none))) &&
3321 KMP_WARNING(AffIgnoreInvalidProcID, j + stride);
3325 KMP_CPU_SET(j + stride, tempMask);
3329 KMP_CPU_ZERO(tempMask);
3334 if (*scan ==
'\0') {
3342 KMP_ASSERT2(0,
"bad explicit places list");
3345 *out_numMasks = nextNewMask;
3346 if (nextNewMask == 0) {
3348 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3351 KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
3352 KMP_CPU_FREE(tempMask);
3353 KMP_CPU_FREE(previousMask);
3354 for (i = 0; i < nextNewMask; i++) {
3355 kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
3356 kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i);
3357 KMP_CPU_COPY(dest, src);
3359 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3363 #undef ADD_MASK_OSID
3366 static int __kmp_hwloc_skip_PUs_obj(hwloc_topology_t t, hwloc_obj_t o) {
3369 hwloc_obj_t hT = NULL;
3370 int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
3371 for (
int i = 0; i < N; ++i) {
3372 KMP_DEBUG_ASSERT(hT);
3373 unsigned idx = hT->os_index;
3374 if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3375 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3376 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3379 hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
3384 static int __kmp_hwloc_obj_has_PUs(hwloc_topology_t t, hwloc_obj_t o) {
3386 hwloc_obj_t hT = NULL;
3387 int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
3388 for (
int i = 0; i < N; ++i) {
3389 KMP_DEBUG_ASSERT(hT);
3390 unsigned idx = hT->os_index;
3391 if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask))
3393 hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
3397 #endif // KMP_USE_HWLOC
3399 static void __kmp_apply_thread_places(AddrUnsPair **pAddr,
int depth) {
3400 AddrUnsPair *newAddr;
3401 if (__kmp_hws_requested == 0)
3404 if (__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
3408 hwloc_topology_t tp = __kmp_hwloc_topology;
3409 int nS = 0, nN = 0, nL = 0, nC = 0,
3411 int nCr = 0, nTr = 0;
3412 int nPkg = 0, nCo = 0, n_new = 0, n_old = 0, nCpP = 0, nTpC = 0;
3413 hwloc_obj_t hT, hC, hL, hN, hS;
3417 int numa_support = 0, tile_support = 0;
3418 if (__kmp_pu_os_idx)
3419 hT = hwloc_get_pu_obj_by_os_index(tp,
3420 __kmp_pu_os_idx[__kmp_avail_proc - 1]);
3422 hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, __kmp_avail_proc - 1);
3424 KMP_WARNING(AffHWSubsetUnsupported);
3428 hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT);
3429 hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT);
3430 if (hN != NULL && hN->depth > hS->depth) {
3432 }
else if (__kmp_hws_node.num > 0) {
3434 KMP_WARNING(AffHWSubsetUnsupported);
3438 L2depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED);
3439 hL = hwloc_get_ancestor_obj_by_depth(tp, L2depth, hT);
3441 __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC) > 1) {
3443 }
else if (__kmp_hws_tile.num > 0) {
3444 if (__kmp_hws_core.num == 0) {
3445 __kmp_hws_core = __kmp_hws_tile;
3446 __kmp_hws_tile.num = 0;
3449 KMP_WARNING(AffHWSubsetInvalid);
3456 if (__kmp_hws_socket.num == 0)
3457 __kmp_hws_socket.num = nPackages;
3458 if (__kmp_hws_socket.offset >= nPackages) {
3459 KMP_WARNING(AffHWSubsetManySockets);
3464 int NN = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE,
3466 if (__kmp_hws_node.num == 0)
3467 __kmp_hws_node.num = NN;
3468 if (__kmp_hws_node.offset >= NN) {
3469 KMP_WARNING(AffHWSubsetManyNodes);
3474 int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
3475 if (__kmp_hws_tile.num == 0) {
3476 __kmp_hws_tile.num = NL + 1;
3478 if (__kmp_hws_tile.offset >= NL) {
3479 KMP_WARNING(AffHWSubsetManyTiles);
3482 int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
3484 if (__kmp_hws_core.num == 0)
3485 __kmp_hws_core.num = NC;
3486 if (__kmp_hws_core.offset >= NC) {
3487 KMP_WARNING(AffHWSubsetManyCores);
3491 int NC = __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE,
3493 if (__kmp_hws_core.num == 0)
3494 __kmp_hws_core.num = NC;
3495 if (__kmp_hws_core.offset >= NC) {
3496 KMP_WARNING(AffHWSubsetManyCores);
3503 int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
3504 if (__kmp_hws_tile.num == 0)
3505 __kmp_hws_tile.num = NL;
3506 if (__kmp_hws_tile.offset >= NL) {
3507 KMP_WARNING(AffHWSubsetManyTiles);
3510 int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
3512 if (__kmp_hws_core.num == 0)
3513 __kmp_hws_core.num = NC;
3514 if (__kmp_hws_core.offset >= NC) {
3515 KMP_WARNING(AffHWSubsetManyCores);
3519 int NC = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE,
3521 if (__kmp_hws_core.num == 0)
3522 __kmp_hws_core.num = NC;
3523 if (__kmp_hws_core.offset >= NC) {
3524 KMP_WARNING(AffHWSubsetManyCores);
3529 if (__kmp_hws_proc.num == 0)
3530 __kmp_hws_proc.num = __kmp_nThreadsPerCore;
3531 if (__kmp_hws_proc.offset >= __kmp_nThreadsPerCore) {
3532 KMP_WARNING(AffHWSubsetManyProcs);
3538 newAddr = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) *
3542 int NP = hwloc_get_nbobjs_by_type(tp, HWLOC_OBJ_PACKAGE);
3543 for (
int s = 0; s < NP; ++s) {
3545 hS = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hS);
3546 if (!__kmp_hwloc_obj_has_PUs(tp, hS))
3549 if (nS <= __kmp_hws_socket.offset ||
3550 nS > __kmp_hws_socket.num + __kmp_hws_socket.offset) {
3551 n_old += __kmp_hwloc_skip_PUs_obj(tp, hS);
3562 __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE, &hN);
3563 for (
int n = 0; n < NN; ++n) {
3565 if (!__kmp_hwloc_obj_has_PUs(tp, hN)) {
3566 hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
3570 if (nN <= __kmp_hws_node.offset ||
3571 nN > __kmp_hws_node.num + __kmp_hws_node.offset) {
3573 n_old += __kmp_hwloc_skip_PUs_obj(tp, hN);
3574 hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
3581 int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
3582 for (
int l = 0; l < NL; ++l) {
3584 if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
3585 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3589 if (nL <= __kmp_hws_tile.offset ||
3590 nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
3592 n_old += __kmp_hwloc_skip_PUs_obj(tp, hL);
3593 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3600 int NC = __kmp_hwloc_count_children_by_type(tp, hL,
3601 HWLOC_OBJ_CORE, &hC);
3602 for (
int c = 0; c < NC; ++c) {
3604 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3605 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3609 if (nC <= __kmp_hws_core.offset ||
3610 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3612 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC);
3613 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3621 int NT = __kmp_hwloc_count_children_by_type(tp, hC,
3623 for (
int t = 0; t < NT; ++t) {
3626 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3627 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3631 if (nT <= __kmp_hws_proc.offset ||
3632 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3634 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3636 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3637 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3642 newAddr[n_new] = (*pAddr)[n_old];
3645 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3653 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3655 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3663 __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE, &hC);
3664 for (
int c = 0; c < NC; ++c) {
3666 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3667 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3671 if (nC <= __kmp_hws_core.offset ||
3672 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3674 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC);
3675 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3683 __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
3684 for (
int t = 0; t < NT; ++t) {
3687 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3688 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3692 if (nT <= __kmp_hws_proc.offset ||
3693 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3695 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3697 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3698 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3703 newAddr[n_new] = (*pAddr)[n_old];
3706 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3714 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3717 hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
3725 int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
3726 for (
int l = 0; l < NL; ++l) {
3728 if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
3729 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3733 if (nL <= __kmp_hws_tile.offset ||
3734 nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
3736 n_old += __kmp_hwloc_skip_PUs_obj(tp, hL);
3737 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3745 __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC);
3746 for (
int c = 0; c < NC; ++c) {
3748 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3749 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3753 if (nC <= __kmp_hws_core.offset ||
3754 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3756 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC);
3757 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3766 __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
3767 for (
int t = 0; t < NT; ++t) {
3770 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3771 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3775 if (nT <= __kmp_hws_proc.offset ||
3776 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3778 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3780 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3781 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3786 newAddr[n_new] = (*pAddr)[n_old];
3789 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3797 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3799 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3807 __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE, &hC);
3808 for (
int c = 0; c < NC; ++c) {
3810 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3811 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3815 if (nC <= __kmp_hws_core.offset ||
3816 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3818 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC);
3819 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3828 __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
3829 for (
int t = 0; t < NT; ++t) {
3832 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3833 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3837 if (nT <= __kmp_hws_proc.offset ||
3838 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3840 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3842 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3843 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3848 newAddr[n_new] = (*pAddr)[n_old];
3851 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3859 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3871 KMP_DEBUG_ASSERT(n_old == __kmp_avail_proc);
3872 KMP_DEBUG_ASSERT(nPkg > 0);
3873 KMP_DEBUG_ASSERT(nCpP > 0);
3874 KMP_DEBUG_ASSERT(nTpC > 0);
3875 KMP_DEBUG_ASSERT(nCo > 0);
3876 KMP_DEBUG_ASSERT(nPkg <= nPackages);
3877 KMP_DEBUG_ASSERT(nCpP <= nCoresPerPkg);
3878 KMP_DEBUG_ASSERT(nTpC <= __kmp_nThreadsPerCore);
3879 KMP_DEBUG_ASSERT(nCo <= __kmp_ncores);
3882 nCoresPerPkg = nCpP;
3883 __kmp_nThreadsPerCore = nTpC;
3884 __kmp_avail_proc = n_new;
3888 #endif // KMP_USE_HWLOC
3890 int n_old = 0, n_new = 0, proc_num = 0;
3891 if (__kmp_hws_node.num > 0 || __kmp_hws_tile.num > 0) {
3892 KMP_WARNING(AffHWSubsetNoHWLOC);
3895 if (__kmp_hws_socket.num == 0)
3896 __kmp_hws_socket.num = nPackages;
3897 if (__kmp_hws_core.num == 0)
3898 __kmp_hws_core.num = nCoresPerPkg;
3899 if (__kmp_hws_proc.num == 0 || __kmp_hws_proc.num > __kmp_nThreadsPerCore)
3900 __kmp_hws_proc.num = __kmp_nThreadsPerCore;
3901 if (!__kmp_affinity_uniform_topology()) {
3902 KMP_WARNING(AffHWSubsetNonUniform);
3906 KMP_WARNING(AffHWSubsetNonThreeLevel);
3909 if (__kmp_hws_socket.offset + __kmp_hws_socket.num > nPackages) {
3910 KMP_WARNING(AffHWSubsetManySockets);
3913 if (__kmp_hws_core.offset + __kmp_hws_core.num > nCoresPerPkg) {
3914 KMP_WARNING(AffHWSubsetManyCores);
3919 newAddr = (AddrUnsPair *)__kmp_allocate(
3920 sizeof(AddrUnsPair) * __kmp_hws_socket.num * __kmp_hws_core.num *
3921 __kmp_hws_proc.num);
3922 for (
int i = 0; i < nPackages; ++i) {
3923 if (i < __kmp_hws_socket.offset ||
3924 i >= __kmp_hws_socket.offset + __kmp_hws_socket.num) {
3926 n_old += nCoresPerPkg * __kmp_nThreadsPerCore;
3927 if (__kmp_pu_os_idx != NULL) {
3929 for (
int j = 0; j < nCoresPerPkg; ++j) {
3930 for (
int k = 0; k < __kmp_nThreadsPerCore; ++k) {
3931 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
3938 for (
int j = 0; j < nCoresPerPkg; ++j) {
3939 if (j < __kmp_hws_core.offset ||
3940 j >= __kmp_hws_core.offset +
3941 __kmp_hws_core.num) {
3942 n_old += __kmp_nThreadsPerCore;
3943 if (__kmp_pu_os_idx != NULL) {
3944 for (
int k = 0; k < __kmp_nThreadsPerCore; ++k) {
3945 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
3951 for (
int k = 0; k < __kmp_nThreadsPerCore; ++k) {
3952 if (k < __kmp_hws_proc.num) {
3954 newAddr[n_new] = (*pAddr)[n_old];
3957 if (__kmp_pu_os_idx != NULL)
3958 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
3967 KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
3968 KMP_DEBUG_ASSERT(n_new ==
3969 __kmp_hws_socket.num * __kmp_hws_core.num *
3970 __kmp_hws_proc.num);
3971 nPackages = __kmp_hws_socket.num;
3972 nCoresPerPkg = __kmp_hws_core.num;
3973 __kmp_nThreadsPerCore = __kmp_hws_proc.num;
3974 __kmp_avail_proc = n_new;
3975 __kmp_ncores = nPackages * __kmp_hws_core.num;
3981 if (__kmp_affinity_verbose) {
3982 char m[KMP_AFFIN_MASK_PRINT_LEN];
3983 __kmp_affinity_print_mask(m, KMP_AFFIN_MASK_PRINT_LEN,
3984 __kmp_affin_fullMask);
3985 if (__kmp_affinity_respect_mask) {
3986 KMP_INFORM(InitOSProcSetRespect,
"KMP_HW_SUBSET", m);
3988 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_HW_SUBSET", m);
3990 KMP_INFORM(AvailableOSProc,
"KMP_HW_SUBSET", __kmp_avail_proc);
3992 __kmp_str_buf_init(&buf);
3993 __kmp_str_buf_print(&buf,
"%d", nPackages);
3994 KMP_INFORM(TopologyExtra,
"KMP_HW_SUBSET", buf.str, nCoresPerPkg,
3995 __kmp_nThreadsPerCore, __kmp_ncores);
3996 __kmp_str_buf_free(&buf);
3999 if (__kmp_pu_os_idx != NULL) {
4000 __kmp_free(__kmp_pu_os_idx);
4001 __kmp_pu_os_idx = NULL;
4007 static int __kmp_affinity_find_core_level(
const AddrUnsPair *address2os,
4008 int nprocs,
int bottom_level) {
4011 for (
int i = 0; i < nprocs; i++) {
4012 for (
int j = bottom_level; j > 0; j--) {
4013 if (address2os[i].first.labels[j] > 0) {
4014 if (core_level < (j - 1)) {
4024 static int __kmp_affinity_compute_ncores(
const AddrUnsPair *address2os,
4025 int nprocs,
int bottom_level,
4031 for (i = 0; i < nprocs; i++) {
4032 for (j = bottom_level; j > core_level; j--) {
4033 if ((i + 1) < nprocs) {
4034 if (address2os[i + 1].first.labels[j] > 0) {
4039 if (j == core_level) {
4043 if (j > core_level) {
4052 static int __kmp_affinity_find_core(
const AddrUnsPair *address2os,
int proc,
4053 int bottom_level,
int core_level) {
4054 return __kmp_affinity_compute_ncores(address2os, proc + 1, bottom_level,
4061 static int __kmp_affinity_max_proc_per_core(
const AddrUnsPair *address2os,
4062 int nprocs,
int bottom_level,
4064 int maxprocpercore = 0;
4066 if (core_level < bottom_level) {
4067 for (
int i = 0; i < nprocs; i++) {
4068 int percore = address2os[i].first.labels[core_level + 1] + 1;
4070 if (percore > maxprocpercore) {
4071 maxprocpercore = percore;
4077 return maxprocpercore;
4080 static AddrUnsPair *address2os = NULL;
4081 static int *procarr = NULL;
4082 static int __kmp_aff_depth = 0;
4084 #if KMP_USE_HIER_SCHED
4085 #define KMP_EXIT_AFF_NONE \
4086 KMP_ASSERT(__kmp_affinity_type == affinity_none); \
4087 KMP_ASSERT(address2os == NULL); \
4088 __kmp_apply_thread_places(NULL, 0); \
4089 __kmp_create_affinity_none_places(); \
4090 __kmp_dispatch_set_hierarchy_values(); \
4093 #define KMP_EXIT_AFF_NONE \
4094 KMP_ASSERT(__kmp_affinity_type == affinity_none); \
4095 KMP_ASSERT(address2os == NULL); \
4096 __kmp_apply_thread_places(NULL, 0); \
4097 __kmp_create_affinity_none_places(); \
4103 static void __kmp_create_affinity_none_places() {
4104 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4105 KMP_ASSERT(__kmp_affinity_type == affinity_none);
4106 __kmp_affinity_num_masks = 1;
4107 KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4108 kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, 0);
4109 KMP_CPU_COPY(dest, __kmp_affin_fullMask);
4112 static int __kmp_affinity_cmp_Address_child_num(
const void *a,
const void *b) {
4113 const Address *aa = &(((
const AddrUnsPair *)a)->first);
4114 const Address *bb = &(((
const AddrUnsPair *)b)->first);
4115 unsigned depth = aa->depth;
4117 KMP_DEBUG_ASSERT(depth == bb->depth);
4118 KMP_DEBUG_ASSERT((
unsigned)__kmp_affinity_compact <= depth);
4119 KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
4120 for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
4121 int j = depth - i - 1;
4122 if (aa->childNums[j] < bb->childNums[j])
4124 if (aa->childNums[j] > bb->childNums[j])
4127 for (; i < depth; i++) {
4128 int j = i - __kmp_affinity_compact;
4129 if (aa->childNums[j] < bb->childNums[j])
4131 if (aa->childNums[j] > bb->childNums[j])
4137 static void __kmp_aux_affinity_initialize(
void) {
4138 if (__kmp_affinity_masks != NULL) {
4139 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4147 if (__kmp_affin_fullMask == NULL) {
4148 KMP_CPU_ALLOC(__kmp_affin_fullMask);
4150 if (KMP_AFFINITY_CAPABLE()) {
4151 if (__kmp_affinity_respect_mask) {
4152 __kmp_get_system_affinity(__kmp_affin_fullMask, TRUE);
4156 __kmp_avail_proc = 0;
4157 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
4158 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
4163 if (__kmp_avail_proc > __kmp_xproc) {
4164 if (__kmp_affinity_verbose ||
4165 (__kmp_affinity_warnings &&
4166 (__kmp_affinity_type != affinity_none))) {
4167 KMP_WARNING(ErrorInitializeAffinity);
4169 __kmp_affinity_type = affinity_none;
4170 KMP_AFFINITY_DISABLE();
4174 __kmp_affinity_entire_machine_mask(__kmp_affin_fullMask);
4175 __kmp_avail_proc = __kmp_xproc;
4179 if (__kmp_affinity_gran == affinity_gran_tile &&
4181 __kmp_affinity_dispatch->get_api_type() == KMPAffinity::NATIVE_OS) {
4182 KMP_WARNING(AffTilesNoHWLOC,
"KMP_AFFINITY");
4183 __kmp_affinity_gran = affinity_gran_package;
4187 kmp_i18n_id_t msg_id = kmp_i18n_null;
4191 if ((__kmp_cpuinfo_file != NULL) &&
4192 (__kmp_affinity_top_method == affinity_top_method_all)) {
4193 __kmp_affinity_top_method = affinity_top_method_cpuinfo;
4196 if (__kmp_affinity_top_method == affinity_top_method_all) {
4200 const char *file_name = NULL;
4204 __kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
4205 if (__kmp_affinity_verbose) {
4206 KMP_INFORM(AffUsingHwloc,
"KMP_AFFINITY");
4208 if (!__kmp_hwloc_error) {
4209 depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
4212 }
else if (depth < 0 && __kmp_affinity_verbose) {
4213 KMP_INFORM(AffIgnoringHwloc,
"KMP_AFFINITY");
4215 }
else if (__kmp_affinity_verbose) {
4216 KMP_INFORM(AffIgnoringHwloc,
"KMP_AFFINITY");
4221 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
4224 if (__kmp_affinity_verbose) {
4225 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
4229 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
4235 if (__kmp_affinity_verbose) {
4236 if (msg_id != kmp_i18n_null) {
4237 KMP_INFORM(AffInfoStrStr,
"KMP_AFFINITY",
4238 __kmp_i18n_catgets(msg_id),
4239 KMP_I18N_STR(DecodingLegacyAPIC));
4241 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY",
4242 KMP_I18N_STR(DecodingLegacyAPIC));
4247 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
4259 if (__kmp_affinity_verbose) {
4260 if (msg_id != kmp_i18n_null) {
4261 KMP_INFORM(AffStrParseFilename,
"KMP_AFFINITY",
4262 __kmp_i18n_catgets(msg_id),
"/proc/cpuinfo");
4264 KMP_INFORM(AffParseFilename,
"KMP_AFFINITY",
"/proc/cpuinfo");
4268 FILE *f = fopen(
"/proc/cpuinfo",
"r");
4270 msg_id = kmp_i18n_str_CantOpenCpuinfo;
4272 file_name =
"/proc/cpuinfo";
4274 __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
4284 #if KMP_GROUP_AFFINITY
4286 if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
4287 if (__kmp_affinity_verbose) {
4288 KMP_INFORM(AffWindowsProcGroupMap,
"KMP_AFFINITY");
4291 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
4292 KMP_ASSERT(depth != 0);
4298 if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
4299 if (file_name == NULL) {
4300 KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
4301 }
else if (line == 0) {
4302 KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
4304 KMP_INFORM(UsingFlatOSFileLine, file_name, line,
4305 __kmp_i18n_catgets(msg_id));
4311 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
4315 KMP_ASSERT(depth > 0);
4316 KMP_ASSERT(address2os != NULL);
4321 else if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
4322 KMP_ASSERT(__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC);
4323 if (__kmp_affinity_verbose) {
4324 KMP_INFORM(AffUsingHwloc,
"KMP_AFFINITY");
4326 depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
4331 #endif // KMP_USE_HWLOC
4337 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
4339 else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
4340 if (__kmp_affinity_verbose) {
4341 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
4344 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
4349 KMP_ASSERT(msg_id != kmp_i18n_null);
4350 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
4352 }
else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
4353 if (__kmp_affinity_verbose) {
4354 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
4357 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
4362 KMP_ASSERT(msg_id != kmp_i18n_null);
4363 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
4369 else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
4370 const char *filename;
4371 if (__kmp_cpuinfo_file != NULL) {
4372 filename = __kmp_cpuinfo_file;
4374 filename =
"/proc/cpuinfo";
4377 if (__kmp_affinity_verbose) {
4378 KMP_INFORM(AffParseFilename,
"KMP_AFFINITY", filename);
4381 FILE *f = fopen(filename,
"r");
4384 if (__kmp_cpuinfo_file != NULL) {
4385 __kmp_fatal(KMP_MSG(CantOpenFileForReading, filename), KMP_ERR(code),
4386 KMP_HNT(NameComesFrom_CPUINFO_FILE), __kmp_msg_null);
4388 __kmp_fatal(KMP_MSG(CantOpenFileForReading, filename), KMP_ERR(code),
4393 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
4396 KMP_ASSERT(msg_id != kmp_i18n_null);
4398 KMP_FATAL(FileLineMsgExiting, filename, line,
4399 __kmp_i18n_catgets(msg_id));
4401 KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
4404 if (__kmp_affinity_type == affinity_none) {
4405 KMP_ASSERT(depth == 0);
4410 #if KMP_GROUP_AFFINITY
4412 else if (__kmp_affinity_top_method == affinity_top_method_group) {
4413 if (__kmp_affinity_verbose) {
4414 KMP_INFORM(AffWindowsProcGroupMap,
"KMP_AFFINITY");
4417 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
4418 KMP_ASSERT(depth != 0);
4420 KMP_ASSERT(msg_id != kmp_i18n_null);
4421 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
4427 else if (__kmp_affinity_top_method == affinity_top_method_flat) {
4428 if (__kmp_affinity_verbose) {
4429 KMP_INFORM(AffUsingFlatOS,
"KMP_AFFINITY");
4432 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
4437 KMP_ASSERT(depth > 0);
4438 KMP_ASSERT(address2os != NULL);
4441 #if KMP_USE_HIER_SCHED
4442 __kmp_dispatch_set_hierarchy_values();
4445 if (address2os == NULL) {
4446 if (KMP_AFFINITY_CAPABLE() &&
4447 (__kmp_affinity_verbose ||
4448 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none)))) {
4449 KMP_WARNING(ErrorInitializeAffinity);
4451 __kmp_affinity_type = affinity_none;
4452 __kmp_create_affinity_none_places();
4453 KMP_AFFINITY_DISABLE();
4457 if (__kmp_affinity_gran == affinity_gran_tile
4459 && __kmp_tile_depth == 0
4463 KMP_WARNING(AffTilesNoTiles,
"KMP_AFFINITY");
4466 __kmp_apply_thread_places(&address2os, depth);
4471 kmp_affin_mask_t *osId2Mask =
4472 __kmp_create_masks(&maxIndex, &numUnique, address2os, __kmp_avail_proc);
4473 if (__kmp_affinity_gran_levels == 0) {
4474 KMP_DEBUG_ASSERT((
int)numUnique == __kmp_avail_proc);
4480 __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
4482 switch (__kmp_affinity_type) {
4484 case affinity_explicit:
4485 KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
4486 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) {
4487 __kmp_affinity_process_proclist(
4488 &__kmp_affinity_masks, &__kmp_affinity_num_masks,
4489 __kmp_affinity_proclist, osId2Mask, maxIndex);
4491 __kmp_affinity_process_placelist(
4492 &__kmp_affinity_masks, &__kmp_affinity_num_masks,
4493 __kmp_affinity_proclist, osId2Mask, maxIndex);
4495 if (__kmp_affinity_num_masks == 0) {
4496 if (__kmp_affinity_verbose ||
4497 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
4498 KMP_WARNING(AffNoValidProcID);
4500 __kmp_affinity_type = affinity_none;
4501 __kmp_create_affinity_none_places();
4511 case affinity_logical:
4512 __kmp_affinity_compact = 0;
4513 if (__kmp_affinity_offset) {
4514 __kmp_affinity_offset =
4515 __kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
4519 case affinity_physical:
4520 if (__kmp_nThreadsPerCore > 1) {
4521 __kmp_affinity_compact = 1;
4522 if (__kmp_affinity_compact >= depth) {
4523 __kmp_affinity_compact = 0;
4526 __kmp_affinity_compact = 0;
4528 if (__kmp_affinity_offset) {
4529 __kmp_affinity_offset =
4530 __kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
4534 case affinity_scatter:
4535 if (__kmp_affinity_compact >= depth) {
4536 __kmp_affinity_compact = 0;
4538 __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
4542 case affinity_compact:
4543 if (__kmp_affinity_compact >= depth) {
4544 __kmp_affinity_compact = depth - 1;
4548 case affinity_balanced:
4550 if (__kmp_affinity_verbose || __kmp_affinity_warnings) {
4551 KMP_WARNING(AffBalancedNotAvail,
"KMP_AFFINITY");
4553 __kmp_affinity_type = affinity_none;
4554 __kmp_create_affinity_none_places();
4556 }
else if (!__kmp_affinity_uniform_topology()) {
4558 __kmp_aff_depth = depth;
4560 int core_level = __kmp_affinity_find_core_level(
4561 address2os, __kmp_avail_proc, depth - 1);
4562 int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc,
4563 depth - 1, core_level);
4564 int maxprocpercore = __kmp_affinity_max_proc_per_core(
4565 address2os, __kmp_avail_proc, depth - 1, core_level);
4567 int nproc = ncores * maxprocpercore;
4568 if ((nproc < 2) || (nproc < __kmp_avail_proc)) {
4569 if (__kmp_affinity_verbose || __kmp_affinity_warnings) {
4570 KMP_WARNING(AffBalancedNotAvail,
"KMP_AFFINITY");
4572 __kmp_affinity_type = affinity_none;
4576 procarr = (
int *)__kmp_allocate(
sizeof(
int) * nproc);
4577 for (
int i = 0; i < nproc; i++) {
4583 for (
int i = 0; i < __kmp_avail_proc; i++) {
4584 int proc = address2os[i].second;
4586 __kmp_affinity_find_core(address2os, i, depth - 1, core_level);
4588 if (core == lastcore) {
4595 procarr[core * maxprocpercore + inlastcore] = proc;
4598 if (__kmp_affinity_compact >= depth) {
4599 __kmp_affinity_compact = depth - 1;
4604 if (__kmp_affinity_dups) {
4605 __kmp_affinity_num_masks = __kmp_avail_proc;
4607 __kmp_affinity_num_masks = numUnique;
4610 if ((__kmp_nested_proc_bind.bind_types[0] != proc_bind_intel) &&
4611 (__kmp_affinity_num_places > 0) &&
4612 ((
unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks)) {
4613 __kmp_affinity_num_masks = __kmp_affinity_num_places;
4616 KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4620 qsort(address2os, __kmp_avail_proc,
sizeof(*address2os),
4621 __kmp_affinity_cmp_Address_child_num);
4625 for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
4626 if ((!__kmp_affinity_dups) && (!address2os[i].first.leader)) {
4629 unsigned osId = address2os[i].second;
4630 kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
4631 kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, j);
4632 KMP_ASSERT(KMP_CPU_ISSET(osId, src));
4633 KMP_CPU_COPY(dest, src);
4634 if (++j >= __kmp_affinity_num_masks) {
4638 KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
4643 KMP_ASSERT2(0,
"Unexpected affinity setting");
4646 KMP_CPU_FREE_ARRAY(osId2Mask, maxIndex + 1);
4647 machine_hierarchy.init(address2os, __kmp_avail_proc);
4649 #undef KMP_EXIT_AFF_NONE
4651 void __kmp_affinity_initialize(
void) {
4660 int disabled = (__kmp_affinity_type == affinity_disabled);
4661 if (!KMP_AFFINITY_CAPABLE()) {
4662 KMP_ASSERT(disabled);
4665 __kmp_affinity_type = affinity_none;
4667 __kmp_aux_affinity_initialize();
4669 __kmp_affinity_type = affinity_disabled;
4673 void __kmp_affinity_uninitialize(
void) {
4674 if (__kmp_affinity_masks != NULL) {
4675 KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4676 __kmp_affinity_masks = NULL;
4678 if (__kmp_affin_fullMask != NULL) {
4679 KMP_CPU_FREE(__kmp_affin_fullMask);
4680 __kmp_affin_fullMask = NULL;
4682 __kmp_affinity_num_masks = 0;
4683 __kmp_affinity_type = affinity_default;
4684 __kmp_affinity_num_places = 0;
4685 if (__kmp_affinity_proclist != NULL) {
4686 __kmp_free(__kmp_affinity_proclist);
4687 __kmp_affinity_proclist = NULL;
4689 if (address2os != NULL) {
4690 __kmp_free(address2os);
4693 if (procarr != NULL) {
4694 __kmp_free(procarr);
4698 if (__kmp_hwloc_topology != NULL) {
4699 hwloc_topology_destroy(__kmp_hwloc_topology);
4700 __kmp_hwloc_topology = NULL;
4703 KMPAffinity::destroy_api();
4706 void __kmp_affinity_set_init_mask(
int gtid,
int isa_root) {
4707 if (!KMP_AFFINITY_CAPABLE()) {
4711 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4712 if (th->th.th_affin_mask == NULL) {
4713 KMP_CPU_ALLOC(th->th.th_affin_mask);
4715 KMP_CPU_ZERO(th->th.th_affin_mask);
4722 kmp_affin_mask_t *mask;
4725 if (KMP_AFFINITY_NON_PROC_BIND) {
4726 if ((__kmp_affinity_type == affinity_none) ||
4727 (__kmp_affinity_type == affinity_balanced)) {
4728 #if KMP_GROUP_AFFINITY
4729 if (__kmp_num_proc_groups > 1) {
4733 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4735 mask = __kmp_affin_fullMask;
4737 KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
4738 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4739 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4743 (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
4744 #if KMP_GROUP_AFFINITY
4745 if (__kmp_num_proc_groups > 1) {
4749 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4751 mask = __kmp_affin_fullMask;
4755 KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
4756 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4757 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4761 th->th.th_current_place = i;
4763 th->th.th_new_place = i;
4764 th->th.th_first_place = 0;
4765 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4766 }
else if (KMP_AFFINITY_NON_PROC_BIND) {
4769 th->th.th_first_place = 0;
4770 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4773 if (i == KMP_PLACE_ALL) {
4774 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to all places\n",
4777 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
4781 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4783 if (__kmp_affinity_verbose
4785 && (__kmp_affinity_type == affinity_none ||
4786 (i != KMP_PLACE_ALL && __kmp_affinity_type != affinity_balanced))) {
4787 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4788 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4789 th->th.th_affin_mask);
4790 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
4791 __kmp_gettid(), gtid, buf);
4798 if (__kmp_affinity_type == affinity_none) {
4799 __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
4802 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4805 void __kmp_affinity_set_place(
int gtid) {
4806 if (!KMP_AFFINITY_CAPABLE()) {
4810 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4812 KA_TRACE(100, (
"__kmp_affinity_set_place: binding T#%d to place %d (current "
4814 gtid, th->th.th_new_place, th->th.th_current_place));
4817 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4818 KMP_ASSERT(th->th.th_new_place >= 0);
4819 KMP_ASSERT((
unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
4820 if (th->th.th_first_place <= th->th.th_last_place) {
4821 KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place) &&
4822 (th->th.th_new_place <= th->th.th_last_place));
4824 KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place) ||
4825 (th->th.th_new_place >= th->th.th_last_place));
4830 kmp_affin_mask_t *mask =
4831 KMP_CPU_INDEX(__kmp_affinity_masks, th->th.th_new_place);
4832 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4833 th->th.th_current_place = th->th.th_new_place;
4835 if (__kmp_affinity_verbose) {
4836 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4837 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4838 th->th.th_affin_mask);
4839 KMP_INFORM(BoundToOSProcSet,
"OMP_PROC_BIND", (kmp_int32)getpid(),
4840 __kmp_gettid(), gtid, buf);
4842 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4845 int __kmp_aux_set_affinity(
void **mask) {
4850 if (!KMP_AFFINITY_CAPABLE()) {
4854 gtid = __kmp_entry_gtid();
4855 KA_TRACE(1000, (
""); {
4856 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4857 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4858 (kmp_affin_mask_t *)(*mask));
4860 "kmp_set_affinity: setting affinity mask for thread %d = %s\n", gtid,
4864 if (__kmp_env_consistency_check) {
4865 if ((mask == NULL) || (*mask == NULL)) {
4866 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4871 KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t *)(*mask))) {
4872 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4873 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4875 if (!KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
4880 if (num_procs == 0) {
4881 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4884 #if KMP_GROUP_AFFINITY
4885 if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) {
4886 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4892 th = __kmp_threads[gtid];
4893 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4894 retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4896 KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask));
4899 th->th.th_current_place = KMP_PLACE_UNDEFINED;
4900 th->th.th_new_place = KMP_PLACE_UNDEFINED;
4901 th->th.th_first_place = 0;
4902 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4905 th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
4910 int __kmp_aux_get_affinity(
void **mask) {
4915 if (!KMP_AFFINITY_CAPABLE()) {
4919 gtid = __kmp_entry_gtid();
4920 th = __kmp_threads[gtid];
4921 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4923 KA_TRACE(1000, (
""); {
4924 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4925 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4926 th->th.th_affin_mask);
4927 __kmp_printf(
"kmp_get_affinity: stored affinity mask for thread %d = %s\n",
4931 if (__kmp_env_consistency_check) {
4932 if ((mask == NULL) || (*mask == NULL)) {
4933 KMP_FATAL(AffinityInvalidMask,
"kmp_get_affinity");
4939 retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4940 KA_TRACE(1000, (
""); {
4941 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4942 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4943 (kmp_affin_mask_t *)(*mask));
4944 __kmp_printf(
"kmp_get_affinity: system affinity mask for thread %d = %s\n",
4951 KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
4957 int __kmp_aux_get_affinity_max_proc() {
4958 if (!KMP_AFFINITY_CAPABLE()) {
4961 #if KMP_GROUP_AFFINITY
4962 if (__kmp_num_proc_groups > 1) {
4963 return (
int)(__kmp_num_proc_groups *
sizeof(DWORD_PTR) * CHAR_BIT);
4969 int __kmp_aux_set_affinity_mask_proc(
int proc,
void **mask) {
4970 if (!KMP_AFFINITY_CAPABLE()) {
4974 KA_TRACE(1000, (
""); {
4975 int gtid = __kmp_entry_gtid();
4976 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4977 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4978 (kmp_affin_mask_t *)(*mask));
4979 __kmp_debug_printf(
"kmp_set_affinity_mask_proc: setting proc %d in "
4980 "affinity mask for thread %d = %s\n",
4984 if (__kmp_env_consistency_check) {
4985 if ((mask == NULL) || (*mask == NULL)) {
4986 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity_mask_proc");
4990 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
4993 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4997 KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask));
5001 int __kmp_aux_unset_affinity_mask_proc(
int proc,
void **mask) {
5002 if (!KMP_AFFINITY_CAPABLE()) {
5006 KA_TRACE(1000, (
""); {
5007 int gtid = __kmp_entry_gtid();
5008 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5009 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
5010 (kmp_affin_mask_t *)(*mask));
5011 __kmp_debug_printf(
"kmp_unset_affinity_mask_proc: unsetting proc %d in "
5012 "affinity mask for thread %d = %s\n",
5016 if (__kmp_env_consistency_check) {
5017 if ((mask == NULL) || (*mask == NULL)) {
5018 KMP_FATAL(AffinityInvalidMask,
"kmp_unset_affinity_mask_proc");
5022 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
5025 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
5029 KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask));
5033 int __kmp_aux_get_affinity_mask_proc(
int proc,
void **mask) {
5034 if (!KMP_AFFINITY_CAPABLE()) {
5038 KA_TRACE(1000, (
""); {
5039 int gtid = __kmp_entry_gtid();
5040 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5041 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
5042 (kmp_affin_mask_t *)(*mask));
5043 __kmp_debug_printf(
"kmp_get_affinity_mask_proc: getting proc %d in "
5044 "affinity mask for thread %d = %s\n",
5048 if (__kmp_env_consistency_check) {
5049 if ((mask == NULL) || (*mask == NULL)) {
5050 KMP_FATAL(AffinityInvalidMask,
"kmp_get_affinity_mask_proc");
5054 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
5057 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
5061 return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask));
5065 void __kmp_balanced_affinity(kmp_info_t *th,
int nthreads) {
5066 KMP_DEBUG_ASSERT(th);
5067 bool fine_gran =
true;
5068 int tid = th->th.th_info.ds.ds_tid;
5070 switch (__kmp_affinity_gran) {
5071 case affinity_gran_fine:
5072 case affinity_gran_thread:
5074 case affinity_gran_core:
5075 if (__kmp_nThreadsPerCore > 1) {
5079 case affinity_gran_package:
5080 if (nCoresPerPkg > 1) {
5088 if (__kmp_affinity_uniform_topology()) {
5092 int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
5094 int ncores = __kmp_ncores;
5095 if ((nPackages > 1) && (__kmp_nth_per_core <= 1)) {
5096 __kmp_nth_per_core = __kmp_avail_proc / nPackages;
5100 int chunk = nthreads / ncores;
5102 int big_cores = nthreads % ncores;
5104 int big_nth = (chunk + 1) * big_cores;
5105 if (tid < big_nth) {
5106 coreID = tid / (chunk + 1);
5107 threadID = (tid % (chunk + 1)) % __kmp_nth_per_core;
5109 coreID = (tid - big_cores) / chunk;
5110 threadID = ((tid - big_cores) % chunk) % __kmp_nth_per_core;
5113 KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
5114 "Illegal set affinity operation when not capable");
5116 kmp_affin_mask_t *mask = th->th.th_affin_mask;
5120 int osID = address2os[coreID * __kmp_nth_per_core + threadID].second;
5121 KMP_CPU_SET(osID, mask);
5123 for (
int i = 0; i < __kmp_nth_per_core; i++) {
5125 osID = address2os[coreID * __kmp_nth_per_core + i].second;
5126 KMP_CPU_SET(osID, mask);
5129 if (__kmp_affinity_verbose) {
5130 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5131 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
5132 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
5133 __kmp_gettid(), tid, buf);
5135 __kmp_set_system_affinity(mask, TRUE);
5138 kmp_affin_mask_t *mask = th->th.th_affin_mask;
5141 int core_level = __kmp_affinity_find_core_level(
5142 address2os, __kmp_avail_proc, __kmp_aff_depth - 1);
5143 int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc,
5144 __kmp_aff_depth - 1, core_level);
5145 int nth_per_core = __kmp_affinity_max_proc_per_core(
5146 address2os, __kmp_avail_proc, __kmp_aff_depth - 1, core_level);
5150 if (nthreads == __kmp_avail_proc) {
5152 int osID = address2os[tid].second;
5153 KMP_CPU_SET(osID, mask);
5155 int core = __kmp_affinity_find_core(address2os, tid,
5156 __kmp_aff_depth - 1, core_level);
5157 for (
int i = 0; i < __kmp_avail_proc; i++) {
5158 int osID = address2os[i].second;
5159 if (__kmp_affinity_find_core(address2os, i, __kmp_aff_depth - 1,
5160 core_level) == core) {
5161 KMP_CPU_SET(osID, mask);
5165 }
else if (nthreads <= ncores) {
5168 for (
int i = 0; i < ncores; i++) {
5171 for (
int j = 0; j < nth_per_core; j++) {
5172 if (procarr[i * nth_per_core + j] != -1) {
5179 for (
int j = 0; j < nth_per_core; j++) {
5180 int osID = procarr[i * nth_per_core + j];
5182 KMP_CPU_SET(osID, mask);
5198 int *nproc_at_core = (
int *)KMP_ALLOCA(
sizeof(
int) * ncores);
5200 int *ncores_with_x_procs =
5201 (
int *)KMP_ALLOCA(
sizeof(
int) * (nth_per_core + 1));
5203 int *ncores_with_x_to_max_procs =
5204 (
int *)KMP_ALLOCA(
sizeof(
int) * (nth_per_core + 1));
5206 for (
int i = 0; i <= nth_per_core; i++) {
5207 ncores_with_x_procs[i] = 0;
5208 ncores_with_x_to_max_procs[i] = 0;
5211 for (
int i = 0; i < ncores; i++) {
5213 for (
int j = 0; j < nth_per_core; j++) {
5214 if (procarr[i * nth_per_core + j] != -1) {
5218 nproc_at_core[i] = cnt;
5219 ncores_with_x_procs[cnt]++;
5222 for (
int i = 0; i <= nth_per_core; i++) {
5223 for (
int j = i; j <= nth_per_core; j++) {
5224 ncores_with_x_to_max_procs[i] += ncores_with_x_procs[j];
5229 int nproc = nth_per_core * ncores;
5231 int *newarr = (
int *)__kmp_allocate(
sizeof(
int) * nproc);
5232 for (
int i = 0; i < nproc; i++) {
5239 for (
int j = 1; j <= nth_per_core; j++) {
5240 int cnt = ncores_with_x_to_max_procs[j];
5241 for (
int i = 0; i < ncores; i++) {
5243 if (nproc_at_core[i] == 0) {
5246 for (
int k = 0; k < nth_per_core; k++) {
5247 if (procarr[i * nth_per_core + k] != -1) {
5248 if (newarr[i * nth_per_core + k] == 0) {
5249 newarr[i * nth_per_core + k] = 1;
5255 newarr[i * nth_per_core + k]++;
5263 if (cnt == 0 || nth == 0) {
5274 for (
int i = 0; i < nproc; i++) {
5278 int osID = procarr[i];
5279 KMP_CPU_SET(osID, mask);
5281 int coreID = i / nth_per_core;
5282 for (
int ii = 0; ii < nth_per_core; ii++) {
5283 int osID = procarr[coreID * nth_per_core + ii];
5285 KMP_CPU_SET(osID, mask);
5295 if (__kmp_affinity_verbose) {
5296 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5297 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
5298 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
5299 __kmp_gettid(), tid, buf);
5301 __kmp_set_system_affinity(mask, TRUE);
5305 #if KMP_OS_LINUX || KMP_OS_FREEBSD
5319 kmp_set_thread_affinity_mask_initial()
5324 int gtid = __kmp_get_gtid();
5327 KA_TRACE(30, (
"kmp_set_thread_affinity_mask_initial: "
5328 "non-omp thread, returning\n"));
5331 if (!KMP_AFFINITY_CAPABLE() || !__kmp_init_middle) {
5332 KA_TRACE(30, (
"kmp_set_thread_affinity_mask_initial: "
5333 "affinity not initialized, returning\n"));
5336 KA_TRACE(30, (
"kmp_set_thread_affinity_mask_initial: "
5337 "set full mask for thread %d\n",
5339 KMP_DEBUG_ASSERT(__kmp_affin_fullMask != NULL);
5340 return __kmp_set_system_affinity(__kmp_affin_fullMask, FALSE);
5344 #endif // KMP_AFFINITY_SUPPORTED