15 #include "kmp_affinity.h" 19 #include "kmp_wrapper_getpid.h" 20 #if KMP_USE_HIER_SCHED 21 #include "kmp_dispatch_hier.h" 25 static hierarchy_info machine_hierarchy;
27 void __kmp_cleanup_hierarchy() { machine_hierarchy.fini(); }
29 void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
33 if (TCR_1(machine_hierarchy.uninitialized))
34 machine_hierarchy.init(NULL, nproc);
37 if (nproc > machine_hierarchy.base_num_threads)
38 machine_hierarchy.resize(nproc);
40 depth = machine_hierarchy.depth;
41 KMP_DEBUG_ASSERT(depth > 0);
43 thr_bar->depth = depth;
44 thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0] - 1;
45 thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
48 #if KMP_AFFINITY_SUPPORTED 50 bool KMPAffinity::picked_api =
false;
52 void *KMPAffinity::Mask::operator
new(
size_t n) {
return __kmp_allocate(n); }
53 void *KMPAffinity::Mask::operator
new[](
size_t n) {
return __kmp_allocate(n); }
54 void KMPAffinity::Mask::operator
delete(
void *p) { __kmp_free(p); }
55 void KMPAffinity::Mask::operator
delete[](
void *p) { __kmp_free(p); }
56 void *KMPAffinity::operator
new(
size_t n) {
return __kmp_allocate(n); }
57 void KMPAffinity::operator
delete(
void *p) { __kmp_free(p); }
59 void KMPAffinity::pick_api() {
60 KMPAffinity *affinity_dispatch;
66 if (__kmp_affinity_top_method == affinity_top_method_hwloc &&
67 __kmp_affinity_type != affinity_disabled) {
68 affinity_dispatch =
new KMPHwlocAffinity();
72 affinity_dispatch =
new KMPNativeAffinity();
74 __kmp_affinity_dispatch = affinity_dispatch;
78 void KMPAffinity::destroy_api() {
79 if (__kmp_affinity_dispatch != NULL) {
80 delete __kmp_affinity_dispatch;
81 __kmp_affinity_dispatch = NULL;
86 #define KMP_ADVANCE_SCAN(scan) \ 87 while (*scan != '\0') { \ 95 char *__kmp_affinity_print_mask(
char *buf,
int buf_len,
96 kmp_affin_mask_t *mask) {
97 int start = 0, finish = 0, previous = 0;
100 KMP_ASSERT(buf_len >= 40);
103 char *end = buf + buf_len - 1;
106 if (mask->begin() == mask->end()) {
107 KMP_SNPRINTF(scan, end - scan + 1,
"{<empty>}");
108 KMP_ADVANCE_SCAN(scan);
109 KMP_ASSERT(scan <= end);
114 start = mask->begin();
118 for (finish = mask->next(start), previous = start;
119 finish == previous + 1 && finish != mask->end();
120 finish = mask->next(finish)) {
127 KMP_SNPRINTF(scan, end - scan + 1,
"%s",
",");
128 KMP_ADVANCE_SCAN(scan);
133 if (previous - start > 1) {
134 KMP_SNPRINTF(scan, end - scan + 1,
"%d-%d", static_cast<int>(start),
135 static_cast<int>(previous));
138 KMP_SNPRINTF(scan, end - scan + 1,
"%d", static_cast<int>(start));
139 KMP_ADVANCE_SCAN(scan);
140 if (previous - start > 0) {
141 KMP_SNPRINTF(scan, end - scan + 1,
",%d", static_cast<int>(previous));
144 KMP_ADVANCE_SCAN(scan);
147 if (start == mask->end())
155 KMP_ASSERT(scan <= end);
158 #undef KMP_ADVANCE_SCAN 164 kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf,
165 kmp_affin_mask_t *mask) {
166 int start = 0, finish = 0, previous = 0;
171 __kmp_str_buf_clear(buf);
174 if (mask->begin() == mask->end()) {
175 __kmp_str_buf_print(buf,
"%s",
"{<empty>}");
180 start = mask->begin();
184 for (finish = mask->next(start), previous = start;
185 finish == previous + 1 && finish != mask->end();
186 finish = mask->next(finish)) {
193 __kmp_str_buf_print(buf,
"%s",
",");
198 if (previous - start > 1) {
199 __kmp_str_buf_print(buf,
"%d-%d", static_cast<int>(start),
200 static_cast<int>(previous));
203 __kmp_str_buf_print(buf,
"%d", static_cast<int>(start));
204 if (previous - start > 0) {
205 __kmp_str_buf_print(buf,
",%d", static_cast<int>(previous));
210 if (start == mask->end())
216 void __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) {
219 #if KMP_GROUP_AFFINITY 221 if (__kmp_num_proc_groups > 1) {
223 KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
224 for (group = 0; group < __kmp_num_proc_groups; group++) {
226 int num = __kmp_GetActiveProcessorCount(group);
227 for (i = 0; i < num; i++) {
228 KMP_CPU_SET(i + group * (CHAR_BIT *
sizeof(DWORD_PTR)), mask);
237 for (proc = 0; proc < __kmp_xproc; proc++) {
238 KMP_CPU_SET(proc, mask);
254 static void __kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
256 KMP_DEBUG_ASSERT(numAddrs > 0);
257 int depth = address2os->first.depth;
258 unsigned *counts = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
259 unsigned *lastLabel = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
261 for (labCt = 0; labCt < depth; labCt++) {
262 address2os[0].first.childNums[labCt] = counts[labCt] = 0;
263 lastLabel[labCt] = address2os[0].first.labels[labCt];
266 for (i = 1; i < numAddrs; i++) {
267 for (labCt = 0; labCt < depth; labCt++) {
268 if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
270 for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
272 lastLabel[labCt2] = address2os[i].first.labels[labCt2];
275 lastLabel[labCt] = address2os[i].first.labels[labCt];
279 for (labCt = 0; labCt < depth; labCt++) {
280 address2os[i].first.childNums[labCt] = counts[labCt];
282 for (; labCt < (int)Address::maxDepth; labCt++) {
283 address2os[i].first.childNums[labCt] = 0;
286 __kmp_free(lastLabel);
301 kmp_affin_mask_t *__kmp_affin_fullMask = NULL;
303 static int nCoresPerPkg, nPackages;
304 static int __kmp_nThreadsPerCore;
305 #ifndef KMP_DFLT_NTH_CORES 306 static int __kmp_ncores;
308 static int *__kmp_pu_os_idx = NULL;
314 inline static bool __kmp_affinity_uniform_topology() {
315 return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
320 static void __kmp_affinity_print_topology(AddrUnsPair *address2os,
int len,
321 int depth,
int pkgLevel,
322 int coreLevel,
int threadLevel) {
325 KMP_INFORM(OSProcToPhysicalThreadMap,
"KMP_AFFINITY");
326 for (proc = 0; proc < len; proc++) {
329 __kmp_str_buf_init(&buf);
330 for (level = 0; level < depth; level++) {
331 if (level == threadLevel) {
332 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Thread));
333 }
else if (level == coreLevel) {
334 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Core));
335 }
else if (level == pkgLevel) {
336 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Package));
337 }
else if (level > pkgLevel) {
338 __kmp_str_buf_print(&buf,
"%s_%d ", KMP_I18N_STR(Node),
339 level - pkgLevel - 1);
341 __kmp_str_buf_print(&buf,
"L%d ", level);
343 __kmp_str_buf_print(&buf,
"%d ", address2os[proc].first.labels[level]);
345 KMP_INFORM(OSProcMapToPack,
"KMP_AFFINITY", address2os[proc].second,
347 __kmp_str_buf_free(&buf);
353 static void __kmp_affinity_print_hwloc_tp(AddrUnsPair *addrP,
int len,
354 int depth,
int *levels) {
357 __kmp_str_buf_init(&buf);
358 KMP_INFORM(OSProcToPhysicalThreadMap,
"KMP_AFFINITY");
359 for (proc = 0; proc < len; proc++) {
360 __kmp_str_buf_print(&buf,
"%s %d ", KMP_I18N_STR(Package),
361 addrP[proc].first.labels[0]);
365 if (__kmp_numa_detected)
367 if (levels[level++] > 0)
368 __kmp_str_buf_print(&buf,
"%s %d ", KMP_I18N_STR(Node),
369 addrP[proc].first.labels[label++]);
370 if (__kmp_tile_depth > 0)
372 if (levels[level++] > 0)
373 __kmp_str_buf_print(&buf,
"%s %d ", KMP_I18N_STR(Tile),
374 addrP[proc].first.labels[label++]);
375 if (levels[level++] > 0)
377 __kmp_str_buf_print(&buf,
"%s %d ", KMP_I18N_STR(Core),
378 addrP[proc].first.labels[label++]);
379 if (levels[level++] > 0)
381 __kmp_str_buf_print(&buf,
"%s %d ", KMP_I18N_STR(Thread),
382 addrP[proc].first.labels[label++]);
383 KMP_DEBUG_ASSERT(label == depth);
385 KMP_INFORM(OSProcMapToPack,
"KMP_AFFINITY", addrP[proc].second, buf.str);
386 __kmp_str_buf_clear(&buf);
388 __kmp_str_buf_free(&buf);
391 static int nNodePerPkg, nTilePerPkg, nTilePerNode, nCorePerNode, nCorePerTile;
398 static int __kmp_affinity_remove_radix_one_levels(AddrUnsPair *addrP,
int nTh,
399 int depth,
int *levels) {
403 int new_depth = depth;
404 for (level = depth - 1; level > 0; --level) {
407 for (i = 1; i < nTh; ++i) {
408 if (addrP[0].first.labels[level] != addrP[i].first.labels[level]) {
414 if (!radix1_detected)
419 if (level == new_depth) {
422 for (i = 0; i < nTh; ++i) {
423 addrP[i].first.depth--;
428 for (j = level; j < new_depth; ++j) {
429 for (i = 0; i < nTh; ++i) {
430 addrP[i].first.labels[j] = addrP[i].first.labels[j + 1];
431 addrP[i].first.depth--;
444 static int __kmp_hwloc_get_nobjs_under_obj(hwloc_obj_t obj,
445 hwloc_obj_type_t type) {
448 for (first = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, obj->type,
449 obj->logical_index, type, 0);
451 hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, obj->type, first) ==
453 first = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, first->type,
460 static int __kmp_hwloc_count_children_by_depth(hwloc_topology_t t,
461 hwloc_obj_t o,
unsigned depth,
463 if (o->depth == depth) {
469 for (
unsigned i = 0; i < o->arity; i++)
470 sum += __kmp_hwloc_count_children_by_depth(t, o->children[i], depth, f);
474 static int __kmp_hwloc_count_children_by_type(hwloc_topology_t t, hwloc_obj_t o,
475 hwloc_obj_type_t type,
477 if (!hwloc_compare_types(o->type, type)) {
483 for (
unsigned i = 0; i < o->arity; i++)
484 sum += __kmp_hwloc_count_children_by_type(t, o->children[i], type, f);
488 static int __kmp_hwloc_process_obj_core_pu(AddrUnsPair *addrPair,
490 int &num_active_cores,
491 hwloc_obj_t obj,
int depth,
493 hwloc_obj_t core = NULL;
494 hwloc_topology_t &tp = __kmp_hwloc_topology;
495 int NC = __kmp_hwloc_count_children_by_type(tp, obj, HWLOC_OBJ_CORE, &core);
496 for (
int core_id = 0; core_id < NC; ++core_id, core = core->next_cousin) {
497 hwloc_obj_t pu = NULL;
498 KMP_DEBUG_ASSERT(core != NULL);
499 int num_active_threads = 0;
500 int NT = __kmp_hwloc_count_children_by_type(tp, core, HWLOC_OBJ_PU, &pu);
502 for (
int pu_id = 0; pu_id < NT; ++pu_id, pu = pu->next_cousin) {
503 KMP_DEBUG_ASSERT(pu != NULL);
504 if (!KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask))
506 Address addr(depth + 2);
507 KA_TRACE(20, (
"Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n",
508 obj->os_index, obj->logical_index, core->os_index,
509 core->logical_index, pu->os_index, pu->logical_index));
510 for (
int i = 0; i < depth; ++i)
511 addr.labels[i] = labels[i];
512 addr.labels[depth] = core_id;
513 addr.labels[depth + 1] = pu_id;
514 addrPair[nActiveThreads] = AddrUnsPair(addr, pu->os_index);
515 __kmp_pu_os_idx[nActiveThreads] = pu->os_index;
517 ++num_active_threads;
519 if (num_active_threads) {
522 if (num_active_threads > __kmp_nThreadsPerCore)
523 __kmp_nThreadsPerCore = num_active_threads;
531 static int __kmp_hwloc_check_numa() {
532 hwloc_topology_t &tp = __kmp_hwloc_topology;
533 hwloc_obj_t hT, hC, hL, hN, hS;
537 hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, 0);
542 hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT);
543 hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT);
544 KMP_DEBUG_ASSERT(hS != NULL);
545 if (hN != NULL && hN->depth > hS->depth) {
546 __kmp_numa_detected = TRUE;
547 if (__kmp_affinity_gran == affinity_gran_node) {
548 __kmp_affinity_gran == affinity_gran_numa;
553 depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED);
554 hL = hwloc_get_ancestor_obj_by_depth(tp, depth, hT);
557 __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC) > 1)
558 __kmp_tile_depth = depth;
562 static int __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
563 kmp_i18n_id_t *
const msg_id) {
564 hwloc_topology_t &tp = __kmp_hwloc_topology;
566 *msg_id = kmp_i18n_null;
569 kmp_affin_mask_t *oldMask;
570 KMP_CPU_ALLOC(oldMask);
571 __kmp_get_system_affinity(oldMask, TRUE);
572 __kmp_hwloc_check_numa();
574 if (!KMP_AFFINITY_CAPABLE()) {
577 KMP_ASSERT(__kmp_affinity_type == affinity_none);
579 nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(
580 hwloc_get_obj_by_type(tp, HWLOC_OBJ_PACKAGE, 0), HWLOC_OBJ_CORE);
581 __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(
582 hwloc_get_obj_by_type(tp, HWLOC_OBJ_CORE, 0), HWLOC_OBJ_PU);
583 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
584 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
585 if (__kmp_affinity_verbose) {
586 KMP_INFORM(AffNotCapableUseLocCpuidL11,
"KMP_AFFINITY");
587 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
588 if (__kmp_affinity_uniform_topology()) {
589 KMP_INFORM(Uniform,
"KMP_AFFINITY");
591 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
593 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
594 __kmp_nThreadsPerCore, __kmp_ncores);
596 KMP_CPU_FREE(oldMask);
601 int levels[5] = {0, 1, 2, 3, 4};
603 if (__kmp_numa_detected)
605 if (__kmp_tile_depth)
609 AddrUnsPair *retval =
610 (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * __kmp_avail_proc);
611 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
612 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
619 hwloc_obj_t socket, node, tile;
620 int nActiveThreads = 0;
623 __kmp_ncores = nPackages = nCoresPerPkg = __kmp_nThreadsPerCore = 0;
624 nNodePerPkg = nTilePerPkg = nTilePerNode = nCorePerNode = nCorePerTile = 0;
625 for (socket = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PACKAGE, 0); socket != NULL;
626 socket = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, socket),
628 labels[0] = socket_id;
629 if (__kmp_numa_detected) {
631 int n_active_nodes = 0;
633 NN = __kmp_hwloc_count_children_by_type(tp, socket, HWLOC_OBJ_NUMANODE,
635 for (
int node_id = 0; node_id < NN; ++node_id, node = node->next_cousin) {
637 if (__kmp_tile_depth) {
640 int n_active_tiles = 0;
642 NT = __kmp_hwloc_count_children_by_depth(tp, node, __kmp_tile_depth,
644 for (
int tl_id = 0; tl_id < NT; ++tl_id, tile = tile->next_cousin) {
646 int n_active_cores = 0;
647 __kmp_hwloc_process_obj_core_pu(retval, nActiveThreads,
648 n_active_cores, tile, 3, labels);
649 if (n_active_cores) {
651 if (n_active_cores > nCorePerTile)
652 nCorePerTile = n_active_cores;
655 if (n_active_tiles) {
657 if (n_active_tiles > nTilePerNode)
658 nTilePerNode = n_active_tiles;
662 int n_active_cores = 0;
663 __kmp_hwloc_process_obj_core_pu(retval, nActiveThreads,
664 n_active_cores, node, 2, labels);
665 if (n_active_cores) {
667 if (n_active_cores > nCorePerNode)
668 nCorePerNode = n_active_cores;
672 if (n_active_nodes) {
674 if (n_active_nodes > nNodePerPkg)
675 nNodePerPkg = n_active_nodes;
678 if (__kmp_tile_depth) {
681 int n_active_tiles = 0;
683 NT = __kmp_hwloc_count_children_by_depth(tp, socket, __kmp_tile_depth,
685 for (
int tl_id = 0; tl_id < NT; ++tl_id, tile = tile->next_cousin) {
687 int n_active_cores = 0;
688 __kmp_hwloc_process_obj_core_pu(retval, nActiveThreads,
689 n_active_cores, tile, 2, labels);
690 if (n_active_cores) {
692 if (n_active_cores > nCorePerTile)
693 nCorePerTile = n_active_cores;
696 if (n_active_tiles) {
698 if (n_active_tiles > nTilePerPkg)
699 nTilePerPkg = n_active_tiles;
703 int n_active_cores = 0;
704 __kmp_hwloc_process_obj_core_pu(retval, nActiveThreads, n_active_cores,
706 if (n_active_cores) {
708 if (n_active_cores > nCoresPerPkg)
709 nCoresPerPkg = n_active_cores;
716 KMP_DEBUG_ASSERT(nActiveThreads == __kmp_avail_proc);
717 KMP_ASSERT(nActiveThreads > 0);
718 if (nActiveThreads == 1) {
719 __kmp_ncores = nPackages = 1;
720 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
721 if (__kmp_affinity_verbose) {
722 char buf[KMP_AFFIN_MASK_PRINT_LEN];
723 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
725 KMP_INFORM(AffUsingHwloc,
"KMP_AFFINITY");
726 if (__kmp_affinity_respect_mask) {
727 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
729 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
731 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
732 KMP_INFORM(Uniform,
"KMP_AFFINITY");
733 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
734 __kmp_nThreadsPerCore, __kmp_ncores);
737 if (__kmp_affinity_type == affinity_none) {
739 KMP_CPU_FREE(oldMask);
745 addr.labels[0] = retval[0].first.labels[0];
746 retval[0].first = addr;
748 if (__kmp_affinity_gran_levels < 0) {
749 __kmp_affinity_gran_levels = 0;
752 if (__kmp_affinity_verbose) {
753 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
756 *address2os = retval;
757 KMP_CPU_FREE(oldMask);
762 qsort(retval, nActiveThreads,
sizeof(*retval),
763 __kmp_affinity_cmp_Address_labels);
766 int nPUs = nPackages * __kmp_nThreadsPerCore;
767 if (__kmp_numa_detected) {
768 if (__kmp_tile_depth) {
769 nPUs *= (nNodePerPkg * nTilePerNode * nCorePerTile);
771 nPUs *= (nNodePerPkg * nCorePerNode);
774 if (__kmp_tile_depth) {
775 nPUs *= (nTilePerPkg * nCorePerTile);
777 nPUs *= nCoresPerPkg;
780 unsigned uniform = (nPUs == nActiveThreads);
783 if (__kmp_affinity_verbose) {
784 char mask[KMP_AFFIN_MASK_PRINT_LEN];
785 __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
786 if (__kmp_affinity_respect_mask) {
787 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", mask);
789 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", mask);
791 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
793 KMP_INFORM(Uniform,
"KMP_AFFINITY");
795 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
797 if (__kmp_numa_detected) {
798 if (__kmp_tile_depth) {
799 KMP_INFORM(TopologyExtraNoTi,
"KMP_AFFINITY", nPackages, nNodePerPkg,
800 nTilePerNode, nCorePerTile, __kmp_nThreadsPerCore,
803 KMP_INFORM(TopologyExtraNode,
"KMP_AFFINITY", nPackages, nNodePerPkg,
804 nCorePerNode, __kmp_nThreadsPerCore, __kmp_ncores);
805 nPUs *= (nNodePerPkg * nCorePerNode);
808 if (__kmp_tile_depth) {
809 KMP_INFORM(TopologyExtraTile,
"KMP_AFFINITY", nPackages, nTilePerPkg,
810 nCorePerTile, __kmp_nThreadsPerCore, __kmp_ncores);
813 __kmp_str_buf_init(&buf);
814 __kmp_str_buf_print(&buf,
"%d", nPackages);
815 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, nCoresPerPkg,
816 __kmp_nThreadsPerCore, __kmp_ncores);
817 __kmp_str_buf_free(&buf);
822 if (__kmp_affinity_type == affinity_none) {
824 KMP_CPU_FREE(oldMask);
828 int depth_full = depth;
831 depth = __kmp_affinity_remove_radix_one_levels(retval, nActiveThreads, depth,
833 KMP_DEBUG_ASSERT(__kmp_affinity_gran != affinity_gran_default);
834 if (__kmp_affinity_gran_levels < 0) {
837 __kmp_affinity_gran_levels = 0;
838 if (__kmp_affinity_gran > affinity_gran_thread) {
839 for (
int i = 1; i <= depth_full; ++i) {
840 if (__kmp_affinity_gran <= i)
842 if (levels[depth_full - i] > 0)
843 __kmp_affinity_gran_levels++;
846 if (__kmp_affinity_gran > affinity_gran_package)
847 __kmp_affinity_gran_levels++;
850 if (__kmp_affinity_verbose)
851 __kmp_affinity_print_hwloc_tp(retval, nActiveThreads, depth, levels);
853 KMP_CPU_FREE(oldMask);
854 *address2os = retval;
857 #endif // KMP_USE_HWLOC 862 static int __kmp_affinity_create_flat_map(AddrUnsPair **address2os,
863 kmp_i18n_id_t *
const msg_id) {
865 *msg_id = kmp_i18n_null;
870 if (!KMP_AFFINITY_CAPABLE()) {
871 KMP_ASSERT(__kmp_affinity_type == affinity_none);
872 __kmp_ncores = nPackages = __kmp_xproc;
873 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
874 if (__kmp_affinity_verbose) {
875 KMP_INFORM(AffFlatTopology,
"KMP_AFFINITY");
876 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
877 KMP_INFORM(Uniform,
"KMP_AFFINITY");
878 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
879 __kmp_nThreadsPerCore, __kmp_ncores);
888 __kmp_ncores = nPackages = __kmp_avail_proc;
889 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
890 if (__kmp_affinity_verbose) {
891 char buf[KMP_AFFIN_MASK_PRINT_LEN];
892 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
893 __kmp_affin_fullMask);
895 KMP_INFORM(AffCapableUseFlat,
"KMP_AFFINITY");
896 if (__kmp_affinity_respect_mask) {
897 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
899 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
901 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
902 KMP_INFORM(Uniform,
"KMP_AFFINITY");
903 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
904 __kmp_nThreadsPerCore, __kmp_ncores);
906 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
907 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
908 if (__kmp_affinity_type == affinity_none) {
911 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
912 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask))
914 __kmp_pu_os_idx[avail_ct++] = i;
921 (AddrUnsPair *)__kmp_allocate(
sizeof(**address2os) * __kmp_avail_proc);
924 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
926 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
929 __kmp_pu_os_idx[avail_ct] = i;
932 (*address2os)[avail_ct++] = AddrUnsPair(addr, i);
934 if (__kmp_affinity_verbose) {
935 KMP_INFORM(OSProcToPackage,
"KMP_AFFINITY");
938 if (__kmp_affinity_gran_levels < 0) {
941 if (__kmp_affinity_gran > affinity_gran_package) {
942 __kmp_affinity_gran_levels = 1;
944 __kmp_affinity_gran_levels = 0;
950 #if KMP_GROUP_AFFINITY 956 static int __kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
957 kmp_i18n_id_t *
const msg_id) {
959 *msg_id = kmp_i18n_null;
963 if (!KMP_AFFINITY_CAPABLE()) {
970 (AddrUnsPair *)__kmp_allocate(
sizeof(**address2os) * __kmp_avail_proc);
971 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
972 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
975 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
977 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
980 __kmp_pu_os_idx[avail_ct] = i;
982 addr.labels[0] = i / (CHAR_BIT *
sizeof(DWORD_PTR));
983 addr.labels[1] = i % (CHAR_BIT *
sizeof(DWORD_PTR));
984 (*address2os)[avail_ct++] = AddrUnsPair(addr, i);
986 if (__kmp_affinity_verbose) {
987 KMP_INFORM(AffOSProcToGroup,
"KMP_AFFINITY", i, addr.labels[0],
992 if (__kmp_affinity_gran_levels < 0) {
993 if (__kmp_affinity_gran == affinity_gran_group) {
994 __kmp_affinity_gran_levels = 1;
995 }
else if ((__kmp_affinity_gran == affinity_gran_fine) ||
996 (__kmp_affinity_gran == affinity_gran_thread)) {
997 __kmp_affinity_gran_levels = 0;
999 const char *gran_str = NULL;
1000 if (__kmp_affinity_gran == affinity_gran_core) {
1002 }
else if (__kmp_affinity_gran == affinity_gran_package) {
1003 gran_str =
"package";
1004 }
else if (__kmp_affinity_gran == affinity_gran_node) {
1012 __kmp_affinity_gran_levels = 0;
1020 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1022 static int __kmp_cpuid_mask_width(
int count) {
1025 while ((1 << r) < count)
1030 class apicThreadInfo {
1034 unsigned maxCoresPerPkg;
1035 unsigned maxThreadsPerPkg;
1041 static int __kmp_affinity_cmp_apicThreadInfo_phys_id(
const void *a,
1043 const apicThreadInfo *aa = (
const apicThreadInfo *)a;
1044 const apicThreadInfo *bb = (
const apicThreadInfo *)b;
1045 if (aa->pkgId < bb->pkgId)
1047 if (aa->pkgId > bb->pkgId)
1049 if (aa->coreId < bb->coreId)
1051 if (aa->coreId > bb->coreId)
1053 if (aa->threadId < bb->threadId)
1055 if (aa->threadId > bb->threadId)
1064 static int __kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
1065 kmp_i18n_id_t *
const msg_id) {
1068 *msg_id = kmp_i18n_null;
1071 __kmp_x86_cpuid(0, 0, &buf);
1073 *msg_id = kmp_i18n_str_NoLeaf4Support;
1082 if (!KMP_AFFINITY_CAPABLE()) {
1085 KMP_ASSERT(__kmp_affinity_type == affinity_none);
1091 __kmp_x86_cpuid(1, 0, &buf);
1092 int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
1093 if (maxThreadsPerPkg == 0) {
1094 maxThreadsPerPkg = 1;
1108 __kmp_x86_cpuid(0, 0, &buf);
1110 __kmp_x86_cpuid(4, 0, &buf);
1111 nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
1129 __kmp_ncores = __kmp_xproc;
1130 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
1131 __kmp_nThreadsPerCore = 1;
1132 if (__kmp_affinity_verbose) {
1133 KMP_INFORM(AffNotCapableUseLocCpuid,
"KMP_AFFINITY");
1134 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1135 if (__kmp_affinity_uniform_topology()) {
1136 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1138 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1140 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1141 __kmp_nThreadsPerCore, __kmp_ncores);
1151 kmp_affin_mask_t *oldMask;
1152 KMP_CPU_ALLOC(oldMask);
1153 KMP_ASSERT(oldMask != NULL);
1154 __kmp_get_system_affinity(oldMask, TRUE);
1182 apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
1183 __kmp_avail_proc *
sizeof(apicThreadInfo));
1184 unsigned nApics = 0;
1185 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
1187 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
1190 KMP_DEBUG_ASSERT((
int)nApics < __kmp_avail_proc);
1192 __kmp_affinity_dispatch->bind_thread(i);
1193 threadInfo[nApics].osId = i;
1196 __kmp_x86_cpuid(1, 0, &buf);
1197 if (((buf.edx >> 9) & 1) == 0) {
1198 __kmp_set_system_affinity(oldMask, TRUE);
1199 __kmp_free(threadInfo);
1200 KMP_CPU_FREE(oldMask);
1201 *msg_id = kmp_i18n_str_ApicNotPresent;
1204 threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
1205 threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
1206 if (threadInfo[nApics].maxThreadsPerPkg == 0) {
1207 threadInfo[nApics].maxThreadsPerPkg = 1;
1216 __kmp_x86_cpuid(0, 0, &buf);
1218 __kmp_x86_cpuid(4, 0, &buf);
1219 threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
1221 threadInfo[nApics].maxCoresPerPkg = 1;
1225 int widthCT = __kmp_cpuid_mask_width(threadInfo[nApics].maxThreadsPerPkg);
1226 threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
1228 int widthC = __kmp_cpuid_mask_width(threadInfo[nApics].maxCoresPerPkg);
1229 int widthT = widthCT - widthC;
1234 __kmp_set_system_affinity(oldMask, TRUE);
1235 __kmp_free(threadInfo);
1236 KMP_CPU_FREE(oldMask);
1237 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1241 int maskC = (1 << widthC) - 1;
1242 threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT) & maskC;
1244 int maskT = (1 << widthT) - 1;
1245 threadInfo[nApics].threadId = threadInfo[nApics].apicId & maskT;
1252 __kmp_set_system_affinity(oldMask, TRUE);
1261 KMP_ASSERT(nApics > 0);
1263 __kmp_ncores = nPackages = 1;
1264 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1265 if (__kmp_affinity_verbose) {
1266 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1267 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1269 KMP_INFORM(AffUseGlobCpuid,
"KMP_AFFINITY");
1270 if (__kmp_affinity_respect_mask) {
1271 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
1273 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
1275 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1276 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1277 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1278 __kmp_nThreadsPerCore, __kmp_ncores);
1281 if (__kmp_affinity_type == affinity_none) {
1282 __kmp_free(threadInfo);
1283 KMP_CPU_FREE(oldMask);
1287 *address2os = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair));
1289 addr.labels[0] = threadInfo[0].pkgId;
1290 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
1292 if (__kmp_affinity_gran_levels < 0) {
1293 __kmp_affinity_gran_levels = 0;
1296 if (__kmp_affinity_verbose) {
1297 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
1300 __kmp_free(threadInfo);
1301 KMP_CPU_FREE(oldMask);
1306 qsort(threadInfo, nApics,
sizeof(*threadInfo),
1307 __kmp_affinity_cmp_apicThreadInfo_phys_id);
1324 __kmp_nThreadsPerCore = 1;
1325 unsigned nCores = 1;
1328 unsigned lastPkgId = threadInfo[0].pkgId;
1329 unsigned coreCt = 1;
1330 unsigned lastCoreId = threadInfo[0].coreId;
1331 unsigned threadCt = 1;
1332 unsigned lastThreadId = threadInfo[0].threadId;
1335 unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
1336 unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
1338 for (i = 1; i < nApics; i++) {
1339 if (threadInfo[i].pkgId != lastPkgId) {
1342 lastPkgId = threadInfo[i].pkgId;
1343 if ((
int)coreCt > nCoresPerPkg)
1344 nCoresPerPkg = coreCt;
1346 lastCoreId = threadInfo[i].coreId;
1347 if ((
int)threadCt > __kmp_nThreadsPerCore)
1348 __kmp_nThreadsPerCore = threadCt;
1350 lastThreadId = threadInfo[i].threadId;
1354 prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
1355 prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
1359 if (threadInfo[i].coreId != lastCoreId) {
1362 lastCoreId = threadInfo[i].coreId;
1363 if ((
int)threadCt > __kmp_nThreadsPerCore)
1364 __kmp_nThreadsPerCore = threadCt;
1366 lastThreadId = threadInfo[i].threadId;
1367 }
else if (threadInfo[i].threadId != lastThreadId) {
1369 lastThreadId = threadInfo[i].threadId;
1371 __kmp_free(threadInfo);
1372 KMP_CPU_FREE(oldMask);
1373 *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
1379 if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg) ||
1380 (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
1381 __kmp_free(threadInfo);
1382 KMP_CPU_FREE(oldMask);
1383 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1388 if ((
int)coreCt > nCoresPerPkg)
1389 nCoresPerPkg = coreCt;
1390 if ((
int)threadCt > __kmp_nThreadsPerCore)
1391 __kmp_nThreadsPerCore = threadCt;
1397 __kmp_ncores = nCores;
1398 if (__kmp_affinity_verbose) {
1399 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1400 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1402 KMP_INFORM(AffUseGlobCpuid,
"KMP_AFFINITY");
1403 if (__kmp_affinity_respect_mask) {
1404 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
1406 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
1408 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1409 if (__kmp_affinity_uniform_topology()) {
1410 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1412 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1414 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1415 __kmp_nThreadsPerCore, __kmp_ncores);
1417 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
1418 KMP_DEBUG_ASSERT(nApics == (
unsigned)__kmp_avail_proc);
1419 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
1420 for (i = 0; i < nApics; ++i) {
1421 __kmp_pu_os_idx[i] = threadInfo[i].osId;
1423 if (__kmp_affinity_type == affinity_none) {
1424 __kmp_free(threadInfo);
1425 KMP_CPU_FREE(oldMask);
1433 int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
1435 (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
1436 unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
1438 KMP_ASSERT(depth > 0);
1439 *address2os = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * nApics);
1441 for (i = 0; i < nApics; ++i) {
1442 Address addr(depth);
1443 unsigned os = threadInfo[i].osId;
1446 if (pkgLevel >= 0) {
1447 addr.labels[d++] = threadInfo[i].pkgId;
1449 if (coreLevel >= 0) {
1450 addr.labels[d++] = threadInfo[i].coreId;
1452 if (threadLevel >= 0) {
1453 addr.labels[d++] = threadInfo[i].threadId;
1455 (*address2os)[i] = AddrUnsPair(addr, os);
1458 if (__kmp_affinity_gran_levels < 0) {
1461 __kmp_affinity_gran_levels = 0;
1462 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
1463 __kmp_affinity_gran_levels++;
1465 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1466 __kmp_affinity_gran_levels++;
1468 if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
1469 __kmp_affinity_gran_levels++;
1473 if (__kmp_affinity_verbose) {
1474 __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
1475 coreLevel, threadLevel);
1478 __kmp_free(threadInfo);
1479 KMP_CPU_FREE(oldMask);
1486 static int __kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
1487 kmp_i18n_id_t *
const msg_id) {
1490 *msg_id = kmp_i18n_null;
1493 __kmp_x86_cpuid(0, 0, &buf);
1495 *msg_id = kmp_i18n_str_NoLeaf11Support;
1498 __kmp_x86_cpuid(11, 0, &buf);
1500 *msg_id = kmp_i18n_str_NoLeaf11Support;
1509 int threadLevel = -1;
1512 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
1514 for (level = 0;; level++) {
1525 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1528 __kmp_x86_cpuid(11, level, &buf);
1537 int kind = (buf.ecx >> 8) & 0xff;
1540 threadLevel = level;
1543 __kmp_nThreadsPerCore = buf.ebx & 0xffff;
1544 if (__kmp_nThreadsPerCore == 0) {
1545 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1548 }
else if (kind == 2) {
1552 nCoresPerPkg = buf.ebx & 0xffff;
1553 if (nCoresPerPkg == 0) {
1554 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1559 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1562 if (pkgLevel >= 0) {
1566 nPackages = buf.ebx & 0xffff;
1567 if (nPackages == 0) {
1568 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1579 if (threadLevel >= 0) {
1580 threadLevel = depth - threadLevel - 1;
1582 if (coreLevel >= 0) {
1583 coreLevel = depth - coreLevel - 1;
1585 KMP_DEBUG_ASSERT(pkgLevel >= 0);
1586 pkgLevel = depth - pkgLevel - 1;
1593 if (!KMP_AFFINITY_CAPABLE()) {
1596 KMP_ASSERT(__kmp_affinity_type == affinity_none);
1598 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
1599 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
1600 if (__kmp_affinity_verbose) {
1601 KMP_INFORM(AffNotCapableUseLocCpuidL11,
"KMP_AFFINITY");
1602 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1603 if (__kmp_affinity_uniform_topology()) {
1604 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1606 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1608 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1609 __kmp_nThreadsPerCore, __kmp_ncores);
1619 kmp_affin_mask_t *oldMask;
1620 KMP_CPU_ALLOC(oldMask);
1621 __kmp_get_system_affinity(oldMask, TRUE);
1624 AddrUnsPair *retval =
1625 (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * __kmp_avail_proc);
1631 KMP_CPU_SET_ITERATE(proc, __kmp_affin_fullMask) {
1633 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
1636 KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
1638 __kmp_affinity_dispatch->bind_thread(proc);
1641 Address addr(depth);
1644 for (level = 0; level < depth; level++) {
1645 __kmp_x86_cpuid(11, level, &buf);
1646 unsigned apicId = buf.edx;
1648 if (level != depth - 1) {
1649 KMP_CPU_FREE(oldMask);
1650 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1653 addr.labels[depth - level - 1] = apicId >> prev_shift;
1657 int shift = buf.eax & 0x1f;
1658 int mask = (1 << shift) - 1;
1659 addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
1662 if (level != depth) {
1663 KMP_CPU_FREE(oldMask);
1664 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1668 retval[nApics] = AddrUnsPair(addr, proc);
1674 __kmp_set_system_affinity(oldMask, TRUE);
1677 KMP_ASSERT(nApics > 0);
1679 __kmp_ncores = nPackages = 1;
1680 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1681 if (__kmp_affinity_verbose) {
1682 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1683 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1685 KMP_INFORM(AffUseGlobCpuidL11,
"KMP_AFFINITY");
1686 if (__kmp_affinity_respect_mask) {
1687 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
1689 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
1691 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1692 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1693 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1694 __kmp_nThreadsPerCore, __kmp_ncores);
1697 if (__kmp_affinity_type == affinity_none) {
1699 KMP_CPU_FREE(oldMask);
1705 addr.labels[0] = retval[0].first.labels[pkgLevel];
1706 retval[0].first = addr;
1708 if (__kmp_affinity_gran_levels < 0) {
1709 __kmp_affinity_gran_levels = 0;
1712 if (__kmp_affinity_verbose) {
1713 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
1716 *address2os = retval;
1717 KMP_CPU_FREE(oldMask);
1722 qsort(retval, nApics,
sizeof(*retval), __kmp_affinity_cmp_Address_labels);
1725 unsigned *totals = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1726 unsigned *counts = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1727 unsigned *maxCt = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1728 unsigned *last = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1729 for (level = 0; level < depth; level++) {
1733 last[level] = retval[0].first.labels[level];
1740 for (proc = 1; (int)proc < nApics; proc++) {
1742 for (level = 0; level < depth; level++) {
1743 if (retval[proc].first.labels[level] != last[level]) {
1745 for (j = level + 1; j < depth; j++) {
1756 last[j] = retval[proc].first.labels[j];
1760 if (counts[level] > maxCt[level]) {
1761 maxCt[level] = counts[level];
1763 last[level] = retval[proc].first.labels[level];
1765 }
else if (level == depth - 1) {
1771 KMP_CPU_FREE(oldMask);
1772 *msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
1782 if (threadLevel >= 0) {
1783 __kmp_nThreadsPerCore = maxCt[threadLevel];
1785 __kmp_nThreadsPerCore = 1;
1787 nPackages = totals[pkgLevel];
1789 if (coreLevel >= 0) {
1790 __kmp_ncores = totals[coreLevel];
1791 nCoresPerPkg = maxCt[coreLevel];
1793 __kmp_ncores = nPackages;
1798 unsigned prod = maxCt[0];
1799 for (level = 1; level < depth; level++) {
1800 prod *= maxCt[level];
1802 bool uniform = (prod == totals[level - 1]);
1805 if (__kmp_affinity_verbose) {
1806 char mask[KMP_AFFIN_MASK_PRINT_LEN];
1807 __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1809 KMP_INFORM(AffUseGlobCpuidL11,
"KMP_AFFINITY");
1810 if (__kmp_affinity_respect_mask) {
1811 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", mask);
1813 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", mask);
1815 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1817 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1819 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1823 __kmp_str_buf_init(&buf);
1825 __kmp_str_buf_print(&buf,
"%d", totals[0]);
1826 for (level = 1; level <= pkgLevel; level++) {
1827 __kmp_str_buf_print(&buf,
" x %d", maxCt[level]);
1829 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, nCoresPerPkg,
1830 __kmp_nThreadsPerCore, __kmp_ncores);
1832 __kmp_str_buf_free(&buf);
1834 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
1835 KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
1836 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
1837 for (proc = 0; (int)proc < nApics; ++proc) {
1838 __kmp_pu_os_idx[proc] = retval[proc].second;
1840 if (__kmp_affinity_type == affinity_none) {
1846 KMP_CPU_FREE(oldMask);
1853 for (level = 0; level < depth; level++) {
1854 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1862 if (new_depth != depth) {
1863 AddrUnsPair *new_retval =
1864 (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * nApics);
1865 for (proc = 0; (int)proc < nApics; proc++) {
1866 Address addr(new_depth);
1867 new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
1870 int newPkgLevel = -1;
1871 int newCoreLevel = -1;
1872 int newThreadLevel = -1;
1873 for (level = 0; level < depth; level++) {
1874 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1878 if (level == pkgLevel) {
1879 newPkgLevel = new_level;
1881 if (level == coreLevel) {
1882 newCoreLevel = new_level;
1884 if (level == threadLevel) {
1885 newThreadLevel = new_level;
1887 for (proc = 0; (int)proc < nApics; proc++) {
1888 new_retval[proc].first.labels[new_level] =
1889 retval[proc].first.labels[level];
1895 retval = new_retval;
1897 pkgLevel = newPkgLevel;
1898 coreLevel = newCoreLevel;
1899 threadLevel = newThreadLevel;
1902 if (__kmp_affinity_gran_levels < 0) {
1905 __kmp_affinity_gran_levels = 0;
1906 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
1907 __kmp_affinity_gran_levels++;
1909 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1910 __kmp_affinity_gran_levels++;
1912 if (__kmp_affinity_gran > affinity_gran_package) {
1913 __kmp_affinity_gran_levels++;
1917 if (__kmp_affinity_verbose) {
1918 __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel, coreLevel,
1926 KMP_CPU_FREE(oldMask);
1927 *address2os = retval;
1934 #define threadIdIndex 1 1935 #define coreIdIndex 2 1936 #define pkgIdIndex 3 1937 #define nodeIdIndex 4 1939 typedef unsigned *ProcCpuInfo;
1940 static unsigned maxIndex = pkgIdIndex;
1942 static int __kmp_affinity_cmp_ProcCpuInfo_phys_id(
const void *a,
1945 const unsigned *aa = *(
unsigned *
const *)a;
1946 const unsigned *bb = *(
unsigned *
const *)b;
1947 for (i = maxIndex;; i--) {
1958 #if KMP_USE_HIER_SCHED 1960 static void __kmp_dispatch_set_hierarchy_values() {
1966 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1] =
1967 nPackages * nCoresPerPkg * __kmp_nThreadsPerCore;
1968 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L1 + 1] = __kmp_ncores;
1969 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 1970 if (__kmp_mic_type >= mic3)
1971 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores / 2;
1973 #endif // KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 1974 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores;
1975 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L3 + 1] = nPackages;
1976 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_NUMA + 1] = nPackages;
1977 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_LOOP + 1] = 1;
1980 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_THREAD + 1] = 1;
1981 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L1 + 1] =
1982 __kmp_nThreadsPerCore;
1983 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 1984 if (__kmp_mic_type >= mic3)
1985 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] =
1986 2 * __kmp_nThreadsPerCore;
1988 #endif // KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 1989 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] =
1990 __kmp_nThreadsPerCore;
1991 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L3 + 1] =
1992 nCoresPerPkg * __kmp_nThreadsPerCore;
1993 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_NUMA + 1] =
1994 nCoresPerPkg * __kmp_nThreadsPerCore;
1995 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_LOOP + 1] =
1996 nPackages * nCoresPerPkg * __kmp_nThreadsPerCore;
2001 int __kmp_dispatch_get_index(
int tid, kmp_hier_layer_e type) {
2002 int index = type + 1;
2003 int num_hw_threads = __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1];
2004 KMP_DEBUG_ASSERT(type != kmp_hier_layer_e::LAYER_LAST);
2005 if (type == kmp_hier_layer_e::LAYER_THREAD)
2007 else if (type == kmp_hier_layer_e::LAYER_LOOP)
2009 KMP_DEBUG_ASSERT(__kmp_hier_max_units[index] != 0);
2010 if (tid >= num_hw_threads)
2011 tid = tid % num_hw_threads;
2012 return (tid / __kmp_hier_threads_per[index]) % __kmp_hier_max_units[index];
2016 int __kmp_dispatch_get_t1_per_t2(kmp_hier_layer_e t1, kmp_hier_layer_e t2) {
2019 KMP_DEBUG_ASSERT(i1 <= i2);
2020 KMP_DEBUG_ASSERT(t1 != kmp_hier_layer_e::LAYER_LAST);
2021 KMP_DEBUG_ASSERT(t2 != kmp_hier_layer_e::LAYER_LAST);
2022 KMP_DEBUG_ASSERT(__kmp_hier_threads_per[i1] != 0);
2024 return __kmp_hier_threads_per[i2] / __kmp_hier_threads_per[i1];
2026 #endif // KMP_USE_HIER_SCHED 2030 static int __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os,
2032 kmp_i18n_id_t *
const msg_id,
2035 *msg_id = kmp_i18n_null;
2040 unsigned num_records = 0;
2042 buf[
sizeof(buf) - 1] = 1;
2043 if (!fgets(buf,
sizeof(buf), f)) {
2048 char s1[] =
"processor";
2049 if (strncmp(buf, s1,
sizeof(s1) - 1) == 0) {
2056 if (KMP_SSCANF(buf,
"node_%u id", &level) == 1) {
2057 if (nodeIdIndex + level >= maxIndex) {
2058 maxIndex = nodeIdIndex + level;
2066 if (num_records == 0) {
2068 *msg_id = kmp_i18n_str_NoProcRecords;
2071 if (num_records > (
unsigned)__kmp_xproc) {
2073 *msg_id = kmp_i18n_str_TooManyProcRecords;
2082 if (fseek(f, 0, SEEK_SET) != 0) {
2084 *msg_id = kmp_i18n_str_CantRewindCpuinfo;
2090 unsigned **threadInfo =
2091 (
unsigned **)__kmp_allocate((num_records + 1) *
sizeof(
unsigned *));
2093 for (i = 0; i <= num_records; i++) {
2095 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2098 #define CLEANUP_THREAD_INFO \ 2099 for (i = 0; i <= num_records; i++) { \ 2100 __kmp_free(threadInfo[i]); \ 2102 __kmp_free(threadInfo); 2107 #define INIT_PROC_INFO(p) \ 2108 for (__index = 0; __index <= maxIndex; __index++) { \ 2109 (p)[__index] = UINT_MAX; \ 2112 for (i = 0; i <= num_records; i++) {
2113 INIT_PROC_INFO(threadInfo[i]);
2116 unsigned num_avail = 0;
2123 buf[
sizeof(buf) - 1] = 1;
2124 bool long_line =
false;
2125 if (!fgets(buf,
sizeof(buf), f)) {
2130 for (i = 0; i <= maxIndex; i++) {
2131 if (threadInfo[num_avail][i] != UINT_MAX) {
2139 }
else if (!buf[
sizeof(buf) - 1]) {
2144 #define CHECK_LINE \ 2146 CLEANUP_THREAD_INFO; \ 2147 *msg_id = kmp_i18n_str_LongLineCpuinfo; \ 2153 char s1[] =
"processor";
2154 if (strncmp(buf, s1,
sizeof(s1) - 1) == 0) {
2156 char *p = strchr(buf +
sizeof(s1) - 1,
':');
2158 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2160 if (threadInfo[num_avail][osIdIndex] != UINT_MAX)
2161 #if KMP_ARCH_AARCH64 2170 threadInfo[num_avail][osIdIndex] = val;
2171 #if KMP_OS_LINUX && !(KMP_ARCH_X86 || KMP_ARCH_X86_64) 2175 "/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
2176 threadInfo[num_avail][osIdIndex]);
2177 __kmp_read_from_file(path,
"%u", &threadInfo[num_avail][pkgIdIndex]);
2179 KMP_SNPRINTF(path,
sizeof(path),
2180 "/sys/devices/system/cpu/cpu%u/topology/core_id",
2181 threadInfo[num_avail][osIdIndex]);
2182 __kmp_read_from_file(path,
"%u", &threadInfo[num_avail][coreIdIndex]);
2186 char s2[] =
"physical id";
2187 if (strncmp(buf, s2,
sizeof(s2) - 1) == 0) {
2189 char *p = strchr(buf +
sizeof(s2) - 1,
':');
2191 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2193 if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX)
2195 threadInfo[num_avail][pkgIdIndex] = val;
2198 char s3[] =
"core id";
2199 if (strncmp(buf, s3,
sizeof(s3) - 1) == 0) {
2201 char *p = strchr(buf +
sizeof(s3) - 1,
':');
2203 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2205 if (threadInfo[num_avail][coreIdIndex] != UINT_MAX)
2207 threadInfo[num_avail][coreIdIndex] = val;
2209 #endif // KMP_OS_LINUX && USE_SYSFS_INFO 2211 char s4[] =
"thread id";
2212 if (strncmp(buf, s4,
sizeof(s4) - 1) == 0) {
2214 char *p = strchr(buf +
sizeof(s4) - 1,
':');
2216 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2218 if (threadInfo[num_avail][threadIdIndex] != UINT_MAX)
2220 threadInfo[num_avail][threadIdIndex] = val;
2224 if (KMP_SSCANF(buf,
"node_%u id", &level) == 1) {
2226 char *p = strchr(buf +
sizeof(s4) - 1,
':');
2228 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2230 KMP_ASSERT(nodeIdIndex + level <= maxIndex);
2231 if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX)
2233 threadInfo[num_avail][nodeIdIndex + level] = val;
2240 if ((*buf != 0) && (*buf !=
'\n')) {
2245 while (((ch = fgetc(f)) != EOF) && (ch !=
'\n'))
2253 if ((
int)num_avail == __kmp_xproc) {
2254 CLEANUP_THREAD_INFO;
2255 *msg_id = kmp_i18n_str_TooManyEntries;
2261 if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
2262 CLEANUP_THREAD_INFO;
2263 *msg_id = kmp_i18n_str_MissingProcField;
2266 if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
2267 CLEANUP_THREAD_INFO;
2268 *msg_id = kmp_i18n_str_MissingPhysicalIDField;
2273 if (!KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex],
2274 __kmp_affin_fullMask)) {
2275 INIT_PROC_INFO(threadInfo[num_avail]);
2282 KMP_ASSERT(num_avail <= num_records);
2283 INIT_PROC_INFO(threadInfo[num_avail]);
2288 CLEANUP_THREAD_INFO;
2289 *msg_id = kmp_i18n_str_MissingValCpuinfo;
2293 CLEANUP_THREAD_INFO;
2294 *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
2299 #if KMP_MIC && REDUCE_TEAM_SIZE 2300 unsigned teamSize = 0;
2301 #endif // KMP_MIC && REDUCE_TEAM_SIZE 2312 KMP_ASSERT(num_avail > 0);
2313 KMP_ASSERT(num_avail <= num_records);
2314 if (num_avail == 1) {
2316 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
2317 if (__kmp_affinity_verbose) {
2318 if (!KMP_AFFINITY_CAPABLE()) {
2319 KMP_INFORM(AffNotCapableUseCpuinfo,
"KMP_AFFINITY");
2320 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2321 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2323 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2324 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
2325 __kmp_affin_fullMask);
2326 KMP_INFORM(AffCapableUseCpuinfo,
"KMP_AFFINITY");
2327 if (__kmp_affinity_respect_mask) {
2328 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
2330 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
2332 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2333 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2337 __kmp_str_buf_init(&buf);
2338 __kmp_str_buf_print(&buf,
"1");
2339 for (index = maxIndex - 1; index > pkgIdIndex; index--) {
2340 __kmp_str_buf_print(&buf,
" x 1");
2342 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, 1, 1, 1);
2343 __kmp_str_buf_free(&buf);
2346 if (__kmp_affinity_type == affinity_none) {
2347 CLEANUP_THREAD_INFO;
2351 *address2os = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair));
2353 addr.labels[0] = threadInfo[0][pkgIdIndex];
2354 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
2356 if (__kmp_affinity_gran_levels < 0) {
2357 __kmp_affinity_gran_levels = 0;
2360 if (__kmp_affinity_verbose) {
2361 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
2364 CLEANUP_THREAD_INFO;
2369 qsort(threadInfo, num_avail,
sizeof(*threadInfo),
2370 __kmp_affinity_cmp_ProcCpuInfo_phys_id);
2382 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2384 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2386 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2388 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2390 bool assign_thread_ids =
false;
2391 unsigned threadIdCt;
2394 restart_radix_check:
2398 if (assign_thread_ids) {
2399 if (threadInfo[0][threadIdIndex] == UINT_MAX) {
2400 threadInfo[0][threadIdIndex] = threadIdCt++;
2401 }
else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
2402 threadIdCt = threadInfo[0][threadIdIndex] + 1;
2405 for (index = 0; index <= maxIndex; index++) {
2409 lastId[index] = threadInfo[0][index];
2414 for (i = 1; i < num_avail; i++) {
2417 for (index = maxIndex; index >= threadIdIndex; index--) {
2418 if (assign_thread_ids && (index == threadIdIndex)) {
2420 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2421 threadInfo[i][threadIdIndex] = threadIdCt++;
2425 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2426 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2429 if (threadInfo[i][index] != lastId[index]) {
2434 for (index2 = threadIdIndex; index2 < index; index2++) {
2436 if (counts[index2] > maxCt[index2]) {
2437 maxCt[index2] = counts[index2];
2440 lastId[index2] = threadInfo[i][index2];
2444 lastId[index] = threadInfo[i][index];
2446 if (assign_thread_ids && (index > threadIdIndex)) {
2448 #if KMP_MIC && REDUCE_TEAM_SIZE 2451 teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
2452 #endif // KMP_MIC && REDUCE_TEAM_SIZE 2458 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2459 threadInfo[i][threadIdIndex] = threadIdCt++;
2465 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2466 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2472 if (index < threadIdIndex) {
2476 if ((threadInfo[i][threadIdIndex] != UINT_MAX) || assign_thread_ids) {
2481 CLEANUP_THREAD_INFO;
2482 *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
2488 assign_thread_ids =
true;
2489 goto restart_radix_check;
2493 #if KMP_MIC && REDUCE_TEAM_SIZE 2496 teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
2497 #endif // KMP_MIC && REDUCE_TEAM_SIZE 2499 for (index = threadIdIndex; index <= maxIndex; index++) {
2500 if (counts[index] > maxCt[index]) {
2501 maxCt[index] = counts[index];
2505 __kmp_nThreadsPerCore = maxCt[threadIdIndex];
2506 nCoresPerPkg = maxCt[coreIdIndex];
2507 nPackages = totals[pkgIdIndex];
2510 unsigned prod = totals[maxIndex];
2511 for (index = threadIdIndex; index < maxIndex; index++) {
2512 prod *= maxCt[index];
2514 bool uniform = (prod == totals[threadIdIndex]);
2520 __kmp_ncores = totals[coreIdIndex];
2522 if (__kmp_affinity_verbose) {
2523 if (!KMP_AFFINITY_CAPABLE()) {
2524 KMP_INFORM(AffNotCapableUseCpuinfo,
"KMP_AFFINITY");
2525 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2527 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2529 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
2532 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2533 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
2534 __kmp_affin_fullMask);
2535 KMP_INFORM(AffCapableUseCpuinfo,
"KMP_AFFINITY");
2536 if (__kmp_affinity_respect_mask) {
2537 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
2539 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
2541 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2543 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2545 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
2549 __kmp_str_buf_init(&buf);
2551 __kmp_str_buf_print(&buf,
"%d", totals[maxIndex]);
2552 for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
2553 __kmp_str_buf_print(&buf,
" x %d", maxCt[index]);
2555 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
2556 maxCt[threadIdIndex], __kmp_ncores);
2558 __kmp_str_buf_free(&buf);
2561 #if KMP_MIC && REDUCE_TEAM_SIZE 2563 if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
2564 __kmp_dflt_team_nth = teamSize;
2565 KA_TRACE(20, (
"__kmp_affinity_create_cpuinfo_map: setting " 2566 "__kmp_dflt_team_nth = %d\n",
2567 __kmp_dflt_team_nth));
2569 #endif // KMP_MIC && REDUCE_TEAM_SIZE 2571 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
2572 KMP_DEBUG_ASSERT(num_avail == (
unsigned)__kmp_avail_proc);
2573 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
2574 for (i = 0; i < num_avail; ++i) {
2575 __kmp_pu_os_idx[i] = threadInfo[i][osIdIndex];
2578 if (__kmp_affinity_type == affinity_none) {
2583 CLEANUP_THREAD_INFO;
2592 bool *inMap = (
bool *)__kmp_allocate((maxIndex + 1) *
sizeof(bool));
2593 for (index = threadIdIndex; index < maxIndex; index++) {
2594 KMP_ASSERT(totals[index] >= totals[index + 1]);
2595 inMap[index] = (totals[index] > totals[index + 1]);
2597 inMap[maxIndex] = (totals[maxIndex] > 1);
2598 inMap[pkgIdIndex] =
true;
2601 for (index = threadIdIndex; index <= maxIndex; index++) {
2606 KMP_ASSERT(depth > 0);
2609 *address2os = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * num_avail);
2612 int threadLevel = -1;
2614 for (i = 0; i < num_avail; ++i) {
2615 Address addr(depth);
2616 unsigned os = threadInfo[i][osIdIndex];
2620 for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
2621 if (!inMap[src_index]) {
2624 addr.labels[dst_index] = threadInfo[i][src_index];
2625 if (src_index == pkgIdIndex) {
2626 pkgLevel = dst_index;
2627 }
else if (src_index == coreIdIndex) {
2628 coreLevel = dst_index;
2629 }
else if (src_index == threadIdIndex) {
2630 threadLevel = dst_index;
2634 (*address2os)[i] = AddrUnsPair(addr, os);
2637 if (__kmp_affinity_gran_levels < 0) {
2641 __kmp_affinity_gran_levels = 0;
2642 for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
2643 if (!inMap[src_index]) {
2646 switch (src_index) {
2648 if (__kmp_affinity_gran > affinity_gran_thread) {
2649 __kmp_affinity_gran_levels++;
2654 if (__kmp_affinity_gran > affinity_gran_core) {
2655 __kmp_affinity_gran_levels++;
2660 if (__kmp_affinity_gran > affinity_gran_package) {
2661 __kmp_affinity_gran_levels++;
2668 if (__kmp_affinity_verbose) {
2669 __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
2670 coreLevel, threadLevel);
2678 CLEANUP_THREAD_INFO;
2685 static kmp_affin_mask_t *__kmp_create_masks(
unsigned *maxIndex,
2686 unsigned *numUnique,
2687 AddrUnsPair *address2os,
2688 unsigned numAddrs) {
2694 KMP_ASSERT(numAddrs > 0);
2695 depth = address2os[0].first.depth;
2698 for (i = numAddrs - 1;; --i) {
2699 unsigned osId = address2os[i].second;
2700 if (osId > maxOsId) {
2706 kmp_affin_mask_t *osId2Mask;
2707 KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId + 1));
2711 qsort(address2os, numAddrs,
sizeof(*address2os),
2712 __kmp_affinity_cmp_Address_labels);
2714 KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
2715 if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
2716 KMP_INFORM(ThreadsMigrate,
"KMP_AFFINITY", __kmp_affinity_gran_levels);
2718 if (__kmp_affinity_gran_levels >= (
int)depth) {
2719 if (__kmp_affinity_verbose ||
2720 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
2721 KMP_WARNING(AffThreadsMayMigrate);
2729 unsigned unique = 0;
2731 unsigned leader = 0;
2732 Address *leaderAddr = &(address2os[0].first);
2733 kmp_affin_mask_t *sum;
2734 KMP_CPU_ALLOC_ON_STACK(sum);
2736 KMP_CPU_SET(address2os[0].second, sum);
2737 for (i = 1; i < numAddrs; i++) {
2741 if (leaderAddr->isClose(address2os[i].first, __kmp_affinity_gran_levels)) {
2742 KMP_CPU_SET(address2os[i].second, sum);
2748 for (; j < i; j++) {
2749 unsigned osId = address2os[j].second;
2750 KMP_DEBUG_ASSERT(osId <= maxOsId);
2751 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2752 KMP_CPU_COPY(mask, sum);
2753 address2os[j].first.leader = (j == leader);
2759 leaderAddr = &(address2os[i].first);
2761 KMP_CPU_SET(address2os[i].second, sum);
2766 for (; j < i; j++) {
2767 unsigned osId = address2os[j].second;
2768 KMP_DEBUG_ASSERT(osId <= maxOsId);
2769 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2770 KMP_CPU_COPY(mask, sum);
2771 address2os[j].first.leader = (j == leader);
2774 KMP_CPU_FREE_FROM_STACK(sum);
2776 *maxIndex = maxOsId;
2777 *numUnique = unique;
2784 static kmp_affin_mask_t *newMasks;
2785 static int numNewMasks;
2786 static int nextNewMask;
2788 #define ADD_MASK(_mask) \ 2790 if (nextNewMask >= numNewMasks) { \ 2793 kmp_affin_mask_t *temp; \ 2794 KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks); \ 2795 for (i = 0; i < numNewMasks / 2; i++) { \ 2796 kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i); \ 2797 kmp_affin_mask_t *dest = KMP_CPU_INDEX(temp, i); \ 2798 KMP_CPU_COPY(dest, src); \ 2800 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks / 2); \ 2803 KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \ 2807 #define ADD_MASK_OSID(_osId, _osId2Mask, _maxOsId) \ 2809 if (((_osId) > _maxOsId) || \ 2810 (!KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \ 2811 if (__kmp_affinity_verbose || \ 2812 (__kmp_affinity_warnings && \ 2813 (__kmp_affinity_type != affinity_none))) { \ 2814 KMP_WARNING(AffIgnoreInvalidProcID, _osId); \ 2817 ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \ 2823 static void __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
2824 unsigned int *out_numMasks,
2825 const char *proclist,
2826 kmp_affin_mask_t *osId2Mask,
2829 const char *scan = proclist;
2830 const char *next = proclist;
2835 KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
2837 kmp_affin_mask_t *sumMask;
2838 KMP_CPU_ALLOC(sumMask);
2842 int start, end, stride;
2846 if (*next ==
'\0') {
2858 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad proclist");
2860 num = __kmp_str_to_int(scan, *next);
2861 KMP_ASSERT2(num >= 0,
"bad explicit proc list");
2864 if ((num > maxOsId) ||
2865 (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2866 if (__kmp_affinity_verbose ||
2867 (__kmp_affinity_warnings &&
2868 (__kmp_affinity_type != affinity_none))) {
2869 KMP_WARNING(AffIgnoreInvalidProcID, num);
2871 KMP_CPU_ZERO(sumMask);
2873 KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2893 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2896 num = __kmp_str_to_int(scan, *next);
2897 KMP_ASSERT2(num >= 0,
"bad explicit proc list");
2900 if ((num > maxOsId) ||
2901 (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2902 if (__kmp_affinity_verbose ||
2903 (__kmp_affinity_warnings &&
2904 (__kmp_affinity_type != affinity_none))) {
2905 KMP_WARNING(AffIgnoreInvalidProcID, num);
2908 KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2925 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2927 start = __kmp_str_to_int(scan, *next);
2928 KMP_ASSERT2(start >= 0,
"bad explicit proc list");
2933 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2947 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2949 end = __kmp_str_to_int(scan, *next);
2950 KMP_ASSERT2(end >= 0,
"bad explicit proc list");
2967 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2969 stride = __kmp_str_to_int(scan, *next);
2970 KMP_ASSERT2(stride >= 0,
"bad explicit proc list");
2975 KMP_ASSERT2(stride != 0,
"bad explicit proc list");
2977 KMP_ASSERT2(start <= end,
"bad explicit proc list");
2979 KMP_ASSERT2(start >= end,
"bad explicit proc list");
2981 KMP_ASSERT2((end - start) / stride <= 65536,
"bad explicit proc list");
2986 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2988 }
while (start <= end);
2991 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2993 }
while (start >= end);
3004 *out_numMasks = nextNewMask;
3005 if (nextNewMask == 0) {
3007 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3010 KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
3011 for (i = 0; i < nextNewMask; i++) {
3012 kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
3013 kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i);
3014 KMP_CPU_COPY(dest, src);
3016 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3017 KMP_CPU_FREE(sumMask);
3043 static void __kmp_process_subplace_list(
const char **scan,
3044 kmp_affin_mask_t *osId2Mask,
3045 int maxOsId, kmp_affin_mask_t *tempMask,
3050 int start, count, stride, i;
3054 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
"bad explicit places list");
3057 start = __kmp_str_to_int(*scan, *next);
3058 KMP_ASSERT(start >= 0);
3063 if (**scan ==
'}' || **scan ==
',') {
3064 if ((start > maxOsId) ||
3065 (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3066 if (__kmp_affinity_verbose ||
3067 (__kmp_affinity_warnings &&
3068 (__kmp_affinity_type != affinity_none))) {
3069 KMP_WARNING(AffIgnoreInvalidProcID, start);
3072 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3075 if (**scan ==
'}') {
3081 KMP_ASSERT2(**scan ==
':',
"bad explicit places list");
3086 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
"bad explicit places list");
3089 count = __kmp_str_to_int(*scan, *next);
3090 KMP_ASSERT(count >= 0);
3095 if (**scan ==
'}' || **scan ==
',') {
3096 for (i = 0; i < count; i++) {
3097 if ((start > maxOsId) ||
3098 (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3099 if (__kmp_affinity_verbose ||
3100 (__kmp_affinity_warnings &&
3101 (__kmp_affinity_type != affinity_none))) {
3102 KMP_WARNING(AffIgnoreInvalidProcID, start);
3106 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3111 if (**scan ==
'}') {
3117 KMP_ASSERT2(**scan ==
':',
"bad explicit places list");
3124 if (**scan ==
'+') {
3128 if (**scan ==
'-') {
3136 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
"bad explicit places list");
3139 stride = __kmp_str_to_int(*scan, *next);
3140 KMP_ASSERT(stride >= 0);
3146 if (**scan ==
'}' || **scan ==
',') {
3147 for (i = 0; i < count; i++) {
3148 if ((start > maxOsId) ||
3149 (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3150 if (__kmp_affinity_verbose ||
3151 (__kmp_affinity_warnings &&
3152 (__kmp_affinity_type != affinity_none))) {
3153 KMP_WARNING(AffIgnoreInvalidProcID, start);
3157 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3162 if (**scan ==
'}') {
3169 KMP_ASSERT2(0,
"bad explicit places list");
3173 static void __kmp_process_place(
const char **scan, kmp_affin_mask_t *osId2Mask,
3174 int maxOsId, kmp_affin_mask_t *tempMask,
3180 if (**scan ==
'{') {
3182 __kmp_process_subplace_list(scan, osId2Mask, maxOsId, tempMask, setSize);
3183 KMP_ASSERT2(**scan ==
'}',
"bad explicit places list");
3185 }
else if (**scan ==
'!') {
3187 __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
3188 KMP_CPU_COMPLEMENT(maxOsId, tempMask);
3189 }
else if ((**scan >=
'0') && (**scan <=
'9')) {
3192 int num = __kmp_str_to_int(*scan, *next);
3193 KMP_ASSERT(num >= 0);
3194 if ((num > maxOsId) ||
3195 (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
3196 if (__kmp_affinity_verbose ||
3197 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
3198 KMP_WARNING(AffIgnoreInvalidProcID, num);
3201 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
3206 KMP_ASSERT2(0,
"bad explicit places list");
3211 void __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
3212 unsigned int *out_numMasks,
3213 const char *placelist,
3214 kmp_affin_mask_t *osId2Mask,
3216 int i, j, count, stride, sign;
3217 const char *scan = placelist;
3218 const char *next = placelist;
3221 KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
3227 kmp_affin_mask_t *tempMask;
3228 kmp_affin_mask_t *previousMask;
3229 KMP_CPU_ALLOC(tempMask);
3230 KMP_CPU_ZERO(tempMask);
3231 KMP_CPU_ALLOC(previousMask);
3232 KMP_CPU_ZERO(previousMask);
3236 __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
3240 if (*scan ==
'\0' || *scan ==
',') {
3244 KMP_CPU_ZERO(tempMask);
3246 if (*scan ==
'\0') {
3253 KMP_ASSERT2(*scan ==
':',
"bad explicit places list");
3258 KMP_ASSERT2((*scan >=
'0') && (*scan <=
'9'),
"bad explicit places list");
3261 count = __kmp_str_to_int(scan, *next);
3262 KMP_ASSERT(count >= 0);
3267 if (*scan ==
'\0' || *scan ==
',') {
3270 KMP_ASSERT2(*scan ==
':',
"bad explicit places list");
3289 KMP_ASSERT2((*scan >=
'0') && (*scan <=
'9'),
"bad explicit places list");
3292 stride = __kmp_str_to_int(scan, *next);
3293 KMP_DEBUG_ASSERT(stride >= 0);
3299 for (i = 0; i < count; i++) {
3304 KMP_CPU_COPY(previousMask, tempMask);
3305 ADD_MASK(previousMask);
3306 KMP_CPU_ZERO(tempMask);
3308 KMP_CPU_SET_ITERATE(j, previousMask) {
3309 if (!KMP_CPU_ISSET(j, previousMask)) {
3312 if ((j + stride > maxOsId) || (j + stride < 0) ||
3313 (!KMP_CPU_ISSET(j, __kmp_affin_fullMask)) ||
3314 (!KMP_CPU_ISSET(j + stride,
3315 KMP_CPU_INDEX(osId2Mask, j + stride)))) {
3316 if ((__kmp_affinity_verbose ||
3317 (__kmp_affinity_warnings &&
3318 (__kmp_affinity_type != affinity_none))) &&
3320 KMP_WARNING(AffIgnoreInvalidProcID, j + stride);
3324 KMP_CPU_SET(j + stride, tempMask);
3328 KMP_CPU_ZERO(tempMask);
3333 if (*scan ==
'\0') {
3341 KMP_ASSERT2(0,
"bad explicit places list");
3344 *out_numMasks = nextNewMask;
3345 if (nextNewMask == 0) {
3347 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3350 KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
3351 KMP_CPU_FREE(tempMask);
3352 KMP_CPU_FREE(previousMask);
3353 for (i = 0; i < nextNewMask; i++) {
3354 kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
3355 kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i);
3356 KMP_CPU_COPY(dest, src);
3358 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3364 #undef ADD_MASK_OSID 3367 static int __kmp_hwloc_skip_PUs_obj(hwloc_topology_t t, hwloc_obj_t o) {
3370 hwloc_obj_t hT = NULL;
3371 int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
3372 for (
int i = 0; i < N; ++i) {
3373 KMP_DEBUG_ASSERT(hT);
3374 unsigned idx = hT->os_index;
3375 if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3376 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3377 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3380 hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
3385 static int __kmp_hwloc_obj_has_PUs(hwloc_topology_t t, hwloc_obj_t o) {
3387 hwloc_obj_t hT = NULL;
3388 int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
3389 for (
int i = 0; i < N; ++i) {
3390 KMP_DEBUG_ASSERT(hT);
3391 unsigned idx = hT->os_index;
3392 if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask))
3394 hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
3398 #endif // KMP_USE_HWLOC 3400 static void __kmp_apply_thread_places(AddrUnsPair **pAddr,
int depth) {
3401 AddrUnsPair *newAddr;
3402 if (__kmp_hws_requested == 0)
3405 if (__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
3409 hwloc_topology_t tp = __kmp_hwloc_topology;
3410 int nS = 0, nN = 0, nL = 0, nC = 0,
3412 int nCr = 0, nTr = 0;
3413 int nPkg = 0, nCo = 0, n_new = 0, n_old = 0, nCpP = 0, nTpC = 0;
3414 hwloc_obj_t hT, hC, hL, hN, hS;
3418 int numa_support = 0, tile_support = 0;
3419 if (__kmp_pu_os_idx)
3420 hT = hwloc_get_pu_obj_by_os_index(tp,
3421 __kmp_pu_os_idx[__kmp_avail_proc - 1]);
3423 hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, __kmp_avail_proc - 1);
3425 KMP_WARNING(AffHWSubsetUnsupported);
3429 hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT);
3430 hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT);
3431 if (hN != NULL && hN->depth > hS->depth) {
3433 }
else if (__kmp_hws_node.num > 0) {
3435 KMP_WARNING(AffHWSubsetUnsupported);
3439 L2depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED);
3440 hL = hwloc_get_ancestor_obj_by_depth(tp, L2depth, hT);
3442 __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC) > 1) {
3444 }
else if (__kmp_hws_tile.num > 0) {
3445 if (__kmp_hws_core.num == 0) {
3446 __kmp_hws_core = __kmp_hws_tile;
3447 __kmp_hws_tile.num = 0;
3450 KMP_WARNING(AffHWSubsetInvalid);
3457 if (__kmp_hws_socket.num == 0)
3458 __kmp_hws_socket.num = nPackages;
3459 if (__kmp_hws_socket.offset >= nPackages) {
3460 KMP_WARNING(AffHWSubsetManySockets);
3465 int NN = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE,
3467 if (__kmp_hws_node.num == 0)
3468 __kmp_hws_node.num = NN;
3469 if (__kmp_hws_node.offset >= NN) {
3470 KMP_WARNING(AffHWSubsetManyNodes);
3475 int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
3476 if (__kmp_hws_tile.num == 0) {
3477 __kmp_hws_tile.num = NL + 1;
3479 if (__kmp_hws_tile.offset >= NL) {
3480 KMP_WARNING(AffHWSubsetManyTiles);
3483 int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
3485 if (__kmp_hws_core.num == 0)
3486 __kmp_hws_core.num = NC;
3487 if (__kmp_hws_core.offset >= NC) {
3488 KMP_WARNING(AffHWSubsetManyCores);
3492 int NC = __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE,
3494 if (__kmp_hws_core.num == 0)
3495 __kmp_hws_core.num = NC;
3496 if (__kmp_hws_core.offset >= NC) {
3497 KMP_WARNING(AffHWSubsetManyCores);
3504 int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
3505 if (__kmp_hws_tile.num == 0)
3506 __kmp_hws_tile.num = NL;
3507 if (__kmp_hws_tile.offset >= NL) {
3508 KMP_WARNING(AffHWSubsetManyTiles);
3511 int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
3513 if (__kmp_hws_core.num == 0)
3514 __kmp_hws_core.num = NC;
3515 if (__kmp_hws_core.offset >= NC) {
3516 KMP_WARNING(AffHWSubsetManyCores);
3520 int NC = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE,
3522 if (__kmp_hws_core.num == 0)
3523 __kmp_hws_core.num = NC;
3524 if (__kmp_hws_core.offset >= NC) {
3525 KMP_WARNING(AffHWSubsetManyCores);
3530 if (__kmp_hws_proc.num == 0)
3531 __kmp_hws_proc.num = __kmp_nThreadsPerCore;
3532 if (__kmp_hws_proc.offset >= __kmp_nThreadsPerCore) {
3533 KMP_WARNING(AffHWSubsetManyProcs);
3539 newAddr = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) *
3543 int NP = hwloc_get_nbobjs_by_type(tp, HWLOC_OBJ_PACKAGE);
3544 for (
int s = 0; s < NP; ++s) {
3546 hS = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hS);
3547 if (!__kmp_hwloc_obj_has_PUs(tp, hS))
3550 if (nS <= __kmp_hws_socket.offset ||
3551 nS > __kmp_hws_socket.num + __kmp_hws_socket.offset) {
3552 n_old += __kmp_hwloc_skip_PUs_obj(tp, hS);
3563 __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE, &hN);
3564 for (
int n = 0; n < NN; ++n) {
3566 if (!__kmp_hwloc_obj_has_PUs(tp, hN)) {
3567 hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
3571 if (nN <= __kmp_hws_node.offset ||
3572 nN > __kmp_hws_node.num + __kmp_hws_node.offset) {
3574 n_old += __kmp_hwloc_skip_PUs_obj(tp, hN);
3575 hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
3582 int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
3583 for (
int l = 0; l < NL; ++l) {
3585 if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
3586 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3590 if (nL <= __kmp_hws_tile.offset ||
3591 nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
3593 n_old += __kmp_hwloc_skip_PUs_obj(tp, hL);
3594 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3601 int NC = __kmp_hwloc_count_children_by_type(tp, hL,
3602 HWLOC_OBJ_CORE, &hC);
3603 for (
int c = 0; c < NC; ++c) {
3605 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3606 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3610 if (nC <= __kmp_hws_core.offset ||
3611 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3613 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC);
3614 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3622 int NT = __kmp_hwloc_count_children_by_type(tp, hC,
3624 for (
int t = 0; t < NT; ++t) {
3627 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3628 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3632 if (nT <= __kmp_hws_proc.offset ||
3633 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3635 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3637 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3638 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3643 newAddr[n_new] = (*pAddr)[n_old];
3646 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3654 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3656 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3664 __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE, &hC);
3665 for (
int c = 0; c < NC; ++c) {
3667 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3668 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3672 if (nC <= __kmp_hws_core.offset ||
3673 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3675 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC);
3676 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3684 __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
3685 for (
int t = 0; t < NT; ++t) {
3688 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3689 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3693 if (nT <= __kmp_hws_proc.offset ||
3694 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3696 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3698 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3699 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3704 newAddr[n_new] = (*pAddr)[n_old];
3707 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3715 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3718 hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
3726 int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
3727 for (
int l = 0; l < NL; ++l) {
3729 if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
3730 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3734 if (nL <= __kmp_hws_tile.offset ||
3735 nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
3737 n_old += __kmp_hwloc_skip_PUs_obj(tp, hL);
3738 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3746 __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC);
3747 for (
int c = 0; c < NC; ++c) {
3749 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3750 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3754 if (nC <= __kmp_hws_core.offset ||
3755 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3757 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC);
3758 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3767 __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
3768 for (
int t = 0; t < NT; ++t) {
3771 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3772 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3776 if (nT <= __kmp_hws_proc.offset ||
3777 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3779 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3781 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3782 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3787 newAddr[n_new] = (*pAddr)[n_old];
3790 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3798 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3800 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3808 __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE, &hC);
3809 for (
int c = 0; c < NC; ++c) {
3811 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3812 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3816 if (nC <= __kmp_hws_core.offset ||
3817 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3819 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC);
3820 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3829 __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
3830 for (
int t = 0; t < NT; ++t) {
3833 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3834 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3838 if (nT <= __kmp_hws_proc.offset ||
3839 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3841 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3843 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3844 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3849 newAddr[n_new] = (*pAddr)[n_old];
3852 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3860 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3872 KMP_DEBUG_ASSERT(n_old == __kmp_avail_proc);
3873 KMP_DEBUG_ASSERT(nPkg > 0);
3874 KMP_DEBUG_ASSERT(nCpP > 0);
3875 KMP_DEBUG_ASSERT(nTpC > 0);
3876 KMP_DEBUG_ASSERT(nCo > 0);
3877 KMP_DEBUG_ASSERT(nPkg <= nPackages);
3878 KMP_DEBUG_ASSERT(nCpP <= nCoresPerPkg);
3879 KMP_DEBUG_ASSERT(nTpC <= __kmp_nThreadsPerCore);
3880 KMP_DEBUG_ASSERT(nCo <= __kmp_ncores);
3883 nCoresPerPkg = nCpP;
3884 __kmp_nThreadsPerCore = nTpC;
3885 __kmp_avail_proc = n_new;
3889 #endif // KMP_USE_HWLOC 3891 int n_old = 0, n_new = 0, proc_num = 0;
3892 if (__kmp_hws_node.num > 0 || __kmp_hws_tile.num > 0) {
3893 KMP_WARNING(AffHWSubsetNoHWLOC);
3896 if (__kmp_hws_socket.num == 0)
3897 __kmp_hws_socket.num = nPackages;
3898 if (__kmp_hws_core.num == 0)
3899 __kmp_hws_core.num = nCoresPerPkg;
3900 if (__kmp_hws_proc.num == 0 || __kmp_hws_proc.num > __kmp_nThreadsPerCore)
3901 __kmp_hws_proc.num = __kmp_nThreadsPerCore;
3902 if (!__kmp_affinity_uniform_topology()) {
3903 KMP_WARNING(AffHWSubsetNonUniform);
3907 KMP_WARNING(AffHWSubsetNonThreeLevel);
3910 if (__kmp_hws_socket.offset + __kmp_hws_socket.num > nPackages) {
3911 KMP_WARNING(AffHWSubsetManySockets);
3914 if (__kmp_hws_core.offset + __kmp_hws_core.num > nCoresPerPkg) {
3915 KMP_WARNING(AffHWSubsetManyCores);
3920 newAddr = (AddrUnsPair *)__kmp_allocate(
3921 sizeof(AddrUnsPair) * __kmp_hws_socket.num * __kmp_hws_core.num *
3922 __kmp_hws_proc.num);
3923 for (
int i = 0; i < nPackages; ++i) {
3924 if (i < __kmp_hws_socket.offset ||
3925 i >= __kmp_hws_socket.offset + __kmp_hws_socket.num) {
3927 n_old += nCoresPerPkg * __kmp_nThreadsPerCore;
3928 if (__kmp_pu_os_idx != NULL) {
3930 for (
int j = 0; j < nCoresPerPkg; ++j) {
3931 for (
int k = 0; k < __kmp_nThreadsPerCore; ++k) {
3932 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
3939 for (
int j = 0; j < nCoresPerPkg; ++j) {
3940 if (j < __kmp_hws_core.offset ||
3941 j >= __kmp_hws_core.offset +
3942 __kmp_hws_core.num) {
3943 n_old += __kmp_nThreadsPerCore;
3944 if (__kmp_pu_os_idx != NULL) {
3945 for (
int k = 0; k < __kmp_nThreadsPerCore; ++k) {
3946 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
3952 for (
int k = 0; k < __kmp_nThreadsPerCore; ++k) {
3953 if (k < __kmp_hws_proc.num) {
3955 newAddr[n_new] = (*pAddr)[n_old];
3958 if (__kmp_pu_os_idx != NULL)
3959 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
3968 KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
3969 KMP_DEBUG_ASSERT(n_new ==
3970 __kmp_hws_socket.num * __kmp_hws_core.num *
3971 __kmp_hws_proc.num);
3972 nPackages = __kmp_hws_socket.num;
3973 nCoresPerPkg = __kmp_hws_core.num;
3974 __kmp_nThreadsPerCore = __kmp_hws_proc.num;
3975 __kmp_avail_proc = n_new;
3976 __kmp_ncores = nPackages * __kmp_hws_core.num;
3982 if (__kmp_affinity_verbose) {
3983 char m[KMP_AFFIN_MASK_PRINT_LEN];
3984 __kmp_affinity_print_mask(m, KMP_AFFIN_MASK_PRINT_LEN,
3985 __kmp_affin_fullMask);
3986 if (__kmp_affinity_respect_mask) {
3987 KMP_INFORM(InitOSProcSetRespect,
"KMP_HW_SUBSET", m);
3989 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_HW_SUBSET", m);
3991 KMP_INFORM(AvailableOSProc,
"KMP_HW_SUBSET", __kmp_avail_proc);
3993 __kmp_str_buf_init(&buf);
3994 __kmp_str_buf_print(&buf,
"%d", nPackages);
3995 KMP_INFORM(TopologyExtra,
"KMP_HW_SUBSET", buf.str, nCoresPerPkg,
3996 __kmp_nThreadsPerCore, __kmp_ncores);
3997 __kmp_str_buf_free(&buf);
4000 if (__kmp_pu_os_idx != NULL) {
4001 __kmp_free(__kmp_pu_os_idx);
4002 __kmp_pu_os_idx = NULL;
4008 static int __kmp_affinity_find_core_level(
const AddrUnsPair *address2os,
4009 int nprocs,
int bottom_level) {
4012 for (
int i = 0; i < nprocs; i++) {
4013 for (
int j = bottom_level; j > 0; j--) {
4014 if (address2os[i].first.labels[j] > 0) {
4015 if (core_level < (j - 1)) {
4025 static int __kmp_affinity_compute_ncores(
const AddrUnsPair *address2os,
4026 int nprocs,
int bottom_level,
4032 for (i = 0; i < nprocs; i++) {
4033 for (j = bottom_level; j > core_level; j--) {
4034 if ((i + 1) < nprocs) {
4035 if (address2os[i + 1].first.labels[j] > 0) {
4040 if (j == core_level) {
4044 if (j > core_level) {
4053 static int __kmp_affinity_find_core(
const AddrUnsPair *address2os,
int proc,
4054 int bottom_level,
int core_level) {
4055 return __kmp_affinity_compute_ncores(address2os, proc + 1, bottom_level,
4062 static int __kmp_affinity_max_proc_per_core(
const AddrUnsPair *address2os,
4063 int nprocs,
int bottom_level,
4065 int maxprocpercore = 0;
4067 if (core_level < bottom_level) {
4068 for (
int i = 0; i < nprocs; i++) {
4069 int percore = address2os[i].first.labels[core_level + 1] + 1;
4071 if (percore > maxprocpercore) {
4072 maxprocpercore = percore;
4078 return maxprocpercore;
4081 static AddrUnsPair *address2os = NULL;
4082 static int *procarr = NULL;
4083 static int __kmp_aff_depth = 0;
4085 #if KMP_USE_HIER_SCHED 4086 #define KMP_EXIT_AFF_NONE \ 4087 KMP_ASSERT(__kmp_affinity_type == affinity_none); \ 4088 KMP_ASSERT(address2os == NULL); \ 4089 __kmp_apply_thread_places(NULL, 0); \ 4090 __kmp_create_affinity_none_places(); \ 4091 __kmp_dispatch_set_hierarchy_values(); \ 4094 #define KMP_EXIT_AFF_NONE \ 4095 KMP_ASSERT(__kmp_affinity_type == affinity_none); \ 4096 KMP_ASSERT(address2os == NULL); \ 4097 __kmp_apply_thread_places(NULL, 0); \ 4098 __kmp_create_affinity_none_places(); \ 4104 static void __kmp_create_affinity_none_places() {
4105 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4106 KMP_ASSERT(__kmp_affinity_type == affinity_none);
4107 __kmp_affinity_num_masks = 1;
4108 KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4109 kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, 0);
4110 KMP_CPU_COPY(dest, __kmp_affin_fullMask);
4113 static int __kmp_affinity_cmp_Address_child_num(
const void *a,
const void *b) {
4114 const Address *aa = &(((
const AddrUnsPair *)a)->first);
4115 const Address *bb = &(((
const AddrUnsPair *)b)->first);
4116 unsigned depth = aa->depth;
4118 KMP_DEBUG_ASSERT(depth == bb->depth);
4119 KMP_DEBUG_ASSERT((
unsigned)__kmp_affinity_compact <= depth);
4120 KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
4121 for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
4122 int j = depth - i - 1;
4123 if (aa->childNums[j] < bb->childNums[j])
4125 if (aa->childNums[j] > bb->childNums[j])
4128 for (; i < depth; i++) {
4129 int j = i - __kmp_affinity_compact;
4130 if (aa->childNums[j] < bb->childNums[j])
4132 if (aa->childNums[j] > bb->childNums[j])
4138 static void __kmp_aux_affinity_initialize(
void) {
4139 if (__kmp_affinity_masks != NULL) {
4140 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4148 if (__kmp_affin_fullMask == NULL) {
4149 KMP_CPU_ALLOC(__kmp_affin_fullMask);
4151 if (KMP_AFFINITY_CAPABLE()) {
4152 if (__kmp_affinity_respect_mask) {
4153 __kmp_get_system_affinity(__kmp_affin_fullMask, TRUE);
4157 __kmp_avail_proc = 0;
4158 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
4159 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
4164 if (__kmp_avail_proc > __kmp_xproc) {
4165 if (__kmp_affinity_verbose ||
4166 (__kmp_affinity_warnings &&
4167 (__kmp_affinity_type != affinity_none))) {
4168 KMP_WARNING(ErrorInitializeAffinity);
4170 __kmp_affinity_type = affinity_none;
4171 KMP_AFFINITY_DISABLE();
4175 __kmp_affinity_entire_machine_mask(__kmp_affin_fullMask);
4176 __kmp_avail_proc = __kmp_xproc;
4180 if (__kmp_affinity_gran == affinity_gran_tile &&
4182 __kmp_affinity_dispatch->get_api_type() == KMPAffinity::NATIVE_OS) {
4183 KMP_WARNING(AffTilesNoHWLOC,
"KMP_AFFINITY");
4184 __kmp_affinity_gran = affinity_gran_package;
4188 kmp_i18n_id_t msg_id = kmp_i18n_null;
4192 if ((__kmp_cpuinfo_file != NULL) &&
4193 (__kmp_affinity_top_method == affinity_top_method_all)) {
4194 __kmp_affinity_top_method = affinity_top_method_cpuinfo;
4197 if (__kmp_affinity_top_method == affinity_top_method_all) {
4201 const char *file_name = NULL;
4205 __kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
4206 if (__kmp_affinity_verbose) {
4207 KMP_INFORM(AffUsingHwloc,
"KMP_AFFINITY");
4209 if (!__kmp_hwloc_error) {
4210 depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
4213 }
else if (depth < 0 && __kmp_affinity_verbose) {
4214 KMP_INFORM(AffIgnoringHwloc,
"KMP_AFFINITY");
4216 }
else if (__kmp_affinity_verbose) {
4217 KMP_INFORM(AffIgnoringHwloc,
"KMP_AFFINITY");
4222 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 4225 if (__kmp_affinity_verbose) {
4226 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
4230 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
4236 if (__kmp_affinity_verbose) {
4237 if (msg_id != kmp_i18n_null) {
4238 KMP_INFORM(AffInfoStrStr,
"KMP_AFFINITY",
4239 __kmp_i18n_catgets(msg_id),
4240 KMP_I18N_STR(DecodingLegacyAPIC));
4242 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY",
4243 KMP_I18N_STR(DecodingLegacyAPIC));
4248 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
4260 if (__kmp_affinity_verbose) {
4261 if (msg_id != kmp_i18n_null) {
4262 KMP_INFORM(AffStrParseFilename,
"KMP_AFFINITY",
4263 __kmp_i18n_catgets(msg_id),
"/proc/cpuinfo");
4265 KMP_INFORM(AffParseFilename,
"KMP_AFFINITY",
"/proc/cpuinfo");
4269 FILE *f = fopen(
"/proc/cpuinfo",
"r");
4271 msg_id = kmp_i18n_str_CantOpenCpuinfo;
4273 file_name =
"/proc/cpuinfo";
4275 __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
4285 #if KMP_GROUP_AFFINITY 4287 if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
4288 if (__kmp_affinity_verbose) {
4289 KMP_INFORM(AffWindowsProcGroupMap,
"KMP_AFFINITY");
4292 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
4293 KMP_ASSERT(depth != 0);
4299 if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
4300 if (file_name == NULL) {
4301 KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
4302 }
else if (line == 0) {
4303 KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
4305 KMP_INFORM(UsingFlatOSFileLine, file_name, line,
4306 __kmp_i18n_catgets(msg_id));
4312 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
4316 KMP_ASSERT(depth > 0);
4317 KMP_ASSERT(address2os != NULL);
4322 else if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
4323 KMP_ASSERT(__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC);
4324 if (__kmp_affinity_verbose) {
4325 KMP_INFORM(AffUsingHwloc,
"KMP_AFFINITY");
4327 depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
4332 #endif // KMP_USE_HWLOC 4338 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 4340 else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
4341 if (__kmp_affinity_verbose) {
4342 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
4345 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
4350 KMP_ASSERT(msg_id != kmp_i18n_null);
4351 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
4353 }
else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
4354 if (__kmp_affinity_verbose) {
4355 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
4358 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
4363 KMP_ASSERT(msg_id != kmp_i18n_null);
4364 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
4370 else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
4371 const char *filename;
4372 if (__kmp_cpuinfo_file != NULL) {
4373 filename = __kmp_cpuinfo_file;
4375 filename =
"/proc/cpuinfo";
4378 if (__kmp_affinity_verbose) {
4379 KMP_INFORM(AffParseFilename,
"KMP_AFFINITY", filename);
4382 FILE *f = fopen(filename,
"r");
4385 if (__kmp_cpuinfo_file != NULL) {
4386 __kmp_fatal(KMP_MSG(CantOpenFileForReading, filename), KMP_ERR(code),
4387 KMP_HNT(NameComesFrom_CPUINFO_FILE), __kmp_msg_null);
4389 __kmp_fatal(KMP_MSG(CantOpenFileForReading, filename), KMP_ERR(code),
4394 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
4397 KMP_ASSERT(msg_id != kmp_i18n_null);
4399 KMP_FATAL(FileLineMsgExiting, filename, line,
4400 __kmp_i18n_catgets(msg_id));
4402 KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
4405 if (__kmp_affinity_type == affinity_none) {
4406 KMP_ASSERT(depth == 0);
4411 #if KMP_GROUP_AFFINITY 4413 else if (__kmp_affinity_top_method == affinity_top_method_group) {
4414 if (__kmp_affinity_verbose) {
4415 KMP_INFORM(AffWindowsProcGroupMap,
"KMP_AFFINITY");
4418 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
4419 KMP_ASSERT(depth != 0);
4421 KMP_ASSERT(msg_id != kmp_i18n_null);
4422 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
4428 else if (__kmp_affinity_top_method == affinity_top_method_flat) {
4429 if (__kmp_affinity_verbose) {
4430 KMP_INFORM(AffUsingFlatOS,
"KMP_AFFINITY");
4433 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
4438 KMP_ASSERT(depth > 0);
4439 KMP_ASSERT(address2os != NULL);
4442 #if KMP_USE_HIER_SCHED 4443 __kmp_dispatch_set_hierarchy_values();
4446 if (address2os == NULL) {
4447 if (KMP_AFFINITY_CAPABLE() &&
4448 (__kmp_affinity_verbose ||
4449 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none)))) {
4450 KMP_WARNING(ErrorInitializeAffinity);
4452 __kmp_affinity_type = affinity_none;
4453 __kmp_create_affinity_none_places();
4454 KMP_AFFINITY_DISABLE();
4458 if (__kmp_affinity_gran == affinity_gran_tile
4460 && __kmp_tile_depth == 0
4464 KMP_WARNING(AffTilesNoTiles,
"KMP_AFFINITY");
4467 __kmp_apply_thread_places(&address2os, depth);
4472 kmp_affin_mask_t *osId2Mask =
4473 __kmp_create_masks(&maxIndex, &numUnique, address2os, __kmp_avail_proc);
4474 if (__kmp_affinity_gran_levels == 0) {
4475 KMP_DEBUG_ASSERT((
int)numUnique == __kmp_avail_proc);
4481 __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
4483 switch (__kmp_affinity_type) {
4485 case affinity_explicit:
4486 KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
4488 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
4491 __kmp_affinity_process_proclist(
4492 &__kmp_affinity_masks, &__kmp_affinity_num_masks,
4493 __kmp_affinity_proclist, osId2Mask, maxIndex);
4497 __kmp_affinity_process_placelist(
4498 &__kmp_affinity_masks, &__kmp_affinity_num_masks,
4499 __kmp_affinity_proclist, osId2Mask, maxIndex);
4502 if (__kmp_affinity_num_masks == 0) {
4503 if (__kmp_affinity_verbose ||
4504 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
4505 KMP_WARNING(AffNoValidProcID);
4507 __kmp_affinity_type = affinity_none;
4508 __kmp_create_affinity_none_places();
4518 case affinity_logical:
4519 __kmp_affinity_compact = 0;
4520 if (__kmp_affinity_offset) {
4521 __kmp_affinity_offset =
4522 __kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
4526 case affinity_physical:
4527 if (__kmp_nThreadsPerCore > 1) {
4528 __kmp_affinity_compact = 1;
4529 if (__kmp_affinity_compact >= depth) {
4530 __kmp_affinity_compact = 0;
4533 __kmp_affinity_compact = 0;
4535 if (__kmp_affinity_offset) {
4536 __kmp_affinity_offset =
4537 __kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
4541 case affinity_scatter:
4542 if (__kmp_affinity_compact >= depth) {
4543 __kmp_affinity_compact = 0;
4545 __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
4549 case affinity_compact:
4550 if (__kmp_affinity_compact >= depth) {
4551 __kmp_affinity_compact = depth - 1;
4555 case affinity_balanced:
4557 if (__kmp_affinity_verbose || __kmp_affinity_warnings) {
4558 KMP_WARNING(AffBalancedNotAvail,
"KMP_AFFINITY");
4560 __kmp_affinity_type = affinity_none;
4561 __kmp_create_affinity_none_places();
4563 }
else if (!__kmp_affinity_uniform_topology()) {
4565 __kmp_aff_depth = depth;
4567 int core_level = __kmp_affinity_find_core_level(
4568 address2os, __kmp_avail_proc, depth - 1);
4569 int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc,
4570 depth - 1, core_level);
4571 int maxprocpercore = __kmp_affinity_max_proc_per_core(
4572 address2os, __kmp_avail_proc, depth - 1, core_level);
4574 int nproc = ncores * maxprocpercore;
4575 if ((nproc < 2) || (nproc < __kmp_avail_proc)) {
4576 if (__kmp_affinity_verbose || __kmp_affinity_warnings) {
4577 KMP_WARNING(AffBalancedNotAvail,
"KMP_AFFINITY");
4579 __kmp_affinity_type = affinity_none;
4583 procarr = (
int *)__kmp_allocate(
sizeof(
int) * nproc);
4584 for (
int i = 0; i < nproc; i++) {
4590 for (
int i = 0; i < __kmp_avail_proc; i++) {
4591 int proc = address2os[i].second;
4593 __kmp_affinity_find_core(address2os, i, depth - 1, core_level);
4595 if (core == lastcore) {
4602 procarr[core * maxprocpercore + inlastcore] = proc;
4605 if (__kmp_affinity_compact >= depth) {
4606 __kmp_affinity_compact = depth - 1;
4611 if (__kmp_affinity_dups) {
4612 __kmp_affinity_num_masks = __kmp_avail_proc;
4614 __kmp_affinity_num_masks = numUnique;
4618 if ((__kmp_nested_proc_bind.bind_types[0] != proc_bind_intel) &&
4619 (__kmp_affinity_num_places > 0) &&
4620 ((
unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks)) {
4621 __kmp_affinity_num_masks = __kmp_affinity_num_places;
4625 KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4629 qsort(address2os, __kmp_avail_proc,
sizeof(*address2os),
4630 __kmp_affinity_cmp_Address_child_num);
4634 for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
4635 if ((!__kmp_affinity_dups) && (!address2os[i].first.leader)) {
4638 unsigned osId = address2os[i].second;
4639 kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
4640 kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, j);
4641 KMP_ASSERT(KMP_CPU_ISSET(osId, src));
4642 KMP_CPU_COPY(dest, src);
4643 if (++j >= __kmp_affinity_num_masks) {
4647 KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
4652 KMP_ASSERT2(0,
"Unexpected affinity setting");
4655 KMP_CPU_FREE_ARRAY(osId2Mask, maxIndex + 1);
4656 machine_hierarchy.init(address2os, __kmp_avail_proc);
4658 #undef KMP_EXIT_AFF_NONE 4660 void __kmp_affinity_initialize(
void) {
4669 int disabled = (__kmp_affinity_type == affinity_disabled);
4670 if (!KMP_AFFINITY_CAPABLE()) {
4671 KMP_ASSERT(disabled);
4674 __kmp_affinity_type = affinity_none;
4676 __kmp_aux_affinity_initialize();
4678 __kmp_affinity_type = affinity_disabled;
4682 void __kmp_affinity_uninitialize(
void) {
4683 if (__kmp_affinity_masks != NULL) {
4684 KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4685 __kmp_affinity_masks = NULL;
4687 if (__kmp_affin_fullMask != NULL) {
4688 KMP_CPU_FREE(__kmp_affin_fullMask);
4689 __kmp_affin_fullMask = NULL;
4691 __kmp_affinity_num_masks = 0;
4692 __kmp_affinity_type = affinity_default;
4694 __kmp_affinity_num_places = 0;
4696 if (__kmp_affinity_proclist != NULL) {
4697 __kmp_free(__kmp_affinity_proclist);
4698 __kmp_affinity_proclist = NULL;
4700 if (address2os != NULL) {
4701 __kmp_free(address2os);
4704 if (procarr != NULL) {
4705 __kmp_free(procarr);
4709 if (__kmp_hwloc_topology != NULL) {
4710 hwloc_topology_destroy(__kmp_hwloc_topology);
4711 __kmp_hwloc_topology = NULL;
4714 KMPAffinity::destroy_api();
4717 void __kmp_affinity_set_init_mask(
int gtid,
int isa_root) {
4718 if (!KMP_AFFINITY_CAPABLE()) {
4722 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4723 if (th->th.th_affin_mask == NULL) {
4724 KMP_CPU_ALLOC(th->th.th_affin_mask);
4726 KMP_CPU_ZERO(th->th.th_affin_mask);
4733 kmp_affin_mask_t *mask;
4737 if (KMP_AFFINITY_NON_PROC_BIND)
4740 if ((__kmp_affinity_type == affinity_none) ||
4741 (__kmp_affinity_type == affinity_balanced)) {
4742 #if KMP_GROUP_AFFINITY 4743 if (__kmp_num_proc_groups > 1) {
4747 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4749 mask = __kmp_affin_fullMask;
4751 KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
4752 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4753 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4759 (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
4760 #if KMP_GROUP_AFFINITY 4761 if (__kmp_num_proc_groups > 1) {
4765 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4767 mask = __kmp_affin_fullMask;
4771 KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
4772 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4773 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4779 th->th.th_current_place = i;
4781 th->th.th_new_place = i;
4782 th->th.th_first_place = 0;
4783 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4784 }
else if (KMP_AFFINITY_NON_PROC_BIND) {
4787 th->th.th_first_place = 0;
4788 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4791 if (i == KMP_PLACE_ALL) {
4792 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to all places\n",
4795 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
4802 (
"__kmp_affinity_set_init_mask: binding T#%d to __kmp_affin_fullMask\n",
4805 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
4810 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4812 if (__kmp_affinity_verbose
4814 && (__kmp_affinity_type == affinity_none ||
4815 (i != KMP_PLACE_ALL && __kmp_affinity_type != affinity_balanced))) {
4816 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4817 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4818 th->th.th_affin_mask);
4819 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
4820 __kmp_gettid(), gtid, buf);
4827 if (__kmp_affinity_type == affinity_none) {
4828 __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
4831 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4836 void __kmp_affinity_set_place(
int gtid) {
4837 if (!KMP_AFFINITY_CAPABLE()) {
4841 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4843 KA_TRACE(100, (
"__kmp_affinity_set_place: binding T#%d to place %d (current " 4845 gtid, th->th.th_new_place, th->th.th_current_place));
4848 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4849 KMP_ASSERT(th->th.th_new_place >= 0);
4850 KMP_ASSERT((
unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
4851 if (th->th.th_first_place <= th->th.th_last_place) {
4852 KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place) &&
4853 (th->th.th_new_place <= th->th.th_last_place));
4855 KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place) ||
4856 (th->th.th_new_place >= th->th.th_last_place));
4861 kmp_affin_mask_t *mask =
4862 KMP_CPU_INDEX(__kmp_affinity_masks, th->th.th_new_place);
4863 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4864 th->th.th_current_place = th->th.th_new_place;
4866 if (__kmp_affinity_verbose) {
4867 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4868 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4869 th->th.th_affin_mask);
4870 KMP_INFORM(BoundToOSProcSet,
"OMP_PROC_BIND", (kmp_int32)getpid(),
4871 __kmp_gettid(), gtid, buf);
4873 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4878 int __kmp_aux_set_affinity(
void **mask) {
4883 if (!KMP_AFFINITY_CAPABLE()) {
4887 gtid = __kmp_entry_gtid();
4889 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4890 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4891 (kmp_affin_mask_t *)(*mask));
4893 "kmp_set_affinity: setting affinity mask for thread %d = %s\n", gtid,
4897 if (__kmp_env_consistency_check) {
4898 if ((mask == NULL) || (*mask == NULL)) {
4899 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4904 KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t *)(*mask))) {
4905 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4906 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4908 if (!KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
4913 if (num_procs == 0) {
4914 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4917 #if KMP_GROUP_AFFINITY 4918 if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) {
4919 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4925 th = __kmp_threads[gtid];
4926 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4927 retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4929 KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask));
4933 th->th.th_current_place = KMP_PLACE_UNDEFINED;
4934 th->th.th_new_place = KMP_PLACE_UNDEFINED;
4935 th->th.th_first_place = 0;
4936 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4939 th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
4945 int __kmp_aux_get_affinity(
void **mask) {
4950 if (!KMP_AFFINITY_CAPABLE()) {
4954 gtid = __kmp_entry_gtid();
4955 th = __kmp_threads[gtid];
4956 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4959 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4960 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4961 th->th.th_affin_mask);
4962 __kmp_printf(
"kmp_get_affinity: stored affinity mask for thread %d = %s\n",
4966 if (__kmp_env_consistency_check) {
4967 if ((mask == NULL) || (*mask == NULL)) {
4968 KMP_FATAL(AffinityInvalidMask,
"kmp_get_affinity");
4974 retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4976 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4977 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4978 (kmp_affin_mask_t *)(*mask));
4979 __kmp_printf(
"kmp_get_affinity: system affinity mask for thread %d = %s\n",
4986 KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
4992 int __kmp_aux_get_affinity_max_proc() {
4993 if (!KMP_AFFINITY_CAPABLE()) {
4996 #if KMP_GROUP_AFFINITY 4997 if (__kmp_num_proc_groups > 1) {
4998 return (
int)(__kmp_num_proc_groups *
sizeof(DWORD_PTR) * CHAR_BIT);
5004 int __kmp_aux_set_affinity_mask_proc(
int proc,
void **mask) {
5005 if (!KMP_AFFINITY_CAPABLE()) {
5010 int gtid = __kmp_entry_gtid();
5011 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5012 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
5013 (kmp_affin_mask_t *)(*mask));
5014 __kmp_debug_printf(
"kmp_set_affinity_mask_proc: setting proc %d in " 5015 "affinity mask for thread %d = %s\n",
5019 if (__kmp_env_consistency_check) {
5020 if ((mask == NULL) || (*mask == NULL)) {
5021 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity_mask_proc");
5025 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
5028 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
5032 KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask));
5036 int __kmp_aux_unset_affinity_mask_proc(
int proc,
void **mask) {
5037 if (!KMP_AFFINITY_CAPABLE()) {
5042 int gtid = __kmp_entry_gtid();
5043 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5044 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
5045 (kmp_affin_mask_t *)(*mask));
5046 __kmp_debug_printf(
"kmp_unset_affinity_mask_proc: unsetting proc %d in " 5047 "affinity mask for thread %d = %s\n",
5051 if (__kmp_env_consistency_check) {
5052 if ((mask == NULL) || (*mask == NULL)) {
5053 KMP_FATAL(AffinityInvalidMask,
"kmp_unset_affinity_mask_proc");
5057 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
5060 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
5064 KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask));
5068 int __kmp_aux_get_affinity_mask_proc(
int proc,
void **mask) {
5069 if (!KMP_AFFINITY_CAPABLE()) {
5074 int gtid = __kmp_entry_gtid();
5075 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5076 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
5077 (kmp_affin_mask_t *)(*mask));
5078 __kmp_debug_printf(
"kmp_get_affinity_mask_proc: getting proc %d in " 5079 "affinity mask for thread %d = %s\n",
5083 if (__kmp_env_consistency_check) {
5084 if ((mask == NULL) || (*mask == NULL)) {
5085 KMP_FATAL(AffinityInvalidMask,
"kmp_get_affinity_mask_proc");
5089 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
5092 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
5096 return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask));
5100 void __kmp_balanced_affinity(kmp_info_t *th,
int nthreads) {
5101 KMP_DEBUG_ASSERT(th);
5102 bool fine_gran =
true;
5103 int tid = th->th.th_info.ds.ds_tid;
5105 switch (__kmp_affinity_gran) {
5106 case affinity_gran_fine:
5107 case affinity_gran_thread:
5109 case affinity_gran_core:
5110 if (__kmp_nThreadsPerCore > 1) {
5114 case affinity_gran_package:
5115 if (nCoresPerPkg > 1) {
5123 if (__kmp_affinity_uniform_topology()) {
5127 int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
5129 int ncores = __kmp_ncores;
5130 if ((nPackages > 1) && (__kmp_nth_per_core <= 1)) {
5131 __kmp_nth_per_core = __kmp_avail_proc / nPackages;
5135 int chunk = nthreads / ncores;
5137 int big_cores = nthreads % ncores;
5139 int big_nth = (chunk + 1) * big_cores;
5140 if (tid < big_nth) {
5141 coreID = tid / (chunk + 1);
5142 threadID = (tid % (chunk + 1)) % __kmp_nth_per_core;
5144 coreID = (tid - big_cores) / chunk;
5145 threadID = ((tid - big_cores) % chunk) % __kmp_nth_per_core;
5148 KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
5149 "Illegal set affinity operation when not capable");
5151 kmp_affin_mask_t *mask = th->th.th_affin_mask;
5155 int osID = address2os[coreID * __kmp_nth_per_core + threadID].second;
5156 KMP_CPU_SET(osID, mask);
5158 for (
int i = 0; i < __kmp_nth_per_core; i++) {
5160 osID = address2os[coreID * __kmp_nth_per_core + i].second;
5161 KMP_CPU_SET(osID, mask);
5164 if (__kmp_affinity_verbose) {
5165 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5166 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
5167 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
5168 __kmp_gettid(), tid, buf);
5170 __kmp_set_system_affinity(mask, TRUE);
5173 kmp_affin_mask_t *mask = th->th.th_affin_mask;
5176 int core_level = __kmp_affinity_find_core_level(
5177 address2os, __kmp_avail_proc, __kmp_aff_depth - 1);
5178 int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc,
5179 __kmp_aff_depth - 1, core_level);
5180 int nth_per_core = __kmp_affinity_max_proc_per_core(
5181 address2os, __kmp_avail_proc, __kmp_aff_depth - 1, core_level);
5185 if (nthreads == __kmp_avail_proc) {
5187 int osID = address2os[tid].second;
5188 KMP_CPU_SET(osID, mask);
5190 int core = __kmp_affinity_find_core(address2os, tid,
5191 __kmp_aff_depth - 1, core_level);
5192 for (
int i = 0; i < __kmp_avail_proc; i++) {
5193 int osID = address2os[i].second;
5194 if (__kmp_affinity_find_core(address2os, i, __kmp_aff_depth - 1,
5195 core_level) == core) {
5196 KMP_CPU_SET(osID, mask);
5200 }
else if (nthreads <= ncores) {
5203 for (
int i = 0; i < ncores; i++) {
5206 for (
int j = 0; j < nth_per_core; j++) {
5207 if (procarr[i * nth_per_core + j] != -1) {
5214 for (
int j = 0; j < nth_per_core; j++) {
5215 int osID = procarr[i * nth_per_core + j];
5217 KMP_CPU_SET(osID, mask);
5233 int *nproc_at_core = (
int *)KMP_ALLOCA(
sizeof(
int) * ncores);
5235 int *ncores_with_x_procs =
5236 (
int *)KMP_ALLOCA(
sizeof(
int) * (nth_per_core + 1));
5238 int *ncores_with_x_to_max_procs =
5239 (
int *)KMP_ALLOCA(
sizeof(
int) * (nth_per_core + 1));
5241 for (
int i = 0; i <= nth_per_core; i++) {
5242 ncores_with_x_procs[i] = 0;
5243 ncores_with_x_to_max_procs[i] = 0;
5246 for (
int i = 0; i < ncores; i++) {
5248 for (
int j = 0; j < nth_per_core; j++) {
5249 if (procarr[i * nth_per_core + j] != -1) {
5253 nproc_at_core[i] = cnt;
5254 ncores_with_x_procs[cnt]++;
5257 for (
int i = 0; i <= nth_per_core; i++) {
5258 for (
int j = i; j <= nth_per_core; j++) {
5259 ncores_with_x_to_max_procs[i] += ncores_with_x_procs[j];
5264 int nproc = nth_per_core * ncores;
5266 int *newarr = (
int *)__kmp_allocate(
sizeof(
int) * nproc);
5267 for (
int i = 0; i < nproc; i++) {
5274 for (
int j = 1; j <= nth_per_core; j++) {
5275 int cnt = ncores_with_x_to_max_procs[j];
5276 for (
int i = 0; i < ncores; i++) {
5278 if (nproc_at_core[i] == 0) {
5281 for (
int k = 0; k < nth_per_core; k++) {
5282 if (procarr[i * nth_per_core + k] != -1) {
5283 if (newarr[i * nth_per_core + k] == 0) {
5284 newarr[i * nth_per_core + k] = 1;
5290 newarr[i * nth_per_core + k]++;
5298 if (cnt == 0 || nth == 0) {
5309 for (
int i = 0; i < nproc; i++) {
5313 int osID = procarr[i];
5314 KMP_CPU_SET(osID, mask);
5316 int coreID = i / nth_per_core;
5317 for (
int ii = 0; ii < nth_per_core; ii++) {
5318 int osID = procarr[coreID * nth_per_core + ii];
5320 KMP_CPU_SET(osID, mask);
5330 if (__kmp_affinity_verbose) {
5331 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5332 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
5333 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
5334 __kmp_gettid(), tid, buf);
5336 __kmp_set_system_affinity(mask, TRUE);
5354 kmp_set_thread_affinity_mask_initial()
5359 int gtid = __kmp_get_gtid();
5362 KA_TRACE(30, (
"kmp_set_thread_affinity_mask_initial: " 5363 "non-omp thread, returning\n"));
5366 if (!KMP_AFFINITY_CAPABLE() || !__kmp_init_middle) {
5367 KA_TRACE(30, (
"kmp_set_thread_affinity_mask_initial: " 5368 "affinity not initialized, returning\n"));
5371 KA_TRACE(30, (
"kmp_set_thread_affinity_mask_initial: " 5372 "set full mask for thread %d\n",
5374 KMP_DEBUG_ASSERT(__kmp_affin_fullMask != NULL);
5375 return __kmp_set_system_affinity(__kmp_affin_fullMask, FALSE);
5379 #endif // KMP_AFFINITY_SUPPORTED