15 #include "kmp_affinity.h" 19 #include "kmp_wrapper_getpid.h" 20 #if KMP_USE_HIER_SCHED 21 #include "kmp_dispatch_hier.h" 25 static hierarchy_info machine_hierarchy;
27 void __kmp_cleanup_hierarchy() { machine_hierarchy.fini(); }
29 void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
33 if (TCR_1(machine_hierarchy.uninitialized))
34 machine_hierarchy.init(NULL, nproc);
37 if (nproc > machine_hierarchy.base_num_threads)
38 machine_hierarchy.resize(nproc);
40 depth = machine_hierarchy.depth;
41 KMP_DEBUG_ASSERT(depth > 0);
43 thr_bar->depth = depth;
44 thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0] - 1;
45 thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
48 #if KMP_AFFINITY_SUPPORTED 50 bool KMPAffinity::picked_api =
false;
52 void *KMPAffinity::Mask::operator
new(
size_t n) {
return __kmp_allocate(n); }
53 void *KMPAffinity::Mask::operator
new[](
size_t n) {
return __kmp_allocate(n); }
54 void KMPAffinity::Mask::operator
delete(
void *p) { __kmp_free(p); }
55 void KMPAffinity::Mask::operator
delete[](
void *p) { __kmp_free(p); }
56 void *KMPAffinity::operator
new(
size_t n) {
return __kmp_allocate(n); }
57 void KMPAffinity::operator
delete(
void *p) { __kmp_free(p); }
59 void KMPAffinity::pick_api() {
60 KMPAffinity *affinity_dispatch;
66 if (__kmp_affinity_top_method == affinity_top_method_hwloc &&
67 __kmp_affinity_type != affinity_disabled) {
68 affinity_dispatch =
new KMPHwlocAffinity();
72 affinity_dispatch =
new KMPNativeAffinity();
74 __kmp_affinity_dispatch = affinity_dispatch;
78 void KMPAffinity::destroy_api() {
79 if (__kmp_affinity_dispatch != NULL) {
80 delete __kmp_affinity_dispatch;
81 __kmp_affinity_dispatch = NULL;
87 char *__kmp_affinity_print_mask(
char *buf,
int buf_len,
88 kmp_affin_mask_t *mask) {
89 KMP_ASSERT(buf_len >= 40);
91 char *end = buf + buf_len - 1;
96 if (i == mask->end()) {
97 KMP_SNPRINTF(scan, end - scan + 1,
"{<empty>}");
100 KMP_ASSERT(scan <= end);
104 KMP_SNPRINTF(scan, end - scan + 1,
"{%ld", (
long)i);
105 while (*scan !=
'\0')
108 for (; i != mask->end(); i = mask->next(i)) {
109 if (!KMP_CPU_ISSET(i, mask)) {
117 if (end - scan < 15) {
120 KMP_SNPRINTF(scan, end - scan + 1,
",%-ld", (
long)i);
121 while (*scan !=
'\0')
124 if (i != mask->end()) {
125 KMP_SNPRINTF(scan, end - scan + 1,
",...");
126 while (*scan !=
'\0')
129 KMP_SNPRINTF(scan, end - scan + 1,
"}");
130 while (*scan !=
'\0')
132 KMP_ASSERT(scan <= end);
136 void __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) {
139 #if KMP_GROUP_AFFINITY 141 if (__kmp_num_proc_groups > 1) {
143 KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
144 for (group = 0; group < __kmp_num_proc_groups; group++) {
146 int num = __kmp_GetActiveProcessorCount(group);
147 for (i = 0; i < num; i++) {
148 KMP_CPU_SET(i + group * (CHAR_BIT *
sizeof(DWORD_PTR)), mask);
157 for (proc = 0; proc < __kmp_xproc; proc++) {
158 KMP_CPU_SET(proc, mask);
174 static void __kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
176 KMP_DEBUG_ASSERT(numAddrs > 0);
177 int depth = address2os->first.depth;
178 unsigned *counts = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
179 unsigned *lastLabel = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
181 for (labCt = 0; labCt < depth; labCt++) {
182 address2os[0].first.childNums[labCt] = counts[labCt] = 0;
183 lastLabel[labCt] = address2os[0].first.labels[labCt];
186 for (i = 1; i < numAddrs; i++) {
187 for (labCt = 0; labCt < depth; labCt++) {
188 if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
190 for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
192 lastLabel[labCt2] = address2os[i].first.labels[labCt2];
195 lastLabel[labCt] = address2os[i].first.labels[labCt];
199 for (labCt = 0; labCt < depth; labCt++) {
200 address2os[i].first.childNums[labCt] = counts[labCt];
202 for (; labCt < (int)Address::maxDepth; labCt++) {
203 address2os[i].first.childNums[labCt] = 0;
206 __kmp_free(lastLabel);
221 kmp_affin_mask_t *__kmp_affin_fullMask = NULL;
223 static int nCoresPerPkg, nPackages;
224 static int __kmp_nThreadsPerCore;
225 #ifndef KMP_DFLT_NTH_CORES 226 static int __kmp_ncores;
228 static int *__kmp_pu_os_idx = NULL;
234 inline static bool __kmp_affinity_uniform_topology() {
235 return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
240 static void __kmp_affinity_print_topology(AddrUnsPair *address2os,
int len,
241 int depth,
int pkgLevel,
242 int coreLevel,
int threadLevel) {
245 KMP_INFORM(OSProcToPhysicalThreadMap,
"KMP_AFFINITY");
246 for (proc = 0; proc < len; proc++) {
249 __kmp_str_buf_init(&buf);
250 for (level = 0; level < depth; level++) {
251 if (level == threadLevel) {
252 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Thread));
253 }
else if (level == coreLevel) {
254 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Core));
255 }
else if (level == pkgLevel) {
256 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Package));
257 }
else if (level > pkgLevel) {
258 __kmp_str_buf_print(&buf,
"%s_%d ", KMP_I18N_STR(Node),
259 level - pkgLevel - 1);
261 __kmp_str_buf_print(&buf,
"L%d ", level);
263 __kmp_str_buf_print(&buf,
"%d ", address2os[proc].first.labels[level]);
265 KMP_INFORM(OSProcMapToPack,
"KMP_AFFINITY", address2os[proc].second,
267 __kmp_str_buf_free(&buf);
273 static void __kmp_affinity_print_hwloc_tp(AddrUnsPair *addrP,
int len,
274 int depth,
int *levels) {
277 __kmp_str_buf_init(&buf);
278 KMP_INFORM(OSProcToPhysicalThreadMap,
"KMP_AFFINITY");
279 for (proc = 0; proc < len; proc++) {
280 __kmp_str_buf_print(&buf,
"%s %d ", KMP_I18N_STR(Package),
281 addrP[proc].first.labels[0]);
285 if (__kmp_numa_detected)
287 if (levels[level++] > 0)
288 __kmp_str_buf_print(&buf,
"%s %d ", KMP_I18N_STR(Node),
289 addrP[proc].first.labels[label++]);
290 if (__kmp_tile_depth > 0)
292 if (levels[level++] > 0)
293 __kmp_str_buf_print(&buf,
"%s %d ", KMP_I18N_STR(Tile),
294 addrP[proc].first.labels[label++]);
295 if (levels[level++] > 0)
297 __kmp_str_buf_print(&buf,
"%s %d ", KMP_I18N_STR(Core),
298 addrP[proc].first.labels[label++]);
299 if (levels[level++] > 0)
301 __kmp_str_buf_print(&buf,
"%s %d ", KMP_I18N_STR(Thread),
302 addrP[proc].first.labels[label++]);
303 KMP_DEBUG_ASSERT(label == depth);
305 KMP_INFORM(OSProcMapToPack,
"KMP_AFFINITY", addrP[proc].second, buf.str);
306 __kmp_str_buf_clear(&buf);
308 __kmp_str_buf_free(&buf);
311 static int nNodePerPkg, nTilePerPkg, nTilePerNode, nCorePerNode, nCorePerTile;
318 static int __kmp_affinity_remove_radix_one_levels(AddrUnsPair *addrP,
int nTh,
319 int depth,
int *levels) {
323 int new_depth = depth;
324 for (level = depth - 1; level > 0; --level) {
327 for (i = 1; i < nTh; ++i) {
328 if (addrP[0].first.labels[level] != addrP[i].first.labels[level]) {
334 if (!radix1_detected)
339 if (level == new_depth) {
342 for (i = 0; i < nTh; ++i) {
343 addrP[i].first.depth--;
348 for (j = level; j < new_depth; ++j) {
349 for (i = 0; i < nTh; ++i) {
350 addrP[i].first.labels[j] = addrP[i].first.labels[j + 1];
351 addrP[i].first.depth--;
364 static int __kmp_hwloc_get_nobjs_under_obj(hwloc_obj_t obj,
365 hwloc_obj_type_t type) {
368 for (first = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, obj->type,
369 obj->logical_index, type, 0);
371 hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, obj->type, first) ==
373 first = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, first->type,
380 static int __kmp_hwloc_count_children_by_depth(hwloc_topology_t t,
381 hwloc_obj_t o,
unsigned depth,
383 if (o->depth == depth) {
389 for (
unsigned i = 0; i < o->arity; i++)
390 sum += __kmp_hwloc_count_children_by_depth(t, o->children[i], depth, f);
394 static int __kmp_hwloc_count_children_by_type(hwloc_topology_t t, hwloc_obj_t o,
395 hwloc_obj_type_t type,
397 if (!hwloc_compare_types(o->type, type)) {
403 for (
unsigned i = 0; i < o->arity; i++)
404 sum += __kmp_hwloc_count_children_by_type(t, o->children[i], type, f);
408 static int __kmp_hwloc_process_obj_core_pu(AddrUnsPair *addrPair,
410 int &num_active_cores,
411 hwloc_obj_t obj,
int depth,
413 hwloc_obj_t core = NULL;
414 hwloc_topology_t &tp = __kmp_hwloc_topology;
415 int NC = __kmp_hwloc_count_children_by_type(tp, obj, HWLOC_OBJ_CORE, &core);
416 for (
int core_id = 0; core_id < NC; ++core_id, core = core->next_cousin) {
417 hwloc_obj_t pu = NULL;
418 KMP_DEBUG_ASSERT(core != NULL);
419 int num_active_threads = 0;
420 int NT = __kmp_hwloc_count_children_by_type(tp, core, HWLOC_OBJ_PU, &pu);
422 for (
int pu_id = 0; pu_id < NT; ++pu_id, pu = pu->next_cousin) {
423 KMP_DEBUG_ASSERT(pu != NULL);
424 if (!KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask))
426 Address addr(depth + 2);
427 KA_TRACE(20, (
"Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n",
428 obj->os_index, obj->logical_index, core->os_index,
429 core->logical_index, pu->os_index, pu->logical_index));
430 for (
int i = 0; i < depth; ++i)
431 addr.labels[i] = labels[i];
432 addr.labels[depth] = core_id;
433 addr.labels[depth + 1] = pu_id;
434 addrPair[nActiveThreads] = AddrUnsPair(addr, pu->os_index);
435 __kmp_pu_os_idx[nActiveThreads] = pu->os_index;
437 ++num_active_threads;
439 if (num_active_threads) {
442 if (num_active_threads > __kmp_nThreadsPerCore)
443 __kmp_nThreadsPerCore = num_active_threads;
451 static int __kmp_hwloc_check_numa() {
452 hwloc_topology_t &tp = __kmp_hwloc_topology;
453 hwloc_obj_t hT, hC, hL, hN, hS;
457 hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, 0);
462 hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT);
463 hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT);
464 KMP_DEBUG_ASSERT(hS != NULL);
465 if (hN != NULL && hN->depth > hS->depth) {
466 __kmp_numa_detected = TRUE;
467 if (__kmp_affinity_gran == affinity_gran_node) {
468 __kmp_affinity_gran == affinity_gran_numa;
473 depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED);
474 hL = hwloc_get_ancestor_obj_by_depth(tp, depth, hT);
477 __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC) > 1)
478 __kmp_tile_depth = depth;
482 static int __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
483 kmp_i18n_id_t *
const msg_id) {
484 hwloc_topology_t &tp = __kmp_hwloc_topology;
486 *msg_id = kmp_i18n_null;
489 kmp_affin_mask_t *oldMask;
490 KMP_CPU_ALLOC(oldMask);
491 __kmp_get_system_affinity(oldMask, TRUE);
492 __kmp_hwloc_check_numa();
494 if (!KMP_AFFINITY_CAPABLE()) {
497 KMP_ASSERT(__kmp_affinity_type == affinity_none);
499 nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(
500 hwloc_get_obj_by_type(tp, HWLOC_OBJ_PACKAGE, 0), HWLOC_OBJ_CORE);
501 __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(
502 hwloc_get_obj_by_type(tp, HWLOC_OBJ_CORE, 0), HWLOC_OBJ_PU);
503 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
504 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
505 if (__kmp_affinity_verbose) {
506 KMP_INFORM(AffNotCapableUseLocCpuidL11,
"KMP_AFFINITY");
507 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
508 if (__kmp_affinity_uniform_topology()) {
509 KMP_INFORM(Uniform,
"KMP_AFFINITY");
511 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
513 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
514 __kmp_nThreadsPerCore, __kmp_ncores);
516 KMP_CPU_FREE(oldMask);
521 int levels[5] = {0, 1, 2, 3, 4};
523 if (__kmp_numa_detected)
525 if (__kmp_tile_depth)
529 AddrUnsPair *retval =
530 (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * __kmp_avail_proc);
531 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
532 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
539 hwloc_obj_t socket, node, tile;
540 int nActiveThreads = 0;
543 __kmp_ncores = nPackages = nCoresPerPkg = __kmp_nThreadsPerCore = 0;
544 nNodePerPkg = nTilePerPkg = nTilePerNode = nCorePerNode = nCorePerTile = 0;
545 for (socket = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PACKAGE, 0); socket != NULL;
546 socket = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, socket),
548 labels[0] = socket_id;
549 if (__kmp_numa_detected) {
551 int n_active_nodes = 0;
553 NN = __kmp_hwloc_count_children_by_type(tp, socket, HWLOC_OBJ_NUMANODE,
555 for (
int node_id = 0; node_id < NN; ++node_id, node = node->next_cousin) {
557 if (__kmp_tile_depth) {
560 int n_active_tiles = 0;
562 NT = __kmp_hwloc_count_children_by_depth(tp, node, __kmp_tile_depth,
564 for (
int tl_id = 0; tl_id < NT; ++tl_id, tile = tile->next_cousin) {
566 int n_active_cores = 0;
567 __kmp_hwloc_process_obj_core_pu(retval, nActiveThreads,
568 n_active_cores, tile, 3, labels);
569 if (n_active_cores) {
571 if (n_active_cores > nCorePerTile)
572 nCorePerTile = n_active_cores;
575 if (n_active_tiles) {
577 if (n_active_tiles > nTilePerNode)
578 nTilePerNode = n_active_tiles;
582 int n_active_cores = 0;
583 __kmp_hwloc_process_obj_core_pu(retval, nActiveThreads,
584 n_active_cores, node, 2, labels);
585 if (n_active_cores) {
587 if (n_active_cores > nCorePerNode)
588 nCorePerNode = n_active_cores;
592 if (n_active_nodes) {
594 if (n_active_nodes > nNodePerPkg)
595 nNodePerPkg = n_active_nodes;
598 if (__kmp_tile_depth) {
601 int n_active_tiles = 0;
603 NT = __kmp_hwloc_count_children_by_depth(tp, socket, __kmp_tile_depth,
605 for (
int tl_id = 0; tl_id < NT; ++tl_id, tile = tile->next_cousin) {
607 int n_active_cores = 0;
608 __kmp_hwloc_process_obj_core_pu(retval, nActiveThreads,
609 n_active_cores, tile, 2, labels);
610 if (n_active_cores) {
612 if (n_active_cores > nCorePerTile)
613 nCorePerTile = n_active_cores;
616 if (n_active_tiles) {
618 if (n_active_tiles > nTilePerPkg)
619 nTilePerPkg = n_active_tiles;
623 int n_active_cores = 0;
624 __kmp_hwloc_process_obj_core_pu(retval, nActiveThreads, n_active_cores,
626 if (n_active_cores) {
628 if (n_active_cores > nCoresPerPkg)
629 nCoresPerPkg = n_active_cores;
636 KMP_DEBUG_ASSERT(nActiveThreads == __kmp_avail_proc);
637 KMP_ASSERT(nActiveThreads > 0);
638 if (nActiveThreads == 1) {
639 __kmp_ncores = nPackages = 1;
640 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
641 if (__kmp_affinity_verbose) {
642 char buf[KMP_AFFIN_MASK_PRINT_LEN];
643 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
645 KMP_INFORM(AffUsingHwloc,
"KMP_AFFINITY");
646 if (__kmp_affinity_respect_mask) {
647 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
649 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
651 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
652 KMP_INFORM(Uniform,
"KMP_AFFINITY");
653 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
654 __kmp_nThreadsPerCore, __kmp_ncores);
657 if (__kmp_affinity_type == affinity_none) {
659 KMP_CPU_FREE(oldMask);
665 addr.labels[0] = retval[0].first.labels[0];
666 retval[0].first = addr;
668 if (__kmp_affinity_gran_levels < 0) {
669 __kmp_affinity_gran_levels = 0;
672 if (__kmp_affinity_verbose) {
673 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
676 *address2os = retval;
677 KMP_CPU_FREE(oldMask);
682 qsort(retval, nActiveThreads,
sizeof(*retval),
683 __kmp_affinity_cmp_Address_labels);
686 int nPUs = nPackages * __kmp_nThreadsPerCore;
687 if (__kmp_numa_detected) {
688 if (__kmp_tile_depth) {
689 nPUs *= (nNodePerPkg * nTilePerNode * nCorePerTile);
691 nPUs *= (nNodePerPkg * nCorePerNode);
694 if (__kmp_tile_depth) {
695 nPUs *= (nTilePerPkg * nCorePerTile);
697 nPUs *= nCoresPerPkg;
700 unsigned uniform = (nPUs == nActiveThreads);
703 if (__kmp_affinity_verbose) {
704 char mask[KMP_AFFIN_MASK_PRINT_LEN];
705 __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
706 if (__kmp_affinity_respect_mask) {
707 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", mask);
709 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", mask);
711 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
713 KMP_INFORM(Uniform,
"KMP_AFFINITY");
715 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
717 if (__kmp_numa_detected) {
718 if (__kmp_tile_depth) {
719 KMP_INFORM(TopologyExtraNoTi,
"KMP_AFFINITY", nPackages, nNodePerPkg,
720 nTilePerNode, nCorePerTile, __kmp_nThreadsPerCore,
723 KMP_INFORM(TopologyExtraNode,
"KMP_AFFINITY", nPackages, nNodePerPkg,
724 nCorePerNode, __kmp_nThreadsPerCore, __kmp_ncores);
725 nPUs *= (nNodePerPkg * nCorePerNode);
728 if (__kmp_tile_depth) {
729 KMP_INFORM(TopologyExtraTile,
"KMP_AFFINITY", nPackages, nTilePerPkg,
730 nCorePerTile, __kmp_nThreadsPerCore, __kmp_ncores);
733 __kmp_str_buf_init(&buf);
734 __kmp_str_buf_print(&buf,
"%d", nPackages);
735 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, nCoresPerPkg,
736 __kmp_nThreadsPerCore, __kmp_ncores);
737 __kmp_str_buf_free(&buf);
742 if (__kmp_affinity_type == affinity_none) {
744 KMP_CPU_FREE(oldMask);
748 int depth_full = depth;
751 depth = __kmp_affinity_remove_radix_one_levels(retval, nActiveThreads, depth,
753 KMP_DEBUG_ASSERT(__kmp_affinity_gran != affinity_gran_default);
754 if (__kmp_affinity_gran_levels < 0) {
757 __kmp_affinity_gran_levels = 0;
758 if (__kmp_affinity_gran > affinity_gran_thread) {
759 for (
int i = 1; i <= depth_full; ++i) {
760 if (__kmp_affinity_gran <= i)
762 if (levels[depth_full - i] > 0)
763 __kmp_affinity_gran_levels++;
766 if (__kmp_affinity_gran > affinity_gran_package)
767 __kmp_affinity_gran_levels++;
770 if (__kmp_affinity_verbose)
771 __kmp_affinity_print_hwloc_tp(retval, nActiveThreads, depth, levels);
773 KMP_CPU_FREE(oldMask);
774 *address2os = retval;
777 #endif // KMP_USE_HWLOC 782 static int __kmp_affinity_create_flat_map(AddrUnsPair **address2os,
783 kmp_i18n_id_t *
const msg_id) {
785 *msg_id = kmp_i18n_null;
790 if (!KMP_AFFINITY_CAPABLE()) {
791 KMP_ASSERT(__kmp_affinity_type == affinity_none);
792 __kmp_ncores = nPackages = __kmp_xproc;
793 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
794 if (__kmp_affinity_verbose) {
795 KMP_INFORM(AffFlatTopology,
"KMP_AFFINITY");
796 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
797 KMP_INFORM(Uniform,
"KMP_AFFINITY");
798 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
799 __kmp_nThreadsPerCore, __kmp_ncores);
808 __kmp_ncores = nPackages = __kmp_avail_proc;
809 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
810 if (__kmp_affinity_verbose) {
811 char buf[KMP_AFFIN_MASK_PRINT_LEN];
812 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
813 __kmp_affin_fullMask);
815 KMP_INFORM(AffCapableUseFlat,
"KMP_AFFINITY");
816 if (__kmp_affinity_respect_mask) {
817 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
819 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
821 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
822 KMP_INFORM(Uniform,
"KMP_AFFINITY");
823 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
824 __kmp_nThreadsPerCore, __kmp_ncores);
826 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
827 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
828 if (__kmp_affinity_type == affinity_none) {
831 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
832 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask))
834 __kmp_pu_os_idx[avail_ct++] = i;
841 (AddrUnsPair *)__kmp_allocate(
sizeof(**address2os) * __kmp_avail_proc);
844 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
846 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
849 __kmp_pu_os_idx[avail_ct] = i;
852 (*address2os)[avail_ct++] = AddrUnsPair(addr, i);
854 if (__kmp_affinity_verbose) {
855 KMP_INFORM(OSProcToPackage,
"KMP_AFFINITY");
858 if (__kmp_affinity_gran_levels < 0) {
861 if (__kmp_affinity_gran > affinity_gran_package) {
862 __kmp_affinity_gran_levels = 1;
864 __kmp_affinity_gran_levels = 0;
870 #if KMP_GROUP_AFFINITY 876 static int __kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
877 kmp_i18n_id_t *
const msg_id) {
879 *msg_id = kmp_i18n_null;
883 if (!KMP_AFFINITY_CAPABLE()) {
890 (AddrUnsPair *)__kmp_allocate(
sizeof(**address2os) * __kmp_avail_proc);
891 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
892 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
895 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
897 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
900 __kmp_pu_os_idx[avail_ct] = i;
902 addr.labels[0] = i / (CHAR_BIT *
sizeof(DWORD_PTR));
903 addr.labels[1] = i % (CHAR_BIT *
sizeof(DWORD_PTR));
904 (*address2os)[avail_ct++] = AddrUnsPair(addr, i);
906 if (__kmp_affinity_verbose) {
907 KMP_INFORM(AffOSProcToGroup,
"KMP_AFFINITY", i, addr.labels[0],
912 if (__kmp_affinity_gran_levels < 0) {
913 if (__kmp_affinity_gran == affinity_gran_group) {
914 __kmp_affinity_gran_levels = 1;
915 }
else if ((__kmp_affinity_gran == affinity_gran_fine) ||
916 (__kmp_affinity_gran == affinity_gran_thread)) {
917 __kmp_affinity_gran_levels = 0;
919 const char *gran_str = NULL;
920 if (__kmp_affinity_gran == affinity_gran_core) {
922 }
else if (__kmp_affinity_gran == affinity_gran_package) {
923 gran_str =
"package";
924 }
else if (__kmp_affinity_gran == affinity_gran_node) {
932 __kmp_affinity_gran_levels = 0;
940 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 942 static int __kmp_cpuid_mask_width(
int count) {
945 while ((1 << r) < count)
950 class apicThreadInfo {
954 unsigned maxCoresPerPkg;
955 unsigned maxThreadsPerPkg;
961 static int __kmp_affinity_cmp_apicThreadInfo_os_id(
const void *a,
963 const apicThreadInfo *aa = (
const apicThreadInfo *)a;
964 const apicThreadInfo *bb = (
const apicThreadInfo *)b;
965 if (aa->osId < bb->osId)
967 if (aa->osId > bb->osId)
972 static int __kmp_affinity_cmp_apicThreadInfo_phys_id(
const void *a,
974 const apicThreadInfo *aa = (
const apicThreadInfo *)a;
975 const apicThreadInfo *bb = (
const apicThreadInfo *)b;
976 if (aa->pkgId < bb->pkgId)
978 if (aa->pkgId > bb->pkgId)
980 if (aa->coreId < bb->coreId)
982 if (aa->coreId > bb->coreId)
984 if (aa->threadId < bb->threadId)
986 if (aa->threadId > bb->threadId)
995 static int __kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
996 kmp_i18n_id_t *
const msg_id) {
1000 *msg_id = kmp_i18n_null;
1003 __kmp_x86_cpuid(0, 0, &buf);
1005 *msg_id = kmp_i18n_str_NoLeaf4Support;
1014 if (!KMP_AFFINITY_CAPABLE()) {
1017 KMP_ASSERT(__kmp_affinity_type == affinity_none);
1023 __kmp_x86_cpuid(1, 0, &buf);
1024 int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
1025 if (maxThreadsPerPkg == 0) {
1026 maxThreadsPerPkg = 1;
1040 __kmp_x86_cpuid(0, 0, &buf);
1042 __kmp_x86_cpuid(4, 0, &buf);
1043 nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
1061 __kmp_ncores = __kmp_xproc;
1062 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
1063 __kmp_nThreadsPerCore = 1;
1064 if (__kmp_affinity_verbose) {
1065 KMP_INFORM(AffNotCapableUseLocCpuid,
"KMP_AFFINITY");
1066 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1067 if (__kmp_affinity_uniform_topology()) {
1068 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1070 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1072 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1073 __kmp_nThreadsPerCore, __kmp_ncores);
1083 kmp_affin_mask_t *oldMask;
1084 KMP_CPU_ALLOC(oldMask);
1085 KMP_ASSERT(oldMask != NULL);
1086 __kmp_get_system_affinity(oldMask, TRUE);
1114 apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
1115 __kmp_avail_proc *
sizeof(apicThreadInfo));
1116 unsigned nApics = 0;
1117 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
1119 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
1122 KMP_DEBUG_ASSERT((
int)nApics < __kmp_avail_proc);
1124 __kmp_affinity_dispatch->bind_thread(i);
1125 threadInfo[nApics].osId = i;
1128 __kmp_x86_cpuid(1, 0, &buf);
1129 if (((buf.edx >> 9) & 1) == 0) {
1130 __kmp_set_system_affinity(oldMask, TRUE);
1131 __kmp_free(threadInfo);
1132 KMP_CPU_FREE(oldMask);
1133 *msg_id = kmp_i18n_str_ApicNotPresent;
1136 threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
1137 threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
1138 if (threadInfo[nApics].maxThreadsPerPkg == 0) {
1139 threadInfo[nApics].maxThreadsPerPkg = 1;
1148 __kmp_x86_cpuid(0, 0, &buf);
1150 __kmp_x86_cpuid(4, 0, &buf);
1151 threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
1153 threadInfo[nApics].maxCoresPerPkg = 1;
1157 int widthCT = __kmp_cpuid_mask_width(threadInfo[nApics].maxThreadsPerPkg);
1158 threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
1160 int widthC = __kmp_cpuid_mask_width(threadInfo[nApics].maxCoresPerPkg);
1161 int widthT = widthCT - widthC;
1166 __kmp_set_system_affinity(oldMask, TRUE);
1167 __kmp_free(threadInfo);
1168 KMP_CPU_FREE(oldMask);
1169 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1173 int maskC = (1 << widthC) - 1;
1174 threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT) & maskC;
1176 int maskT = (1 << widthT) - 1;
1177 threadInfo[nApics].threadId = threadInfo[nApics].apicId & maskT;
1184 __kmp_set_system_affinity(oldMask, TRUE);
1193 KMP_ASSERT(nApics > 0);
1195 __kmp_ncores = nPackages = 1;
1196 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1197 if (__kmp_affinity_verbose) {
1198 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1199 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1201 KMP_INFORM(AffUseGlobCpuid,
"KMP_AFFINITY");
1202 if (__kmp_affinity_respect_mask) {
1203 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
1205 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
1207 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1208 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1209 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1210 __kmp_nThreadsPerCore, __kmp_ncores);
1213 if (__kmp_affinity_type == affinity_none) {
1214 __kmp_free(threadInfo);
1215 KMP_CPU_FREE(oldMask);
1219 *address2os = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair));
1221 addr.labels[0] = threadInfo[0].pkgId;
1222 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
1224 if (__kmp_affinity_gran_levels < 0) {
1225 __kmp_affinity_gran_levels = 0;
1228 if (__kmp_affinity_verbose) {
1229 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
1232 __kmp_free(threadInfo);
1233 KMP_CPU_FREE(oldMask);
1238 qsort(threadInfo, nApics,
sizeof(*threadInfo),
1239 __kmp_affinity_cmp_apicThreadInfo_phys_id);
1256 __kmp_nThreadsPerCore = 1;
1257 unsigned nCores = 1;
1260 unsigned lastPkgId = threadInfo[0].pkgId;
1261 unsigned coreCt = 1;
1262 unsigned lastCoreId = threadInfo[0].coreId;
1263 unsigned threadCt = 1;
1264 unsigned lastThreadId = threadInfo[0].threadId;
1267 unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
1268 unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
1270 for (i = 1; i < nApics; i++) {
1271 if (threadInfo[i].pkgId != lastPkgId) {
1274 lastPkgId = threadInfo[i].pkgId;
1275 if ((
int)coreCt > nCoresPerPkg)
1276 nCoresPerPkg = coreCt;
1278 lastCoreId = threadInfo[i].coreId;
1279 if ((
int)threadCt > __kmp_nThreadsPerCore)
1280 __kmp_nThreadsPerCore = threadCt;
1282 lastThreadId = threadInfo[i].threadId;
1286 prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
1287 prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
1291 if (threadInfo[i].coreId != lastCoreId) {
1294 lastCoreId = threadInfo[i].coreId;
1295 if ((
int)threadCt > __kmp_nThreadsPerCore)
1296 __kmp_nThreadsPerCore = threadCt;
1298 lastThreadId = threadInfo[i].threadId;
1299 }
else if (threadInfo[i].threadId != lastThreadId) {
1301 lastThreadId = threadInfo[i].threadId;
1303 __kmp_free(threadInfo);
1304 KMP_CPU_FREE(oldMask);
1305 *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
1311 if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg) ||
1312 (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
1313 __kmp_free(threadInfo);
1314 KMP_CPU_FREE(oldMask);
1315 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1320 if ((
int)coreCt > nCoresPerPkg)
1321 nCoresPerPkg = coreCt;
1322 if ((
int)threadCt > __kmp_nThreadsPerCore)
1323 __kmp_nThreadsPerCore = threadCt;
1329 __kmp_ncores = nCores;
1330 if (__kmp_affinity_verbose) {
1331 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1332 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1334 KMP_INFORM(AffUseGlobCpuid,
"KMP_AFFINITY");
1335 if (__kmp_affinity_respect_mask) {
1336 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
1338 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
1340 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1341 if (__kmp_affinity_uniform_topology()) {
1342 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1344 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1346 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1347 __kmp_nThreadsPerCore, __kmp_ncores);
1349 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
1350 KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
1351 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
1352 for (i = 0; i < nApics; ++i) {
1353 __kmp_pu_os_idx[i] = threadInfo[i].osId;
1355 if (__kmp_affinity_type == affinity_none) {
1356 __kmp_free(threadInfo);
1357 KMP_CPU_FREE(oldMask);
1365 int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
1367 (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
1368 unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
1370 KMP_ASSERT(depth > 0);
1371 *address2os = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * nApics);
1373 for (i = 0; i < nApics; ++i) {
1374 Address addr(depth);
1375 unsigned os = threadInfo[i].osId;
1378 if (pkgLevel >= 0) {
1379 addr.labels[d++] = threadInfo[i].pkgId;
1381 if (coreLevel >= 0) {
1382 addr.labels[d++] = threadInfo[i].coreId;
1384 if (threadLevel >= 0) {
1385 addr.labels[d++] = threadInfo[i].threadId;
1387 (*address2os)[i] = AddrUnsPair(addr, os);
1390 if (__kmp_affinity_gran_levels < 0) {
1393 __kmp_affinity_gran_levels = 0;
1394 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
1395 __kmp_affinity_gran_levels++;
1397 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1398 __kmp_affinity_gran_levels++;
1400 if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
1401 __kmp_affinity_gran_levels++;
1405 if (__kmp_affinity_verbose) {
1406 __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
1407 coreLevel, threadLevel);
1410 __kmp_free(threadInfo);
1411 KMP_CPU_FREE(oldMask);
1418 static int __kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
1419 kmp_i18n_id_t *
const msg_id) {
1422 *msg_id = kmp_i18n_null;
1425 __kmp_x86_cpuid(0, 0, &buf);
1427 *msg_id = kmp_i18n_str_NoLeaf11Support;
1430 __kmp_x86_cpuid(11, 0, &buf);
1432 *msg_id = kmp_i18n_str_NoLeaf11Support;
1441 int threadLevel = -1;
1444 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
1446 for (level = 0;; level++) {
1457 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1460 __kmp_x86_cpuid(11, level, &buf);
1469 int kind = (buf.ecx >> 8) & 0xff;
1472 threadLevel = level;
1475 __kmp_nThreadsPerCore = buf.ebx & 0xffff;
1476 if (__kmp_nThreadsPerCore == 0) {
1477 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1480 }
else if (kind == 2) {
1484 nCoresPerPkg = buf.ebx & 0xffff;
1485 if (nCoresPerPkg == 0) {
1486 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1491 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1494 if (pkgLevel >= 0) {
1498 nPackages = buf.ebx & 0xffff;
1499 if (nPackages == 0) {
1500 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1511 if (threadLevel >= 0) {
1512 threadLevel = depth - threadLevel - 1;
1514 if (coreLevel >= 0) {
1515 coreLevel = depth - coreLevel - 1;
1517 KMP_DEBUG_ASSERT(pkgLevel >= 0);
1518 pkgLevel = depth - pkgLevel - 1;
1525 if (!KMP_AFFINITY_CAPABLE()) {
1528 KMP_ASSERT(__kmp_affinity_type == affinity_none);
1530 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
1531 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
1532 if (__kmp_affinity_verbose) {
1533 KMP_INFORM(AffNotCapableUseLocCpuidL11,
"KMP_AFFINITY");
1534 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1535 if (__kmp_affinity_uniform_topology()) {
1536 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1538 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1540 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1541 __kmp_nThreadsPerCore, __kmp_ncores);
1551 kmp_affin_mask_t *oldMask;
1552 KMP_CPU_ALLOC(oldMask);
1553 __kmp_get_system_affinity(oldMask, TRUE);
1556 AddrUnsPair *retval =
1557 (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * __kmp_avail_proc);
1563 KMP_CPU_SET_ITERATE(proc, __kmp_affin_fullMask) {
1565 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
1568 KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
1570 __kmp_affinity_dispatch->bind_thread(proc);
1573 Address addr(depth);
1576 for (level = 0; level < depth; level++) {
1577 __kmp_x86_cpuid(11, level, &buf);
1578 unsigned apicId = buf.edx;
1580 if (level != depth - 1) {
1581 KMP_CPU_FREE(oldMask);
1582 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1585 addr.labels[depth - level - 1] = apicId >> prev_shift;
1589 int shift = buf.eax & 0x1f;
1590 int mask = (1 << shift) - 1;
1591 addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
1594 if (level != depth) {
1595 KMP_CPU_FREE(oldMask);
1596 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1600 retval[nApics] = AddrUnsPair(addr, proc);
1606 __kmp_set_system_affinity(oldMask, TRUE);
1609 KMP_ASSERT(nApics > 0);
1611 __kmp_ncores = nPackages = 1;
1612 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1613 if (__kmp_affinity_verbose) {
1614 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1615 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1617 KMP_INFORM(AffUseGlobCpuidL11,
"KMP_AFFINITY");
1618 if (__kmp_affinity_respect_mask) {
1619 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
1621 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
1623 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1624 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1625 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1626 __kmp_nThreadsPerCore, __kmp_ncores);
1629 if (__kmp_affinity_type == affinity_none) {
1631 KMP_CPU_FREE(oldMask);
1637 addr.labels[0] = retval[0].first.labels[pkgLevel];
1638 retval[0].first = addr;
1640 if (__kmp_affinity_gran_levels < 0) {
1641 __kmp_affinity_gran_levels = 0;
1644 if (__kmp_affinity_verbose) {
1645 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
1648 *address2os = retval;
1649 KMP_CPU_FREE(oldMask);
1654 qsort(retval, nApics,
sizeof(*retval), __kmp_affinity_cmp_Address_labels);
1657 unsigned *totals = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1658 unsigned *counts = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1659 unsigned *maxCt = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1660 unsigned *last = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1661 for (level = 0; level < depth; level++) {
1665 last[level] = retval[0].first.labels[level];
1672 for (proc = 1; (int)proc < nApics; proc++) {
1674 for (level = 0; level < depth; level++) {
1675 if (retval[proc].first.labels[level] != last[level]) {
1677 for (j = level + 1; j < depth; j++) {
1688 last[j] = retval[proc].first.labels[j];
1692 if (counts[level] > maxCt[level]) {
1693 maxCt[level] = counts[level];
1695 last[level] = retval[proc].first.labels[level];
1697 }
else if (level == depth - 1) {
1703 KMP_CPU_FREE(oldMask);
1704 *msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
1714 if (threadLevel >= 0) {
1715 __kmp_nThreadsPerCore = maxCt[threadLevel];
1717 __kmp_nThreadsPerCore = 1;
1719 nPackages = totals[pkgLevel];
1721 if (coreLevel >= 0) {
1722 __kmp_ncores = totals[coreLevel];
1723 nCoresPerPkg = maxCt[coreLevel];
1725 __kmp_ncores = nPackages;
1730 unsigned prod = maxCt[0];
1731 for (level = 1; level < depth; level++) {
1732 prod *= maxCt[level];
1734 bool uniform = (prod == totals[level - 1]);
1737 if (__kmp_affinity_verbose) {
1738 char mask[KMP_AFFIN_MASK_PRINT_LEN];
1739 __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1741 KMP_INFORM(AffUseGlobCpuidL11,
"KMP_AFFINITY");
1742 if (__kmp_affinity_respect_mask) {
1743 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", mask);
1745 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", mask);
1747 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1749 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1751 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1755 __kmp_str_buf_init(&buf);
1757 __kmp_str_buf_print(&buf,
"%d", totals[0]);
1758 for (level = 1; level <= pkgLevel; level++) {
1759 __kmp_str_buf_print(&buf,
" x %d", maxCt[level]);
1761 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, nCoresPerPkg,
1762 __kmp_nThreadsPerCore, __kmp_ncores);
1764 __kmp_str_buf_free(&buf);
1766 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
1767 KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
1768 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
1769 for (proc = 0; (int)proc < nApics; ++proc) {
1770 __kmp_pu_os_idx[proc] = retval[proc].second;
1772 if (__kmp_affinity_type == affinity_none) {
1778 KMP_CPU_FREE(oldMask);
1785 for (level = 0; level < depth; level++) {
1786 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1794 if (new_depth != depth) {
1795 AddrUnsPair *new_retval =
1796 (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * nApics);
1797 for (proc = 0; (int)proc < nApics; proc++) {
1798 Address addr(new_depth);
1799 new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
1802 int newPkgLevel = -1;
1803 int newCoreLevel = -1;
1804 int newThreadLevel = -1;
1806 for (level = 0; level < depth; level++) {
1807 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1811 if (level == pkgLevel) {
1812 newPkgLevel = new_level;
1814 if (level == coreLevel) {
1815 newCoreLevel = new_level;
1817 if (level == threadLevel) {
1818 newThreadLevel = new_level;
1820 for (proc = 0; (int)proc < nApics; proc++) {
1821 new_retval[proc].first.labels[new_level] =
1822 retval[proc].first.labels[level];
1828 retval = new_retval;
1830 pkgLevel = newPkgLevel;
1831 coreLevel = newCoreLevel;
1832 threadLevel = newThreadLevel;
1835 if (__kmp_affinity_gran_levels < 0) {
1838 __kmp_affinity_gran_levels = 0;
1839 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
1840 __kmp_affinity_gran_levels++;
1842 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1843 __kmp_affinity_gran_levels++;
1845 if (__kmp_affinity_gran > affinity_gran_package) {
1846 __kmp_affinity_gran_levels++;
1850 if (__kmp_affinity_verbose) {
1851 __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel, coreLevel,
1859 KMP_CPU_FREE(oldMask);
1860 *address2os = retval;
1867 #define threadIdIndex 1 1868 #define coreIdIndex 2 1869 #define pkgIdIndex 3 1870 #define nodeIdIndex 4 1872 typedef unsigned *ProcCpuInfo;
1873 static unsigned maxIndex = pkgIdIndex;
1875 static int __kmp_affinity_cmp_ProcCpuInfo_os_id(
const void *a,
const void *b) {
1876 const unsigned *aa = (
const unsigned *)a;
1877 const unsigned *bb = (
const unsigned *)b;
1878 if (aa[osIdIndex] < bb[osIdIndex])
1880 if (aa[osIdIndex] > bb[osIdIndex])
1885 static int __kmp_affinity_cmp_ProcCpuInfo_phys_id(
const void *a,
1888 const unsigned *aa = *(
unsigned *
const *)a;
1889 const unsigned *bb = *(
unsigned *
const *)b;
1890 for (i = maxIndex;; i--) {
1901 #if KMP_USE_HIER_SCHED 1903 static void __kmp_dispatch_set_hierarchy_values() {
1909 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1] =
1910 nPackages * nCoresPerPkg * __kmp_nThreadsPerCore;
1911 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L1 + 1] = __kmp_ncores;
1912 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 1913 if (__kmp_mic_type >= mic3)
1914 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores / 2;
1916 #endif // KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 1917 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores;
1918 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L3 + 1] = nPackages;
1919 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_NUMA + 1] = nPackages;
1920 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_LOOP + 1] = 1;
1923 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_THREAD + 1] = 1;
1924 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L1 + 1] =
1925 __kmp_nThreadsPerCore;
1926 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 1927 if (__kmp_mic_type >= mic3)
1928 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] =
1929 2 * __kmp_nThreadsPerCore;
1931 #endif // KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 1932 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] =
1933 __kmp_nThreadsPerCore;
1934 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L3 + 1] =
1935 nCoresPerPkg * __kmp_nThreadsPerCore;
1936 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_NUMA + 1] =
1937 nCoresPerPkg * __kmp_nThreadsPerCore;
1938 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_LOOP + 1] =
1939 nPackages * nCoresPerPkg * __kmp_nThreadsPerCore;
1944 int __kmp_dispatch_get_index(
int tid, kmp_hier_layer_e type) {
1945 int index = type + 1;
1946 int num_hw_threads = __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1];
1947 KMP_DEBUG_ASSERT(type != kmp_hier_layer_e::LAYER_LAST);
1948 if (type == kmp_hier_layer_e::LAYER_THREAD)
1950 else if (type == kmp_hier_layer_e::LAYER_LOOP)
1952 KMP_DEBUG_ASSERT(__kmp_hier_max_units[index] != 0);
1953 if (tid >= num_hw_threads)
1954 tid = tid % num_hw_threads;
1955 return (tid / __kmp_hier_threads_per[index]) % __kmp_hier_max_units[index];
1959 int __kmp_dispatch_get_t1_per_t2(kmp_hier_layer_e t1, kmp_hier_layer_e t2) {
1962 KMP_DEBUG_ASSERT(i1 <= i2);
1963 KMP_DEBUG_ASSERT(t1 != kmp_hier_layer_e::LAYER_LAST);
1964 KMP_DEBUG_ASSERT(t2 != kmp_hier_layer_e::LAYER_LAST);
1965 KMP_DEBUG_ASSERT(__kmp_hier_threads_per[i1] != 0);
1967 return __kmp_hier_threads_per[i2] / __kmp_hier_threads_per[i1];
1969 #endif // KMP_USE_HIER_SCHED 1973 static int __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os,
1975 kmp_i18n_id_t *
const msg_id,
1978 *msg_id = kmp_i18n_null;
1983 unsigned num_records = 0;
1985 buf[
sizeof(buf) - 1] = 1;
1986 if (!fgets(buf,
sizeof(buf), f)) {
1991 char s1[] =
"processor";
1992 if (strncmp(buf, s1,
sizeof(s1) - 1) == 0) {
1999 if (KMP_SSCANF(buf,
"node_%u id", &level) == 1) {
2000 if (nodeIdIndex + level >= maxIndex) {
2001 maxIndex = nodeIdIndex + level;
2009 if (num_records == 0) {
2011 *msg_id = kmp_i18n_str_NoProcRecords;
2014 if (num_records > (
unsigned)__kmp_xproc) {
2016 *msg_id = kmp_i18n_str_TooManyProcRecords;
2025 if (fseek(f, 0, SEEK_SET) != 0) {
2027 *msg_id = kmp_i18n_str_CantRewindCpuinfo;
2033 unsigned **threadInfo =
2034 (
unsigned **)__kmp_allocate((num_records + 1) *
sizeof(
unsigned *));
2036 for (i = 0; i <= num_records; i++) {
2038 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2041 #define CLEANUP_THREAD_INFO \ 2042 for (i = 0; i <= num_records; i++) { \ 2043 __kmp_free(threadInfo[i]); \ 2045 __kmp_free(threadInfo); 2050 #define INIT_PROC_INFO(p) \ 2051 for (__index = 0; __index <= maxIndex; __index++) { \ 2052 (p)[__index] = UINT_MAX; \ 2055 for (i = 0; i <= num_records; i++) {
2056 INIT_PROC_INFO(threadInfo[i]);
2059 unsigned num_avail = 0;
2066 buf[
sizeof(buf) - 1] = 1;
2067 bool long_line =
false;
2068 if (!fgets(buf,
sizeof(buf), f)) {
2073 for (i = 0; i <= maxIndex; i++) {
2074 if (threadInfo[num_avail][i] != UINT_MAX) {
2082 }
else if (!buf[
sizeof(buf) - 1]) {
2087 #define CHECK_LINE \ 2089 CLEANUP_THREAD_INFO; \ 2090 *msg_id = kmp_i18n_str_LongLineCpuinfo; \ 2096 char s1[] =
"processor";
2097 if (strncmp(buf, s1,
sizeof(s1) - 1) == 0) {
2099 char *p = strchr(buf +
sizeof(s1) - 1,
':');
2101 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2103 if (threadInfo[num_avail][osIdIndex] != UINT_MAX)
2104 #if KMP_ARCH_AARCH64 2113 threadInfo[num_avail][osIdIndex] = val;
2114 #if KMP_OS_LINUX && !(KMP_ARCH_X86 || KMP_ARCH_X86_64) 2118 "/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
2119 threadInfo[num_avail][osIdIndex]);
2120 __kmp_read_from_file(path,
"%u", &threadInfo[num_avail][pkgIdIndex]);
2122 KMP_SNPRINTF(path,
sizeof(path),
2123 "/sys/devices/system/cpu/cpu%u/topology/core_id",
2124 threadInfo[num_avail][osIdIndex]);
2125 __kmp_read_from_file(path,
"%u", &threadInfo[num_avail][coreIdIndex]);
2129 char s2[] =
"physical id";
2130 if (strncmp(buf, s2,
sizeof(s2) - 1) == 0) {
2132 char *p = strchr(buf +
sizeof(s2) - 1,
':');
2134 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2136 if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX)
2138 threadInfo[num_avail][pkgIdIndex] = val;
2141 char s3[] =
"core id";
2142 if (strncmp(buf, s3,
sizeof(s3) - 1) == 0) {
2144 char *p = strchr(buf +
sizeof(s3) - 1,
':');
2146 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2148 if (threadInfo[num_avail][coreIdIndex] != UINT_MAX)
2150 threadInfo[num_avail][coreIdIndex] = val;
2152 #endif // KMP_OS_LINUX && USE_SYSFS_INFO 2154 char s4[] =
"thread id";
2155 if (strncmp(buf, s4,
sizeof(s4) - 1) == 0) {
2157 char *p = strchr(buf +
sizeof(s4) - 1,
':');
2159 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2161 if (threadInfo[num_avail][threadIdIndex] != UINT_MAX)
2163 threadInfo[num_avail][threadIdIndex] = val;
2167 if (KMP_SSCANF(buf,
"node_%u id", &level) == 1) {
2169 char *p = strchr(buf +
sizeof(s4) - 1,
':');
2171 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2173 KMP_ASSERT(nodeIdIndex + level <= maxIndex);
2174 if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX)
2176 threadInfo[num_avail][nodeIdIndex + level] = val;
2183 if ((*buf != 0) && (*buf !=
'\n')) {
2188 while (((ch = fgetc(f)) != EOF) && (ch !=
'\n'))
2196 if ((
int)num_avail == __kmp_xproc) {
2197 CLEANUP_THREAD_INFO;
2198 *msg_id = kmp_i18n_str_TooManyEntries;
2204 if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
2205 CLEANUP_THREAD_INFO;
2206 *msg_id = kmp_i18n_str_MissingProcField;
2209 if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
2210 CLEANUP_THREAD_INFO;
2211 *msg_id = kmp_i18n_str_MissingPhysicalIDField;
2216 if (!KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex],
2217 __kmp_affin_fullMask)) {
2218 INIT_PROC_INFO(threadInfo[num_avail]);
2225 KMP_ASSERT(num_avail <= num_records);
2226 INIT_PROC_INFO(threadInfo[num_avail]);
2231 CLEANUP_THREAD_INFO;
2232 *msg_id = kmp_i18n_str_MissingValCpuinfo;
2236 CLEANUP_THREAD_INFO;
2237 *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
2242 #if KMP_MIC && REDUCE_TEAM_SIZE 2243 unsigned teamSize = 0;
2244 #endif // KMP_MIC && REDUCE_TEAM_SIZE 2255 KMP_ASSERT(num_avail > 0);
2256 KMP_ASSERT(num_avail <= num_records);
2257 if (num_avail == 1) {
2259 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
2260 if (__kmp_affinity_verbose) {
2261 if (!KMP_AFFINITY_CAPABLE()) {
2262 KMP_INFORM(AffNotCapableUseCpuinfo,
"KMP_AFFINITY");
2263 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2264 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2266 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2267 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
2268 __kmp_affin_fullMask);
2269 KMP_INFORM(AffCapableUseCpuinfo,
"KMP_AFFINITY");
2270 if (__kmp_affinity_respect_mask) {
2271 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
2273 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
2275 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2276 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2280 __kmp_str_buf_init(&buf);
2281 __kmp_str_buf_print(&buf,
"1");
2282 for (index = maxIndex - 1; index > pkgIdIndex; index--) {
2283 __kmp_str_buf_print(&buf,
" x 1");
2285 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, 1, 1, 1);
2286 __kmp_str_buf_free(&buf);
2289 if (__kmp_affinity_type == affinity_none) {
2290 CLEANUP_THREAD_INFO;
2294 *address2os = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair));
2296 addr.labels[0] = threadInfo[0][pkgIdIndex];
2297 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
2299 if (__kmp_affinity_gran_levels < 0) {
2300 __kmp_affinity_gran_levels = 0;
2303 if (__kmp_affinity_verbose) {
2304 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
2307 CLEANUP_THREAD_INFO;
2312 qsort(threadInfo, num_avail,
sizeof(*threadInfo),
2313 __kmp_affinity_cmp_ProcCpuInfo_phys_id);
2325 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2327 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2329 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2331 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2333 bool assign_thread_ids =
false;
2334 unsigned threadIdCt;
2337 restart_radix_check:
2341 if (assign_thread_ids) {
2342 if (threadInfo[0][threadIdIndex] == UINT_MAX) {
2343 threadInfo[0][threadIdIndex] = threadIdCt++;
2344 }
else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
2345 threadIdCt = threadInfo[0][threadIdIndex] + 1;
2348 for (index = 0; index <= maxIndex; index++) {
2352 lastId[index] = threadInfo[0][index];
2357 for (i = 1; i < num_avail; i++) {
2360 for (index = maxIndex; index >= threadIdIndex; index--) {
2361 if (assign_thread_ids && (index == threadIdIndex)) {
2363 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2364 threadInfo[i][threadIdIndex] = threadIdCt++;
2368 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2369 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2372 if (threadInfo[i][index] != lastId[index]) {
2377 for (index2 = threadIdIndex; index2 < index; index2++) {
2379 if (counts[index2] > maxCt[index2]) {
2380 maxCt[index2] = counts[index2];
2383 lastId[index2] = threadInfo[i][index2];
2387 lastId[index] = threadInfo[i][index];
2389 if (assign_thread_ids && (index > threadIdIndex)) {
2391 #if KMP_MIC && REDUCE_TEAM_SIZE 2394 teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
2395 #endif // KMP_MIC && REDUCE_TEAM_SIZE 2401 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2402 threadInfo[i][threadIdIndex] = threadIdCt++;
2408 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2409 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2415 if (index < threadIdIndex) {
2419 if ((threadInfo[i][threadIdIndex] != UINT_MAX) || assign_thread_ids) {
2424 CLEANUP_THREAD_INFO;
2425 *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
2431 assign_thread_ids =
true;
2432 goto restart_radix_check;
2436 #if KMP_MIC && REDUCE_TEAM_SIZE 2439 teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
2440 #endif // KMP_MIC && REDUCE_TEAM_SIZE 2442 for (index = threadIdIndex; index <= maxIndex; index++) {
2443 if (counts[index] > maxCt[index]) {
2444 maxCt[index] = counts[index];
2448 __kmp_nThreadsPerCore = maxCt[threadIdIndex];
2449 nCoresPerPkg = maxCt[coreIdIndex];
2450 nPackages = totals[pkgIdIndex];
2453 unsigned prod = totals[maxIndex];
2454 for (index = threadIdIndex; index < maxIndex; index++) {
2455 prod *= maxCt[index];
2457 bool uniform = (prod == totals[threadIdIndex]);
2463 __kmp_ncores = totals[coreIdIndex];
2465 if (__kmp_affinity_verbose) {
2466 if (!KMP_AFFINITY_CAPABLE()) {
2467 KMP_INFORM(AffNotCapableUseCpuinfo,
"KMP_AFFINITY");
2468 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2470 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2472 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
2475 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2476 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
2477 __kmp_affin_fullMask);
2478 KMP_INFORM(AffCapableUseCpuinfo,
"KMP_AFFINITY");
2479 if (__kmp_affinity_respect_mask) {
2480 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
2482 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
2484 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2486 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2488 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
2492 __kmp_str_buf_init(&buf);
2494 __kmp_str_buf_print(&buf,
"%d", totals[maxIndex]);
2495 for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
2496 __kmp_str_buf_print(&buf,
" x %d", maxCt[index]);
2498 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
2499 maxCt[threadIdIndex], __kmp_ncores);
2501 __kmp_str_buf_free(&buf);
2504 #if KMP_MIC && REDUCE_TEAM_SIZE 2506 if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
2507 __kmp_dflt_team_nth = teamSize;
2508 KA_TRACE(20, (
"__kmp_affinity_create_cpuinfo_map: setting " 2509 "__kmp_dflt_team_nth = %d\n",
2510 __kmp_dflt_team_nth));
2512 #endif // KMP_MIC && REDUCE_TEAM_SIZE 2514 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
2515 KMP_DEBUG_ASSERT(num_avail == __kmp_avail_proc);
2516 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
2517 for (i = 0; i < num_avail; ++i) {
2518 __kmp_pu_os_idx[i] = threadInfo[i][osIdIndex];
2521 if (__kmp_affinity_type == affinity_none) {
2526 CLEANUP_THREAD_INFO;
2535 bool *inMap = (
bool *)__kmp_allocate((maxIndex + 1) *
sizeof(bool));
2537 for (index = threadIdIndex; index < maxIndex; index++) {
2538 KMP_ASSERT(totals[index] >= totals[index + 1]);
2539 inMap[index] = (totals[index] > totals[index + 1]);
2541 inMap[maxIndex] = (totals[maxIndex] > 1);
2542 inMap[pkgIdIndex] =
true;
2545 for (index = threadIdIndex; index <= maxIndex; index++) {
2550 KMP_ASSERT(depth > 0);
2553 *address2os = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * num_avail);
2556 int threadLevel = -1;
2558 for (i = 0; i < num_avail; ++i) {
2559 Address addr(depth);
2560 unsigned os = threadInfo[i][osIdIndex];
2564 for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
2565 if (!inMap[src_index]) {
2568 addr.labels[dst_index] = threadInfo[i][src_index];
2569 if (src_index == pkgIdIndex) {
2570 pkgLevel = dst_index;
2571 }
else if (src_index == coreIdIndex) {
2572 coreLevel = dst_index;
2573 }
else if (src_index == threadIdIndex) {
2574 threadLevel = dst_index;
2578 (*address2os)[i] = AddrUnsPair(addr, os);
2581 if (__kmp_affinity_gran_levels < 0) {
2585 __kmp_affinity_gran_levels = 0;
2586 for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
2587 if (!inMap[src_index]) {
2590 switch (src_index) {
2592 if (__kmp_affinity_gran > affinity_gran_thread) {
2593 __kmp_affinity_gran_levels++;
2598 if (__kmp_affinity_gran > affinity_gran_core) {
2599 __kmp_affinity_gran_levels++;
2604 if (__kmp_affinity_gran > affinity_gran_package) {
2605 __kmp_affinity_gran_levels++;
2612 if (__kmp_affinity_verbose) {
2613 __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
2614 coreLevel, threadLevel);
2622 CLEANUP_THREAD_INFO;
2629 static kmp_affin_mask_t *__kmp_create_masks(
unsigned *maxIndex,
2630 unsigned *numUnique,
2631 AddrUnsPair *address2os,
2632 unsigned numAddrs) {
2638 KMP_ASSERT(numAddrs > 0);
2639 depth = address2os[0].first.depth;
2642 for (i = numAddrs - 1;; --i) {
2643 unsigned osId = address2os[i].second;
2644 if (osId > maxOsId) {
2650 kmp_affin_mask_t *osId2Mask;
2651 KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId + 1));
2655 qsort(address2os, numAddrs,
sizeof(*address2os),
2656 __kmp_affinity_cmp_Address_labels);
2658 KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
2659 if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
2660 KMP_INFORM(ThreadsMigrate,
"KMP_AFFINITY", __kmp_affinity_gran_levels);
2662 if (__kmp_affinity_gran_levels >= (
int)depth) {
2663 if (__kmp_affinity_verbose ||
2664 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
2665 KMP_WARNING(AffThreadsMayMigrate);
2673 unsigned unique = 0;
2675 unsigned leader = 0;
2676 Address *leaderAddr = &(address2os[0].first);
2677 kmp_affin_mask_t *sum;
2678 KMP_CPU_ALLOC_ON_STACK(sum);
2680 KMP_CPU_SET(address2os[0].second, sum);
2681 for (i = 1; i < numAddrs; i++) {
2685 if (leaderAddr->isClose(address2os[i].first, __kmp_affinity_gran_levels)) {
2686 KMP_CPU_SET(address2os[i].second, sum);
2692 for (; j < i; j++) {
2693 unsigned osId = address2os[j].second;
2694 KMP_DEBUG_ASSERT(osId <= maxOsId);
2695 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2696 KMP_CPU_COPY(mask, sum);
2697 address2os[j].first.leader = (j == leader);
2703 leaderAddr = &(address2os[i].first);
2705 KMP_CPU_SET(address2os[i].second, sum);
2710 for (; j < i; j++) {
2711 unsigned osId = address2os[j].second;
2712 KMP_DEBUG_ASSERT(osId <= maxOsId);
2713 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2714 KMP_CPU_COPY(mask, sum);
2715 address2os[j].first.leader = (j == leader);
2718 KMP_CPU_FREE_FROM_STACK(sum);
2720 *maxIndex = maxOsId;
2721 *numUnique = unique;
2728 static kmp_affin_mask_t *newMasks;
2729 static int numNewMasks;
2730 static int nextNewMask;
2732 #define ADD_MASK(_mask) \ 2734 if (nextNewMask >= numNewMasks) { \ 2737 kmp_affin_mask_t *temp; \ 2738 KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks); \ 2739 for (i = 0; i < numNewMasks / 2; i++) { \ 2740 kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i); \ 2741 kmp_affin_mask_t *dest = KMP_CPU_INDEX(temp, i); \ 2742 KMP_CPU_COPY(dest, src); \ 2744 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks / 2); \ 2747 KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \ 2751 #define ADD_MASK_OSID(_osId, _osId2Mask, _maxOsId) \ 2753 if (((_osId) > _maxOsId) || \ 2754 (!KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \ 2755 if (__kmp_affinity_verbose || \ 2756 (__kmp_affinity_warnings && \ 2757 (__kmp_affinity_type != affinity_none))) { \ 2758 KMP_WARNING(AffIgnoreInvalidProcID, _osId); \ 2761 ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \ 2767 static void __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
2768 unsigned int *out_numMasks,
2769 const char *proclist,
2770 kmp_affin_mask_t *osId2Mask,
2773 const char *scan = proclist;
2774 const char *next = proclist;
2779 KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
2781 kmp_affin_mask_t *sumMask;
2782 KMP_CPU_ALLOC(sumMask);
2786 int start, end, stride;
2790 if (*next ==
'\0') {
2802 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad proclist");
2804 num = __kmp_str_to_int(scan, *next);
2805 KMP_ASSERT2(num >= 0,
"bad explicit proc list");
2808 if ((num > maxOsId) ||
2809 (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2810 if (__kmp_affinity_verbose ||
2811 (__kmp_affinity_warnings &&
2812 (__kmp_affinity_type != affinity_none))) {
2813 KMP_WARNING(AffIgnoreInvalidProcID, num);
2815 KMP_CPU_ZERO(sumMask);
2817 KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2837 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2840 num = __kmp_str_to_int(scan, *next);
2841 KMP_ASSERT2(num >= 0,
"bad explicit proc list");
2844 if ((num > maxOsId) ||
2845 (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2846 if (__kmp_affinity_verbose ||
2847 (__kmp_affinity_warnings &&
2848 (__kmp_affinity_type != affinity_none))) {
2849 KMP_WARNING(AffIgnoreInvalidProcID, num);
2852 KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2869 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2871 start = __kmp_str_to_int(scan, *next);
2872 KMP_ASSERT2(start >= 0,
"bad explicit proc list");
2877 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2891 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2893 end = __kmp_str_to_int(scan, *next);
2894 KMP_ASSERT2(end >= 0,
"bad explicit proc list");
2911 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2913 stride = __kmp_str_to_int(scan, *next);
2914 KMP_ASSERT2(stride >= 0,
"bad explicit proc list");
2919 KMP_ASSERT2(stride != 0,
"bad explicit proc list");
2921 KMP_ASSERT2(start <= end,
"bad explicit proc list");
2923 KMP_ASSERT2(start >= end,
"bad explicit proc list");
2925 KMP_ASSERT2((end - start) / stride <= 65536,
"bad explicit proc list");
2930 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2932 }
while (start <= end);
2935 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2937 }
while (start >= end);
2948 *out_numMasks = nextNewMask;
2949 if (nextNewMask == 0) {
2951 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
2954 KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
2955 for (i = 0; i < nextNewMask; i++) {
2956 kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
2957 kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i);
2958 KMP_CPU_COPY(dest, src);
2960 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
2961 KMP_CPU_FREE(sumMask);
2987 static void __kmp_process_subplace_list(
const char **scan,
2988 kmp_affin_mask_t *osId2Mask,
2989 int maxOsId, kmp_affin_mask_t *tempMask,
2994 int start, count, stride, i;
2998 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
"bad explicit places list");
3001 start = __kmp_str_to_int(*scan, *next);
3002 KMP_ASSERT(start >= 0);
3007 if (**scan ==
'}' || **scan ==
',') {
3008 if ((start > maxOsId) ||
3009 (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3010 if (__kmp_affinity_verbose ||
3011 (__kmp_affinity_warnings &&
3012 (__kmp_affinity_type != affinity_none))) {
3013 KMP_WARNING(AffIgnoreInvalidProcID, start);
3016 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3019 if (**scan ==
'}') {
3025 KMP_ASSERT2(**scan ==
':',
"bad explicit places list");
3030 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
"bad explicit places list");
3033 count = __kmp_str_to_int(*scan, *next);
3034 KMP_ASSERT(count >= 0);
3039 if (**scan ==
'}' || **scan ==
',') {
3040 for (i = 0; i < count; i++) {
3041 if ((start > maxOsId) ||
3042 (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3043 if (__kmp_affinity_verbose ||
3044 (__kmp_affinity_warnings &&
3045 (__kmp_affinity_type != affinity_none))) {
3046 KMP_WARNING(AffIgnoreInvalidProcID, start);
3050 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3055 if (**scan ==
'}') {
3061 KMP_ASSERT2(**scan ==
':',
"bad explicit places list");
3068 if (**scan ==
'+') {
3072 if (**scan ==
'-') {
3080 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
"bad explicit places list");
3083 stride = __kmp_str_to_int(*scan, *next);
3084 KMP_ASSERT(stride >= 0);
3090 if (**scan ==
'}' || **scan ==
',') {
3091 for (i = 0; i < count; i++) {
3092 if ((start > maxOsId) ||
3093 (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3094 if (__kmp_affinity_verbose ||
3095 (__kmp_affinity_warnings &&
3096 (__kmp_affinity_type != affinity_none))) {
3097 KMP_WARNING(AffIgnoreInvalidProcID, start);
3101 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3106 if (**scan ==
'}') {
3113 KMP_ASSERT2(0,
"bad explicit places list");
3117 static void __kmp_process_place(
const char **scan, kmp_affin_mask_t *osId2Mask,
3118 int maxOsId, kmp_affin_mask_t *tempMask,
3124 if (**scan ==
'{') {
3126 __kmp_process_subplace_list(scan, osId2Mask, maxOsId, tempMask, setSize);
3127 KMP_ASSERT2(**scan ==
'}',
"bad explicit places list");
3129 }
else if (**scan ==
'!') {
3131 __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
3132 KMP_CPU_COMPLEMENT(maxOsId, tempMask);
3133 }
else if ((**scan >=
'0') && (**scan <=
'9')) {
3136 int num = __kmp_str_to_int(*scan, *next);
3137 KMP_ASSERT(num >= 0);
3138 if ((num > maxOsId) ||
3139 (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
3140 if (__kmp_affinity_verbose ||
3141 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
3142 KMP_WARNING(AffIgnoreInvalidProcID, num);
3145 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
3150 KMP_ASSERT2(0,
"bad explicit places list");
3155 void __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
3156 unsigned int *out_numMasks,
3157 const char *placelist,
3158 kmp_affin_mask_t *osId2Mask,
3160 int i, j, count, stride, sign;
3161 const char *scan = placelist;
3162 const char *next = placelist;
3165 KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
3171 kmp_affin_mask_t *tempMask;
3172 kmp_affin_mask_t *previousMask;
3173 KMP_CPU_ALLOC(tempMask);
3174 KMP_CPU_ZERO(tempMask);
3175 KMP_CPU_ALLOC(previousMask);
3176 KMP_CPU_ZERO(previousMask);
3180 __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
3184 if (*scan ==
'\0' || *scan ==
',') {
3188 KMP_CPU_ZERO(tempMask);
3190 if (*scan ==
'\0') {
3197 KMP_ASSERT2(*scan ==
':',
"bad explicit places list");
3202 KMP_ASSERT2((*scan >=
'0') && (*scan <=
'9'),
"bad explicit places list");
3205 count = __kmp_str_to_int(scan, *next);
3206 KMP_ASSERT(count >= 0);
3211 if (*scan ==
'\0' || *scan ==
',') {
3214 KMP_ASSERT2(*scan ==
':',
"bad explicit places list");
3233 KMP_ASSERT2((*scan >=
'0') && (*scan <=
'9'),
"bad explicit places list");
3236 stride = __kmp_str_to_int(scan, *next);
3237 KMP_DEBUG_ASSERT(stride >= 0);
3243 for (i = 0; i < count; i++) {
3248 KMP_CPU_COPY(previousMask, tempMask);
3249 ADD_MASK(previousMask);
3250 KMP_CPU_ZERO(tempMask);
3252 KMP_CPU_SET_ITERATE(j, previousMask) {
3253 if (!KMP_CPU_ISSET(j, previousMask)) {
3256 if ((j + stride > maxOsId) || (j + stride < 0) ||
3257 (!KMP_CPU_ISSET(j, __kmp_affin_fullMask)) ||
3258 (!KMP_CPU_ISSET(j + stride,
3259 KMP_CPU_INDEX(osId2Mask, j + stride)))) {
3260 if ((__kmp_affinity_verbose ||
3261 (__kmp_affinity_warnings &&
3262 (__kmp_affinity_type != affinity_none))) &&
3264 KMP_WARNING(AffIgnoreInvalidProcID, j + stride);
3268 KMP_CPU_SET(j + stride, tempMask);
3272 KMP_CPU_ZERO(tempMask);
3277 if (*scan ==
'\0') {
3285 KMP_ASSERT2(0,
"bad explicit places list");
3288 *out_numMasks = nextNewMask;
3289 if (nextNewMask == 0) {
3291 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3294 KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
3295 KMP_CPU_FREE(tempMask);
3296 KMP_CPU_FREE(previousMask);
3297 for (i = 0; i < nextNewMask; i++) {
3298 kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
3299 kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i);
3300 KMP_CPU_COPY(dest, src);
3302 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3308 #undef ADD_MASK_OSID 3311 static int __kmp_hwloc_skip_PUs_obj(hwloc_topology_t t, hwloc_obj_t o) {
3314 hwloc_obj_t hT = NULL;
3315 int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
3316 for (
int i = 0; i < N; ++i) {
3317 KMP_DEBUG_ASSERT(hT);
3318 unsigned idx = hT->os_index;
3319 if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3320 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3321 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3324 hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
3329 static int __kmp_hwloc_obj_has_PUs(hwloc_topology_t t, hwloc_obj_t o) {
3331 hwloc_obj_t hT = NULL;
3332 int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
3333 for (
int i = 0; i < N; ++i) {
3334 KMP_DEBUG_ASSERT(hT);
3335 unsigned idx = hT->os_index;
3336 if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask))
3338 hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
3342 #endif // KMP_USE_HWLOC 3344 static void __kmp_apply_thread_places(AddrUnsPair **pAddr,
int depth) {
3345 AddrUnsPair *newAddr;
3346 if (__kmp_hws_requested == 0)
3349 if (__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
3353 hwloc_topology_t tp = __kmp_hwloc_topology;
3354 int nS = 0, nN = 0, nL = 0, nC = 0,
3356 int nCr = 0, nTr = 0;
3357 int nPkg = 0, nCo = 0, n_new = 0, n_old = 0, nCpP = 0, nTpC = 0;
3358 hwloc_obj_t hT, hC, hL, hN, hS;
3362 int numa_support = 0, tile_support = 0;
3363 if (__kmp_pu_os_idx)
3364 hT = hwloc_get_pu_obj_by_os_index(tp,
3365 __kmp_pu_os_idx[__kmp_avail_proc - 1]);
3367 hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, __kmp_avail_proc - 1);
3369 KMP_WARNING(AffHWSubsetUnsupported);
3373 hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT);
3374 hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT);
3375 if (hN != NULL && hN->depth > hS->depth) {
3377 }
else if (__kmp_hws_node.num > 0) {
3379 KMP_WARNING(AffHWSubsetUnsupported);
3383 L2depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED);
3384 hL = hwloc_get_ancestor_obj_by_depth(tp, L2depth, hT);
3386 __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC) > 1) {
3388 }
else if (__kmp_hws_tile.num > 0) {
3389 if (__kmp_hws_core.num == 0) {
3390 __kmp_hws_core = __kmp_hws_tile;
3391 __kmp_hws_tile.num = 0;
3394 KMP_WARNING(AffHWSubsetInvalid);
3401 if (__kmp_hws_socket.num == 0)
3402 __kmp_hws_socket.num = nPackages;
3403 if (__kmp_hws_socket.offset >= nPackages) {
3404 KMP_WARNING(AffHWSubsetManySockets);
3409 int NN = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE,
3411 if (__kmp_hws_node.num == 0)
3412 __kmp_hws_node.num = NN;
3413 if (__kmp_hws_node.offset >= NN) {
3414 KMP_WARNING(AffHWSubsetManyNodes);
3419 int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
3420 if (__kmp_hws_tile.num == 0) {
3421 __kmp_hws_tile.num = NL + 1;
3423 if (__kmp_hws_tile.offset >= NL) {
3424 KMP_WARNING(AffHWSubsetManyTiles);
3427 int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
3429 if (__kmp_hws_core.num == 0)
3430 __kmp_hws_core.num = NC;
3431 if (__kmp_hws_core.offset >= NC) {
3432 KMP_WARNING(AffHWSubsetManyCores);
3436 int NC = __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE,
3438 if (__kmp_hws_core.num == 0)
3439 __kmp_hws_core.num = NC;
3440 if (__kmp_hws_core.offset >= NC) {
3441 KMP_WARNING(AffHWSubsetManyCores);
3448 int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
3449 if (__kmp_hws_tile.num == 0)
3450 __kmp_hws_tile.num = NL;
3451 if (__kmp_hws_tile.offset >= NL) {
3452 KMP_WARNING(AffHWSubsetManyTiles);
3455 int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
3457 if (__kmp_hws_core.num == 0)
3458 __kmp_hws_core.num = NC;
3459 if (__kmp_hws_core.offset >= NC) {
3460 KMP_WARNING(AffHWSubsetManyCores);
3464 int NC = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE,
3466 if (__kmp_hws_core.num == 0)
3467 __kmp_hws_core.num = NC;
3468 if (__kmp_hws_core.offset >= NC) {
3469 KMP_WARNING(AffHWSubsetManyCores);
3474 if (__kmp_hws_proc.num == 0)
3475 __kmp_hws_proc.num = __kmp_nThreadsPerCore;
3476 if (__kmp_hws_proc.offset >= __kmp_nThreadsPerCore) {
3477 KMP_WARNING(AffHWSubsetManyProcs);
3483 newAddr = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) *
3487 int NP = hwloc_get_nbobjs_by_type(tp, HWLOC_OBJ_PACKAGE);
3488 for (
int s = 0; s < NP; ++s) {
3490 hS = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hS);
3491 if (!__kmp_hwloc_obj_has_PUs(tp, hS))
3494 if (nS <= __kmp_hws_socket.offset ||
3495 nS > __kmp_hws_socket.num + __kmp_hws_socket.offset) {
3496 n_old += __kmp_hwloc_skip_PUs_obj(tp, hS);
3507 __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE, &hN);
3508 for (
int n = 0; n < NN; ++n) {
3510 if (!__kmp_hwloc_obj_has_PUs(tp, hN)) {
3511 hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
3515 if (nN <= __kmp_hws_node.offset ||
3516 nN > __kmp_hws_node.num + __kmp_hws_node.offset) {
3518 n_old += __kmp_hwloc_skip_PUs_obj(tp, hN);
3519 hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
3526 int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
3527 for (
int l = 0; l < NL; ++l) {
3529 if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
3530 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3534 if (nL <= __kmp_hws_tile.offset ||
3535 nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
3537 n_old += __kmp_hwloc_skip_PUs_obj(tp, hL);
3538 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3545 int NC = __kmp_hwloc_count_children_by_type(tp, hL,
3546 HWLOC_OBJ_CORE, &hC);
3547 for (
int c = 0; c < NC; ++c) {
3549 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3550 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3554 if (nC <= __kmp_hws_core.offset ||
3555 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3557 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC);
3558 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3566 int NT = __kmp_hwloc_count_children_by_type(tp, hC,
3568 for (
int t = 0; t < NT; ++t) {
3571 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3572 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3576 if (nT <= __kmp_hws_proc.offset ||
3577 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3579 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3581 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3582 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3587 newAddr[n_new] = (*pAddr)[n_old];
3590 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3598 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3600 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3608 __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE, &hC);
3609 for (
int c = 0; c < NC; ++c) {
3611 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3612 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3616 if (nC <= __kmp_hws_core.offset ||
3617 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3619 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC);
3620 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3628 __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
3629 for (
int t = 0; t < NT; ++t) {
3632 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3633 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3637 if (nT <= __kmp_hws_proc.offset ||
3638 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3640 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3642 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3643 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3648 newAddr[n_new] = (*pAddr)[n_old];
3651 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3659 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3662 hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
3670 int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
3671 for (
int l = 0; l < NL; ++l) {
3673 if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
3674 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3678 if (nL <= __kmp_hws_tile.offset ||
3679 nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
3681 n_old += __kmp_hwloc_skip_PUs_obj(tp, hL);
3682 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3690 __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC);
3691 for (
int c = 0; c < NC; ++c) {
3693 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3694 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3698 if (nC <= __kmp_hws_core.offset ||
3699 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3701 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC);
3702 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3711 __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
3712 for (
int t = 0; t < NT; ++t) {
3715 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3716 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3720 if (nT <= __kmp_hws_proc.offset ||
3721 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3723 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3725 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3726 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3731 newAddr[n_new] = (*pAddr)[n_old];
3734 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3742 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3744 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3752 __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE, &hC);
3753 for (
int c = 0; c < NC; ++c) {
3755 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3756 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3760 if (nC <= __kmp_hws_core.offset ||
3761 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3763 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC);
3764 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3773 __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
3774 for (
int t = 0; t < NT; ++t) {
3777 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3778 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3782 if (nT <= __kmp_hws_proc.offset ||
3783 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3785 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3787 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3788 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3793 newAddr[n_new] = (*pAddr)[n_old];
3796 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3804 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3816 KMP_DEBUG_ASSERT(n_old == __kmp_avail_proc);
3817 KMP_DEBUG_ASSERT(nPkg > 0);
3818 KMP_DEBUG_ASSERT(nCpP > 0);
3819 KMP_DEBUG_ASSERT(nTpC > 0);
3820 KMP_DEBUG_ASSERT(nCo > 0);
3821 KMP_DEBUG_ASSERT(nPkg <= nPackages);
3822 KMP_DEBUG_ASSERT(nCpP <= nCoresPerPkg);
3823 KMP_DEBUG_ASSERT(nTpC <= __kmp_nThreadsPerCore);
3824 KMP_DEBUG_ASSERT(nCo <= __kmp_ncores);
3827 nCoresPerPkg = nCpP;
3828 __kmp_nThreadsPerCore = nTpC;
3829 __kmp_avail_proc = n_new;
3833 #endif // KMP_USE_HWLOC 3835 int n_old = 0, n_new = 0, proc_num = 0;
3836 if (__kmp_hws_node.num > 0 || __kmp_hws_tile.num > 0) {
3837 KMP_WARNING(AffHWSubsetNoHWLOC);
3840 if (__kmp_hws_socket.num == 0)
3841 __kmp_hws_socket.num = nPackages;
3842 if (__kmp_hws_core.num == 0)
3843 __kmp_hws_core.num = nCoresPerPkg;
3844 if (__kmp_hws_proc.num == 0 || __kmp_hws_proc.num > __kmp_nThreadsPerCore)
3845 __kmp_hws_proc.num = __kmp_nThreadsPerCore;
3846 if (!__kmp_affinity_uniform_topology()) {
3847 KMP_WARNING(AffHWSubsetNonUniform);
3851 KMP_WARNING(AffHWSubsetNonThreeLevel);
3854 if (__kmp_hws_socket.offset + __kmp_hws_socket.num > nPackages) {
3855 KMP_WARNING(AffHWSubsetManySockets);
3858 if (__kmp_hws_core.offset + __kmp_hws_core.num > nCoresPerPkg) {
3859 KMP_WARNING(AffHWSubsetManyCores);
3864 newAddr = (AddrUnsPair *)__kmp_allocate(
3865 sizeof(AddrUnsPair) * __kmp_hws_socket.num * __kmp_hws_core.num *
3866 __kmp_hws_proc.num);
3867 for (
int i = 0; i < nPackages; ++i) {
3868 if (i < __kmp_hws_socket.offset ||
3869 i >= __kmp_hws_socket.offset + __kmp_hws_socket.num) {
3871 n_old += nCoresPerPkg * __kmp_nThreadsPerCore;
3872 if (__kmp_pu_os_idx != NULL) {
3874 for (
int j = 0; j < nCoresPerPkg; ++j) {
3875 for (
int k = 0; k < __kmp_nThreadsPerCore; ++k) {
3876 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
3883 for (
int j = 0; j < nCoresPerPkg; ++j) {
3884 if (j < __kmp_hws_core.offset ||
3885 j >= __kmp_hws_core.offset +
3886 __kmp_hws_core.num) {
3887 n_old += __kmp_nThreadsPerCore;
3888 if (__kmp_pu_os_idx != NULL) {
3889 for (
int k = 0; k < __kmp_nThreadsPerCore; ++k) {
3890 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
3896 for (
int k = 0; k < __kmp_nThreadsPerCore; ++k) {
3897 if (k < __kmp_hws_proc.num) {
3899 newAddr[n_new] = (*pAddr)[n_old];
3902 if (__kmp_pu_os_idx != NULL)
3903 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
3912 KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
3913 KMP_DEBUG_ASSERT(n_new ==
3914 __kmp_hws_socket.num * __kmp_hws_core.num *
3915 __kmp_hws_proc.num);
3916 nPackages = __kmp_hws_socket.num;
3917 nCoresPerPkg = __kmp_hws_core.num;
3918 __kmp_nThreadsPerCore = __kmp_hws_proc.num;
3919 __kmp_avail_proc = n_new;
3920 __kmp_ncores = nPackages * __kmp_hws_core.num;
3926 if (__kmp_affinity_verbose) {
3927 char m[KMP_AFFIN_MASK_PRINT_LEN];
3928 __kmp_affinity_print_mask(m, KMP_AFFIN_MASK_PRINT_LEN,
3929 __kmp_affin_fullMask);
3930 if (__kmp_affinity_respect_mask) {
3931 KMP_INFORM(InitOSProcSetRespect,
"KMP_HW_SUBSET", m);
3933 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_HW_SUBSET", m);
3935 KMP_INFORM(AvailableOSProc,
"KMP_HW_SUBSET", __kmp_avail_proc);
3937 __kmp_str_buf_init(&buf);
3938 __kmp_str_buf_print(&buf,
"%d", nPackages);
3939 KMP_INFORM(TopologyExtra,
"KMP_HW_SUBSET", buf.str, nCoresPerPkg,
3940 __kmp_nThreadsPerCore, __kmp_ncores);
3941 __kmp_str_buf_free(&buf);
3944 if (__kmp_pu_os_idx != NULL) {
3945 __kmp_free(__kmp_pu_os_idx);
3946 __kmp_pu_os_idx = NULL;
3952 static int __kmp_affinity_find_core_level(
const AddrUnsPair *address2os,
3953 int nprocs,
int bottom_level) {
3956 for (
int i = 0; i < nprocs; i++) {
3957 for (
int j = bottom_level; j > 0; j--) {
3958 if (address2os[i].first.labels[j] > 0) {
3959 if (core_level < (j - 1)) {
3969 static int __kmp_affinity_compute_ncores(
const AddrUnsPair *address2os,
3970 int nprocs,
int bottom_level,
3976 for (i = 0; i < nprocs; i++) {
3977 for (j = bottom_level; j > core_level; j--) {
3978 if ((i + 1) < nprocs) {
3979 if (address2os[i + 1].first.labels[j] > 0) {
3984 if (j == core_level) {
3988 if (j > core_level) {
3997 static int __kmp_affinity_find_core(
const AddrUnsPair *address2os,
int proc,
3998 int bottom_level,
int core_level) {
3999 return __kmp_affinity_compute_ncores(address2os, proc + 1, bottom_level,
4006 static int __kmp_affinity_max_proc_per_core(
const AddrUnsPair *address2os,
4007 int nprocs,
int bottom_level,
4009 int maxprocpercore = 0;
4011 if (core_level < bottom_level) {
4012 for (
int i = 0; i < nprocs; i++) {
4013 int percore = address2os[i].first.labels[core_level + 1] + 1;
4015 if (percore > maxprocpercore) {
4016 maxprocpercore = percore;
4022 return maxprocpercore;
4025 static AddrUnsPair *address2os = NULL;
4026 static int *procarr = NULL;
4027 static int __kmp_aff_depth = 0;
4029 #if KMP_USE_HIER_SCHED 4030 #define KMP_EXIT_AFF_NONE \ 4031 KMP_ASSERT(__kmp_affinity_type == affinity_none); \ 4032 KMP_ASSERT(address2os == NULL); \ 4033 __kmp_apply_thread_places(NULL, 0); \ 4034 __kmp_create_affinity_none_places(); \ 4035 __kmp_dispatch_set_hierarchy_values(); \ 4038 #define KMP_EXIT_AFF_NONE \ 4039 KMP_ASSERT(__kmp_affinity_type == affinity_none); \ 4040 KMP_ASSERT(address2os == NULL); \ 4041 __kmp_apply_thread_places(NULL, 0); \ 4042 __kmp_create_affinity_none_places(); \ 4048 static void __kmp_create_affinity_none_places() {
4049 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4050 KMP_ASSERT(__kmp_affinity_type == affinity_none);
4051 __kmp_affinity_num_masks = 1;
4052 KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4053 kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, 0);
4054 KMP_CPU_COPY(dest, __kmp_affin_fullMask);
4057 static int __kmp_affinity_cmp_Address_child_num(
const void *a,
const void *b) {
4058 const Address *aa = &(((
const AddrUnsPair *)a)->first);
4059 const Address *bb = &(((
const AddrUnsPair *)b)->first);
4060 unsigned depth = aa->depth;
4062 KMP_DEBUG_ASSERT(depth == bb->depth);
4063 KMP_DEBUG_ASSERT((
unsigned)__kmp_affinity_compact <= depth);
4064 KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
4065 for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
4066 int j = depth - i - 1;
4067 if (aa->childNums[j] < bb->childNums[j])
4069 if (aa->childNums[j] > bb->childNums[j])
4072 for (; i < depth; i++) {
4073 int j = i - __kmp_affinity_compact;
4074 if (aa->childNums[j] < bb->childNums[j])
4076 if (aa->childNums[j] > bb->childNums[j])
4082 static void __kmp_aux_affinity_initialize(
void) {
4083 if (__kmp_affinity_masks != NULL) {
4084 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4092 if (__kmp_affin_fullMask == NULL) {
4093 KMP_CPU_ALLOC(__kmp_affin_fullMask);
4095 if (KMP_AFFINITY_CAPABLE()) {
4096 if (__kmp_affinity_respect_mask) {
4097 __kmp_get_system_affinity(__kmp_affin_fullMask, TRUE);
4101 __kmp_avail_proc = 0;
4102 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
4103 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
4108 if (__kmp_avail_proc > __kmp_xproc) {
4109 if (__kmp_affinity_verbose ||
4110 (__kmp_affinity_warnings &&
4111 (__kmp_affinity_type != affinity_none))) {
4112 KMP_WARNING(ErrorInitializeAffinity);
4114 __kmp_affinity_type = affinity_none;
4115 KMP_AFFINITY_DISABLE();
4119 __kmp_affinity_entire_machine_mask(__kmp_affin_fullMask);
4120 __kmp_avail_proc = __kmp_xproc;
4124 if (__kmp_affinity_gran == affinity_gran_tile &&
4126 __kmp_affinity_dispatch->get_api_type() == KMPAffinity::NATIVE_OS) {
4127 KMP_WARNING(AffTilesNoHWLOC,
"KMP_AFFINITY");
4128 __kmp_affinity_gran = affinity_gran_package;
4132 kmp_i18n_id_t msg_id = kmp_i18n_null;
4136 if ((__kmp_cpuinfo_file != NULL) &&
4137 (__kmp_affinity_top_method == affinity_top_method_all)) {
4138 __kmp_affinity_top_method = affinity_top_method_cpuinfo;
4141 if (__kmp_affinity_top_method == affinity_top_method_all) {
4145 const char *file_name = NULL;
4149 __kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
4150 if (__kmp_affinity_verbose) {
4151 KMP_INFORM(AffUsingHwloc,
"KMP_AFFINITY");
4153 if (!__kmp_hwloc_error) {
4154 depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
4157 }
else if (depth < 0 && __kmp_affinity_verbose) {
4158 KMP_INFORM(AffIgnoringHwloc,
"KMP_AFFINITY");
4160 }
else if (__kmp_affinity_verbose) {
4161 KMP_INFORM(AffIgnoringHwloc,
"KMP_AFFINITY");
4166 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 4169 if (__kmp_affinity_verbose) {
4170 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
4174 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
4180 if (__kmp_affinity_verbose) {
4181 if (msg_id != kmp_i18n_null) {
4182 KMP_INFORM(AffInfoStrStr,
"KMP_AFFINITY",
4183 __kmp_i18n_catgets(msg_id),
4184 KMP_I18N_STR(DecodingLegacyAPIC));
4186 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY",
4187 KMP_I18N_STR(DecodingLegacyAPIC));
4192 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
4204 if (__kmp_affinity_verbose) {
4205 if (msg_id != kmp_i18n_null) {
4206 KMP_INFORM(AffStrParseFilename,
"KMP_AFFINITY",
4207 __kmp_i18n_catgets(msg_id),
"/proc/cpuinfo");
4209 KMP_INFORM(AffParseFilename,
"KMP_AFFINITY",
"/proc/cpuinfo");
4213 FILE *f = fopen(
"/proc/cpuinfo",
"r");
4215 msg_id = kmp_i18n_str_CantOpenCpuinfo;
4217 file_name =
"/proc/cpuinfo";
4219 __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
4229 #if KMP_GROUP_AFFINITY 4231 if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
4232 if (__kmp_affinity_verbose) {
4233 KMP_INFORM(AffWindowsProcGroupMap,
"KMP_AFFINITY");
4236 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
4237 KMP_ASSERT(depth != 0);
4243 if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
4244 if (file_name == NULL) {
4245 KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
4246 }
else if (line == 0) {
4247 KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
4249 KMP_INFORM(UsingFlatOSFileLine, file_name, line,
4250 __kmp_i18n_catgets(msg_id));
4256 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
4260 KMP_ASSERT(depth > 0);
4261 KMP_ASSERT(address2os != NULL);
4266 else if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
4267 KMP_ASSERT(__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC);
4268 if (__kmp_affinity_verbose) {
4269 KMP_INFORM(AffUsingHwloc,
"KMP_AFFINITY");
4271 depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
4276 #endif // KMP_USE_HWLOC 4282 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 4284 else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
4285 if (__kmp_affinity_verbose) {
4286 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
4289 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
4294 KMP_ASSERT(msg_id != kmp_i18n_null);
4295 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
4297 }
else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
4298 if (__kmp_affinity_verbose) {
4299 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
4302 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
4307 KMP_ASSERT(msg_id != kmp_i18n_null);
4308 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
4314 else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
4315 const char *filename;
4316 if (__kmp_cpuinfo_file != NULL) {
4317 filename = __kmp_cpuinfo_file;
4319 filename =
"/proc/cpuinfo";
4322 if (__kmp_affinity_verbose) {
4323 KMP_INFORM(AffParseFilename,
"KMP_AFFINITY", filename);
4326 FILE *f = fopen(filename,
"r");
4329 if (__kmp_cpuinfo_file != NULL) {
4330 __kmp_fatal(KMP_MSG(CantOpenFileForReading, filename), KMP_ERR(code),
4331 KMP_HNT(NameComesFrom_CPUINFO_FILE), __kmp_msg_null);
4333 __kmp_fatal(KMP_MSG(CantOpenFileForReading, filename), KMP_ERR(code),
4338 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
4341 KMP_ASSERT(msg_id != kmp_i18n_null);
4343 KMP_FATAL(FileLineMsgExiting, filename, line,
4344 __kmp_i18n_catgets(msg_id));
4346 KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
4349 if (__kmp_affinity_type == affinity_none) {
4350 KMP_ASSERT(depth == 0);
4355 #if KMP_GROUP_AFFINITY 4357 else if (__kmp_affinity_top_method == affinity_top_method_group) {
4358 if (__kmp_affinity_verbose) {
4359 KMP_INFORM(AffWindowsProcGroupMap,
"KMP_AFFINITY");
4362 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
4363 KMP_ASSERT(depth != 0);
4365 KMP_ASSERT(msg_id != kmp_i18n_null);
4366 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
4372 else if (__kmp_affinity_top_method == affinity_top_method_flat) {
4373 if (__kmp_affinity_verbose) {
4374 KMP_INFORM(AffUsingFlatOS,
"KMP_AFFINITY");
4377 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
4382 KMP_ASSERT(depth > 0);
4383 KMP_ASSERT(address2os != NULL);
4386 #if KMP_USE_HIER_SCHED 4387 __kmp_dispatch_set_hierarchy_values();
4390 if (address2os == NULL) {
4391 if (KMP_AFFINITY_CAPABLE() &&
4392 (__kmp_affinity_verbose ||
4393 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none)))) {
4394 KMP_WARNING(ErrorInitializeAffinity);
4396 __kmp_affinity_type = affinity_none;
4397 __kmp_create_affinity_none_places();
4398 KMP_AFFINITY_DISABLE();
4402 if (__kmp_affinity_gran == affinity_gran_tile
4404 && __kmp_tile_depth == 0
4408 KMP_WARNING(AffTilesNoTiles,
"KMP_AFFINITY");
4411 __kmp_apply_thread_places(&address2os, depth);
4416 kmp_affin_mask_t *osId2Mask =
4417 __kmp_create_masks(&maxIndex, &numUnique, address2os, __kmp_avail_proc);
4418 if (__kmp_affinity_gran_levels == 0) {
4419 KMP_DEBUG_ASSERT((
int)numUnique == __kmp_avail_proc);
4425 __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
4427 switch (__kmp_affinity_type) {
4429 case affinity_explicit:
4430 KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
4432 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
4435 __kmp_affinity_process_proclist(
4436 &__kmp_affinity_masks, &__kmp_affinity_num_masks,
4437 __kmp_affinity_proclist, osId2Mask, maxIndex);
4441 __kmp_affinity_process_placelist(
4442 &__kmp_affinity_masks, &__kmp_affinity_num_masks,
4443 __kmp_affinity_proclist, osId2Mask, maxIndex);
4446 if (__kmp_affinity_num_masks == 0) {
4447 if (__kmp_affinity_verbose ||
4448 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
4449 KMP_WARNING(AffNoValidProcID);
4451 __kmp_affinity_type = affinity_none;
4461 case affinity_logical:
4462 __kmp_affinity_compact = 0;
4463 if (__kmp_affinity_offset) {
4464 __kmp_affinity_offset =
4465 __kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
4469 case affinity_physical:
4470 if (__kmp_nThreadsPerCore > 1) {
4471 __kmp_affinity_compact = 1;
4472 if (__kmp_affinity_compact >= depth) {
4473 __kmp_affinity_compact = 0;
4476 __kmp_affinity_compact = 0;
4478 if (__kmp_affinity_offset) {
4479 __kmp_affinity_offset =
4480 __kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
4484 case affinity_scatter:
4485 if (__kmp_affinity_compact >= depth) {
4486 __kmp_affinity_compact = 0;
4488 __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
4492 case affinity_compact:
4493 if (__kmp_affinity_compact >= depth) {
4494 __kmp_affinity_compact = depth - 1;
4498 case affinity_balanced:
4500 if (__kmp_affinity_verbose || __kmp_affinity_warnings) {
4501 KMP_WARNING(AffBalancedNotAvail,
"KMP_AFFINITY");
4503 __kmp_affinity_type = affinity_none;
4505 }
else if (__kmp_affinity_uniform_topology()) {
4510 __kmp_aff_depth = depth;
4512 int core_level = __kmp_affinity_find_core_level(
4513 address2os, __kmp_avail_proc, depth - 1);
4514 int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc,
4515 depth - 1, core_level);
4516 int maxprocpercore = __kmp_affinity_max_proc_per_core(
4517 address2os, __kmp_avail_proc, depth - 1, core_level);
4519 int nproc = ncores * maxprocpercore;
4520 if ((nproc < 2) || (nproc < __kmp_avail_proc)) {
4521 if (__kmp_affinity_verbose || __kmp_affinity_warnings) {
4522 KMP_WARNING(AffBalancedNotAvail,
"KMP_AFFINITY");
4524 __kmp_affinity_type = affinity_none;
4528 procarr = (
int *)__kmp_allocate(
sizeof(
int) * nproc);
4529 for (
int i = 0; i < nproc; i++) {
4535 for (
int i = 0; i < __kmp_avail_proc; i++) {
4536 int proc = address2os[i].second;
4538 __kmp_affinity_find_core(address2os, i, depth - 1, core_level);
4540 if (core == lastcore) {
4547 procarr[core * maxprocpercore + inlastcore] = proc;
4555 if (__kmp_affinity_dups) {
4556 __kmp_affinity_num_masks = __kmp_avail_proc;
4558 __kmp_affinity_num_masks = numUnique;
4562 if ((__kmp_nested_proc_bind.bind_types[0] != proc_bind_intel) &&
4563 (__kmp_affinity_num_places > 0) &&
4564 ((
unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks)) {
4565 __kmp_affinity_num_masks = __kmp_affinity_num_places;
4569 KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4573 qsort(address2os, __kmp_avail_proc,
sizeof(*address2os),
4574 __kmp_affinity_cmp_Address_child_num);
4578 for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
4579 if ((!__kmp_affinity_dups) && (!address2os[i].first.leader)) {
4582 unsigned osId = address2os[i].second;
4583 kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
4584 kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, j);
4585 KMP_ASSERT(KMP_CPU_ISSET(osId, src));
4586 KMP_CPU_COPY(dest, src);
4587 if (++j >= __kmp_affinity_num_masks) {
4591 KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
4596 KMP_ASSERT2(0,
"Unexpected affinity setting");
4599 KMP_CPU_FREE_ARRAY(osId2Mask, maxIndex + 1);
4600 machine_hierarchy.init(address2os, __kmp_avail_proc);
4602 #undef KMP_EXIT_AFF_NONE 4604 void __kmp_affinity_initialize(
void) {
4613 int disabled = (__kmp_affinity_type == affinity_disabled);
4614 if (!KMP_AFFINITY_CAPABLE()) {
4615 KMP_ASSERT(disabled);
4618 __kmp_affinity_type = affinity_none;
4620 __kmp_aux_affinity_initialize();
4622 __kmp_affinity_type = affinity_disabled;
4626 void __kmp_affinity_uninitialize(
void) {
4627 if (__kmp_affinity_masks != NULL) {
4628 KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4629 __kmp_affinity_masks = NULL;
4631 if (__kmp_affin_fullMask != NULL) {
4632 KMP_CPU_FREE(__kmp_affin_fullMask);
4633 __kmp_affin_fullMask = NULL;
4635 __kmp_affinity_num_masks = 0;
4636 __kmp_affinity_type = affinity_default;
4638 __kmp_affinity_num_places = 0;
4640 if (__kmp_affinity_proclist != NULL) {
4641 __kmp_free(__kmp_affinity_proclist);
4642 __kmp_affinity_proclist = NULL;
4644 if (address2os != NULL) {
4645 __kmp_free(address2os);
4648 if (procarr != NULL) {
4649 __kmp_free(procarr);
4653 if (__kmp_hwloc_topology != NULL) {
4654 hwloc_topology_destroy(__kmp_hwloc_topology);
4655 __kmp_hwloc_topology = NULL;
4658 KMPAffinity::destroy_api();
4661 void __kmp_affinity_set_init_mask(
int gtid,
int isa_root) {
4662 if (!KMP_AFFINITY_CAPABLE()) {
4666 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4667 if (th->th.th_affin_mask == NULL) {
4668 KMP_CPU_ALLOC(th->th.th_affin_mask);
4670 KMP_CPU_ZERO(th->th.th_affin_mask);
4677 kmp_affin_mask_t *mask;
4681 if (KMP_AFFINITY_NON_PROC_BIND)
4684 if ((__kmp_affinity_type == affinity_none) ||
4685 (__kmp_affinity_type == affinity_balanced)) {
4686 #if KMP_GROUP_AFFINITY 4687 if (__kmp_num_proc_groups > 1) {
4691 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4693 mask = __kmp_affin_fullMask;
4695 KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
4696 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4697 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4703 (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
4704 #if KMP_GROUP_AFFINITY 4705 if (__kmp_num_proc_groups > 1) {
4709 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4711 mask = __kmp_affin_fullMask;
4715 KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
4716 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4717 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4723 th->th.th_current_place = i;
4725 th->th.th_new_place = i;
4726 th->th.th_first_place = 0;
4727 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4730 if (i == KMP_PLACE_ALL) {
4731 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to all places\n",
4734 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
4741 (
"__kmp_affinity_set_init_mask: binding T#%d to __kmp_affin_fullMask\n",
4744 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
4749 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4751 if (__kmp_affinity_verbose
4753 && (__kmp_affinity_type == affinity_none || i != KMP_PLACE_ALL)) {
4754 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4755 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4756 th->th.th_affin_mask);
4757 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
4758 __kmp_gettid(), gtid, buf);
4765 if (__kmp_affinity_type == affinity_none) {
4766 __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
4769 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4774 void __kmp_affinity_set_place(
int gtid) {
4777 if (!KMP_AFFINITY_CAPABLE()) {
4781 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4783 KA_TRACE(100, (
"__kmp_affinity_set_place: binding T#%d to place %d (current " 4785 gtid, th->th.th_new_place, th->th.th_current_place));
4788 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4789 KMP_ASSERT(th->th.th_new_place >= 0);
4790 KMP_ASSERT((
unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
4791 if (th->th.th_first_place <= th->th.th_last_place) {
4792 KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place) &&
4793 (th->th.th_new_place <= th->th.th_last_place));
4795 KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place) ||
4796 (th->th.th_new_place >= th->th.th_last_place));
4801 kmp_affin_mask_t *mask =
4802 KMP_CPU_INDEX(__kmp_affinity_masks, th->th.th_new_place);
4803 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4804 th->th.th_current_place = th->th.th_new_place;
4806 if (__kmp_affinity_verbose) {
4807 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4808 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4809 th->th.th_affin_mask);
4810 KMP_INFORM(BoundToOSProcSet,
"OMP_PROC_BIND", (kmp_int32)getpid(),
4811 __kmp_gettid(), gtid, buf);
4813 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4818 int __kmp_aux_set_affinity(
void **mask) {
4823 if (!KMP_AFFINITY_CAPABLE()) {
4827 gtid = __kmp_entry_gtid();
4829 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4830 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4831 (kmp_affin_mask_t *)(*mask));
4833 "kmp_set_affinity: setting affinity mask for thread %d = %s\n", gtid,
4837 if (__kmp_env_consistency_check) {
4838 if ((mask == NULL) || (*mask == NULL)) {
4839 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4844 KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t *)(*mask))) {
4845 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4846 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4848 if (!KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
4853 if (num_procs == 0) {
4854 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4857 #if KMP_GROUP_AFFINITY 4858 if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) {
4859 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4865 th = __kmp_threads[gtid];
4866 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4867 retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4869 KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask));
4873 th->th.th_current_place = KMP_PLACE_UNDEFINED;
4874 th->th.th_new_place = KMP_PLACE_UNDEFINED;
4875 th->th.th_first_place = 0;
4876 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4879 th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
4885 int __kmp_aux_get_affinity(
void **mask) {
4890 if (!KMP_AFFINITY_CAPABLE()) {
4894 gtid = __kmp_entry_gtid();
4895 th = __kmp_threads[gtid];
4896 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4899 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4900 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4901 th->th.th_affin_mask);
4902 __kmp_printf(
"kmp_get_affinity: stored affinity mask for thread %d = %s\n",
4906 if (__kmp_env_consistency_check) {
4907 if ((mask == NULL) || (*mask == NULL)) {
4908 KMP_FATAL(AffinityInvalidMask,
"kmp_get_affinity");
4914 retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4916 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4917 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4918 (kmp_affin_mask_t *)(*mask));
4919 __kmp_printf(
"kmp_get_affinity: system affinity mask for thread %d = %s\n",
4926 KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
4932 int __kmp_aux_get_affinity_max_proc() {
4933 if (!KMP_AFFINITY_CAPABLE()) {
4936 #if KMP_GROUP_AFFINITY 4937 if (__kmp_num_proc_groups > 1) {
4938 return (
int)(__kmp_num_proc_groups *
sizeof(DWORD_PTR) * CHAR_BIT);
4944 int __kmp_aux_set_affinity_mask_proc(
int proc,
void **mask) {
4947 if (!KMP_AFFINITY_CAPABLE()) {
4952 int gtid = __kmp_entry_gtid();
4953 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4954 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4955 (kmp_affin_mask_t *)(*mask));
4956 __kmp_debug_printf(
"kmp_set_affinity_mask_proc: setting proc %d in " 4957 "affinity mask for thread %d = %s\n",
4961 if (__kmp_env_consistency_check) {
4962 if ((mask == NULL) || (*mask == NULL)) {
4963 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity_mask_proc");
4967 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
4970 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4974 KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask));
4978 int __kmp_aux_unset_affinity_mask_proc(
int proc,
void **mask) {
4981 if (!KMP_AFFINITY_CAPABLE()) {
4986 int gtid = __kmp_entry_gtid();
4987 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4988 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4989 (kmp_affin_mask_t *)(*mask));
4990 __kmp_debug_printf(
"kmp_unset_affinity_mask_proc: unsetting proc %d in " 4991 "affinity mask for thread %d = %s\n",
4995 if (__kmp_env_consistency_check) {
4996 if ((mask == NULL) || (*mask == NULL)) {
4997 KMP_FATAL(AffinityInvalidMask,
"kmp_unset_affinity_mask_proc");
5001 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
5004 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
5008 KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask));
5012 int __kmp_aux_get_affinity_mask_proc(
int proc,
void **mask) {
5015 if (!KMP_AFFINITY_CAPABLE()) {
5020 int gtid = __kmp_entry_gtid();
5021 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5022 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
5023 (kmp_affin_mask_t *)(*mask));
5024 __kmp_debug_printf(
"kmp_get_affinity_mask_proc: getting proc %d in " 5025 "affinity mask for thread %d = %s\n",
5029 if (__kmp_env_consistency_check) {
5030 if ((mask == NULL) || (*mask == NULL)) {
5031 KMP_FATAL(AffinityInvalidMask,
"kmp_get_affinity_mask_proc");
5035 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
5038 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
5042 return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask));
5046 void __kmp_balanced_affinity(
int tid,
int nthreads) {
5047 bool fine_gran =
true;
5049 switch (__kmp_affinity_gran) {
5050 case affinity_gran_fine:
5051 case affinity_gran_thread:
5053 case affinity_gran_core:
5054 if (__kmp_nThreadsPerCore > 1) {
5058 case affinity_gran_package:
5059 if (nCoresPerPkg > 1) {
5067 if (__kmp_affinity_uniform_topology()) {
5071 int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
5073 int ncores = __kmp_ncores;
5074 if ((nPackages > 1) && (__kmp_nth_per_core <= 1)) {
5075 __kmp_nth_per_core = __kmp_avail_proc / nPackages;
5079 int chunk = nthreads / ncores;
5081 int big_cores = nthreads % ncores;
5083 int big_nth = (chunk + 1) * big_cores;
5084 if (tid < big_nth) {
5085 coreID = tid / (chunk + 1);
5086 threadID = (tid % (chunk + 1)) % __kmp_nth_per_core;
5088 coreID = (tid - big_cores) / chunk;
5089 threadID = ((tid - big_cores) % chunk) % __kmp_nth_per_core;
5092 KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
5093 "Illegal set affinity operation when not capable");
5095 kmp_affin_mask_t *mask;
5096 KMP_CPU_ALLOC_ON_STACK(mask);
5100 int osID = address2os[coreID * __kmp_nth_per_core + threadID].second;
5101 KMP_CPU_SET(osID, mask);
5103 for (
int i = 0; i < __kmp_nth_per_core; i++) {
5105 osID = address2os[coreID * __kmp_nth_per_core + i].second;
5106 KMP_CPU_SET(osID, mask);
5109 if (__kmp_affinity_verbose) {
5110 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5111 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
5112 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
5113 __kmp_gettid(), tid, buf);
5115 __kmp_set_system_affinity(mask, TRUE);
5116 KMP_CPU_FREE_FROM_STACK(mask);
5119 kmp_affin_mask_t *mask;
5120 KMP_CPU_ALLOC_ON_STACK(mask);
5123 int core_level = __kmp_affinity_find_core_level(
5124 address2os, __kmp_avail_proc, __kmp_aff_depth - 1);
5125 int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc,
5126 __kmp_aff_depth - 1, core_level);
5127 int nth_per_core = __kmp_affinity_max_proc_per_core(
5128 address2os, __kmp_avail_proc, __kmp_aff_depth - 1, core_level);
5132 if (nthreads == __kmp_avail_proc) {
5134 int osID = address2os[tid].second;
5135 KMP_CPU_SET(osID, mask);
5137 int core = __kmp_affinity_find_core(address2os, tid,
5138 __kmp_aff_depth - 1, core_level);
5139 for (
int i = 0; i < __kmp_avail_proc; i++) {
5140 int osID = address2os[i].second;
5141 if (__kmp_affinity_find_core(address2os, i, __kmp_aff_depth - 1,
5142 core_level) == core) {
5143 KMP_CPU_SET(osID, mask);
5147 }
else if (nthreads <= ncores) {
5150 for (
int i = 0; i < ncores; i++) {
5153 for (
int j = 0; j < nth_per_core; j++) {
5154 if (procarr[i * nth_per_core + j] != -1) {
5161 for (
int j = 0; j < nth_per_core; j++) {
5162 int osID = procarr[i * nth_per_core + j];
5164 KMP_CPU_SET(osID, mask);
5180 int *nproc_at_core = (
int *)KMP_ALLOCA(
sizeof(
int) * ncores);
5182 int *ncores_with_x_procs =
5183 (
int *)KMP_ALLOCA(
sizeof(
int) * (nth_per_core + 1));
5185 int *ncores_with_x_to_max_procs =
5186 (
int *)KMP_ALLOCA(
sizeof(
int) * (nth_per_core + 1));
5188 for (
int i = 0; i <= nth_per_core; i++) {
5189 ncores_with_x_procs[i] = 0;
5190 ncores_with_x_to_max_procs[i] = 0;
5193 for (
int i = 0; i < ncores; i++) {
5195 for (
int j = 0; j < nth_per_core; j++) {
5196 if (procarr[i * nth_per_core + j] != -1) {
5200 nproc_at_core[i] = cnt;
5201 ncores_with_x_procs[cnt]++;
5204 for (
int i = 0; i <= nth_per_core; i++) {
5205 for (
int j = i; j <= nth_per_core; j++) {
5206 ncores_with_x_to_max_procs[i] += ncores_with_x_procs[j];
5211 int nproc = nth_per_core * ncores;
5213 int *newarr = (
int *)__kmp_allocate(
sizeof(
int) * nproc);
5214 for (
int i = 0; i < nproc; i++) {
5221 for (
int j = 1; j <= nth_per_core; j++) {
5222 int cnt = ncores_with_x_to_max_procs[j];
5223 for (
int i = 0; i < ncores; i++) {
5225 if (nproc_at_core[i] == 0) {
5228 for (
int k = 0; k < nth_per_core; k++) {
5229 if (procarr[i * nth_per_core + k] != -1) {
5230 if (newarr[i * nth_per_core + k] == 0) {
5231 newarr[i * nth_per_core + k] = 1;
5237 newarr[i * nth_per_core + k]++;
5245 if (cnt == 0 || nth == 0) {
5256 for (
int i = 0; i < nproc; i++) {
5260 int osID = procarr[i];
5261 KMP_CPU_SET(osID, mask);
5263 int coreID = i / nth_per_core;
5264 for (
int ii = 0; ii < nth_per_core; ii++) {
5265 int osID = procarr[coreID * nth_per_core + ii];
5267 KMP_CPU_SET(osID, mask);
5277 if (__kmp_affinity_verbose) {
5278 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5279 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
5280 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
5281 __kmp_gettid(), tid, buf);
5283 __kmp_set_system_affinity(mask, TRUE);
5284 KMP_CPU_FREE_FROM_STACK(mask);
5302 kmp_set_thread_affinity_mask_initial()
5307 int gtid = __kmp_get_gtid();
5310 KA_TRACE(30, (
"kmp_set_thread_affinity_mask_initial: " 5311 "non-omp thread, returning\n"));
5314 if (!KMP_AFFINITY_CAPABLE() || !__kmp_init_middle) {
5315 KA_TRACE(30, (
"kmp_set_thread_affinity_mask_initial: " 5316 "affinity not initialized, returning\n"));
5319 KA_TRACE(30, (
"kmp_set_thread_affinity_mask_initial: " 5320 "set full mask for thread %d\n",
5322 KMP_DEBUG_ASSERT(__kmp_affin_fullMask != NULL);
5323 return __kmp_set_system_affinity(__kmp_affin_fullMask, FALSE);
5327 #endif // KMP_AFFINITY_SUPPORTED