14#include "kmp_affinity.h"
15#include "kmp_atomic.h"
16#include "kmp_environment.h"
21#include "kmp_settings.h"
24#include "kmp_wait_release.h"
25#include "kmp_wrapper_getpid.h"
26#include "kmp_dispatch.h"
29#include "kmp_dispatch_hier.h"
33#include "ompt-specific.h"
36#include "ompd-specific.h"
39#if OMP_PROFILING_SUPPORT
40#include "llvm/Support/TimeProfiler.h"
41static char *ProfileTraceFile =
nullptr;
45#define KMP_USE_PRCTL 0
60#if defined(KMP_GOMP_COMPAT)
61char const __kmp_version_alt_comp[] =
62 KMP_VERSION_PREFIX
"alternative compiler support: yes";
65char const __kmp_version_omp_api[] =
66 KMP_VERSION_PREFIX
"API version: 5.0 (201611)";
69char const __kmp_version_lock[] =
70 KMP_VERSION_PREFIX
"lock type: run time selectable";
73#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
78kmp_info_t __kmp_monitor;
83void __kmp_cleanup(
void);
85static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *,
int tid,
87static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
88 kmp_internal_control_t *new_icvs,
90#if KMP_AFFINITY_SUPPORTED
91static void __kmp_partition_places(kmp_team_t *team,
92 int update_master_only = 0);
94static void __kmp_do_serial_initialize(
void);
95void __kmp_fork_barrier(
int gtid,
int tid);
96void __kmp_join_barrier(
int gtid);
97void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
98 kmp_internal_control_t *new_icvs,
ident_t *loc);
100#ifdef USE_LOAD_BALANCE
101static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc);
104static int __kmp_expand_threads(
int nNeed);
106static int __kmp_unregister_root_other_thread(
int gtid);
108static void __kmp_reap_thread(kmp_info_t *thread,
int is_root);
109kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
111void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
113void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads);
118int __kmp_get_global_thread_id() {
120 kmp_info_t **other_threads;
128 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
129 __kmp_nth, __kmp_all_nth));
136 if (!TCR_4(__kmp_init_gtid))
140 if (TCR_4(__kmp_gtid_mode) >= 3) {
141 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
145 if (TCR_4(__kmp_gtid_mode) >= 2) {
146 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
147 return __kmp_gtid_get_specific();
149 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
151 stack_addr = (
char *)&stack_data;
152 other_threads = __kmp_threads;
165 for (i = 0; i < __kmp_threads_capacity; i++) {
167 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
171 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
172 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
176 if (stack_addr <= stack_base) {
177 size_t stack_diff = stack_base - stack_addr;
179 if (stack_diff <= stack_size) {
182 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i);
190 (
"*** __kmp_get_global_thread_id: internal alg. failed to find "
191 "thread, using TLS\n"));
192 i = __kmp_gtid_get_specific();
202 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
203 KMP_FATAL(StackOverflow, i);
206 stack_base = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
207 if (stack_addr > stack_base) {
208 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
209 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
210 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
213 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
214 stack_base - stack_addr);
218 if (__kmp_storage_map) {
219 char *stack_end = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
220 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
221 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
222 other_threads[i]->th.th_info.ds.ds_stacksize,
223 "th_%d stack (refinement)", i);
228int __kmp_get_global_thread_id_reg() {
231 if (!__kmp_init_serial) {
235 if (TCR_4(__kmp_gtid_mode) >= 3) {
236 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
240 if (TCR_4(__kmp_gtid_mode) >= 2) {
241 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
242 gtid = __kmp_gtid_get_specific();
245 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
246 gtid = __kmp_get_global_thread_id();
250 if (gtid == KMP_GTID_DNE) {
252 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. "
253 "Registering a new gtid.\n"));
254 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
255 if (!__kmp_init_serial) {
256 __kmp_do_serial_initialize();
257 gtid = __kmp_gtid_get_specific();
259 gtid = __kmp_register_root(FALSE);
261 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
265 KMP_DEBUG_ASSERT(gtid >= 0);
271void __kmp_check_stack_overlap(kmp_info_t *th) {
273 char *stack_beg = NULL;
274 char *stack_end = NULL;
277 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
278 if (__kmp_storage_map) {
279 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
280 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
282 gtid = __kmp_gtid_from_thread(th);
284 if (gtid == KMP_GTID_MONITOR) {
285 __kmp_print_storage_map_gtid(
286 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
287 "th_%s stack (%s)",
"mon",
288 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
290 __kmp_print_storage_map_gtid(
291 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
292 "th_%d stack (%s)", gtid,
293 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
299 gtid = __kmp_gtid_from_thread(th);
300 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
302 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
303 if (stack_beg == NULL) {
304 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
305 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
308 for (f = 0; f < __kmp_threads_capacity; f++) {
309 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
311 if (f_th && f_th != th) {
312 char *other_stack_end =
313 (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
314 char *other_stack_beg =
315 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
316 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
317 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
320 if (__kmp_storage_map)
321 __kmp_print_storage_map_gtid(
322 -1, other_stack_beg, other_stack_end,
323 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
324 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
326 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
332 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
337void __kmp_infinite_loop(
void) {
338 static int done = FALSE;
345#define MAX_MESSAGE 512
347void __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
348 char const *format, ...) {
349 char buffer[MAX_MESSAGE];
352 va_start(ap, format);
353 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
354 p2, (
unsigned long)size, format);
355 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
356 __kmp_vprintf(kmp_err, buffer, ap);
357#if KMP_PRINT_DATA_PLACEMENT
360 if (p1 <= p2 && (
char *)p2 - (
char *)p1 == size) {
361 if (__kmp_storage_map_verbose) {
362 node = __kmp_get_host_node(p1);
364 __kmp_storage_map_verbose = FALSE;
368 int localProc = __kmp_get_cpu_from_gtid(gtid);
370 const int page_size = KMP_GET_PAGE_SIZE();
372 p1 = (
void *)((
size_t)p1 & ~((size_t)page_size - 1));
373 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
375 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid,
378 __kmp_printf_no_lock(
" GTID %d\n", gtid);
387 (
char *)p1 += page_size;
388 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
389 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last, (
char *)p1 - 1,
393 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
394 (
char *)p1 + (page_size - 1),
395 __kmp_get_host_node(p1));
397 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
398 (
char *)p2 + (page_size - 1),
399 __kmp_get_host_node(p2));
405 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR(StorageMapWarning));
408 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
411void __kmp_warn(
char const *format, ...) {
412 char buffer[MAX_MESSAGE];
415 if (__kmp_generate_warnings == kmp_warnings_off) {
419 va_start(ap, format);
421 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
422 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
423 __kmp_vprintf(kmp_err, buffer, ap);
424 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
429void __kmp_abort_process() {
431 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
433 if (__kmp_debug_buf) {
434 __kmp_dump_debug_buffer();
437 if (KMP_OS_WINDOWS) {
440 __kmp_global.g.g_abort = SIGABRT;
454 __kmp_unregister_library();
458 __kmp_infinite_loop();
459 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
463void __kmp_abort_thread(
void) {
466 __kmp_infinite_loop();
472static void __kmp_print_thread_storage_map(kmp_info_t *thr,
int gtid) {
473 __kmp_print_storage_map_gtid(gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d",
476 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
477 sizeof(kmp_desc_t),
"th_%d.th_info", gtid);
479 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
480 sizeof(kmp_local_t),
"th_%d.th_local", gtid);
482 __kmp_print_storage_map_gtid(
483 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
484 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid);
486 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
487 &thr->th.th_bar[bs_plain_barrier + 1],
488 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]",
491 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
492 &thr->th.th_bar[bs_forkjoin_barrier + 1],
493 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]",
496#if KMP_FAST_REDUCTION_BARRIER
497 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
498 &thr->th.th_bar[bs_reduction_barrier + 1],
499 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]",
507static void __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
508 int team_id,
int num_thr) {
509 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
510 __kmp_print_storage_map_gtid(-1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
513 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
514 &team->t.t_bar[bs_last_barrier],
515 sizeof(kmp_balign_team_t) * bs_last_barrier,
516 "%s_%d.t_bar", header, team_id);
518 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
519 &team->t.t_bar[bs_plain_barrier + 1],
520 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]",
523 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
524 &team->t.t_bar[bs_forkjoin_barrier + 1],
525 sizeof(kmp_balign_team_t),
526 "%s_%d.t_bar[forkjoin]", header, team_id);
528#if KMP_FAST_REDUCTION_BARRIER
529 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
530 &team->t.t_bar[bs_reduction_barrier + 1],
531 sizeof(kmp_balign_team_t),
532 "%s_%d.t_bar[reduction]", header, team_id);
535 __kmp_print_storage_map_gtid(
536 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
537 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
539 __kmp_print_storage_map_gtid(
540 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
541 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
543 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
544 &team->t.t_disp_buffer[num_disp_buff],
545 sizeof(dispatch_shared_info_t) * num_disp_buff,
546 "%s_%d.t_disp_buffer", header, team_id);
549static void __kmp_init_allocator() {
550 __kmp_init_memkind();
551 __kmp_init_target_mem();
553static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
560BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
565 case DLL_PROCESS_ATTACH:
566 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
570 case DLL_PROCESS_DETACH:
571 KA_TRACE(10, (
"DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
584 if (lpReserved == NULL)
585 __kmp_internal_end_library(__kmp_gtid_get_specific());
589 case DLL_THREAD_ATTACH:
590 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
596 case DLL_THREAD_DETACH:
597 KA_TRACE(10, (
"DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
599 __kmp_internal_end_thread(__kmp_gtid_get_specific());
610void __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
611 int gtid = *gtid_ref;
612#ifdef BUILD_PARALLEL_ORDERED
613 kmp_team_t *team = __kmp_team_from_gtid(gtid);
616 if (__kmp_env_consistency_check) {
617 if (__kmp_threads[gtid]->th.th_root->r.r_active)
618#if KMP_USE_DYNAMIC_LOCK
619 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
621 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
624#ifdef BUILD_PARALLEL_ORDERED
625 if (!team->t.t_serialized) {
627 KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
635void __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
636 int gtid = *gtid_ref;
637#ifdef BUILD_PARALLEL_ORDERED
638 int tid = __kmp_tid_from_gtid(gtid);
639 kmp_team_t *team = __kmp_team_from_gtid(gtid);
642 if (__kmp_env_consistency_check) {
643 if (__kmp_threads[gtid]->th.th_root->r.r_active)
644 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
646#ifdef BUILD_PARALLEL_ORDERED
647 if (!team->t.t_serialized) {
652 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
662int __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws) {
667 if (!TCR_4(__kmp_init_parallel))
668 __kmp_parallel_initialize();
669 __kmp_resume_if_soft_paused();
671 th = __kmp_threads[gtid];
672 team = th->th.th_team;
675 th->th.th_ident = id_ref;
677 if (team->t.t_serialized) {
680 kmp_int32 old_this = th->th.th_local.this_construct;
682 ++th->th.th_local.this_construct;
686 if (team->t.t_construct == old_this) {
687 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
688 th->th.th_local.this_construct);
691 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
692 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
693 team->t.t_active_level == 1) {
695 __kmp_itt_metadata_single(id_ref);
700 if (__kmp_env_consistency_check) {
701 if (status && push_ws) {
702 __kmp_push_workshare(gtid, ct_psingle, id_ref);
704 __kmp_check_workshare(gtid, ct_psingle, id_ref);
709 __kmp_itt_single_start(gtid);
715void __kmp_exit_single(
int gtid) {
717 __kmp_itt_single_end(gtid);
719 if (__kmp_env_consistency_check)
720 __kmp_pop_workshare(gtid, ct_psingle, NULL);
729static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
730 int master_tid,
int set_nthreads,
734 KMP_DEBUG_ASSERT(__kmp_init_serial);
735 KMP_DEBUG_ASSERT(root && parent_team);
736 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
740 new_nthreads = set_nthreads;
741 if (!get__dynamic_2(parent_team, master_tid)) {
744#ifdef USE_LOAD_BALANCE
745 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
746 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
747 if (new_nthreads == 1) {
748 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
749 "reservation to 1 thread\n",
753 if (new_nthreads < set_nthreads) {
754 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
755 "reservation to %d threads\n",
756 master_tid, new_nthreads));
760 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
761 new_nthreads = __kmp_avail_proc - __kmp_nth +
762 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
763 if (new_nthreads <= 1) {
764 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
765 "reservation to 1 thread\n",
769 if (new_nthreads < set_nthreads) {
770 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
771 "reservation to %d threads\n",
772 master_tid, new_nthreads));
774 new_nthreads = set_nthreads;
776 }
else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
777 if (set_nthreads > 2) {
778 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
779 new_nthreads = (new_nthreads % set_nthreads) + 1;
780 if (new_nthreads == 1) {
781 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
782 "reservation to 1 thread\n",
786 if (new_nthreads < set_nthreads) {
787 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
788 "reservation to %d threads\n",
789 master_tid, new_nthreads));
797 if (__kmp_nth + new_nthreads -
798 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
800 int tl_nthreads = __kmp_max_nth - __kmp_nth +
801 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
802 if (tl_nthreads <= 0) {
807 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
808 __kmp_reserve_warn = 1;
809 __kmp_msg(kmp_ms_warning,
810 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
811 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
813 if (tl_nthreads == 1) {
814 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
815 "reduced reservation to 1 thread\n",
819 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
820 "reservation to %d threads\n",
821 master_tid, tl_nthreads));
822 new_nthreads = tl_nthreads;
826 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
827 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
828 if (cg_nthreads + new_nthreads -
829 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
831 int tl_nthreads = max_cg_threads - cg_nthreads +
832 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
833 if (tl_nthreads <= 0) {
838 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
839 __kmp_reserve_warn = 1;
840 __kmp_msg(kmp_ms_warning,
841 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
842 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
844 if (tl_nthreads == 1) {
845 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
846 "reduced reservation to 1 thread\n",
850 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
851 "reservation to %d threads\n",
852 master_tid, tl_nthreads));
853 new_nthreads = tl_nthreads;
859 capacity = __kmp_threads_capacity;
860 if (TCR_PTR(__kmp_threads[0]) == NULL) {
866 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
867 capacity -= __kmp_hidden_helper_threads_num;
869 if (__kmp_nth + new_nthreads -
870 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
873 int slotsRequired = __kmp_nth + new_nthreads -
874 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
876 int slotsAdded = __kmp_expand_threads(slotsRequired);
877 if (slotsAdded < slotsRequired) {
879 new_nthreads -= (slotsRequired - slotsAdded);
880 KMP_ASSERT(new_nthreads >= 1);
883 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
884 __kmp_reserve_warn = 1;
885 if (__kmp_tp_cached) {
886 __kmp_msg(kmp_ms_warning,
887 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
888 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
889 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
891 __kmp_msg(kmp_ms_warning,
892 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
893 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
900 if (new_nthreads == 1) {
902 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming "
903 "dead roots and rechecking; requested %d threads\n",
904 __kmp_get_gtid(), set_nthreads));
906 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested"
908 __kmp_get_gtid(), new_nthreads, set_nthreads));
917static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
918 kmp_info_t *master_th,
int master_gtid,
919 int fork_teams_workers) {
923 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
924 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
928 master_th->th.th_info.ds.ds_tid = 0;
929 master_th->th.th_team = team;
930 master_th->th.th_team_nproc = team->t.t_nproc;
931 master_th->th.th_team_master = master_th;
932 master_th->th.th_team_serialized = FALSE;
933 master_th->th.th_dispatch = &team->t.t_dispatch[0];
936#if KMP_NESTED_HOT_TEAMS
938 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
941 int level = team->t.t_active_level - 1;
942 if (master_th->th.th_teams_microtask) {
943 if (master_th->th.th_teams_size.nteams > 1) {
947 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
948 master_th->th.th_teams_level == team->t.t_level) {
953 if (level < __kmp_hot_teams_max_level) {
954 if (hot_teams[level].hot_team) {
956 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
960 hot_teams[level].hot_team = team;
961 hot_teams[level].hot_team_nth = team->t.t_nproc;
968 use_hot_team = team == root->r.r_hot_team;
973 team->t.t_threads[0] = master_th;
974 __kmp_initialize_info(master_th, team, 0, master_gtid);
977 for (i = 1; i < team->t.t_nproc; i++) {
980 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
981 team->t.t_threads[i] = thr;
982 KMP_DEBUG_ASSERT(thr);
983 KMP_DEBUG_ASSERT(thr->th.th_team == team);
985 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
986 "T#%d(%d:%d) join =%llu, plain=%llu\n",
987 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
988 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
989 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
990 team->t.t_bar[bs_plain_barrier].b_arrived));
991 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
992 thr->th.th_teams_level = master_th->th.th_teams_level;
993 thr->th.th_teams_size = master_th->th.th_teams_size;
996 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
997 for (b = 0; b < bs_last_barrier; ++b) {
998 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
999 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1001 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1007#if KMP_AFFINITY_SUPPORTED
1011 if (!fork_teams_workers) {
1012 __kmp_partition_places(team);
1016 if (team->t.t_nproc > 1 &&
1017 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1018 team->t.b->update_num_threads(team->t.t_nproc);
1019 __kmp_add_threads_to_team(team, team->t.t_nproc);
1023 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1024 for (i = 0; i < team->t.t_nproc; i++) {
1025 kmp_info_t *thr = team->t.t_threads[i];
1026 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1027 thr->th.th_prev_level != team->t.t_level) {
1028 team->t.t_display_affinity = 1;
1037#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1041inline static void propagateFPControl(kmp_team_t *team) {
1042 if (__kmp_inherit_fp_control) {
1043 kmp_int16 x87_fpu_control_word;
1047 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1048 __kmp_store_mxcsr(&mxcsr);
1049 mxcsr &= KMP_X86_MXCSR_MASK;
1060 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1061 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1064 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1068 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1074inline static void updateHWFPControl(kmp_team_t *team) {
1075 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1078 kmp_int16 x87_fpu_control_word;
1080 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1081 __kmp_store_mxcsr(&mxcsr);
1082 mxcsr &= KMP_X86_MXCSR_MASK;
1084 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1085 __kmp_clear_x87_fpu_status_word();
1086 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1089 if (team->t.t_mxcsr != mxcsr) {
1090 __kmp_load_mxcsr(&team->t.t_mxcsr);
1095#define propagateFPControl(x) ((void)0)
1096#define updateHWFPControl(x) ((void)0)
1099static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
1104void __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid) {
1105 kmp_info_t *this_thr;
1106 kmp_team_t *serial_team;
1108 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1115 if (!TCR_4(__kmp_init_parallel))
1116 __kmp_parallel_initialize();
1117 __kmp_resume_if_soft_paused();
1119 this_thr = __kmp_threads[global_tid];
1120 serial_team = this_thr->th.th_serial_team;
1123 KMP_DEBUG_ASSERT(serial_team);
1126 if (__kmp_tasking_mode != tskm_immediate_exec) {
1128 this_thr->th.th_task_team ==
1129 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1130 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1132 KA_TRACE(20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / "
1133 "team %p, new task_team = NULL\n",
1134 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1135 this_thr->th.th_task_team = NULL;
1138 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1139 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1140 proc_bind = proc_bind_false;
1141 }
else if (proc_bind == proc_bind_default) {
1144 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1147 this_thr->th.th_set_proc_bind = proc_bind_default;
1150 ompt_data_t ompt_parallel_data = ompt_data_none;
1151 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1152 if (ompt_enabled.enabled &&
1153 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1155 ompt_task_info_t *parent_task_info;
1156 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1158 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1159 if (ompt_enabled.ompt_callback_parallel_begin) {
1162 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1163 &(parent_task_info->task_data), &(parent_task_info->frame),
1164 &ompt_parallel_data, team_size,
1165 ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
1170 if (this_thr->th.th_team != serial_team) {
1172 int level = this_thr->th.th_team->t.t_level;
1174 if (serial_team->t.t_serialized) {
1177 kmp_team_t *new_team;
1179 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1182 __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1186 proc_bind, &this_thr->th.th_current_task->td_icvs,
1187 0 USE_NESTED_HOT_ARG(NULL));
1188 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1189 KMP_ASSERT(new_team);
1192 new_team->t.t_threads[0] = this_thr;
1193 new_team->t.t_parent = this_thr->th.th_team;
1194 serial_team = new_team;
1195 this_thr->th.th_serial_team = serial_team;
1199 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1200 global_tid, serial_team));
1208 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1209 global_tid, serial_team));
1213 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1214 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1215 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1216 serial_team->t.t_ident = loc;
1217 serial_team->t.t_serialized = 1;
1218 serial_team->t.t_nproc = 1;
1219 serial_team->t.t_parent = this_thr->th.th_team;
1220 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1221 this_thr->th.th_team = serial_team;
1222 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1224 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,
1225 this_thr->th.th_current_task));
1226 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1227 this_thr->th.th_current_task->td_flags.executing = 0;
1229 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1234 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1235 &this_thr->th.th_current_task->td_parent->td_icvs);
1239 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1240 this_thr->th.th_current_task->td_icvs.nproc =
1241 __kmp_nested_nth.nth[level + 1];
1244 if (__kmp_nested_proc_bind.used &&
1245 (level + 1 < __kmp_nested_proc_bind.used)) {
1246 this_thr->th.th_current_task->td_icvs.proc_bind =
1247 __kmp_nested_proc_bind.bind_types[level + 1];
1251 serial_team->t.t_pkfn = (microtask_t)(~0);
1253 this_thr->th.th_info.ds.ds_tid = 0;
1256 this_thr->th.th_team_nproc = 1;
1257 this_thr->th.th_team_master = this_thr;
1258 this_thr->th.th_team_serialized = 1;
1260 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1261 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1262 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator;
1264 propagateFPControl(serial_team);
1267 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1268 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1269 serial_team->t.t_dispatch->th_disp_buffer =
1270 (dispatch_private_info_t *)__kmp_allocate(
1271 sizeof(dispatch_private_info_t));
1273 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1280 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1281 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1282 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1283 ++serial_team->t.t_serialized;
1284 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1287 int level = this_thr->th.th_team->t.t_level;
1290 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1291 this_thr->th.th_current_task->td_icvs.nproc =
1292 __kmp_nested_nth.nth[level + 1];
1294 serial_team->t.t_level++;
1295 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level "
1296 "of serial team %p to %d\n",
1297 global_tid, serial_team, serial_team->t.t_level));
1300 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1302 dispatch_private_info_t *disp_buffer =
1303 (dispatch_private_info_t *)__kmp_allocate(
1304 sizeof(dispatch_private_info_t));
1305 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1306 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1308 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1312 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1316 if (__kmp_display_affinity) {
1317 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1318 this_thr->th.th_prev_num_threads != 1) {
1320 __kmp_aux_display_affinity(global_tid, NULL);
1321 this_thr->th.th_prev_level = serial_team->t.t_level;
1322 this_thr->th.th_prev_num_threads = 1;
1326 if (__kmp_env_consistency_check)
1327 __kmp_push_parallel(global_tid, NULL);
1329 serial_team->t.ompt_team_info.master_return_address = codeptr;
1330 if (ompt_enabled.enabled &&
1331 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1332 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1333 OMPT_GET_FRAME_ADDRESS(0);
1335 ompt_lw_taskteam_t lw_taskteam;
1336 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1337 &ompt_parallel_data, codeptr);
1339 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1343 if (ompt_enabled.ompt_callback_implicit_task) {
1344 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1345 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1346 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid),
1347 ompt_task_implicit);
1348 OMPT_CUR_TASK_INFO(this_thr)->thread_num =
1349 __kmp_tid_from_gtid(global_tid);
1353 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1354 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1355 OMPT_GET_FRAME_ADDRESS(0);
1361static inline bool __kmp_is_fork_in_teams(kmp_info_t *master_th,
1362 microtask_t microtask,
int level,
1363 int teams_level, kmp_va_list ap) {
1364 return (master_th->th.th_teams_microtask && ap &&
1365 microtask != (microtask_t)__kmp_teams_master && level == teams_level);
1370static inline bool __kmp_is_entering_teams(
int active_level,
int level,
1371 int teams_level, kmp_va_list ap) {
1372 return ((ap == NULL && active_level == 0) ||
1373 (ap && teams_level > 0 && teams_level == level));
1380__kmp_fork_in_teams(
ident_t *loc,
int gtid, kmp_team_t *parent_team,
1381 kmp_int32 argc, kmp_info_t *master_th, kmp_root_t *root,
1382 enum fork_context_e call_context, microtask_t microtask,
1383 launch_t invoker,
int master_set_numthreads,
int level,
1385 ompt_data_t ompt_parallel_data,
void *return_address,
1391 parent_team->t.t_ident = loc;
1392 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1393 parent_team->t.t_argc = argc;
1394 argv = (
void **)parent_team->t.t_argv;
1395 for (i = argc - 1; i >= 0; --i) {
1396 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1399 if (parent_team == master_th->th.th_serial_team) {
1402 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1404 if (call_context == fork_context_gnu) {
1407 parent_team->t.t_serialized--;
1412 parent_team->t.t_pkfn = microtask;
1417 void **exit_frame_p;
1418 ompt_data_t *implicit_task_data;
1419 ompt_lw_taskteam_t lw_taskteam;
1421 if (ompt_enabled.enabled) {
1422 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1423 &ompt_parallel_data, return_address);
1424 exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1426 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1430 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1431 if (ompt_enabled.ompt_callback_implicit_task) {
1432 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1433 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1434 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), implicit_task_data,
1435 1, OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1439 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1441 exit_frame_p = &dummy;
1447 parent_team->t.t_serialized--;
1450 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1451 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1452 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1461 if (ompt_enabled.enabled) {
1462 *exit_frame_p = NULL;
1463 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
1464 if (ompt_enabled.ompt_callback_implicit_task) {
1465 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1466 ompt_scope_end, NULL, implicit_task_data, 1,
1467 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1469 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1470 __ompt_lw_taskteam_unlink(master_th);
1471 if (ompt_enabled.ompt_callback_parallel_end) {
1472 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1473 &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),
1474 OMPT_INVOKER(call_context) | ompt_parallel_team, return_address);
1476 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1482 parent_team->t.t_pkfn = microtask;
1483 parent_team->t.t_invoke = invoker;
1484 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1485 parent_team->t.t_active_level++;
1486 parent_team->t.t_level++;
1487 parent_team->t.t_def_allocator = master_th->th.th_def_allocator;
1494 master_th->th.th_teams_size.nth = parent_team->t.t_nproc;
1497 if (ompt_enabled.enabled) {
1498 ompt_lw_taskteam_t lw_taskteam;
1499 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, &ompt_parallel_data,
1501 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1,
true);
1506 if (master_set_numthreads) {
1507 if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
1509 kmp_info_t **other_threads = parent_team->t.t_threads;
1512 int old_proc = master_th->th.th_teams_size.nth;
1513 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1514 __kmp_resize_dist_barrier(parent_team, old_proc, master_set_numthreads);
1515 __kmp_add_threads_to_team(parent_team, master_set_numthreads);
1517 parent_team->t.t_nproc = master_set_numthreads;
1518 for (i = 0; i < master_set_numthreads; ++i) {
1519 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1523 master_th->th.th_set_nproc = 0;
1527 if (__kmp_debugging) {
1528 int nth = __kmp_omp_num_threads(loc);
1530 master_set_numthreads = nth;
1536 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1538 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1539 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1540 proc_bind = proc_bind_false;
1543 if (proc_bind == proc_bind_default) {
1544 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1550 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1551 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1552 master_th->th.th_current_task->td_icvs.proc_bind)) {
1553 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1556 KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind);
1558 if (proc_bind_icv != proc_bind_default &&
1559 master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
1560 kmp_info_t **other_threads = parent_team->t.t_threads;
1561 for (i = 0; i < master_th->th.th_team_nproc; ++i) {
1562 other_threads[i]->th.th_current_task->td_icvs.proc_bind = proc_bind_icv;
1566 master_th->th.th_set_proc_bind = proc_bind_default;
1568#if USE_ITT_BUILD && USE_ITT_NOTIFY
1569 if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
1571 __kmp_forkjoin_frames_mode == 3 &&
1572 parent_team->t.t_active_level == 1
1573 && master_th->th.th_teams_size.nteams == 1) {
1574 kmp_uint64 tmp_time = __itt_get_timestamp();
1575 master_th->th.th_frame_time = tmp_time;
1576 parent_team->t.t_region_time = tmp_time;
1578 if (__itt_stack_caller_create_ptr) {
1579 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
1581 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
1584#if KMP_AFFINITY_SUPPORTED
1585 __kmp_partition_places(parent_team);
1588 KF_TRACE(10, (
"__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "
1589 "master_th=%p, gtid=%d\n",
1590 root, parent_team, master_th, gtid));
1591 __kmp_internal_fork(loc, gtid, parent_team);
1592 KF_TRACE(10, (
"__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "
1593 "master_th=%p, gtid=%d\n",
1594 root, parent_team, master_th, gtid));
1596 if (call_context == fork_context_gnu)
1600 KA_TRACE(20, (
"__kmp_fork_in_teams: T#%d(%d:0) invoke microtask = %p\n", gtid,
1601 parent_team->t.t_id, parent_team->t.t_pkfn));
1603 if (!parent_team->t.t_invoke(gtid)) {
1604 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
1606 KA_TRACE(20, (
"__kmp_fork_in_teams: T#%d(%d:0) done microtask = %p\n", gtid,
1607 parent_team->t.t_id, parent_team->t.t_pkfn));
1610 KA_TRACE(20, (
"__kmp_fork_in_teams: parallel exit T#%d\n", gtid));
1617__kmp_serial_fork_call(
ident_t *loc,
int gtid,
enum fork_context_e call_context,
1618 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1619 kmp_info_t *master_th, kmp_team_t *parent_team,
1621 ompt_data_t *ompt_parallel_data,
void **return_address,
1622 ompt_data_t **parent_task_data,
1630#if KMP_OS_LINUX && \
1631 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1634 void **args = (
void **)KMP_ALLOCA(argc *
sizeof(
void *));
1639 20, (
"__kmp_serial_fork_call: T#%d serializing parallel region\n", gtid));
1644 master_th->th.th_serial_team->t.t_pkfn = microtask;
1647 if (call_context == fork_context_intel) {
1649 master_th->th.th_serial_team->t.t_ident = loc;
1652 master_th->th.th_serial_team->t.t_level--;
1657 void **exit_frame_p;
1658 ompt_task_info_t *task_info;
1659 ompt_lw_taskteam_t lw_taskteam;
1661 if (ompt_enabled.enabled) {
1662 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1663 ompt_parallel_data, *return_address);
1665 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1667 task_info = OMPT_CUR_TASK_INFO(master_th);
1668 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1669 if (ompt_enabled.ompt_callback_implicit_task) {
1670 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1671 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1672 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1673 &(task_info->task_data), 1,
1674 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1678 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1680 exit_frame_p = &dummy;
1685 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1686 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1687 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1696 if (ompt_enabled.enabled) {
1697 *exit_frame_p = NULL;
1698 if (ompt_enabled.ompt_callback_implicit_task) {
1699 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1700 ompt_scope_end, NULL, &(task_info->task_data), 1,
1701 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1703 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1704 __ompt_lw_taskteam_unlink(master_th);
1705 if (ompt_enabled.ompt_callback_parallel_end) {
1706 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1707 ompt_parallel_data, *parent_task_data,
1708 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1710 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1713 }
else if (microtask == (microtask_t)__kmp_teams_master) {
1714 KMP_DEBUG_ASSERT(master_th->th.th_team == master_th->th.th_serial_team);
1715 team = master_th->th.th_team;
1717 team->t.t_invoke = invoker;
1718 __kmp_alloc_argv_entries(argc, team, TRUE);
1719 team->t.t_argc = argc;
1720 argv = (
void **)team->t.t_argv;
1722 for (i = argc - 1; i >= 0; --i)
1723 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1725 for (i = 0; i < argc; ++i)
1727 argv[i] = parent_team->t.t_argv[i];
1735 if (ompt_enabled.enabled) {
1736 ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th);
1737 if (ompt_enabled.ompt_callback_implicit_task) {
1738 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1739 ompt_scope_end, NULL, &(task_info->task_data), 0,
1740 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
1742 if (ompt_enabled.ompt_callback_parallel_end) {
1743 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1744 ompt_parallel_data, *parent_task_data,
1745 OMPT_INVOKER(call_context) | ompt_parallel_league,
1748 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1753 for (i = argc - 1; i >= 0; --i)
1754 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1759 void **exit_frame_p;
1760 ompt_task_info_t *task_info;
1761 ompt_lw_taskteam_t lw_taskteam;
1762 ompt_data_t *implicit_task_data;
1764 if (ompt_enabled.enabled) {
1765 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1766 ompt_parallel_data, *return_address);
1767 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1769 task_info = OMPT_CUR_TASK_INFO(master_th);
1770 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1773 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1774 if (ompt_enabled.ompt_callback_implicit_task) {
1775 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1776 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1777 implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
1778 ompt_task_implicit);
1779 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1783 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1785 exit_frame_p = &dummy;
1790 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1791 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1792 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1801 if (ompt_enabled.enabled) {
1802 *exit_frame_p = NULL;
1803 if (ompt_enabled.ompt_callback_implicit_task) {
1804 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1805 ompt_scope_end, NULL, &(task_info->task_data), 1,
1806 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1809 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1810 __ompt_lw_taskteam_unlink(master_th);
1811 if (ompt_enabled.ompt_callback_parallel_end) {
1812 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1813 ompt_parallel_data, *parent_task_data,
1814 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1816 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1820 }
else if (call_context == fork_context_gnu) {
1822 if (ompt_enabled.enabled) {
1823 ompt_lw_taskteam_t lwt;
1824 __ompt_lw_taskteam_init(&lwt, master_th, gtid, ompt_parallel_data,
1827 lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
1828 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1834 KA_TRACE(20, (
"__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1837 KMP_ASSERT2(call_context < fork_context_last,
1838 "__kmp_serial_fork_call: unknown fork_context parameter");
1841 KA_TRACE(20, (
"__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1848int __kmp_fork_call(
ident_t *loc,
int gtid,
1849 enum fork_context_e call_context,
1850 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1855 int master_this_cons;
1857 kmp_team_t *parent_team;
1858 kmp_info_t *master_th;
1862 int master_set_numthreads;
1866#if KMP_NESTED_HOT_TEAMS
1867 kmp_hot_team_ptr_t **p_hot_teams;
1870 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1873 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1874 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1877 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1879 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1880 __kmp_stkpadding += (short)((kmp_int64)dummy);
1886 if (!TCR_4(__kmp_init_parallel))
1887 __kmp_parallel_initialize();
1888 __kmp_resume_if_soft_paused();
1893 master_th = __kmp_threads[gtid];
1895 parent_team = master_th->th.th_team;
1896 master_tid = master_th->th.th_info.ds.ds_tid;
1897 master_this_cons = master_th->th.th_local.this_construct;
1898 root = master_th->th.th_root;
1899 master_active = root->r.r_active;
1900 master_set_numthreads = master_th->th.th_set_nproc;
1903 ompt_data_t ompt_parallel_data = ompt_data_none;
1904 ompt_data_t *parent_task_data;
1905 ompt_frame_t *ompt_frame;
1906 void *return_address = NULL;
1908 if (ompt_enabled.enabled) {
1909 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1911 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1916 __kmp_assign_root_init_mask();
1919 level = parent_team->t.t_level;
1921 active_level = parent_team->t.t_active_level;
1923 teams_level = master_th->th.th_teams_level;
1924#if KMP_NESTED_HOT_TEAMS
1925 p_hot_teams = &master_th->th.th_hot_teams;
1926 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1927 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1928 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1929 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1931 (*p_hot_teams)[0].hot_team_nth = 1;
1936 if (ompt_enabled.enabled) {
1937 if (ompt_enabled.ompt_callback_parallel_begin) {
1938 int team_size = master_set_numthreads
1939 ? master_set_numthreads
1940 : get__nproc_2(parent_team, master_tid);
1941 int flags = OMPT_INVOKER(call_context) |
1942 ((microtask == (microtask_t)__kmp_teams_master)
1943 ? ompt_parallel_league
1944 : ompt_parallel_team);
1945 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1946 parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
1949 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1953 master_th->th.th_ident = loc;
1956 if (__kmp_is_fork_in_teams(master_th, microtask, level, teams_level, ap)) {
1957 return __kmp_fork_in_teams(loc, gtid, parent_team, argc, master_th, root,
1958 call_context, microtask, invoker,
1959 master_set_numthreads, level,
1961 ompt_parallel_data, return_address,
1967 if (__kmp_tasking_mode != tskm_immediate_exec) {
1968 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1969 parent_team->t.t_task_team[master_th->th.th_task_state]);
1979 __kmp_is_entering_teams(active_level, level, teams_level, ap);
1980 if ((!enter_teams &&
1981 (parent_team->t.t_active_level >=
1982 master_th->th.th_current_task->td_icvs.max_active_levels)) ||
1983 (__kmp_library == library_serial)) {
1984 KC_TRACE(10, (
"__kmp_fork_call: T#%d serializing team\n", gtid));
1987 nthreads = master_set_numthreads
1988 ? master_set_numthreads
1990 : get__nproc_2(parent_team, master_tid);
1995 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2000 nthreads = __kmp_reserve_threads(root, parent_team, master_tid,
2001 nthreads, enter_teams);
2002 if (nthreads == 1) {
2006 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2010 KMP_DEBUG_ASSERT(nthreads > 0);
2013 master_th->th.th_set_nproc = 0;
2015 if (nthreads == 1) {
2016 return __kmp_serial_fork_call(loc, gtid, call_context, argc, microtask,
2017 invoker, master_th, parent_team,
2019 &ompt_parallel_data, &return_address,
2027 KF_TRACE(10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
2028 "curtask=%p, curtask_max_aclevel=%d\n",
2029 parent_team->t.t_active_level, master_th,
2030 master_th->th.th_current_task,
2031 master_th->th.th_current_task->td_icvs.max_active_levels));
2035 master_th->th.th_current_task->td_flags.executing = 0;
2037 if (!master_th->th.th_teams_microtask || level > teams_level) {
2039 KMP_ATOMIC_INC(&root->r.r_in_parallel);
2043 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
2044 if ((level + 1 < __kmp_nested_nth.used) &&
2045 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
2046 nthreads_icv = __kmp_nested_nth.nth[level + 1];
2052 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2054 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
2055 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
2056 proc_bind = proc_bind_false;
2060 if (proc_bind == proc_bind_default) {
2061 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2064 if (master_th->th.th_teams_microtask &&
2065 microtask == (microtask_t)__kmp_teams_master) {
2066 proc_bind = __kmp_teams_proc_bind;
2072 if ((level + 1 < __kmp_nested_proc_bind.used) &&
2073 (__kmp_nested_proc_bind.bind_types[level + 1] !=
2074 master_th->th.th_current_task->td_icvs.proc_bind)) {
2077 if (!master_th->th.th_teams_microtask ||
2078 !(microtask == (microtask_t)__kmp_teams_master || ap == NULL))
2079 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2084 master_th->th.th_set_proc_bind = proc_bind_default;
2086 if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) {
2087 kmp_internal_control_t new_icvs;
2088 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2089 new_icvs.next = NULL;
2090 if (nthreads_icv > 0) {
2091 new_icvs.nproc = nthreads_icv;
2093 if (proc_bind_icv != proc_bind_default) {
2094 new_icvs.proc_bind = proc_bind_icv;
2098 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2099 team = __kmp_allocate_team(root, nthreads, nthreads,
2103 proc_bind, &new_icvs,
2104 argc USE_NESTED_HOT_ARG(master_th));
2105 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2106 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs);
2109 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2110 team = __kmp_allocate_team(root, nthreads, nthreads,
2115 &master_th->th.th_current_task->td_icvs,
2116 argc USE_NESTED_HOT_ARG(master_th));
2117 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2118 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs,
2119 &master_th->th.th_current_task->td_icvs);
2122 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2125 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2126 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2127 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2128 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2129 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2131 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2134 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2136 if (!master_th->th.th_teams_microtask || level > teams_level) {
2137 int new_level = parent_team->t.t_level + 1;
2138 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2139 new_level = parent_team->t.t_active_level + 1;
2140 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2143 int new_level = parent_team->t.t_level;
2144 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2145 new_level = parent_team->t.t_active_level;
2146 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2148 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2150 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2152 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2153 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2156 propagateFPControl(team);
2158 if (ompd_state & OMPD_ENABLE_BP)
2159 ompd_bp_parallel_begin();
2162 if (__kmp_tasking_mode != tskm_immediate_exec) {
2165 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2166 parent_team->t.t_task_team[master_th->th.th_task_state]);
2167 KA_TRACE(20, (
"__kmp_fork_call: Primary T#%d pushing task_team %p / team "
2168 "%p, new task_team %p / team %p\n",
2169 __kmp_gtid_from_thread(master_th),
2170 master_th->th.th_task_team, parent_team,
2171 team->t.t_task_team[master_th->th.th_task_state], team));
2173 if (active_level || master_th->th.th_task_team) {
2175 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2176 if (master_th->th.th_task_state_top >=
2177 master_th->th.th_task_state_stack_sz) {
2178 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2179 kmp_uint8 *old_stack, *new_stack;
2181 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2182 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2183 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2185 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2189 old_stack = master_th->th.th_task_state_memo_stack;
2190 master_th->th.th_task_state_memo_stack = new_stack;
2191 master_th->th.th_task_state_stack_sz = new_size;
2192 __kmp_free(old_stack);
2196 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2197 master_th->th.th_task_state;
2198 master_th->th.th_task_state_top++;
2199#if KMP_NESTED_HOT_TEAMS
2200 if (master_th->th.th_hot_teams &&
2201 active_level < __kmp_hot_teams_max_level &&
2202 team == master_th->th.th_hot_teams[active_level].hot_team) {
2204 master_th->th.th_task_state =
2206 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2209 master_th->th.th_task_state = 0;
2210#if KMP_NESTED_HOT_TEAMS
2214#if !KMP_NESTED_HOT_TEAMS
2215 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2216 (team == root->r.r_hot_team));
2222 (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2223 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2225 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2226 (team->t.t_master_tid == 0 &&
2227 (team->t.t_parent == root->r.r_root_team ||
2228 team->t.t_parent->t.t_serialized)));
2232 argv = (
void **)team->t.t_argv;
2234 for (i = argc - 1; i >= 0; --i) {
2235 void *new_argv = va_arg(kmp_va_deref(ap),
void *);
2236 KMP_CHECK_UPDATE(*argv, new_argv);
2240 for (i = 0; i < argc; ++i) {
2242 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2247 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2248 if (!root->r.r_active)
2249 root->r.r_active = TRUE;
2251 __kmp_fork_team_threads(root, team, master_th, gtid, !ap);
2252 __kmp_setup_icv_copy(team, nthreads,
2253 &master_th->th.th_current_task->td_icvs, loc);
2256 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2259 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2262 if (team->t.t_active_level == 1
2263 && !master_th->th.th_teams_microtask) {
2265 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2266 (__kmp_forkjoin_frames_mode == 3 ||
2267 __kmp_forkjoin_frames_mode == 1)) {
2268 kmp_uint64 tmp_time = 0;
2269 if (__itt_get_timestamp_ptr)
2270 tmp_time = __itt_get_timestamp();
2272 master_th->th.th_frame_time = tmp_time;
2273 if (__kmp_forkjoin_frames_mode == 3)
2274 team->t.t_region_time = tmp_time;
2278 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2279 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2281 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2287 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2290 (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2291 root, team, master_th, gtid));
2294 if (__itt_stack_caller_create_ptr) {
2297 KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL);
2298 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2299 }
else if (parent_team->t.t_serialized) {
2304 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
2305 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
2313 __kmp_internal_fork(loc, gtid, team);
2314 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, "
2315 "master_th=%p, gtid=%d\n",
2316 root, team, master_th, gtid));
2319 if (call_context == fork_context_gnu) {
2320 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2325 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2326 team->t.t_id, team->t.t_pkfn));
2329#if KMP_STATS_ENABLED
2333 KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION);
2337 if (!team->t.t_invoke(gtid)) {
2338 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
2341#if KMP_STATS_ENABLED
2344 KMP_SET_THREAD_STATE(previous_state);
2348 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2349 team->t.t_id, team->t.t_pkfn));
2352 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2354 if (ompt_enabled.enabled) {
2355 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2363static inline void __kmp_join_restore_state(kmp_info_t *thread,
2366 thread->th.ompt_thread_info.state =
2367 ((team->t.t_serialized) ? ompt_state_work_serial
2368 : ompt_state_work_parallel);
2371static inline void __kmp_join_ompt(
int gtid, kmp_info_t *thread,
2372 kmp_team_t *team, ompt_data_t *parallel_data,
2373 int flags,
void *codeptr) {
2374 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2375 if (ompt_enabled.ompt_callback_parallel_end) {
2376 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2377 parallel_data, &(task_info->task_data), flags, codeptr);
2380 task_info->frame.enter_frame = ompt_data_none;
2381 __kmp_join_restore_state(thread, team);
2385void __kmp_join_call(
ident_t *loc,
int gtid
2388 enum fork_context_e fork_context
2392 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2394 kmp_team_t *parent_team;
2395 kmp_info_t *master_th;
2399 KA_TRACE(20, (
"__kmp_join_call: enter T#%d\n", gtid));
2402 master_th = __kmp_threads[gtid];
2403 root = master_th->th.th_root;
2404 team = master_th->th.th_team;
2405 parent_team = team->t.t_parent;
2407 master_th->th.th_ident = loc;
2410 void *team_microtask = (
void *)team->t.t_pkfn;
2414 if (ompt_enabled.enabled &&
2415 !(team->t.t_serialized && fork_context == fork_context_gnu)) {
2416 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2421 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2422 KA_TRACE(20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2423 "th_task_team = %p\n",
2424 __kmp_gtid_from_thread(master_th), team,
2425 team->t.t_task_team[master_th->th.th_task_state],
2426 master_th->th.th_task_team));
2427 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2428 team->t.t_task_team[master_th->th.th_task_state]);
2432 if (team->t.t_serialized) {
2433 if (master_th->th.th_teams_microtask) {
2435 int level = team->t.t_level;
2436 int tlevel = master_th->th.th_teams_level;
2437 if (level == tlevel) {
2441 }
else if (level == tlevel + 1) {
2445 team->t.t_serialized++;
2451 if (ompt_enabled.enabled) {
2452 if (fork_context == fork_context_gnu) {
2453 __ompt_lw_taskteam_unlink(master_th);
2455 __kmp_join_restore_state(master_th, parent_team);
2462 master_active = team->t.t_master_active;
2467 __kmp_internal_join(loc, gtid, team);
2469 if (__itt_stack_caller_create_ptr) {
2470 KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL);
2472 __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);
2473 team->t.t_stack_id = NULL;
2477 master_th->th.th_task_state =
2480 if (__itt_stack_caller_create_ptr && parent_team->t.t_serialized) {
2481 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL);
2485 __kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id);
2486 parent_team->t.t_stack_id = NULL;
2494 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2495 void *codeptr = team->t.ompt_team_info.master_return_address;
2500 if (team->t.t_active_level == 1 &&
2501 (!master_th->th.th_teams_microtask ||
2502 master_th->th.th_teams_size.nteams == 1)) {
2503 master_th->th.th_ident = loc;
2506 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2507 __kmp_forkjoin_frames_mode == 3)
2508 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2509 master_th->th.th_frame_time, 0, loc,
2510 master_th->th.th_team_nproc, 1);
2511 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2512 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2513 __kmp_itt_region_joined(gtid);
2517#if KMP_AFFINITY_SUPPORTED
2520 master_th->th.th_first_place = team->t.t_first_place;
2521 master_th->th.th_last_place = team->t.t_last_place;
2525 if (master_th->th.th_teams_microtask && !exit_teams &&
2526 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2527 team->t.t_level == master_th->th.th_teams_level + 1) {
2532 ompt_data_t ompt_parallel_data = ompt_data_none;
2533 if (ompt_enabled.enabled) {
2534 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2535 if (ompt_enabled.ompt_callback_implicit_task) {
2536 int ompt_team_size = team->t.t_nproc;
2537 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2538 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2539 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
2541 task_info->frame.exit_frame = ompt_data_none;
2542 task_info->task_data = ompt_data_none;
2543 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
2544 __ompt_lw_taskteam_unlink(master_th);
2549 team->t.t_active_level--;
2550 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2556 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2557 int old_num = master_th->th.th_team_nproc;
2558 int new_num = master_th->th.th_teams_size.nth;
2559 kmp_info_t **other_threads = team->t.t_threads;
2560 team->t.t_nproc = new_num;
2561 for (
int i = 0; i < old_num; ++i) {
2562 other_threads[i]->th.th_team_nproc = new_num;
2565 for (
int i = old_num; i < new_num; ++i) {
2567 KMP_DEBUG_ASSERT(other_threads[i]);
2568 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2569 for (
int b = 0; b < bs_last_barrier; ++b) {
2570 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2571 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2573 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2576 if (__kmp_tasking_mode != tskm_immediate_exec) {
2578 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2584 if (ompt_enabled.enabled) {
2585 __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
2586 OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr);
2594 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2595 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2597 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2602 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2604 if (!master_th->th.th_teams_microtask ||
2605 team->t.t_level > master_th->th.th_teams_level) {
2607 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2609 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2612 if (ompt_enabled.enabled) {
2613 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2614 if (ompt_enabled.ompt_callback_implicit_task) {
2615 int flags = (team_microtask == (
void *)__kmp_teams_master)
2617 : ompt_task_implicit;
2618 int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
2619 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2620 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2621 OMPT_CUR_TASK_INFO(master_th)->thread_num, flags);
2623 task_info->frame.exit_frame = ompt_data_none;
2624 task_info->task_data = ompt_data_none;
2628 KF_TRACE(10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2630 __kmp_pop_current_task_from_thread(master_th);
2632 master_th->th.th_def_allocator = team->t.t_def_allocator;
2635 if (ompd_state & OMPD_ENABLE_BP)
2636 ompd_bp_parallel_end();
2638 updateHWFPControl(team);
2640 if (root->r.r_active != master_active)
2641 root->r.r_active = master_active;
2643 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2651 master_th->th.th_team = parent_team;
2652 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2653 master_th->th.th_team_master = parent_team->t.t_threads[0];
2654 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2657 if (parent_team->t.t_serialized &&
2658 parent_team != master_th->th.th_serial_team &&
2659 parent_team != root->r.r_root_team) {
2660 __kmp_free_team(root,
2661 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2662 master_th->th.th_serial_team = parent_team;
2665 if (__kmp_tasking_mode != tskm_immediate_exec) {
2666 if (master_th->th.th_task_state_top >
2668 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2670 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2671 master_th->th.th_task_state;
2672 --master_th->th.th_task_state_top;
2674 master_th->th.th_task_state =
2676 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2677 }
else if (team != root->r.r_hot_team) {
2682 master_th->th.th_task_state = 0;
2685 master_th->th.th_task_team =
2686 parent_team->t.t_task_team[master_th->th.th_task_state];
2688 (
"__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n",
2689 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2696 master_th->th.th_current_task->td_flags.executing = 1;
2698 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2700#if KMP_AFFINITY_SUPPORTED
2701 if (master_th->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
2702 __kmp_reset_root_init_mask(gtid);
2707 OMPT_INVOKER(fork_context) |
2708 ((team_microtask == (
void *)__kmp_teams_master) ? ompt_parallel_league
2709 : ompt_parallel_team);
2710 if (ompt_enabled.enabled) {
2711 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
2717 KA_TRACE(20, (
"__kmp_join_call: exit T#%d\n", gtid));
2722void __kmp_save_internal_controls(kmp_info_t *thread) {
2724 if (thread->th.th_team != thread->th.th_serial_team) {
2727 if (thread->th.th_team->t.t_serialized > 1) {
2730 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2733 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2734 thread->th.th_team->t.t_serialized) {
2739 kmp_internal_control_t *control =
2740 (kmp_internal_control_t *)__kmp_allocate(
2741 sizeof(kmp_internal_control_t));
2743 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2745 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2747 control->next = thread->th.th_team->t.t_control_stack_top;
2748 thread->th.th_team->t.t_control_stack_top = control;
2754void __kmp_set_num_threads(
int new_nth,
int gtid) {
2758 KF_TRACE(10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2759 KMP_DEBUG_ASSERT(__kmp_init_serial);
2763 else if (new_nth > __kmp_max_nth)
2764 new_nth = __kmp_max_nth;
2767 thread = __kmp_threads[gtid];
2768 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2771 __kmp_save_internal_controls(thread);
2773 set__nproc(thread, new_nth);
2778 root = thread->th.th_root;
2779 if (__kmp_init_parallel && (!root->r.r_active) &&
2780 (root->r.r_hot_team->t.t_nproc > new_nth)
2781#
if KMP_NESTED_HOT_TEAMS
2782 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2785 kmp_team_t *hot_team = root->r.r_hot_team;
2788 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2790 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2791 __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth);
2794 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2795 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2796 if (__kmp_tasking_mode != tskm_immediate_exec) {
2799 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2801 __kmp_free_thread(hot_team->t.t_threads[f]);
2802 hot_team->t.t_threads[f] = NULL;
2804 hot_team->t.t_nproc = new_nth;
2805#if KMP_NESTED_HOT_TEAMS
2806 if (thread->th.th_hot_teams) {
2807 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2808 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2812 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2813 hot_team->t.b->update_num_threads(new_nth);
2814 __kmp_add_threads_to_team(hot_team, new_nth);
2817 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2820 for (f = 0; f < new_nth; f++) {
2821 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2822 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2825 hot_team->t.t_size_changed = -1;
2830void __kmp_set_max_active_levels(
int gtid,
int max_active_levels) {
2833 KF_TRACE(10, (
"__kmp_set_max_active_levels: new max_active_levels for thread "
2835 gtid, max_active_levels));
2836 KMP_DEBUG_ASSERT(__kmp_init_serial);
2839 if (max_active_levels < 0) {
2840 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2845 KF_TRACE(10, (
"__kmp_set_max_active_levels: the call is ignored: new "
2846 "max_active_levels for thread %d = (%d)\n",
2847 gtid, max_active_levels));
2850 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2855 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2856 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2857 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2863 KF_TRACE(10, (
"__kmp_set_max_active_levels: after validation: new "
2864 "max_active_levels for thread %d = (%d)\n",
2865 gtid, max_active_levels));
2867 thread = __kmp_threads[gtid];
2869 __kmp_save_internal_controls(thread);
2871 set__max_active_levels(thread, max_active_levels);
2875int __kmp_get_max_active_levels(
int gtid) {
2878 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d\n", gtid));
2879 KMP_DEBUG_ASSERT(__kmp_init_serial);
2881 thread = __kmp_threads[gtid];
2882 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2883 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, "
2884 "curtask_maxaclevel=%d\n",
2885 gtid, thread->th.th_current_task,
2886 thread->th.th_current_task->td_icvs.max_active_levels));
2887 return thread->th.th_current_task->td_icvs.max_active_levels;
2891void __kmp_set_num_teams(
int num_teams) {
2893 __kmp_nteams = num_teams;
2895int __kmp_get_max_teams(
void) {
return __kmp_nteams; }
2897void __kmp_set_teams_thread_limit(
int limit) {
2899 __kmp_teams_thread_limit = limit;
2901int __kmp_get_teams_thread_limit(
void) {
return __kmp_teams_thread_limit; }
2903KMP_BUILD_ASSERT(
sizeof(kmp_sched_t) ==
sizeof(
int));
2904KMP_BUILD_ASSERT(
sizeof(
enum sched_type) ==
sizeof(
int));
2907void __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk) {
2909 kmp_sched_t orig_kind;
2912 KF_TRACE(10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2913 gtid, (
int)kind, chunk));
2914 KMP_DEBUG_ASSERT(__kmp_init_serial);
2921 kind = __kmp_sched_without_mods(kind);
2923 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2924 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2926 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2927 KMP_HNT(DefaultScheduleKindUsed,
"static, no chunk"),
2929 kind = kmp_sched_default;
2933 thread = __kmp_threads[gtid];
2935 __kmp_save_internal_controls(thread);
2937 if (kind < kmp_sched_upper_std) {
2938 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2941 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2943 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2944 __kmp_sch_map[kind - kmp_sched_lower - 1];
2949 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2950 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2951 kmp_sched_lower - 2];
2953 __kmp_sched_apply_mods_intkind(
2954 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2955 if (kind == kmp_sched_auto || chunk < 1) {
2957 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2959 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2964void __kmp_get_schedule(
int gtid, kmp_sched_t *kind,
int *chunk) {
2968 KF_TRACE(10, (
"__kmp_get_schedule: thread %d\n", gtid));
2969 KMP_DEBUG_ASSERT(__kmp_init_serial);
2971 thread = __kmp_threads[gtid];
2973 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2974 switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) {
2976 case kmp_sch_static_greedy:
2977 case kmp_sch_static_balanced:
2978 *kind = kmp_sched_static;
2979 __kmp_sched_apply_mods_stdkind(kind, th_type);
2982 case kmp_sch_static_chunked:
2983 *kind = kmp_sched_static;
2985 case kmp_sch_dynamic_chunked:
2986 *kind = kmp_sched_dynamic;
2989 case kmp_sch_guided_iterative_chunked:
2990 case kmp_sch_guided_analytical_chunked:
2991 *kind = kmp_sched_guided;
2994 *kind = kmp_sched_auto;
2996 case kmp_sch_trapezoidal:
2997 *kind = kmp_sched_trapezoidal;
2999#if KMP_STATIC_STEAL_ENABLED
3000 case kmp_sch_static_steal:
3001 *kind = kmp_sched_static_steal;
3005 KMP_FATAL(UnknownSchedulingType, th_type);
3008 __kmp_sched_apply_mods_stdkind(kind, th_type);
3009 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
3012int __kmp_get_ancestor_thread_num(
int gtid,
int level) {
3018 KF_TRACE(10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
3019 KMP_DEBUG_ASSERT(__kmp_init_serial);
3026 thr = __kmp_threads[gtid];
3027 team = thr->th.th_team;
3028 ii = team->t.t_level;
3032 if (thr->th.th_teams_microtask) {
3034 int tlevel = thr->th.th_teams_level;
3037 KMP_DEBUG_ASSERT(ii >= tlevel);
3049 return __kmp_tid_from_gtid(gtid);
3051 dd = team->t.t_serialized;
3053 while (ii > level) {
3054 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3056 if ((team->t.t_serialized) && (!dd)) {
3057 team = team->t.t_parent;
3061 team = team->t.t_parent;
3062 dd = team->t.t_serialized;
3067 return (dd > 1) ? (0) : (team->t.t_master_tid);
3070int __kmp_get_team_size(
int gtid,
int level) {
3076 KF_TRACE(10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level));
3077 KMP_DEBUG_ASSERT(__kmp_init_serial);
3084 thr = __kmp_threads[gtid];
3085 team = thr->th.th_team;
3086 ii = team->t.t_level;
3090 if (thr->th.th_teams_microtask) {
3092 int tlevel = thr->th.th_teams_level;
3095 KMP_DEBUG_ASSERT(ii >= tlevel);
3106 while (ii > level) {
3107 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3109 if (team->t.t_serialized && (!dd)) {
3110 team = team->t.t_parent;
3114 team = team->t.t_parent;
3119 return team->t.t_nproc;
3122kmp_r_sched_t __kmp_get_schedule_global() {
3127 kmp_r_sched_t r_sched;
3133 enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched);
3134 enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched);
3137 r_sched.r_sched_type = __kmp_static;
3140 r_sched.r_sched_type = __kmp_guided;
3142 r_sched.r_sched_type = __kmp_sched;
3144 SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers);
3146 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
3148 r_sched.chunk = KMP_DEFAULT_CHUNK;
3150 r_sched.chunk = __kmp_chunk;
3158static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc) {
3160 KMP_DEBUG_ASSERT(team);
3161 if (!realloc || argc > team->t.t_max_argc) {
3163 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, "
3164 "current entries=%d\n",
3165 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3167 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3168 __kmp_free((
void *)team->t.t_argv);
3170 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3172 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3173 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d "
3175 team->t.t_id, team->t.t_max_argc));
3176 team->t.t_argv = &team->t.t_inline_argv[0];
3177 if (__kmp_storage_map) {
3178 __kmp_print_storage_map_gtid(
3179 -1, &team->t.t_inline_argv[0],
3180 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3181 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
"team_%d.t_inline_argv",
3186 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3187 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3189 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3191 team->t.t_id, team->t.t_max_argc));
3193 (
void **)__kmp_page_allocate(
sizeof(
void *) * team->t.t_max_argc);
3194 if (__kmp_storage_map) {
3195 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3196 &team->t.t_argv[team->t.t_max_argc],
3197 sizeof(
void *) * team->t.t_max_argc,
3198 "team_%d.t_argv", team->t.t_id);
3204static void __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth) {
3206 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3208 (kmp_info_t **)__kmp_allocate(
sizeof(kmp_info_t *) * max_nth);
3209 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3210 sizeof(dispatch_shared_info_t) * num_disp_buff);
3211 team->t.t_dispatch =
3212 (kmp_disp_t *)__kmp_allocate(
sizeof(kmp_disp_t) * max_nth);
3213 team->t.t_implicit_task_taskdata =
3214 (kmp_taskdata_t *)__kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth);
3215 team->t.t_max_nproc = max_nth;
3218 for (i = 0; i < num_disp_buff; ++i) {
3219 team->t.t_disp_buffer[i].buffer_index = i;
3220 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3224static void __kmp_free_team_arrays(kmp_team_t *team) {
3227 for (i = 0; i < team->t.t_max_nproc; ++i) {
3228 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3229 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3230 team->t.t_dispatch[i].th_disp_buffer = NULL;
3233#if KMP_USE_HIER_SCHED
3234 __kmp_dispatch_free_hierarchies(team);
3236 __kmp_free(team->t.t_threads);
3237 __kmp_free(team->t.t_disp_buffer);
3238 __kmp_free(team->t.t_dispatch);
3239 __kmp_free(team->t.t_implicit_task_taskdata);
3240 team->t.t_threads = NULL;
3241 team->t.t_disp_buffer = NULL;
3242 team->t.t_dispatch = NULL;
3243 team->t.t_implicit_task_taskdata = 0;
3246static void __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3247 kmp_info_t **oldThreads = team->t.t_threads;
3249 __kmp_free(team->t.t_disp_buffer);
3250 __kmp_free(team->t.t_dispatch);
3251 __kmp_free(team->t.t_implicit_task_taskdata);
3252 __kmp_allocate_team_arrays(team, max_nth);
3254 KMP_MEMCPY(team->t.t_threads, oldThreads,
3255 team->t.t_nproc *
sizeof(kmp_info_t *));
3257 __kmp_free(oldThreads);
3260static kmp_internal_control_t __kmp_get_global_icvs(
void) {
3262 kmp_r_sched_t r_sched =
3263 __kmp_get_schedule_global();
3265 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
3267 kmp_internal_control_t g_icvs = {
3269 (kmp_int8)__kmp_global.g.g_dynamic,
3271 (kmp_int8)__kmp_env_blocktime,
3273 __kmp_dflt_blocktime,
3278 __kmp_dflt_team_nth,
3282 __kmp_dflt_max_active_levels,
3286 __kmp_nested_proc_bind.bind_types[0],
3287 __kmp_default_device,
3294static kmp_internal_control_t __kmp_get_x_global_icvs(
const kmp_team_t *team) {
3296 kmp_internal_control_t gx_icvs;
3297 gx_icvs.serial_nesting_level =
3299 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3300 gx_icvs.next = NULL;
3305static void __kmp_initialize_root(kmp_root_t *root) {
3307 kmp_team_t *root_team;
3308 kmp_team_t *hot_team;
3309 int hot_team_max_nth;
3310 kmp_r_sched_t r_sched =
3311 __kmp_get_schedule_global();
3312 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3313 KMP_DEBUG_ASSERT(root);
3314 KMP_ASSERT(!root->r.r_begin);
3317 __kmp_init_lock(&root->r.r_begin_lock);
3318 root->r.r_begin = FALSE;
3319 root->r.r_active = FALSE;
3320 root->r.r_in_parallel = 0;
3321 root->r.r_blocktime = __kmp_dflt_blocktime;
3322#if KMP_AFFINITY_SUPPORTED
3323 root->r.r_affinity_assigned = FALSE;
3328 KF_TRACE(10, (
"__kmp_initialize_root: before root_team\n"));
3331 __kmp_allocate_team(root,
3337 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3339 USE_NESTED_HOT_ARG(NULL)
3344 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3347 KF_TRACE(10, (
"__kmp_initialize_root: after root_team = %p\n", root_team));
3349 root->r.r_root_team = root_team;
3350 root_team->t.t_control_stack_top = NULL;
3353 root_team->t.t_threads[0] = NULL;
3354 root_team->t.t_nproc = 1;
3355 root_team->t.t_serialized = 1;
3357 root_team->t.t_sched.sched = r_sched.sched;
3360 (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3361 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3365 KF_TRACE(10, (
"__kmp_initialize_root: before hot_team\n"));
3368 __kmp_allocate_team(root,
3370 __kmp_dflt_team_nth_ub * 2,
3374 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3376 USE_NESTED_HOT_ARG(NULL)
3378 KF_TRACE(10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team));
3380 root->r.r_hot_team = hot_team;
3381 root_team->t.t_control_stack_top = NULL;
3384 hot_team->t.t_parent = root_team;
3387 hot_team_max_nth = hot_team->t.t_max_nproc;
3388 for (f = 0; f < hot_team_max_nth; ++f) {
3389 hot_team->t.t_threads[f] = NULL;
3391 hot_team->t.t_nproc = 1;
3393 hot_team->t.t_sched.sched = r_sched.sched;
3394 hot_team->t.t_size_changed = 0;
3399typedef struct kmp_team_list_item {
3400 kmp_team_p
const *entry;
3401 struct kmp_team_list_item *next;
3402} kmp_team_list_item_t;
3403typedef kmp_team_list_item_t *kmp_team_list_t;
3405static void __kmp_print_structure_team_accum(
3406 kmp_team_list_t list,
3407 kmp_team_p
const *team
3417 KMP_DEBUG_ASSERT(list != NULL);
3422 __kmp_print_structure_team_accum(list, team->t.t_parent);
3423 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3427 while (l->next != NULL && l->entry != team) {
3430 if (l->next != NULL) {
3436 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3442 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3443 sizeof(kmp_team_list_item_t));
3450static void __kmp_print_structure_team(
char const *title, kmp_team_p
const *team
3453 __kmp_printf(
"%s", title);
3455 __kmp_printf(
"%2x %p\n", team->t.t_id, team);
3457 __kmp_printf(
" - (nil)\n");
3461static void __kmp_print_structure_thread(
char const *title,
3462 kmp_info_p
const *thread) {
3463 __kmp_printf(
"%s", title);
3464 if (thread != NULL) {
3465 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3467 __kmp_printf(
" - (nil)\n");
3471void __kmp_print_structure(
void) {
3473 kmp_team_list_t list;
3477 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof(kmp_team_list_item_t));
3481 __kmp_printf(
"\n------------------------------\nGlobal Thread "
3482 "Table\n------------------------------\n");
3485 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3486 __kmp_printf(
"%2d", gtid);
3487 if (__kmp_threads != NULL) {
3488 __kmp_printf(
" %p", __kmp_threads[gtid]);
3490 if (__kmp_root != NULL) {
3491 __kmp_printf(
" %p", __kmp_root[gtid]);
3498 __kmp_printf(
"\n------------------------------\nThreads\n--------------------"
3500 if (__kmp_threads != NULL) {
3502 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3503 kmp_info_t
const *thread = __kmp_threads[gtid];
3504 if (thread != NULL) {
3505 __kmp_printf(
"GTID %2d %p:\n", gtid, thread);
3506 __kmp_printf(
" Our Root: %p\n", thread->th.th_root);
3507 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team);
3508 __kmp_print_structure_team(
" Serial Team: ",
3509 thread->th.th_serial_team);
3510 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc);
3511 __kmp_print_structure_thread(
" Primary: ",
3512 thread->th.th_team_master);
3513 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized);
3514 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc);
3515 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3516 __kmp_print_structure_thread(
" Next in pool: ",
3517 thread->th.th_next_pool);
3519 __kmp_print_structure_team_accum(list, thread->th.th_team);
3520 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3524 __kmp_printf(
"Threads array is not allocated.\n");
3528 __kmp_printf(
"\n------------------------------\nUbers\n----------------------"
3530 if (__kmp_root != NULL) {
3532 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3533 kmp_root_t
const *root = __kmp_root[gtid];
3535 __kmp_printf(
"GTID %2d %p:\n", gtid, root);
3536 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team);
3537 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team);
3538 __kmp_print_structure_thread(
" Uber Thread: ",
3539 root->r.r_uber_thread);
3540 __kmp_printf(
" Active?: %2d\n", root->r.r_active);
3541 __kmp_printf(
" In Parallel: %2d\n",
3542 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
3544 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3545 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3549 __kmp_printf(
"Ubers array is not allocated.\n");
3552 __kmp_printf(
"\n------------------------------\nTeams\n----------------------"
3554 while (list->next != NULL) {
3555 kmp_team_p
const *team = list->entry;
3557 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team);
3558 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent);
3559 __kmp_printf(
" Primary TID: %2d\n", team->t.t_master_tid);
3560 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc);
3561 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized);
3562 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc);
3563 for (i = 0; i < team->t.t_nproc; ++i) {
3564 __kmp_printf(
" Thread %2d: ", i);
3565 __kmp_print_structure_thread(
"", team->t.t_threads[i]);
3567 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool);
3573 __kmp_printf(
"\n------------------------------\nPools\n----------------------"
3575 __kmp_print_structure_thread(
"Thread pool: ",
3576 CCAST(kmp_info_t *, __kmp_thread_pool));
3577 __kmp_print_structure_team(
"Team pool: ",
3578 CCAST(kmp_team_t *, __kmp_team_pool));
3582 while (list != NULL) {
3583 kmp_team_list_item_t *item = list;
3585 KMP_INTERNAL_FREE(item);
3594static const unsigned __kmp_primes[] = {
3595 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3596 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3597 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3598 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3599 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3600 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3601 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3602 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3603 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3604 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3605 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3609unsigned short __kmp_get_random(kmp_info_t *thread) {
3610 unsigned x = thread->th.th_x;
3611 unsigned short r = (
unsigned short)(x >> 16);
3613 thread->th.th_x = x * thread->th.th_a + 1;
3615 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3616 thread->th.th_info.ds.ds_tid, r));
3622void __kmp_init_random(kmp_info_t *thread) {
3623 unsigned seed = thread->th.th_info.ds.ds_tid;
3626 __kmp_primes[seed % (
sizeof(__kmp_primes) /
sizeof(__kmp_primes[0]))];
3627 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3629 (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3635static int __kmp_reclaim_dead_roots(
void) {
3638 for (i = 0; i < __kmp_threads_capacity; ++i) {
3639 if (KMP_UBER_GTID(i) &&
3640 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3643 r += __kmp_unregister_root_other_thread(i);
3668static int __kmp_expand_threads(
int nNeed) {
3670 int minimumRequiredCapacity;
3672 kmp_info_t **newThreads;
3673 kmp_root_t **newRoot;
3679#if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB
3682 added = __kmp_reclaim_dead_roots();
3711 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
3714 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3718 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3720 newCapacity = __kmp_threads_capacity;
3722 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3723 : __kmp_sys_max_nth;
3724 }
while (newCapacity < minimumRequiredCapacity);
3725 newThreads = (kmp_info_t **)__kmp_allocate(
3726 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3728 (kmp_root_t **)((
char *)newThreads +
sizeof(kmp_info_t *) * newCapacity);
3729 KMP_MEMCPY(newThreads, __kmp_threads,
3730 __kmp_threads_capacity *
sizeof(kmp_info_t *));
3731 KMP_MEMCPY(newRoot, __kmp_root,
3732 __kmp_threads_capacity *
sizeof(kmp_root_t *));
3735 kmp_old_threads_list_t *node =
3736 (kmp_old_threads_list_t *)__kmp_allocate(
sizeof(kmp_old_threads_list_t));
3737 node->threads = __kmp_threads;
3738 node->next = __kmp_old_threads_list;
3739 __kmp_old_threads_list = node;
3741 *(kmp_info_t * *
volatile *)&__kmp_threads = newThreads;
3742 *(kmp_root_t * *
volatile *)&__kmp_root = newRoot;
3743 added += newCapacity - __kmp_threads_capacity;
3744 *(
volatile int *)&__kmp_threads_capacity = newCapacity;
3746 if (newCapacity > __kmp_tp_capacity) {
3747 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3748 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3749 __kmp_threadprivate_resize_cache(newCapacity);
3751 *(
volatile int *)&__kmp_tp_capacity = newCapacity;
3753 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3762int __kmp_register_root(
int initial_thread) {
3763 kmp_info_t *root_thread;
3767 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3768 KA_TRACE(20, (
"__kmp_register_root: entered\n"));
3785 capacity = __kmp_threads_capacity;
3786 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3793 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
3794 capacity -= __kmp_hidden_helper_threads_num;
3798 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3799 if (__kmp_tp_cached) {
3800 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3801 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3802 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3804 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3814 if (TCR_4(__kmp_init_hidden_helper_threads)) {
3817 for (gtid = 1; TCR_PTR(__kmp_threads[gtid]) != NULL &&
3818 gtid <= __kmp_hidden_helper_threads_num;
3821 KMP_ASSERT(gtid <= __kmp_hidden_helper_threads_num);
3822 KA_TRACE(1, (
"__kmp_register_root: found slot in threads array for "
3823 "hidden helper thread: T#%d\n",
3829 if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3832 for (gtid = __kmp_hidden_helper_threads_num + 1;
3833 TCR_PTR(__kmp_threads[gtid]) != NULL; gtid++)
3837 1, (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3838 KMP_ASSERT(gtid < __kmp_threads_capacity);
3843 TCW_4(__kmp_nth, __kmp_nth + 1);
3847 if (__kmp_adjust_gtid_mode) {
3848 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3849 if (TCR_4(__kmp_gtid_mode) != 2) {
3850 TCW_4(__kmp_gtid_mode, 2);
3853 if (TCR_4(__kmp_gtid_mode) != 1) {
3854 TCW_4(__kmp_gtid_mode, 1);
3859#ifdef KMP_ADJUST_BLOCKTIME
3862 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3863 if (__kmp_nth > __kmp_avail_proc) {
3864 __kmp_zero_bt = TRUE;
3870 if (!(root = __kmp_root[gtid])) {
3871 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(
sizeof(kmp_root_t));
3872 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3875#if KMP_STATS_ENABLED
3877 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3878 __kmp_stats_thread_ptr->startLife();
3879 KMP_SET_THREAD_STATE(SERIAL_REGION);
3882 __kmp_initialize_root(root);
3885 if (root->r.r_uber_thread) {
3886 root_thread = root->r.r_uber_thread;
3888 root_thread = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
3889 if (__kmp_storage_map) {
3890 __kmp_print_thread_storage_map(root_thread, gtid);
3892 root_thread->th.th_info.ds.ds_gtid = gtid;
3894 root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
3896 root_thread->th.th_root = root;
3897 if (__kmp_env_consistency_check) {
3898 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3901 __kmp_initialize_fast_memory(root_thread);
3905 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3906 __kmp_initialize_bget(root_thread);
3908 __kmp_init_random(root_thread);
3912 if (!root_thread->th.th_serial_team) {
3913 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3914 KF_TRACE(10, (
"__kmp_register_root: before serial_team\n"));
3915 root_thread->th.th_serial_team = __kmp_allocate_team(
3920 proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3922 KMP_ASSERT(root_thread->th.th_serial_team);
3923 KF_TRACE(10, (
"__kmp_register_root: after serial_team = %p\n",
3924 root_thread->th.th_serial_team));
3927 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3929 root->r.r_root_team->t.t_threads[0] = root_thread;
3930 root->r.r_hot_team->t.t_threads[0] = root_thread;
3931 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3933 root_thread->th.th_serial_team->t.t_serialized = 0;
3934 root->r.r_uber_thread = root_thread;
3937 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3938 TCW_4(__kmp_init_gtid, TRUE);
3941 __kmp_gtid_set_specific(gtid);
3944 __kmp_itt_thread_name(gtid);
3947#ifdef KMP_TDATA_GTID
3950 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3951 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3953 KA_TRACE(20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3955 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3956 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3957 KMP_INIT_BARRIER_STATE));
3960 for (b = 0; b < bs_last_barrier; ++b) {
3961 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3963 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3967 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
3968 KMP_INIT_BARRIER_STATE);
3970#if KMP_AFFINITY_SUPPORTED
3971 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3972 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3973 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3974 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3976 root_thread->th.th_def_allocator = __kmp_def_allocator;
3977 root_thread->th.th_prev_level = 0;
3978 root_thread->th.th_prev_num_threads = 1;
3980 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
3981 tmp->cg_root = root_thread;
3982 tmp->cg_thread_limit = __kmp_cg_max_nth;
3983 tmp->cg_nthreads = 1;
3984 KA_TRACE(100, (
"__kmp_register_root: Thread %p created node %p with"
3985 " cg_nthreads init to 1\n",
3988 root_thread->th.th_cg_roots = tmp;
3990 __kmp_root_counter++;
3993 if (!initial_thread && ompt_enabled.enabled) {
3995 kmp_info_t *root_thread = ompt_get_thread();
3997 ompt_set_thread_state(root_thread, ompt_state_overhead);
3999 if (ompt_enabled.ompt_callback_thread_begin) {
4000 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
4001 ompt_thread_initial, __ompt_get_thread_data_internal());
4003 ompt_data_t *task_data;
4004 ompt_data_t *parallel_data;
4005 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
4007 if (ompt_enabled.ompt_callback_implicit_task) {
4008 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4009 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
4012 ompt_set_thread_state(root_thread, ompt_state_work_serial);
4016 if (ompd_state & OMPD_ENABLE_BP)
4017 ompd_bp_thread_begin();
4021 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4026#if KMP_NESTED_HOT_TEAMS
4027static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr,
int level,
4028 const int max_level) {
4030 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
4031 if (!hot_teams || !hot_teams[level].hot_team) {
4034 KMP_DEBUG_ASSERT(level < max_level);
4035 kmp_team_t *team = hot_teams[level].hot_team;
4036 nth = hot_teams[level].hot_team_nth;
4038 if (level < max_level - 1) {
4039 for (i = 0; i < nth; ++i) {
4040 kmp_info_t *th = team->t.t_threads[i];
4041 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
4042 if (i > 0 && th->th.th_hot_teams) {
4043 __kmp_free(th->th.th_hot_teams);
4044 th->th.th_hot_teams = NULL;
4048 __kmp_free_team(root, team, NULL);
4055static int __kmp_reset_root(
int gtid, kmp_root_t *root) {
4056 kmp_team_t *root_team = root->r.r_root_team;
4057 kmp_team_t *hot_team = root->r.r_hot_team;
4058 int n = hot_team->t.t_nproc;
4061 KMP_DEBUG_ASSERT(!root->r.r_active);
4063 root->r.r_root_team = NULL;
4064 root->r.r_hot_team = NULL;
4067 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
4068#if KMP_NESTED_HOT_TEAMS
4069 if (__kmp_hot_teams_max_level >
4071 for (i = 0; i < hot_team->t.t_nproc; ++i) {
4072 kmp_info_t *th = hot_team->t.t_threads[i];
4073 if (__kmp_hot_teams_max_level > 1) {
4074 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
4076 if (th->th.th_hot_teams) {
4077 __kmp_free(th->th.th_hot_teams);
4078 th->th.th_hot_teams = NULL;
4083 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
4088 if (__kmp_tasking_mode != tskm_immediate_exec) {
4089 __kmp_wait_to_unref_task_teams();
4095 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
4097 (LPVOID) & (root->r.r_uber_thread->th),
4098 root->r.r_uber_thread->th.th_info.ds.ds_thread));
4099 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
4103 if (ompd_state & OMPD_ENABLE_BP)
4104 ompd_bp_thread_end();
4108 ompt_data_t *task_data;
4109 ompt_data_t *parallel_data;
4110 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
4112 if (ompt_enabled.ompt_callback_implicit_task) {
4113 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4114 ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
4116 if (ompt_enabled.ompt_callback_thread_end) {
4117 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
4118 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
4124 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
4125 KA_TRACE(100, (
"__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
4127 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,
4128 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));
4131 KMP_DEBUG_ASSERT(root->r.r_uber_thread ==
4132 root->r.r_uber_thread->th.th_cg_roots->cg_root);
4133 KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL);
4134 __kmp_free(root->r.r_uber_thread->th.th_cg_roots);
4135 root->r.r_uber_thread->th.th_cg_roots = NULL;
4137 __kmp_reap_thread(root->r.r_uber_thread, 1);
4141 root->r.r_uber_thread = NULL;
4143 root->r.r_begin = FALSE;
4148void __kmp_unregister_root_current_thread(
int gtid) {
4149 KA_TRACE(1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
4153 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
4154 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
4155 KC_TRACE(10, (
"__kmp_unregister_root_current_thread: already finished, "
4158 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4161 kmp_root_t *root = __kmp_root[gtid];
4163 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4164 KMP_ASSERT(KMP_UBER_GTID(gtid));
4165 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4166 KMP_ASSERT(root->r.r_active == FALSE);
4170 kmp_info_t *thread = __kmp_threads[gtid];
4171 kmp_team_t *team = thread->th.th_team;
4172 kmp_task_team_t *task_team = thread->th.th_task_team;
4175 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
4176 task_team->tt.tt_hidden_helper_task_encountered)) {
4179 thread->th.ompt_thread_info.state = ompt_state_undefined;
4181 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
4184 __kmp_reset_root(gtid, root);
4188 (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
4190 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4197static int __kmp_unregister_root_other_thread(
int gtid) {
4198 kmp_root_t *root = __kmp_root[gtid];
4201 KA_TRACE(1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4202 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4203 KMP_ASSERT(KMP_UBER_GTID(gtid));
4204 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4205 KMP_ASSERT(root->r.r_active == FALSE);
4207 r = __kmp_reset_root(gtid, root);
4209 (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4215void __kmp_task_info() {
4217 kmp_int32 gtid = __kmp_entry_gtid();
4218 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4219 kmp_info_t *this_thr = __kmp_threads[gtid];
4220 kmp_team_t *steam = this_thr->th.th_serial_team;
4221 kmp_team_t *team = this_thr->th.th_team;
4224 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4226 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4227 team->t.t_implicit_task_taskdata[tid].td_parent);
4234static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4235 int tid,
int gtid) {
4239 KMP_DEBUG_ASSERT(this_thr != NULL);
4240 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4241 KMP_DEBUG_ASSERT(team);
4242 KMP_DEBUG_ASSERT(team->t.t_threads);
4243 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4244 kmp_info_t *master = team->t.t_threads[0];
4245 KMP_DEBUG_ASSERT(master);
4246 KMP_DEBUG_ASSERT(master->th.th_root);
4250 TCW_SYNC_PTR(this_thr->th.th_team, team);
4252 this_thr->th.th_info.ds.ds_tid = tid;
4253 this_thr->th.th_set_nproc = 0;
4254 if (__kmp_tasking_mode != tskm_immediate_exec)
4257 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4259 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4260 this_thr->th.th_set_proc_bind = proc_bind_default;
4261#if KMP_AFFINITY_SUPPORTED
4262 this_thr->th.th_new_place = this_thr->th.th_current_place;
4264 this_thr->th.th_root = master->th.th_root;
4267 this_thr->th.th_team_nproc = team->t.t_nproc;
4268 this_thr->th.th_team_master = master;
4269 this_thr->th.th_team_serialized = team->t.t_serialized;
4271 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4273 KF_TRACE(10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4274 tid, gtid, this_thr, this_thr->th.th_current_task));
4276 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4279 KF_TRACE(10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4280 tid, gtid, this_thr, this_thr->th.th_current_task));
4285 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4287 this_thr->th.th_local.this_construct = 0;
4289 if (!this_thr->th.th_pri_common) {
4290 this_thr->th.th_pri_common =
4291 (
struct common_table *)__kmp_allocate(
sizeof(
struct common_table));
4292 if (__kmp_storage_map) {
4293 __kmp_print_storage_map_gtid(
4294 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4295 sizeof(
struct common_table),
"th_%d.th_pri_common\n", gtid);
4297 this_thr->th.th_pri_head = NULL;
4300 if (this_thr != master &&
4301 this_thr->th.th_cg_roots != master->th.th_cg_roots) {
4303 KMP_DEBUG_ASSERT(master->th.th_cg_roots);
4304 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
4307 int i = tmp->cg_nthreads--;
4308 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p decrement cg_nthreads"
4309 " on node %p of thread %p to %d\n",
4310 this_thr, tmp, tmp->cg_root, tmp->cg_nthreads));
4315 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4317 this_thr->th.th_cg_roots->cg_nthreads++;
4318 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p increment cg_nthreads on"
4319 " node %p of thread %p to %d\n",
4320 this_thr, this_thr->th.th_cg_roots,
4321 this_thr->th.th_cg_roots->cg_root,
4322 this_thr->th.th_cg_roots->cg_nthreads));
4323 this_thr->th.th_current_task->td_icvs.thread_limit =
4324 this_thr->th.th_cg_roots->cg_thread_limit;
4329 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4332 sizeof(dispatch_private_info_t) *
4333 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4334 KD_TRACE(10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4335 team->t.t_max_nproc));
4336 KMP_ASSERT(dispatch);
4337 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4338 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4340 dispatch->th_disp_index = 0;
4341 dispatch->th_doacross_buf_idx = 0;
4342 if (!dispatch->th_disp_buffer) {
4343 dispatch->th_disp_buffer =
4344 (dispatch_private_info_t *)__kmp_allocate(disp_size);
4346 if (__kmp_storage_map) {
4347 __kmp_print_storage_map_gtid(
4348 gtid, &dispatch->th_disp_buffer[0],
4349 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4351 : __kmp_dispatch_num_buffers],
4353 "th_%d.th_dispatch.th_disp_buffer "
4354 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4355 gtid, team->t.t_id, gtid);
4358 memset(&dispatch->th_disp_buffer[0],
'\0', disp_size);
4361 dispatch->th_dispatch_pr_current = 0;
4362 dispatch->th_dispatch_sh_current = 0;
4364 dispatch->th_deo_fcn = 0;
4365 dispatch->th_dxo_fcn = 0;
4368 this_thr->th.th_next_pool = NULL;
4370 if (!this_thr->th.th_task_state_memo_stack) {
4372 this_thr->th.th_task_state_memo_stack =
4373 (kmp_uint8 *)__kmp_allocate(4 *
sizeof(kmp_uint8));
4374 this_thr->th.th_task_state_top = 0;
4375 this_thr->th.th_task_state_stack_sz = 4;
4376 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4378 this_thr->th.th_task_state_memo_stack[i] = 0;
4381 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4382 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4392kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4394 kmp_team_t *serial_team;
4395 kmp_info_t *new_thr;
4398 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4399 KMP_DEBUG_ASSERT(root && team);
4400#if !KMP_NESTED_HOT_TEAMS
4401 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
4406 if (__kmp_thread_pool) {
4407 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4408 __kmp_thread_pool = (
volatile kmp_info_t *)new_thr->th.th_next_pool;
4409 if (new_thr == __kmp_thread_pool_insert_pt) {
4410 __kmp_thread_pool_insert_pt = NULL;
4412 TCW_4(new_thr->th.th_in_pool, FALSE);
4413 __kmp_suspend_initialize_thread(new_thr);
4414 __kmp_lock_suspend_mx(new_thr);
4415 if (new_thr->th.th_active_in_pool == TRUE) {
4416 KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE);
4417 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
4418 new_thr->th.th_active_in_pool = FALSE;
4420 __kmp_unlock_suspend_mx(new_thr);
4422 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4423 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4424 KMP_ASSERT(!new_thr->th.th_team);
4425 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4428 __kmp_initialize_info(new_thr, team, new_tid,
4429 new_thr->th.th_info.ds.ds_gtid);
4430 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4432 TCW_4(__kmp_nth, __kmp_nth + 1);
4434 new_thr->th.th_task_state = 0;
4435 new_thr->th.th_task_state_top = 0;
4436 new_thr->th.th_task_state_stack_sz = 4;
4438 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
4440 KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0);
4444#ifdef KMP_ADJUST_BLOCKTIME
4447 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4448 if (__kmp_nth > __kmp_avail_proc) {
4449 __kmp_zero_bt = TRUE;
4458 kmp_balign_t *balign = new_thr->th.th_bar;
4459 for (b = 0; b < bs_last_barrier; ++b)
4460 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4463 KF_TRACE(10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4464 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4471 KMP_ASSERT(__kmp_nth == __kmp_all_nth);
4472 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4477 if (!TCR_4(__kmp_init_monitor)) {
4478 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4479 if (!TCR_4(__kmp_init_monitor)) {
4480 KF_TRACE(10, (
"before __kmp_create_monitor\n"));
4481 TCW_4(__kmp_init_monitor, 1);
4482 __kmp_create_monitor(&__kmp_monitor);
4483 KF_TRACE(10, (
"after __kmp_create_monitor\n"));
4494 while (TCR_4(__kmp_init_monitor) < 2) {
4497 KF_TRACE(10, (
"after monitor thread has started\n"));
4500 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4507 int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads)
4509 : __kmp_hidden_helper_threads_num + 1;
4511 for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid]) != NULL;
4513 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4516 if (TCR_4(__kmp_init_hidden_helper_threads)) {
4517 KMP_DEBUG_ASSERT(new_gtid <= __kmp_hidden_helper_threads_num);
4522 new_thr = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
4524 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4526#if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
4529 __itt_suppress_mark_range(
4530 __itt_suppress_range, __itt_suppress_threading_errors,
4531 &new_thr->th.th_sleep_loc,
sizeof(new_thr->th.th_sleep_loc));
4532 __itt_suppress_mark_range(
4533 __itt_suppress_range, __itt_suppress_threading_errors,
4534 &new_thr->th.th_reap_state,
sizeof(new_thr->th.th_reap_state));
4536 __itt_suppress_mark_range(
4537 __itt_suppress_range, __itt_suppress_threading_errors,
4538 &new_thr->th.th_suspend_init,
sizeof(new_thr->th.th_suspend_init));
4540 __itt_suppress_mark_range(__itt_suppress_range,
4541 __itt_suppress_threading_errors,
4542 &new_thr->th.th_suspend_init_count,
4543 sizeof(new_thr->th.th_suspend_init_count));
4546 __itt_suppress_mark_range(__itt_suppress_range,
4547 __itt_suppress_threading_errors,
4548 CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go),
4549 sizeof(new_thr->th.th_bar[0].bb.b_go));
4550 __itt_suppress_mark_range(__itt_suppress_range,
4551 __itt_suppress_threading_errors,
4552 CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go),
4553 sizeof(new_thr->th.th_bar[1].bb.b_go));
4554 __itt_suppress_mark_range(__itt_suppress_range,
4555 __itt_suppress_threading_errors,
4556 CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go),
4557 sizeof(new_thr->th.th_bar[2].bb.b_go));
4559 if (__kmp_storage_map) {
4560 __kmp_print_thread_storage_map(new_thr, new_gtid);
4565 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4566 KF_TRACE(10, (
"__kmp_allocate_thread: before th_serial/serial_team\n"));
4567 new_thr->th.th_serial_team = serial_team =
4568 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4572 proc_bind_default, &r_icvs,
4573 0 USE_NESTED_HOT_ARG(NULL));
4575 KMP_ASSERT(serial_team);
4576 serial_team->t.t_serialized = 0;
4578 serial_team->t.t_threads[0] = new_thr;
4580 (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4584 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4587 __kmp_initialize_fast_memory(new_thr);
4591 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4592 __kmp_initialize_bget(new_thr);
4595 __kmp_init_random(new_thr);
4599 (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4600 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4603 kmp_balign_t *balign = new_thr->th.th_bar;
4604 for (b = 0; b < bs_last_barrier; ++b) {
4605 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4606 balign[b].bb.team = NULL;
4607 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4608 balign[b].bb.use_oncore_barrier = 0;
4611 TCW_PTR(new_thr->th.th_sleep_loc, NULL);
4612 new_thr->th.th_sleep_loc_type = flag_unset;
4614 new_thr->th.th_spin_here = FALSE;
4615 new_thr->th.th_next_waiting = 0;
4617 new_thr->th.th_blocking =
false;
4620#if KMP_AFFINITY_SUPPORTED
4621 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4622 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4623 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4624 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4626 new_thr->th.th_def_allocator = __kmp_def_allocator;
4627 new_thr->th.th_prev_level = 0;
4628 new_thr->th.th_prev_num_threads = 1;
4630 TCW_4(new_thr->th.th_in_pool, FALSE);
4631 new_thr->th.th_active_in_pool = FALSE;
4632 TCW_4(new_thr->th.th_active, TRUE);
4640 if (__kmp_adjust_gtid_mode) {
4641 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4642 if (TCR_4(__kmp_gtid_mode) != 2) {
4643 TCW_4(__kmp_gtid_mode, 2);
4646 if (TCR_4(__kmp_gtid_mode) != 1) {
4647 TCW_4(__kmp_gtid_mode, 1);
4652#ifdef KMP_ADJUST_BLOCKTIME
4655 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4656 if (__kmp_nth > __kmp_avail_proc) {
4657 __kmp_zero_bt = TRUE;
4664 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4665 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4667 (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4669 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4680static void __kmp_reinitialize_team(kmp_team_t *team,
4681 kmp_internal_control_t *new_icvs,
4683 KF_TRACE(10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4684 team->t.t_threads[0], team));
4685 KMP_DEBUG_ASSERT(team && new_icvs);
4686 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4687 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4689 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4691 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4692 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4694 KF_TRACE(10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4695 team->t.t_threads[0], team));
4701static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
4702 kmp_internal_control_t *new_icvs,
4704 KF_TRACE(10, (
"__kmp_initialize_team: enter: team=%p\n", team));
4707 KMP_DEBUG_ASSERT(team);
4708 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4709 KMP_DEBUG_ASSERT(team->t.t_threads);
4712 team->t.t_master_tid = 0;
4714 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4715 team->t.t_nproc = new_nproc;
4718 team->t.t_next_pool = NULL;
4722 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4723 team->t.t_invoke = NULL;
4726 team->t.t_sched.sched = new_icvs->sched.sched;
4728#if KMP_ARCH_X86 || KMP_ARCH_X86_64
4729 team->t.t_fp_control_saved = FALSE;
4730 team->t.t_x87_fpu_control_word = 0;
4731 team->t.t_mxcsr = 0;
4734 team->t.t_construct = 0;
4736 team->t.t_ordered.dt.t_value = 0;
4737 team->t.t_master_active = FALSE;
4740 team->t.t_copypriv_data = NULL;
4743 team->t.t_copyin_counter = 0;
4746 team->t.t_control_stack_top = NULL;
4748 __kmp_reinitialize_team(team, new_icvs, loc);
4751 KF_TRACE(10, (
"__kmp_initialize_team: exit: team=%p\n", team));
4754#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
4757__kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
4758 if (KMP_AFFINITY_CAPABLE()) {
4760 if (old_mask != NULL) {
4761 status = __kmp_get_system_affinity(old_mask, TRUE);
4764 __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError), KMP_ERR(error),
4768 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE);
4773#if KMP_AFFINITY_SUPPORTED
4779static void __kmp_partition_places(kmp_team_t *team,
int update_master_only) {
4781 if (KMP_HIDDEN_HELPER_TEAM(team))
4784 kmp_info_t *master_th = team->t.t_threads[0];
4785 KMP_DEBUG_ASSERT(master_th != NULL);
4786 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4787 int first_place = master_th->th.th_first_place;
4788 int last_place = master_th->th.th_last_place;
4789 int masters_place = master_th->th.th_current_place;
4790 int num_masks = __kmp_affinity.num_masks;
4791 team->t.t_first_place = first_place;
4792 team->t.t_last_place = last_place;
4794 KA_TRACE(20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4795 "bound to place %d partition = [%d,%d]\n",
4796 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4797 team->t.t_id, masters_place, first_place, last_place));
4799 switch (proc_bind) {
4801 case proc_bind_default:
4804 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4807 case proc_bind_primary: {
4809 int n_th = team->t.t_nproc;
4810 for (f = 1; f < n_th; f++) {
4811 kmp_info_t *th = team->t.t_threads[f];
4812 KMP_DEBUG_ASSERT(th != NULL);
4813 th->th.th_first_place = first_place;
4814 th->th.th_last_place = last_place;
4815 th->th.th_new_place = masters_place;
4816 if (__kmp_display_affinity && masters_place != th->th.th_current_place &&
4817 team->t.t_display_affinity != 1) {
4818 team->t.t_display_affinity = 1;
4821 KA_TRACE(100, (
"__kmp_partition_places: primary: T#%d(%d:%d) place %d "
4822 "partition = [%d,%d]\n",
4823 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4824 f, masters_place, first_place, last_place));
4828 case proc_bind_close: {
4830 int n_th = team->t.t_nproc;
4832 if (first_place <= last_place) {
4833 n_places = last_place - first_place + 1;
4835 n_places = num_masks - first_place + last_place + 1;
4837 if (n_th <= n_places) {
4838 int place = masters_place;
4839 for (f = 1; f < n_th; f++) {
4840 kmp_info_t *th = team->t.t_threads[f];
4841 KMP_DEBUG_ASSERT(th != NULL);
4843 if (place == last_place) {
4844 place = first_place;
4845 }
else if (place == (num_masks - 1)) {
4850 th->th.th_first_place = first_place;
4851 th->th.th_last_place = last_place;
4852 th->th.th_new_place = place;
4853 if (__kmp_display_affinity && place != th->th.th_current_place &&
4854 team->t.t_display_affinity != 1) {
4855 team->t.t_display_affinity = 1;
4858 KA_TRACE(100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4859 "partition = [%d,%d]\n",
4860 __kmp_gtid_from_thread(team->t.t_threads[f]),
4861 team->t.t_id, f, place, first_place, last_place));
4864 int S, rem, gap, s_count;
4865 S = n_th / n_places;
4867 rem = n_th - (S * n_places);
4868 gap = rem > 0 ? n_places / rem : n_places;
4869 int place = masters_place;
4871 for (f = 0; f < n_th; f++) {
4872 kmp_info_t *th = team->t.t_threads[f];
4873 KMP_DEBUG_ASSERT(th != NULL);
4875 th->th.th_first_place = first_place;
4876 th->th.th_last_place = last_place;
4877 th->th.th_new_place = place;
4878 if (__kmp_display_affinity && place != th->th.th_current_place &&
4879 team->t.t_display_affinity != 1) {
4880 team->t.t_display_affinity = 1;
4884 if ((s_count == S) && rem && (gap_ct == gap)) {
4886 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4888 if (place == last_place) {
4889 place = first_place;
4890 }
else if (place == (num_masks - 1)) {
4898 }
else if (s_count == S) {
4899 if (place == last_place) {
4900 place = first_place;
4901 }
else if (place == (num_masks - 1)) {
4911 (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4912 "partition = [%d,%d]\n",
4913 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4914 th->th.th_new_place, first_place, last_place));
4916 KMP_DEBUG_ASSERT(place == masters_place);
4920 case proc_bind_spread: {
4922 int n_th = team->t.t_nproc;
4925 if (first_place <= last_place) {
4926 n_places = last_place - first_place + 1;
4928 n_places = num_masks - first_place + last_place + 1;
4930 if (n_th <= n_places) {
4933 if (n_places != num_masks) {
4934 int S = n_places / n_th;
4935 int s_count, rem, gap, gap_ct;
4937 place = masters_place;
4938 rem = n_places - n_th * S;
4939 gap = rem ? n_th / rem : 1;
4942 if (update_master_only == 1)
4944 for (f = 0; f < thidx; f++) {
4945 kmp_info_t *th = team->t.t_threads[f];
4946 KMP_DEBUG_ASSERT(th != NULL);
4948 th->th.th_first_place = place;
4949 th->th.th_new_place = place;
4950 if (__kmp_display_affinity && place != th->th.th_current_place &&
4951 team->t.t_display_affinity != 1) {
4952 team->t.t_display_affinity = 1;
4955 while (s_count < S) {
4956 if (place == last_place) {
4957 place = first_place;
4958 }
else if (place == (num_masks - 1)) {
4965 if (rem && (gap_ct == gap)) {
4966 if (place == last_place) {
4967 place = first_place;
4968 }
else if (place == (num_masks - 1)) {
4976 th->th.th_last_place = place;
4979 if (place == last_place) {
4980 place = first_place;
4981 }
else if (place == (num_masks - 1)) {
4988 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4989 "partition = [%d,%d], num_masks: %u\n",
4990 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4991 f, th->th.th_new_place, th->th.th_first_place,
4992 th->th.th_last_place, num_masks));
4998 double current =
static_cast<double>(masters_place);
5000 (
static_cast<double>(n_places + 1) /
static_cast<double>(n_th));
5005 if (update_master_only == 1)
5007 for (f = 0; f < thidx; f++) {
5008 first =
static_cast<int>(current);
5009 last =
static_cast<int>(current + spacing) - 1;
5010 KMP_DEBUG_ASSERT(last >= first);
5011 if (first >= n_places) {
5012 if (masters_place) {
5015 if (first == (masters_place + 1)) {
5016 KMP_DEBUG_ASSERT(f == n_th);
5019 if (last == masters_place) {
5020 KMP_DEBUG_ASSERT(f == (n_th - 1));
5024 KMP_DEBUG_ASSERT(f == n_th);
5029 if (last >= n_places) {
5030 last = (n_places - 1);
5035 KMP_DEBUG_ASSERT(0 <= first);
5036 KMP_DEBUG_ASSERT(n_places > first);
5037 KMP_DEBUG_ASSERT(0 <= last);
5038 KMP_DEBUG_ASSERT(n_places > last);
5039 KMP_DEBUG_ASSERT(last_place >= first_place);
5040 th = team->t.t_threads[f];
5041 KMP_DEBUG_ASSERT(th);
5042 th->th.th_first_place = first;
5043 th->th.th_new_place = place;
5044 th->th.th_last_place = last;
5045 if (__kmp_display_affinity && place != th->th.th_current_place &&
5046 team->t.t_display_affinity != 1) {
5047 team->t.t_display_affinity = 1;
5050 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5051 "partition = [%d,%d], spacing = %.4f\n",
5052 __kmp_gtid_from_thread(team->t.t_threads[f]),
5053 team->t.t_id, f, th->th.th_new_place,
5054 th->th.th_first_place, th->th.th_last_place, spacing));
5058 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5060 int S, rem, gap, s_count;
5061 S = n_th / n_places;
5063 rem = n_th - (S * n_places);
5064 gap = rem > 0 ? n_places / rem : n_places;
5065 int place = masters_place;
5068 if (update_master_only == 1)
5070 for (f = 0; f < thidx; f++) {
5071 kmp_info_t *th = team->t.t_threads[f];
5072 KMP_DEBUG_ASSERT(th != NULL);
5074 th->th.th_first_place = place;
5075 th->th.th_last_place = place;
5076 th->th.th_new_place = place;
5077 if (__kmp_display_affinity && place != th->th.th_current_place &&
5078 team->t.t_display_affinity != 1) {
5079 team->t.t_display_affinity = 1;
5083 if ((s_count == S) && rem && (gap_ct == gap)) {
5085 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
5087 if (place == last_place) {
5088 place = first_place;
5089 }
else if (place == (num_masks - 1)) {
5097 }
else if (s_count == S) {
5098 if (place == last_place) {
5099 place = first_place;
5100 }
else if (place == (num_masks - 1)) {
5109 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5110 "partition = [%d,%d]\n",
5111 __kmp_gtid_from_thread(team->t.t_threads[f]),
5112 team->t.t_id, f, th->th.th_new_place,
5113 th->th.th_first_place, th->th.th_last_place));
5115 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5123 KA_TRACE(20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id));
5131__kmp_allocate_team(kmp_root_t *root,
int new_nproc,
int max_nproc,
5133 ompt_data_t ompt_parallel_data,
5135 kmp_proc_bind_t new_proc_bind,
5136 kmp_internal_control_t *new_icvs,
5137 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5138 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
5141 int use_hot_team = !root->r.r_active;
5143 int do_place_partition = 1;
5145 KA_TRACE(20, (
"__kmp_allocate_team: called\n"));
5146 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
5147 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
5150#if KMP_NESTED_HOT_TEAMS
5151 kmp_hot_team_ptr_t *hot_teams;
5153 team = master->th.th_team;
5154 level = team->t.t_active_level;
5155 if (master->th.th_teams_microtask) {
5156 if (master->th.th_teams_size.nteams > 1 &&
5159 (microtask_t)__kmp_teams_master ||
5160 master->th.th_teams_level <
5167 if ((master->th.th_teams_size.nteams == 1 &&
5168 master->th.th_teams_level >= team->t.t_level) ||
5169 (team->t.t_pkfn == (microtask_t)__kmp_teams_master))
5170 do_place_partition = 0;
5172 hot_teams = master->th.th_hot_teams;
5173 if (level < __kmp_hot_teams_max_level && hot_teams &&
5174 hot_teams[level].hot_team) {
5182 KMP_DEBUG_ASSERT(new_nproc == 1);
5186 if (use_hot_team && new_nproc > 1) {
5187 KMP_DEBUG_ASSERT(new_nproc <= max_nproc);
5188#if KMP_NESTED_HOT_TEAMS
5189 team = hot_teams[level].hot_team;
5191 team = root->r.r_hot_team;
5194 if (__kmp_tasking_mode != tskm_immediate_exec) {
5195 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5196 "task_team[1] = %p before reinit\n",
5197 team->t.t_task_team[0], team->t.t_task_team[1]));
5201 if (team->t.t_nproc != new_nproc &&
5202 __kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5204 int old_nthr = team->t.t_nproc;
5205 __kmp_resize_dist_barrier(team, old_nthr, new_nproc);
5210 if (do_place_partition == 0)
5211 team->t.t_proc_bind = proc_bind_default;
5215 if (team->t.t_nproc == new_nproc) {
5216 KA_TRACE(20, (
"__kmp_allocate_team: reusing hot team\n"));
5219 if (team->t.t_size_changed == -1) {
5220 team->t.t_size_changed = 1;
5222 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
5226 kmp_r_sched_t new_sched = new_icvs->sched;
5228 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
5230 __kmp_reinitialize_team(team, new_icvs,
5231 root->r.r_uber_thread->th.th_ident);
5233 KF_TRACE(10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
5234 team->t.t_threads[0], team));
5235 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5237#if KMP_AFFINITY_SUPPORTED
5238 if ((team->t.t_size_changed == 0) &&
5239 (team->t.t_proc_bind == new_proc_bind)) {
5240 if (new_proc_bind == proc_bind_spread) {
5241 if (do_place_partition) {
5243 __kmp_partition_places(team, 1);
5246 KA_TRACE(200, (
"__kmp_allocate_team: reusing hot team #%d bindings: "
5247 "proc_bind = %d, partition = [%d,%d]\n",
5248 team->t.t_id, new_proc_bind, team->t.t_first_place,
5249 team->t.t_last_place));
5251 if (do_place_partition) {
5252 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5253 __kmp_partition_places(team);
5257 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5259 }
else if (team->t.t_nproc > new_nproc) {
5261 (
"__kmp_allocate_team: decreasing hot team thread count to %d\n",
5264 team->t.t_size_changed = 1;
5265 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5268 __kmp_add_threads_to_team(team, new_nproc);
5270#if KMP_NESTED_HOT_TEAMS
5271 if (__kmp_hot_teams_mode == 0) {
5274 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
5275 hot_teams[level].hot_team_nth = new_nproc;
5278 for (f = new_nproc; f < team->t.t_nproc; f++) {
5279 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5280 if (__kmp_tasking_mode != tskm_immediate_exec) {
5283 team->t.t_threads[f]->th.th_task_team = NULL;
5285 __kmp_free_thread(team->t.t_threads[f]);
5286 team->t.t_threads[f] = NULL;
5288#if KMP_NESTED_HOT_TEAMS
5293 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5294 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5295 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5296 for (
int b = 0; b < bs_last_barrier; ++b) {
5297 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
5298 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5300 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
5305 team->t.t_nproc = new_nproc;
5307 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
5308 __kmp_reinitialize_team(team, new_icvs,
5309 root->r.r_uber_thread->th.th_ident);
5312 for (f = 0; f < new_nproc; ++f) {
5313 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5318 KF_TRACE(10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5319 team->t.t_threads[0], team));
5321 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5324 for (f = 0; f < team->t.t_nproc; f++) {
5325 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5326 team->t.t_threads[f]->th.th_team_nproc ==
5331 if (do_place_partition) {
5332 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5333#if KMP_AFFINITY_SUPPORTED
5334 __kmp_partition_places(team);
5338#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5339 kmp_affin_mask_t *old_mask;
5340 if (KMP_AFFINITY_CAPABLE()) {
5341 KMP_CPU_ALLOC(old_mask);
5346 (
"__kmp_allocate_team: increasing hot team thread count to %d\n",
5348 int old_nproc = team->t.t_nproc;
5349 team->t.t_size_changed = 1;
5351#if KMP_NESTED_HOT_TEAMS
5352 int avail_threads = hot_teams[level].hot_team_nth;
5353 if (new_nproc < avail_threads)
5354 avail_threads = new_nproc;
5355 kmp_info_t **other_threads = team->t.t_threads;
5356 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5360 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5361 for (b = 0; b < bs_last_barrier; ++b) {
5362 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5363 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5365 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5369 if (hot_teams[level].hot_team_nth >= new_nproc) {
5372 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5373 team->t.t_nproc = new_nproc;
5377 team->t.t_nproc = hot_teams[level].hot_team_nth;
5378 hot_teams[level].hot_team_nth = new_nproc;
5380 if (team->t.t_max_nproc < new_nproc) {
5382 __kmp_reallocate_team_arrays(team, new_nproc);
5383 __kmp_reinitialize_team(team, new_icvs, NULL);
5386#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5392 __kmp_set_thread_affinity_mask_full_tmp(old_mask);
5396 for (f = team->t.t_nproc; f < new_nproc; f++) {
5397 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5398 KMP_DEBUG_ASSERT(new_worker);
5399 team->t.t_threads[f] = new_worker;
5402 (
"__kmp_allocate_team: team %d init T#%d arrived: "
5403 "join=%llu, plain=%llu\n",
5404 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5405 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5406 team->t.t_bar[bs_plain_barrier].b_arrived));
5410 kmp_balign_t *balign = new_worker->th.th_bar;
5411 for (b = 0; b < bs_last_barrier; ++b) {
5412 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5413 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5414 KMP_BARRIER_PARENT_FLAG);
5416 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5422#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5423 if (KMP_AFFINITY_CAPABLE()) {
5425 __kmp_set_system_affinity(old_mask, TRUE);
5426 KMP_CPU_FREE(old_mask);
5429#if KMP_NESTED_HOT_TEAMS
5432 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5435 __kmp_add_threads_to_team(team, new_nproc);
5439 __kmp_initialize_team(team, new_nproc, new_icvs,
5440 root->r.r_uber_thread->th.th_ident);
5443 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5444 for (f = 0; f < team->t.t_nproc; ++f)
5445 __kmp_initialize_info(team->t.t_threads[f], team, f,
5446 __kmp_gtid_from_tid(f, team));
5454 for (f = old_nproc; f < team->t.t_nproc; ++f)
5455 team->t.t_threads[f]->th.th_task_state =
5456 team->t.t_threads[0]->th.th_task_state_memo_stack[level];
5459 kmp_uint8 old_state = team->t.t_threads[0]->th.th_task_state;
5460 for (f = old_nproc; f < team->t.t_nproc; ++f)
5461 team->t.t_threads[f]->th.th_task_state = old_state;
5465 for (f = 0; f < team->t.t_nproc; ++f) {
5466 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5467 team->t.t_threads[f]->th.th_team_nproc ==
5472 if (do_place_partition) {
5473 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5474#if KMP_AFFINITY_SUPPORTED
5475 __kmp_partition_places(team);
5480 kmp_info_t *master = team->t.t_threads[0];
5481 if (master->th.th_teams_microtask) {
5482 for (f = 1; f < new_nproc; ++f) {
5484 kmp_info_t *thr = team->t.t_threads[f];
5485 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5486 thr->th.th_teams_level = master->th.th_teams_level;
5487 thr->th.th_teams_size = master->th.th_teams_size;
5490#if KMP_NESTED_HOT_TEAMS
5494 for (f = 1; f < new_nproc; ++f) {
5495 kmp_info_t *thr = team->t.t_threads[f];
5497 kmp_balign_t *balign = thr->th.th_bar;
5498 for (b = 0; b < bs_last_barrier; ++b) {
5499 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5500 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5502 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5510 __kmp_alloc_argv_entries(argc, team, TRUE);
5511 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5515 KF_TRACE(10, (
" hot_team = %p\n", team));
5518 if (__kmp_tasking_mode != tskm_immediate_exec) {
5519 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5520 "task_team[1] = %p after reinit\n",
5521 team->t.t_task_team[0], team->t.t_task_team[1]));
5526 __ompt_team_assign_id(team, ompt_parallel_data);
5536 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5539 if (team->t.t_max_nproc >= max_nproc) {
5541 __kmp_team_pool = team->t.t_next_pool;
5543 if (max_nproc > 1 &&
5544 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5546 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5551 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5553 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and "
5554 "task_team[1] %p to NULL\n",
5555 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5556 team->t.t_task_team[0] = NULL;
5557 team->t.t_task_team[1] = NULL;
5560 __kmp_alloc_argv_entries(argc, team, TRUE);
5561 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5564 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5565 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5568 for (b = 0; b < bs_last_barrier; ++b) {
5569 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5571 team->t.t_bar[b].b_master_arrived = 0;
5572 team->t.t_bar[b].b_team_arrived = 0;
5577 team->t.t_proc_bind = new_proc_bind;
5579 KA_TRACE(20, (
"__kmp_allocate_team: using team from pool %d.\n",
5583 __ompt_team_assign_id(team, ompt_parallel_data);
5595 team = __kmp_reap_team(team);
5596 __kmp_team_pool = team;
5601 team = (kmp_team_t *)__kmp_allocate(
sizeof(kmp_team_t));
5604 team->t.t_max_nproc = max_nproc;
5605 if (max_nproc > 1 &&
5606 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5608 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5613 __kmp_allocate_team_arrays(team, max_nproc);
5615 KA_TRACE(20, (
"__kmp_allocate_team: making a new team\n"));
5616 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5618 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5620 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5621 team->t.t_task_team[0] = NULL;
5623 team->t.t_task_team[1] = NULL;
5626 if (__kmp_storage_map) {
5627 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc);
5631 __kmp_alloc_argv_entries(argc, team, FALSE);
5632 team->t.t_argc = argc;
5635 (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5636 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5639 for (b = 0; b < bs_last_barrier; ++b) {
5640 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5642 team->t.t_bar[b].b_master_arrived = 0;
5643 team->t.t_bar[b].b_team_arrived = 0;
5648 team->t.t_proc_bind = new_proc_bind;
5651 __ompt_team_assign_id(team, ompt_parallel_data);
5652 team->t.ompt_serialized_team_info = NULL;
5657 KA_TRACE(20, (
"__kmp_allocate_team: done creating a new team %d.\n",
5668void __kmp_free_team(kmp_root_t *root,
5669 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5671 KA_TRACE(20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5675 KMP_DEBUG_ASSERT(root);
5676 KMP_DEBUG_ASSERT(team);
5677 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5678 KMP_DEBUG_ASSERT(team->t.t_threads);
5680 int use_hot_team = team == root->r.r_hot_team;
5681#if KMP_NESTED_HOT_TEAMS
5684 level = team->t.t_active_level - 1;
5685 if (master->th.th_teams_microtask) {
5686 if (master->th.th_teams_size.nteams > 1) {
5690 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5691 master->th.th_teams_level == team->t.t_level) {
5697 kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams;
5699 if (level < __kmp_hot_teams_max_level) {
5700 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5707 TCW_SYNC_PTR(team->t.t_pkfn,
5710 team->t.t_copyin_counter = 0;
5715 if (!use_hot_team) {
5716 if (__kmp_tasking_mode != tskm_immediate_exec) {
5718 for (f = 1; f < team->t.t_nproc; ++f) {
5719 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5720 kmp_info_t *th = team->t.t_threads[f];
5721 volatile kmp_uint32 *state = &th->th.th_reap_state;
5722 while (*state != KMP_SAFE_TO_REAP) {
5726 if (!__kmp_is_thread_alive(th, &ecode)) {
5727 *state = KMP_SAFE_TO_REAP;
5732 kmp_flag_64<> fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5733 if (fl.is_sleeping())
5734 fl.resume(__kmp_gtid_from_thread(th));
5741 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5742 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5743 if (task_team != NULL) {
5744 for (f = 0; f < team->t.t_nproc; ++f) {
5745 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5746 team->t.t_threads[f]->th.th_task_team = NULL;
5750 (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5751 __kmp_get_gtid(), task_team, team->t.t_id));
5752#if KMP_NESTED_HOT_TEAMS
5753 __kmp_free_task_team(master, task_team);
5755 team->t.t_task_team[tt_idx] = NULL;
5761 team->t.t_parent = NULL;
5762 team->t.t_level = 0;
5763 team->t.t_active_level = 0;
5766 for (f = 1; f < team->t.t_nproc; ++f) {
5767 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5768 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5769 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team),
5772 __kmp_free_thread(team->t.t_threads[f]);
5775 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5778 team->t.b->go_release();
5779 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5780 for (f = 1; f < team->t.t_nproc; ++f) {
5781 if (team->t.b->sleep[f].sleep) {
5782 __kmp_atomic_resume_64(
5783 team->t.t_threads[f]->th.th_info.ds.ds_gtid,
5784 (kmp_atomic_flag_64<> *)NULL);
5789 for (
int f = 1; f < team->t.t_nproc; ++f) {
5790 while (team->t.t_threads[f]->th.th_used_in_team.load() != 0)
5796 for (f = 1; f < team->t.t_nproc; ++f) {
5797 team->t.t_threads[f] = NULL;
5800 if (team->t.t_max_nproc > 1 &&
5801 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5802 distributedBarrier::deallocate(team->t.b);
5807 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5808 __kmp_team_pool = (
volatile kmp_team_t *)team;
5811 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&
5812 team->t.t_threads[1]->th.th_cg_roots);
5813 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5815 for (f = 1; f < team->t.t_nproc; ++f) {
5816 kmp_info_t *thr = team->t.t_threads[f];
5817 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&
5818 thr->th.th_cg_roots->cg_root == thr);
5820 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5821 thr->th.th_cg_roots = tmp->up;
5822 KA_TRACE(100, (
"__kmp_free_team: Thread %p popping node %p and moving"
5823 " up to node %p. cg_nthreads was %d\n",
5824 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads));
5825 int i = tmp->cg_nthreads--;
5830 if (thr->th.th_cg_roots)
5831 thr->th.th_current_task->td_icvs.thread_limit =
5832 thr->th.th_cg_roots->cg_thread_limit;
5841kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5842 kmp_team_t *next_pool = team->t.t_next_pool;
5844 KMP_DEBUG_ASSERT(team);
5845 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5846 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5847 KMP_DEBUG_ASSERT(team->t.t_threads);
5848 KMP_DEBUG_ASSERT(team->t.t_argv);
5853 __kmp_free_team_arrays(team);
5854 if (team->t.t_argv != &team->t.t_inline_argv[0])
5855 __kmp_free((
void *)team->t.t_argv);
5887void __kmp_free_thread(kmp_info_t *this_th) {
5891 KA_TRACE(20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5892 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5894 KMP_DEBUG_ASSERT(this_th);
5899 kmp_balign_t *balign = this_th->th.th_bar;
5900 for (b = 0; b < bs_last_barrier; ++b) {
5901 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5902 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5903 balign[b].bb.team = NULL;
5904 balign[b].bb.leaf_kids = 0;
5906 this_th->th.th_task_state = 0;
5907 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5910 TCW_PTR(this_th->th.th_team, NULL);
5911 TCW_PTR(this_th->th.th_root, NULL);
5912 TCW_PTR(this_th->th.th_dispatch, NULL);
5914 while (this_th->th.th_cg_roots) {
5915 this_th->th.th_cg_roots->cg_nthreads--;
5916 KA_TRACE(100, (
"__kmp_free_thread: Thread %p decrement cg_nthreads on node"
5917 " %p of thread %p to %d\n",
5918 this_th, this_th->th.th_cg_roots,
5919 this_th->th.th_cg_roots->cg_root,
5920 this_th->th.th_cg_roots->cg_nthreads));
5921 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5922 if (tmp->cg_root == this_th) {
5923 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0);
5925 5, (
"__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp));
5926 this_th->th.th_cg_roots = tmp->up;
5929 if (tmp->cg_nthreads == 0) {
5932 this_th->th.th_cg_roots = NULL;
5942 __kmp_free_implicit_task(this_th);
5943 this_th->th.th_current_task = NULL;
5947 gtid = this_th->th.th_info.ds.ds_gtid;
5948 if (__kmp_thread_pool_insert_pt != NULL) {
5949 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5950 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5951 __kmp_thread_pool_insert_pt = NULL;
5960 if (__kmp_thread_pool_insert_pt != NULL) {
5961 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5963 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5965 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5966 scan = &((*scan)->th.th_next_pool))
5971 TCW_PTR(this_th->th.th_next_pool, *scan);
5972 __kmp_thread_pool_insert_pt = *scan = this_th;
5973 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5974 (this_th->th.th_info.ds.ds_gtid <
5975 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5976 TCW_4(this_th->th.th_in_pool, TRUE);
5977 __kmp_suspend_initialize_thread(this_th);
5978 __kmp_lock_suspend_mx(this_th);
5979 if (this_th->th.th_active == TRUE) {
5980 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
5981 this_th->th.th_active_in_pool = TRUE;
5985 KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE);
5988 __kmp_unlock_suspend_mx(this_th);
5990 TCW_4(__kmp_nth, __kmp_nth - 1);
5992#ifdef KMP_ADJUST_BLOCKTIME
5995 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5996 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5997 if (__kmp_nth <= __kmp_avail_proc) {
5998 __kmp_zero_bt = FALSE;
6008void *__kmp_launch_thread(kmp_info_t *this_thr) {
6009#if OMP_PROFILING_SUPPORT
6010 ProfileTraceFile = getenv(
"LIBOMPTARGET_PROFILE");
6012 if (ProfileTraceFile)
6013 llvm::timeTraceProfilerInitialize(500 ,
"libomptarget");
6016 int gtid = this_thr->th.th_info.ds.ds_gtid;
6018 kmp_team_t **
volatile pteam;
6021 KA_TRACE(10, (
"__kmp_launch_thread: T#%d start\n", gtid));
6023 if (__kmp_env_consistency_check) {
6024 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid);
6028 if (ompd_state & OMPD_ENABLE_BP)
6029 ompd_bp_thread_begin();
6033 ompt_data_t *thread_data =
nullptr;
6034 if (ompt_enabled.enabled) {
6035 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
6036 *thread_data = ompt_data_none;
6038 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6039 this_thr->th.ompt_thread_info.wait_id = 0;
6040 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
6041 this_thr->th.ompt_thread_info.parallel_flags = 0;
6042 if (ompt_enabled.ompt_callback_thread_begin) {
6043 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
6044 ompt_thread_worker, thread_data);
6046 this_thr->th.ompt_thread_info.state = ompt_state_idle;
6051 while (!TCR_4(__kmp_global.g.g_done)) {
6052 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
6056 KA_TRACE(20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid));
6059 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
6062 if (ompt_enabled.enabled) {
6063 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6067 pteam = &this_thr->th.th_team;
6070 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
6072 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
6075 (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
6076 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6077 (*pteam)->t.t_pkfn));
6079 updateHWFPControl(*pteam);
6082 if (ompt_enabled.enabled) {
6083 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
6087 rc = (*pteam)->t.t_invoke(gtid);
6091 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
6092 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6093 (*pteam)->t.t_pkfn));
6096 if (ompt_enabled.enabled) {
6098 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;
6100 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6104 __kmp_join_barrier(gtid);
6107 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
6110 if (ompd_state & OMPD_ENABLE_BP)
6111 ompd_bp_thread_end();
6115 if (ompt_enabled.ompt_callback_thread_end) {
6116 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
6120 this_thr->th.th_task_team = NULL;
6122 __kmp_common_destroy_gtid(gtid);
6124 KA_TRACE(10, (
"__kmp_launch_thread: T#%d done\n", gtid));
6127#if OMP_PROFILING_SUPPORT
6128 llvm::timeTraceProfilerFinishThread();
6135void __kmp_internal_end_dest(
void *specific_gtid) {
6138 __kmp_type_convert((kmp_intptr_t)specific_gtid - 1, >id);
6140 KA_TRACE(30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
6144 __kmp_internal_end_thread(gtid);
6147#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6149__attribute__((destructor))
void __kmp_internal_end_dtor(
void) {
6150 __kmp_internal_end_atexit();
6157void __kmp_internal_end_atexit(
void) {
6158 KA_TRACE(30, (
"__kmp_internal_end_atexit\n"));
6182 __kmp_internal_end_library(-1);
6184 __kmp_close_console();
6188static void __kmp_reap_thread(kmp_info_t *thread,
int is_root) {
6193 KMP_DEBUG_ASSERT(thread != NULL);
6195 gtid = thread->th.th_info.ds.ds_gtid;
6198 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
6201 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
6203 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
6205 !KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3))
6207 __kmp_resume_32(gtid, (kmp_flag_32<false, false> *)NULL);
6211 kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
6213 __kmp_release_64(&flag);
6218 __kmp_reap_worker(thread);
6230 if (thread->th.th_active_in_pool) {
6231 thread->th.th_active_in_pool = FALSE;
6232 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
6233 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
6237 __kmp_free_implicit_task(thread);
6241 __kmp_free_fast_memory(thread);
6244 __kmp_suspend_uninitialize_thread(thread);
6246 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
6247 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
6252#ifdef KMP_ADJUST_BLOCKTIME
6255 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6256 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6257 if (__kmp_nth <= __kmp_avail_proc) {
6258 __kmp_zero_bt = FALSE;
6264 if (__kmp_env_consistency_check) {
6265 if (thread->th.th_cons) {
6266 __kmp_free_cons_stack(thread->th.th_cons);
6267 thread->th.th_cons = NULL;
6271 if (thread->th.th_pri_common != NULL) {
6272 __kmp_free(thread->th.th_pri_common);
6273 thread->th.th_pri_common = NULL;
6276 if (thread->th.th_task_state_memo_stack != NULL) {
6277 __kmp_free(thread->th.th_task_state_memo_stack);
6278 thread->th.th_task_state_memo_stack = NULL;
6282 if (thread->th.th_local.bget_data != NULL) {
6283 __kmp_finalize_bget(thread);
6287#if KMP_AFFINITY_SUPPORTED
6288 if (thread->th.th_affin_mask != NULL) {
6289 KMP_CPU_FREE(thread->th.th_affin_mask);
6290 thread->th.th_affin_mask = NULL;
6294#if KMP_USE_HIER_SCHED
6295 if (thread->th.th_hier_bar_data != NULL) {
6296 __kmp_free(thread->th.th_hier_bar_data);
6297 thread->th.th_hier_bar_data = NULL;
6301 __kmp_reap_team(thread->th.th_serial_team);
6302 thread->th.th_serial_team = NULL;
6309static void __kmp_itthash_clean(kmp_info_t *th) {
6311 if (__kmp_itt_region_domains.count > 0) {
6312 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6313 kmp_itthash_entry_t *bucket = __kmp_itt_region_domains.buckets[i];
6315 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6316 __kmp_thread_free(th, bucket);
6321 if (__kmp_itt_barrier_domains.count > 0) {
6322 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6323 kmp_itthash_entry_t *bucket = __kmp_itt_barrier_domains.buckets[i];
6325 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6326 __kmp_thread_free(th, bucket);
6334static void __kmp_internal_end(
void) {
6338 __kmp_unregister_library();
6345 __kmp_reclaim_dead_roots();
6349 for (i = 0; i < __kmp_threads_capacity; i++)
6351 if (__kmp_root[i]->r.r_active)
6354 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6356 if (i < __kmp_threads_capacity) {
6368 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6369 if (TCR_4(__kmp_init_monitor)) {
6370 __kmp_reap_monitor(&__kmp_monitor);
6371 TCW_4(__kmp_init_monitor, 0);
6373 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6374 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6380 for (i = 0; i < __kmp_threads_capacity; i++) {
6381 if (__kmp_root[i]) {
6384 KMP_ASSERT(!__kmp_root[i]->r.r_active);
6393 while (__kmp_thread_pool != NULL) {
6395 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
6396 __kmp_thread_pool = thread->th.th_next_pool;
6398 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
6399 thread->th.th_next_pool = NULL;
6400 thread->th.th_in_pool = FALSE;
6401 __kmp_reap_thread(thread, 0);
6403 __kmp_thread_pool_insert_pt = NULL;
6406 while (__kmp_team_pool != NULL) {
6408 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
6409 __kmp_team_pool = team->t.t_next_pool;
6411 team->t.t_next_pool = NULL;
6412 __kmp_reap_team(team);
6415 __kmp_reap_task_teams();
6422 for (i = 0; i < __kmp_threads_capacity; i++) {
6423 kmp_info_t *thr = __kmp_threads[i];
6424 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
6429 for (i = 0; i < __kmp_threads_capacity; ++i) {
6436 TCW_SYNC_4(__kmp_init_common, FALSE);
6438 KA_TRACE(10, (
"__kmp_internal_end: all workers reaped\n"));
6446 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6447 if (TCR_4(__kmp_init_monitor)) {
6448 __kmp_reap_monitor(&__kmp_monitor);
6449 TCW_4(__kmp_init_monitor, 0);
6451 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6452 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6455 TCW_4(__kmp_init_gtid, FALSE);
6464void __kmp_internal_end_library(
int gtid_req) {
6471 if (__kmp_global.g.g_abort) {
6472 KA_TRACE(11, (
"__kmp_internal_end_library: abort, exiting\n"));
6476 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6477 KA_TRACE(10, (
"__kmp_internal_end_library: already finished\n"));
6482 if (TCR_4(__kmp_init_hidden_helper) &&
6483 !TCR_4(__kmp_hidden_helper_team_done)) {
6484 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6486 __kmp_hidden_helper_main_thread_release();
6488 __kmp_hidden_helper_threads_deinitz_wait();
6494 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6496 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6497 if (gtid == KMP_GTID_SHUTDOWN) {
6498 KA_TRACE(10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system "
6499 "already shutdown\n"));
6501 }
else if (gtid == KMP_GTID_MONITOR) {
6502 KA_TRACE(10, (
"__kmp_internal_end_library: monitor thread, gtid not "
6503 "registered, or system shutdown\n"));
6505 }
else if (gtid == KMP_GTID_DNE) {
6506 KA_TRACE(10, (
"__kmp_internal_end_library: gtid not registered or system "
6509 }
else if (KMP_UBER_GTID(gtid)) {
6511 if (__kmp_root[gtid]->r.r_active) {
6512 __kmp_global.g.g_abort = -1;
6513 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6514 __kmp_unregister_library();
6516 (
"__kmp_internal_end_library: root still active, abort T#%d\n",
6520 __kmp_itthash_clean(__kmp_threads[gtid]);
6523 (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6524 __kmp_unregister_root_current_thread(gtid);
6531#ifdef DUMP_DEBUG_ON_EXIT
6532 if (__kmp_debug_buf)
6533 __kmp_dump_debug_buffer();
6538 __kmp_unregister_library();
6543 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6546 if (__kmp_global.g.g_abort) {
6547 KA_TRACE(10, (
"__kmp_internal_end_library: abort, exiting\n"));
6549 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6552 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6553 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6562 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6565 __kmp_internal_end();
6567 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6568 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6570 KA_TRACE(10, (
"__kmp_internal_end_library: exit\n"));
6572#ifdef DUMP_DEBUG_ON_EXIT
6573 if (__kmp_debug_buf)
6574 __kmp_dump_debug_buffer();
6578 __kmp_close_console();
6581 __kmp_fini_allocator();
6585void __kmp_internal_end_thread(
int gtid_req) {
6594 if (__kmp_global.g.g_abort) {
6595 KA_TRACE(11, (
"__kmp_internal_end_thread: abort, exiting\n"));
6599 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6600 KA_TRACE(10, (
"__kmp_internal_end_thread: already finished\n"));
6605 if (TCR_4(__kmp_init_hidden_helper) &&
6606 !TCR_4(__kmp_hidden_helper_team_done)) {
6607 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6609 __kmp_hidden_helper_main_thread_release();
6611 __kmp_hidden_helper_threads_deinitz_wait();
6618 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6620 (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6621 if (gtid == KMP_GTID_SHUTDOWN) {
6622 KA_TRACE(10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system "
6623 "already shutdown\n"));
6625 }
else if (gtid == KMP_GTID_MONITOR) {
6626 KA_TRACE(10, (
"__kmp_internal_end_thread: monitor thread, gtid not "
6627 "registered, or system shutdown\n"));
6629 }
else if (gtid == KMP_GTID_DNE) {
6630 KA_TRACE(10, (
"__kmp_internal_end_thread: gtid not registered or system "
6634 }
else if (KMP_UBER_GTID(gtid)) {
6636 if (__kmp_root[gtid]->r.r_active) {
6637 __kmp_global.g.g_abort = -1;
6638 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6640 (
"__kmp_internal_end_thread: root still active, abort T#%d\n",
6644 KA_TRACE(10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n",
6646 __kmp_unregister_root_current_thread(gtid);
6650 KA_TRACE(10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6653 __kmp_threads[gtid]->th.th_task_team = NULL;
6657 (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6663 if (__kmp_pause_status != kmp_hard_paused)
6667 KA_TRACE(10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6672 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6675 if (__kmp_global.g.g_abort) {
6676 KA_TRACE(10, (
"__kmp_internal_end_thread: abort, exiting\n"));
6678 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6681 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6682 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6693 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6695 for (i = 0; i < __kmp_threads_capacity; ++i) {
6696 if (KMP_UBER_GTID(i)) {
6699 (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6700 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6701 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6708 __kmp_internal_end();
6710 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6711 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6713 KA_TRACE(10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6715#ifdef DUMP_DEBUG_ON_EXIT
6716 if (__kmp_debug_buf)
6717 __kmp_dump_debug_buffer();
6724static long __kmp_registration_flag = 0;
6726static char *__kmp_registration_str = NULL;
6729static inline char *__kmp_reg_status_name() {
6735#if KMP_OS_UNIX && !KMP_OS_DARWIN && KMP_DYNAMIC_LIB
6736 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d_%d", (
int)getpid(),
6739 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int)getpid());
6743#if defined(KMP_USE_SHM)
6745char *temp_reg_status_file_name =
nullptr;
6748void __kmp_register_library_startup(
void) {
6750 char *name = __kmp_reg_status_name();
6756#if KMP_ARCH_X86 || KMP_ARCH_X86_64
6757 __kmp_initialize_system_tick();
6759 __kmp_read_system_time(&time.dtime);
6760 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6761 __kmp_registration_str =
6762 __kmp_str_format(
"%p-%lx-%s", &__kmp_registration_flag,
6763 __kmp_registration_flag, KMP_LIBRARY_FILE);
6765 KA_TRACE(50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name,
6766 __kmp_registration_str));
6772#if defined(KMP_USE_SHM)
6773 char *shm_name = __kmp_str_format(
"/%s", name);
6774 int shm_preexist = 0;
6776 int fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0666);
6777 if ((fd1 == -1) && (errno == EEXIST)) {
6780 fd1 = shm_open(shm_name, O_RDWR, 0666);
6783 __kmp_fatal(KMP_MSG(FunctionError,
"Can't open SHM"), KMP_ERR(0),
6789 }
else if (fd1 == -1) {
6794 char *temp_file_name = __kmp_str_format(
"/tmp/%sXXXXXX", name);
6795 fd1 = mkstemp(temp_file_name);
6798 __kmp_fatal(KMP_MSG(FunctionError,
"Can't open TEMP"), KMP_ERR(errno),
6801 temp_reg_status_file_name = temp_file_name;
6803 if (shm_preexist == 0) {
6805 if (ftruncate(fd1, SHM_SIZE) == -1) {
6807 __kmp_fatal(KMP_MSG(FunctionError,
"Can't set size of SHM"),
6808 KMP_ERR(errno), __kmp_msg_null);
6812 (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd1, 0);
6813 if (data1 == MAP_FAILED) {
6815 __kmp_fatal(KMP_MSG(FunctionError,
"Can't map SHM"), KMP_ERR(errno),
6818 if (shm_preexist == 0) {
6819 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6822 value = __kmp_str_format(
"%s", data1);
6823 munmap(data1, SHM_SIZE);
6827 __kmp_env_set(name, __kmp_registration_str, 0);
6829 value = __kmp_env_get(name);
6832 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6839 char *flag_addr_str = NULL;
6840 char *flag_val_str = NULL;
6841 char const *file_name = NULL;
6842 __kmp_str_split(tail,
'-', &flag_addr_str, &tail);
6843 __kmp_str_split(tail,
'-', &flag_val_str, &tail);
6846 unsigned long *flag_addr = 0;
6847 unsigned long flag_val = 0;
6848 KMP_SSCANF(flag_addr_str,
"%p", RCAST(
void **, &flag_addr));
6849 KMP_SSCANF(flag_val_str,
"%lx", &flag_val);
6850 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name,
"") != 0) {
6854 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6868 file_name =
"unknown library";
6873 char *duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK");
6874 if (!__kmp_str_match_true(duplicate_ok)) {
6876 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6877 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6879 KMP_INTERNAL_FREE(duplicate_ok);
6880 __kmp_duplicate_library_ok = 1;
6885#if defined(KMP_USE_SHM)
6887 shm_unlink(shm_name);
6890 __kmp_env_unset(name);
6894 KMP_DEBUG_ASSERT(0);
6898 KMP_INTERNAL_FREE((
void *)value);
6899#if defined(KMP_USE_SHM)
6900 KMP_INTERNAL_FREE((
void *)shm_name);
6903 KMP_INTERNAL_FREE((
void *)name);
6907void __kmp_unregister_library(
void) {
6909 char *name = __kmp_reg_status_name();
6912#if defined(KMP_USE_SHM)
6913 bool use_shm =
true;
6914 char *shm_name = __kmp_str_format(
"/%s", name);
6915 int fd1 = shm_open(shm_name, O_RDONLY, 0666);
6919 KMP_DEBUG_ASSERT(temp_reg_status_file_name);
6920 FILE *tf = fopen(temp_reg_status_file_name, O_RDONLY);
6927 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6928 if (data1 != MAP_FAILED) {
6929 value = __kmp_str_format(
"%s", data1);
6930 munmap(data1, SHM_SIZE);
6934 value = __kmp_env_get(name);
6937 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6938 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6939 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6941#if defined(KMP_USE_SHM)
6943 shm_unlink(shm_name);
6945 KMP_DEBUG_ASSERT(temp_reg_status_file_name);
6946 unlink(temp_reg_status_file_name);
6949 __kmp_env_unset(name);
6953#if defined(KMP_USE_SHM)
6954 KMP_INTERNAL_FREE(shm_name);
6956 KMP_DEBUG_ASSERT(temp_reg_status_file_name);
6957 KMP_INTERNAL_FREE(temp_reg_status_file_name);
6961 KMP_INTERNAL_FREE(__kmp_registration_str);
6962 KMP_INTERNAL_FREE(value);
6963 KMP_INTERNAL_FREE(name);
6965 __kmp_registration_flag = 0;
6966 __kmp_registration_str = NULL;
6973#if KMP_MIC_SUPPORTED
6975static void __kmp_check_mic_type() {
6976 kmp_cpuid_t cpuid_state = {0};
6977 kmp_cpuid_t *cs_p = &cpuid_state;
6978 __kmp_x86_cpuid(1, 0, cs_p);
6980 if ((cs_p->eax & 0xff0) == 0xB10) {
6981 __kmp_mic_type = mic2;
6982 }
else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6983 __kmp_mic_type = mic3;
6985 __kmp_mic_type = non_mic;
6992static void __kmp_user_level_mwait_init() {
6993 struct kmp_cpuid buf;
6994 __kmp_x86_cpuid(7, 0, &buf);
6995 __kmp_waitpkg_enabled = ((buf.ecx >> 5) & 1);
6996 __kmp_umwait_enabled = __kmp_waitpkg_enabled && __kmp_user_level_mwait;
6997 __kmp_tpause_enabled = __kmp_waitpkg_enabled && (__kmp_tpause_state > 0);
6998 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",
6999 __kmp_umwait_enabled));
7002#ifndef AT_INTELPHIUSERMWAIT
7005#define AT_INTELPHIUSERMWAIT 10000
7010unsigned long getauxval(
unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL;
7011unsigned long getauxval(
unsigned long) {
return 0; }
7013static void __kmp_user_level_mwait_init() {
7018 if (__kmp_mic_type == mic3) {
7019 unsigned long res = getauxval(AT_INTELPHIUSERMWAIT);
7020 if ((res & 0x1) || __kmp_user_level_mwait) {
7021 __kmp_mwait_enabled = TRUE;
7022 if (__kmp_user_level_mwait) {
7023 KMP_INFORM(EnvMwaitWarn);
7026 __kmp_mwait_enabled = FALSE;
7029 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
7030 "__kmp_mwait_enabled = %d\n",
7031 __kmp_mic_type, __kmp_mwait_enabled));
7035static void __kmp_do_serial_initialize(
void) {
7039 KA_TRACE(10, (
"__kmp_do_serial_initialize: enter\n"));
7041 KMP_DEBUG_ASSERT(
sizeof(kmp_int32) == 4);
7042 KMP_DEBUG_ASSERT(
sizeof(kmp_uint32) == 4);
7043 KMP_DEBUG_ASSERT(
sizeof(kmp_int64) == 8);
7044 KMP_DEBUG_ASSERT(
sizeof(kmp_uint64) == 8);
7045 KMP_DEBUG_ASSERT(
sizeof(kmp_intptr_t) ==
sizeof(
void *));
7055 __kmp_validate_locks();
7058 __kmp_init_allocator();
7064 if (__kmp_need_register_serial)
7065 __kmp_register_library_startup();
7068 if (TCR_4(__kmp_global.g.g_done)) {
7069 KA_TRACE(10, (
"__kmp_do_serial_initialize: reinitialization of library\n"));
7072 __kmp_global.g.g_abort = 0;
7073 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
7076#if KMP_USE_ADAPTIVE_LOCKS
7077#if KMP_DEBUG_ADAPTIVE_LOCKS
7078 __kmp_init_speculative_stats();
7081#if KMP_STATS_ENABLED
7084 __kmp_init_lock(&__kmp_global_lock);
7085 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
7086 __kmp_init_lock(&__kmp_debug_lock);
7087 __kmp_init_atomic_lock(&__kmp_atomic_lock);
7088 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
7089 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
7090 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
7091 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
7092 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
7093 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
7094 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
7095 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
7096 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
7097 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
7098 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
7099 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
7100 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
7101 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
7103 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
7105 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
7109 __kmp_runtime_initialize();
7111#if KMP_MIC_SUPPORTED
7112 __kmp_check_mic_type();
7119 __kmp_abort_delay = 0;
7123 __kmp_dflt_team_nth_ub = __kmp_xproc;
7124 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
7125 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
7127 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
7128 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
7130 __kmp_max_nth = __kmp_sys_max_nth;
7131 __kmp_cg_max_nth = __kmp_sys_max_nth;
7132 __kmp_teams_max_nth = __kmp_xproc;
7133 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
7134 __kmp_teams_max_nth = __kmp_sys_max_nth;
7139 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
7141 __kmp_monitor_wakeups =
7142 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7143 __kmp_bt_intervals =
7144 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7147 __kmp_library = library_throughput;
7149 __kmp_static = kmp_sch_static_balanced;
7156#if KMP_FAST_REDUCTION_BARRIER
7157#define kmp_reduction_barrier_gather_bb ((int)1)
7158#define kmp_reduction_barrier_release_bb ((int)1)
7159#define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt
7160#define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt
7162 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
7163 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
7164 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
7165 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
7166 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
7167#if KMP_FAST_REDUCTION_BARRIER
7168 if (i == bs_reduction_barrier) {
7170 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
7171 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
7172 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
7173 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
7177#if KMP_FAST_REDUCTION_BARRIER
7178#undef kmp_reduction_barrier_release_pat
7179#undef kmp_reduction_barrier_gather_pat
7180#undef kmp_reduction_barrier_release_bb
7181#undef kmp_reduction_barrier_gather_bb
7183#if KMP_MIC_SUPPORTED
7184 if (__kmp_mic_type == mic2) {
7186 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3;
7187 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
7189 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7190 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7192#if KMP_FAST_REDUCTION_BARRIER
7193 if (__kmp_mic_type == mic2) {
7194 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7195 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7202 __kmp_env_checks = TRUE;
7204 __kmp_env_checks = FALSE;
7208 __kmp_foreign_tp = TRUE;
7210 __kmp_global.g.g_dynamic = FALSE;
7211 __kmp_global.g.g_dynamic_mode = dynamic_default;
7213 __kmp_init_nesting_mode();
7215 __kmp_env_initialize(NULL);
7217#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
7218 __kmp_user_level_mwait_init();
7222 char const *val = __kmp_env_get(
"KMP_DUMP_CATALOG");
7223 if (__kmp_str_match_true(val)) {
7224 kmp_str_buf_t buffer;
7225 __kmp_str_buf_init(&buffer);
7226 __kmp_i18n_dump_catalog(&buffer);
7227 __kmp_printf(
"%s", buffer.str);
7228 __kmp_str_buf_free(&buffer);
7230 __kmp_env_free(&val);
7233 __kmp_threads_capacity =
7234 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
7236 __kmp_tp_capacity = __kmp_default_tp_capacity(
7237 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
7242 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
7243 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
7244 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
7245 __kmp_thread_pool = NULL;
7246 __kmp_thread_pool_insert_pt = NULL;
7247 __kmp_team_pool = NULL;
7254 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * __kmp_threads_capacity +
7256 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
7257 __kmp_root = (kmp_root_t **)((
char *)__kmp_threads +
7258 sizeof(kmp_info_t *) * __kmp_threads_capacity);
7261 KMP_DEBUG_ASSERT(__kmp_all_nth ==
7263 KMP_DEBUG_ASSERT(__kmp_nth == 0);
7268 gtid = __kmp_register_root(TRUE);
7269 KA_TRACE(10, (
"__kmp_do_serial_initialize T#%d\n", gtid));
7270 KMP_ASSERT(KMP_UBER_GTID(gtid));
7271 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
7275 __kmp_common_initialize();
7279 __kmp_register_atfork();
7282#if !KMP_DYNAMIC_LIB || \
7283 ((KMP_COMPILER_ICC || KMP_COMPILER_ICX) && KMP_OS_DARWIN)
7288 int rc = atexit(__kmp_internal_end_atexit);
7290 __kmp_fatal(KMP_MSG(FunctionError,
"atexit()"), KMP_ERR(rc),
7296#if KMP_HANDLE_SIGNALS
7302 __kmp_install_signals(FALSE);
7305 __kmp_install_signals(TRUE);
7310 __kmp_init_counter++;
7312 __kmp_init_serial = TRUE;
7314 if (__kmp_settings) {
7318 if (__kmp_display_env || __kmp_display_env_verbose) {
7319 __kmp_env_print_2();
7328 KA_TRACE(10, (
"__kmp_do_serial_initialize: exit\n"));
7331void __kmp_serial_initialize(
void) {
7332 if (__kmp_init_serial) {
7335 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7336 if (__kmp_init_serial) {
7337 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7340 __kmp_do_serial_initialize();
7341 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7344static void __kmp_do_middle_initialize(
void) {
7346 int prev_dflt_team_nth;
7348 if (!__kmp_init_serial) {
7349 __kmp_do_serial_initialize();
7352 KA_TRACE(10, (
"__kmp_middle_initialize: enter\n"));
7354 if (UNLIKELY(!__kmp_need_register_serial)) {
7357 __kmp_register_library_startup();
7362 prev_dflt_team_nth = __kmp_dflt_team_nth;
7364#if KMP_AFFINITY_SUPPORTED
7367 __kmp_affinity_initialize(__kmp_affinity);
7371 KMP_ASSERT(__kmp_xproc > 0);
7372 if (__kmp_avail_proc == 0) {
7373 __kmp_avail_proc = __kmp_xproc;
7379 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
7380 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
7385 if (__kmp_dflt_team_nth == 0) {
7386#ifdef KMP_DFLT_NTH_CORES
7388 __kmp_dflt_team_nth = __kmp_ncores;
7389 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7390 "__kmp_ncores (%d)\n",
7391 __kmp_dflt_team_nth));
7394 __kmp_dflt_team_nth = __kmp_avail_proc;
7395 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7396 "__kmp_avail_proc(%d)\n",
7397 __kmp_dflt_team_nth));
7401 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
7402 __kmp_dflt_team_nth = KMP_MIN_NTH;
7404 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
7405 __kmp_dflt_team_nth = __kmp_sys_max_nth;
7408 if (__kmp_nesting_mode > 0)
7409 __kmp_set_nesting_mode_threads();
7413 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
7415 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
7420 for (i = 0; i < __kmp_threads_capacity; i++) {
7421 kmp_info_t *thread = __kmp_threads[i];
7424 if (thread->th.th_current_task->td_icvs.nproc != 0)
7427 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
7432 (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
7433 __kmp_dflt_team_nth));
7435#ifdef KMP_ADJUST_BLOCKTIME
7437 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
7438 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
7439 if (__kmp_nth > __kmp_avail_proc) {
7440 __kmp_zero_bt = TRUE;
7446 TCW_SYNC_4(__kmp_init_middle, TRUE);
7448 KA_TRACE(10, (
"__kmp_do_middle_initialize: exit\n"));
7451void __kmp_middle_initialize(
void) {
7452 if (__kmp_init_middle) {
7455 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7456 if (__kmp_init_middle) {
7457 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7460 __kmp_do_middle_initialize();
7461 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7464void __kmp_parallel_initialize(
void) {
7465 int gtid = __kmp_entry_gtid();
7468 if (TCR_4(__kmp_init_parallel))
7470 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7471 if (TCR_4(__kmp_init_parallel)) {
7472 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7477 if (TCR_4(__kmp_global.g.g_done)) {
7480 (
"__kmp_parallel_initialize: attempt to init while shutting down\n"));
7481 __kmp_infinite_loop();
7487 if (!__kmp_init_middle) {
7488 __kmp_do_middle_initialize();
7490 __kmp_assign_root_init_mask();
7491 __kmp_resume_if_hard_paused();
7494 KA_TRACE(10, (
"__kmp_parallel_initialize: enter\n"));
7495 KMP_ASSERT(KMP_UBER_GTID(gtid));
7497#if KMP_ARCH_X86 || KMP_ARCH_X86_64
7500 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
7501 __kmp_store_mxcsr(&__kmp_init_mxcsr);
7502 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
7506#if KMP_HANDLE_SIGNALS
7508 __kmp_install_signals(TRUE);
7512 __kmp_suspend_initialize();
7514#if defined(USE_LOAD_BALANCE)
7515 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7516 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7519 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7520 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7524 if (__kmp_version) {
7525 __kmp_print_version_2();
7529 TCW_SYNC_4(__kmp_init_parallel, TRUE);
7532 KA_TRACE(10, (
"__kmp_parallel_initialize: exit\n"));
7534 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7537void __kmp_hidden_helper_initialize() {
7538 if (TCR_4(__kmp_init_hidden_helper))
7542 if (!TCR_4(__kmp_init_parallel))
7543 __kmp_parallel_initialize();
7547 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7548 if (TCR_4(__kmp_init_hidden_helper)) {
7549 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7553#if KMP_AFFINITY_SUPPORTED
7557 if (!__kmp_hh_affinity.flags.initialized)
7558 __kmp_affinity_initialize(__kmp_hh_affinity);
7562 KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0);
7566 TCW_SYNC_4(__kmp_init_hidden_helper_threads, TRUE);
7569 __kmp_do_initialize_hidden_helper_threads();
7572 __kmp_hidden_helper_threads_initz_wait();
7575 TCW_SYNC_4(__kmp_init_hidden_helper, TRUE);
7577 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7582void __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7584 kmp_disp_t *dispatch;
7589 this_thr->th.th_local.this_construct = 0;
7591 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
7593 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7594 KMP_DEBUG_ASSERT(dispatch);
7595 KMP_DEBUG_ASSERT(team->t.t_dispatch);
7599 dispatch->th_disp_index = 0;
7600 dispatch->th_doacross_buf_idx = 0;
7601 if (__kmp_env_consistency_check)
7602 __kmp_push_parallel(gtid, team->t.t_ident);
7607void __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7609 if (__kmp_env_consistency_check)
7610 __kmp_pop_parallel(gtid, team->t.t_ident);
7612 __kmp_finish_implicit_task(this_thr);
7615int __kmp_invoke_task_func(
int gtid) {
7617 int tid = __kmp_tid_from_gtid(gtid);
7618 kmp_info_t *this_thr = __kmp_threads[gtid];
7619 kmp_team_t *team = this_thr->th.th_team;
7621 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
7623 if (__itt_stack_caller_create_ptr) {
7625 if (team->t.t_stack_id != NULL) {
7626 __kmp_itt_stack_callee_enter((__itt_caller)team->t.t_stack_id);
7628 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7629 __kmp_itt_stack_callee_enter(
7630 (__itt_caller)team->t.t_parent->t.t_stack_id);
7634#if INCLUDE_SSC_MARKS
7635 SSC_MARK_INVOKING();
7640 void **exit_frame_p;
7641 ompt_data_t *my_task_data;
7642 ompt_data_t *my_parallel_data;
7645 if (ompt_enabled.enabled) {
7646 exit_frame_p = &(team->t.t_implicit_task_taskdata[tid]
7647 .ompt_task_info.frame.exit_frame.ptr);
7649 exit_frame_p = &dummy;
7653 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7654 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7655 if (ompt_enabled.ompt_callback_implicit_task) {
7656 ompt_team_size = team->t.t_nproc;
7657 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7658 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
7659 __kmp_tid_from_gtid(gtid), ompt_task_implicit);
7660 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
7664#if KMP_STATS_ENABLED
7666 if (previous_state == stats_state_e::TEAMS_REGION) {
7667 KMP_PUSH_PARTITIONED_TIMER(OMP_teams);
7669 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel);
7671 KMP_SET_THREAD_STATE(IMPLICIT_TASK);
7674 rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
7675 tid, (
int)team->t.t_argc, (
void **)team->t.t_argv
7682 *exit_frame_p = NULL;
7683 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team;
7686#if KMP_STATS_ENABLED
7687 if (previous_state == stats_state_e::TEAMS_REGION) {
7688 KMP_SET_THREAD_STATE(previous_state);
7690 KMP_POP_PARTITIONED_TIMER();
7694 if (__itt_stack_caller_create_ptr) {
7696 if (team->t.t_stack_id != NULL) {
7697 __kmp_itt_stack_callee_leave((__itt_caller)team->t.t_stack_id);
7699 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7700 __kmp_itt_stack_callee_leave(
7701 (__itt_caller)team->t.t_parent->t.t_stack_id);
7705 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7710void __kmp_teams_master(
int gtid) {
7712 kmp_info_t *thr = __kmp_threads[gtid];
7713 kmp_team_t *team = thr->th.th_team;
7714 ident_t *loc = team->t.t_ident;
7715 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7716 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7717 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7718 KA_TRACE(20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7719 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7722 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
7725 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7726 tmp->cg_nthreads = 1;
7727 KA_TRACE(100, (
"__kmp_teams_master: Thread %p created node %p and init"
7728 " cg_nthreads to 1\n",
7730 tmp->up = thr->th.th_cg_roots;
7731 thr->th.th_cg_roots = tmp;
7735#if INCLUDE_SSC_MARKS
7738 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7739 (microtask_t)thr->th.th_teams_microtask,
7740 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
7741#if INCLUDE_SSC_MARKS
7745 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7746 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7749 __kmp_join_call(loc, gtid
7758int __kmp_invoke_teams_master(
int gtid) {
7759 kmp_info_t *this_thr = __kmp_threads[gtid];
7760 kmp_team_t *team = this_thr->th.th_team;
7762 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7763 KMP_DEBUG_ASSERT((
void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7764 (
void *)__kmp_teams_master);
7766 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7768 int tid = __kmp_tid_from_gtid(gtid);
7769 ompt_data_t *task_data =
7770 &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
7771 ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
7772 if (ompt_enabled.ompt_callback_implicit_task) {
7773 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7774 ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
7776 OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid;
7779 __kmp_teams_master(gtid);
7781 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league;
7783 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7792void __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads) {
7793 kmp_info_t *thr = __kmp_threads[gtid];
7795 if (num_threads > 0)
7796 thr->th.th_set_nproc = num_threads;
7799static void __kmp_push_thread_limit(kmp_info_t *thr,
int num_teams,
7801 KMP_DEBUG_ASSERT(thr);
7803 if (!TCR_4(__kmp_init_middle))
7804 __kmp_middle_initialize();
7805 __kmp_assign_root_init_mask();
7806 KMP_DEBUG_ASSERT(__kmp_avail_proc);
7807 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
7809 if (num_threads == 0) {
7810 if (__kmp_teams_thread_limit > 0) {
7811 num_threads = __kmp_teams_thread_limit;
7813 num_threads = __kmp_avail_proc / num_teams;
7818 if (num_threads > __kmp_dflt_team_nth) {
7819 num_threads = __kmp_dflt_team_nth;
7821 if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
7822 num_threads = thr->th.th_current_task->td_icvs.thread_limit;
7824 if (num_teams * num_threads > __kmp_teams_max_nth) {
7825 num_threads = __kmp_teams_max_nth / num_teams;
7827 if (num_threads == 0) {
7831 if (num_threads < 0) {
7832 __kmp_msg(kmp_ms_warning, KMP_MSG(CantFormThrTeam, num_threads, 1),
7838 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7840 if (num_threads > __kmp_dflt_team_nth) {
7841 num_threads = __kmp_dflt_team_nth;
7843 if (num_teams * num_threads > __kmp_teams_max_nth) {
7844 int new_threads = __kmp_teams_max_nth / num_teams;
7845 if (new_threads == 0) {
7848 if (new_threads != num_threads) {
7849 if (!__kmp_reserve_warn) {
7850 __kmp_reserve_warn = 1;
7851 __kmp_msg(kmp_ms_warning,
7852 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7853 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7856 num_threads = new_threads;
7859 thr->th.th_teams_size.nth = num_threads;
7864void __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
7866 kmp_info_t *thr = __kmp_threads[gtid];
7867 if (num_teams < 0) {
7870 __kmp_msg(kmp_ms_warning, KMP_MSG(NumTeamsNotPositive, num_teams, 1),
7874 if (num_teams == 0) {
7875 if (__kmp_nteams > 0) {
7876 num_teams = __kmp_nteams;
7881 if (num_teams > __kmp_teams_max_nth) {
7882 if (!__kmp_reserve_warn) {
7883 __kmp_reserve_warn = 1;
7884 __kmp_msg(kmp_ms_warning,
7885 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7886 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7888 num_teams = __kmp_teams_max_nth;
7892 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7894 __kmp_push_thread_limit(thr, num_teams, num_threads);
7899void __kmp_push_num_teams_51(
ident_t *
id,
int gtid,
int num_teams_lb,
7900 int num_teams_ub,
int num_threads) {
7901 kmp_info_t *thr = __kmp_threads[gtid];
7902 KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0);
7903 KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb);
7904 KMP_DEBUG_ASSERT(num_threads >= 0);
7906 if (num_teams_lb > num_teams_ub) {
7907 __kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub),
7908 KMP_HNT(SetNewBound, __kmp_teams_max_nth), __kmp_msg_null);
7913 if (num_teams_lb == 0 && num_teams_ub > 0)
7914 num_teams_lb = num_teams_ub;
7916 if (num_teams_lb == 0 && num_teams_ub == 0) {
7917 num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams;
7918 if (num_teams > __kmp_teams_max_nth) {
7919 if (!__kmp_reserve_warn) {
7920 __kmp_reserve_warn = 1;
7921 __kmp_msg(kmp_ms_warning,
7922 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7923 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7925 num_teams = __kmp_teams_max_nth;
7927 }
else if (num_teams_lb == num_teams_ub) {
7928 num_teams = num_teams_ub;
7930 if (num_threads <= 0) {
7931 if (num_teams_ub > __kmp_teams_max_nth) {
7932 num_teams = num_teams_lb;
7934 num_teams = num_teams_ub;
7937 num_teams = (num_threads > __kmp_teams_max_nth)
7939 : __kmp_teams_max_nth / num_threads;
7940 if (num_teams < num_teams_lb) {
7941 num_teams = num_teams_lb;
7942 }
else if (num_teams > num_teams_ub) {
7943 num_teams = num_teams_ub;
7949 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7951 __kmp_push_thread_limit(thr, num_teams, num_threads);
7955void __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind) {
7956 kmp_info_t *thr = __kmp_threads[gtid];
7957 thr->th.th_set_proc_bind = proc_bind;
7962void __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team) {
7963 kmp_info_t *this_thr = __kmp_threads[gtid];
7969 KMP_DEBUG_ASSERT(team);
7970 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7971 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7974 team->t.t_construct = 0;
7975 team->t.t_ordered.dt.t_value =
7979 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
7980 if (team->t.t_max_nproc > 1) {
7982 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7983 team->t.t_disp_buffer[i].buffer_index = i;
7984 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7987 team->t.t_disp_buffer[0].buffer_index = 0;
7988 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7992 KMP_ASSERT(this_thr->th.th_team == team);
7995 for (f = 0; f < team->t.t_nproc; f++) {
7996 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
7997 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
8002 __kmp_fork_barrier(gtid, 0);
8005void __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team) {
8006 kmp_info_t *this_thr = __kmp_threads[gtid];
8008 KMP_DEBUG_ASSERT(team);
8009 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
8010 KMP_ASSERT(KMP_MASTER_GTID(gtid));
8016 if (__kmp_threads[gtid] &&
8017 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
8018 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
8019 __kmp_threads[gtid]);
8020 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
8021 "team->t.t_nproc=%d\n",
8022 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
8024 __kmp_print_structure();
8026 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
8027 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
8030 __kmp_join_barrier(gtid);
8032 if (ompt_enabled.enabled &&
8033 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
8034 int ds_tid = this_thr->th.th_info.ds.ds_tid;
8035 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
8036 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
8038 void *codeptr = NULL;
8039 if (KMP_MASTER_TID(ds_tid) &&
8040 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
8041 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
8042 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
8044 if (ompt_enabled.ompt_callback_sync_region_wait) {
8045 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
8046 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
8049 if (ompt_enabled.ompt_callback_sync_region) {
8050 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
8051 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
8055 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
8056 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
8057 ompt_scope_end, NULL, task_data, 0, ds_tid,
8058 ompt_task_implicit);
8064 KMP_ASSERT(this_thr->th.th_team == team);
8069#ifdef USE_LOAD_BALANCE
8073static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
8076 kmp_team_t *hot_team;
8078 if (root->r.r_active) {
8081 hot_team = root->r.r_hot_team;
8082 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
8083 return hot_team->t.t_nproc - 1;
8088 for (i = 1; i < hot_team->t.t_nproc; i++) {
8089 if (hot_team->t.t_threads[i]->th.th_active) {
8098static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc) {
8101 int hot_team_active;
8102 int team_curr_active;
8105 KB_TRACE(20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
8107 KMP_DEBUG_ASSERT(root);
8108 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
8109 ->th.th_current_task->td_icvs.dynamic == TRUE);
8110 KMP_DEBUG_ASSERT(set_nproc > 1);
8112 if (set_nproc == 1) {
8113 KB_TRACE(20, (
"__kmp_load_balance_nproc: serial execution.\n"));
8122 pool_active = __kmp_thread_pool_active_nth;
8123 hot_team_active = __kmp_active_hot_team_nproc(root);
8124 team_curr_active = pool_active + hot_team_active + 1;
8127 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
8128 KB_TRACE(30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d "
8129 "hot team active = %d\n",
8130 system_active, pool_active, hot_team_active));
8132 if (system_active < 0) {
8136 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
8137 KMP_WARNING(CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit");
8140 retval = __kmp_avail_proc - __kmp_nth +
8141 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
8142 if (retval > set_nproc) {
8145 if (retval < KMP_MIN_NTH) {
8146 retval = KMP_MIN_NTH;
8149 KB_TRACE(20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
8157 if (system_active < team_curr_active) {
8158 system_active = team_curr_active;
8160 retval = __kmp_avail_proc - system_active + team_curr_active;
8161 if (retval > set_nproc) {
8164 if (retval < KMP_MIN_NTH) {
8165 retval = KMP_MIN_NTH;
8168 KB_TRACE(20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval));
8177void __kmp_cleanup(
void) {
8180 KA_TRACE(10, (
"__kmp_cleanup: enter\n"));
8182 if (TCR_4(__kmp_init_parallel)) {
8183#if KMP_HANDLE_SIGNALS
8184 __kmp_remove_signals();
8186 TCW_4(__kmp_init_parallel, FALSE);
8189 if (TCR_4(__kmp_init_middle)) {
8190#if KMP_AFFINITY_SUPPORTED
8191 __kmp_affinity_uninitialize();
8193 __kmp_cleanup_hierarchy();
8194 TCW_4(__kmp_init_middle, FALSE);
8197 KA_TRACE(10, (
"__kmp_cleanup: go serial cleanup\n"));
8199 if (__kmp_init_serial) {
8200 __kmp_runtime_destroy();
8201 __kmp_init_serial = FALSE;
8204 __kmp_cleanup_threadprivate_caches();
8206 for (f = 0; f < __kmp_threads_capacity; f++) {
8207 if (__kmp_root[f] != NULL) {
8208 __kmp_free(__kmp_root[f]);
8209 __kmp_root[f] = NULL;
8212 __kmp_free(__kmp_threads);
8215 __kmp_threads = NULL;
8217 __kmp_threads_capacity = 0;
8220 kmp_old_threads_list_t *ptr = __kmp_old_threads_list;
8222 kmp_old_threads_list_t *next = ptr->next;
8223 __kmp_free(ptr->threads);
8228#if KMP_USE_DYNAMIC_LOCK
8229 __kmp_cleanup_indirect_user_locks();
8231 __kmp_cleanup_user_locks();
8235 __kmp_free(ompd_env_block);
8236 ompd_env_block = NULL;
8237 ompd_env_block_size = 0;
8241#if KMP_AFFINITY_SUPPORTED
8242 KMP_INTERNAL_FREE(CCAST(
char *, __kmp_cpuinfo_file));
8243 __kmp_cpuinfo_file = NULL;
8246#if KMP_USE_ADAPTIVE_LOCKS
8247#if KMP_DEBUG_ADAPTIVE_LOCKS
8248 __kmp_print_speculative_stats();
8251 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
8252 __kmp_nested_nth.nth = NULL;
8253 __kmp_nested_nth.size = 0;
8254 __kmp_nested_nth.used = 0;
8255 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
8256 __kmp_nested_proc_bind.bind_types = NULL;
8257 __kmp_nested_proc_bind.size = 0;
8258 __kmp_nested_proc_bind.used = 0;
8259 if (__kmp_affinity_format) {
8260 KMP_INTERNAL_FREE(__kmp_affinity_format);
8261 __kmp_affinity_format = NULL;
8264 __kmp_i18n_catclose();
8266#if KMP_USE_HIER_SCHED
8267 __kmp_hier_scheds.deallocate();
8270#if KMP_STATS_ENABLED
8274 KA_TRACE(10, (
"__kmp_cleanup: exit\n"));
8279int __kmp_ignore_mppbeg(
void) {
8282 if ((env = getenv(
"KMP_IGNORE_MPPBEG")) != NULL) {
8283 if (__kmp_str_match_false(env))
8290int __kmp_ignore_mppend(
void) {
8293 if ((env = getenv(
"KMP_IGNORE_MPPEND")) != NULL) {
8294 if (__kmp_str_match_false(env))
8301void __kmp_internal_begin(
void) {
8307 gtid = __kmp_entry_gtid();
8308 root = __kmp_threads[gtid]->th.th_root;
8309 KMP_ASSERT(KMP_UBER_GTID(gtid));
8311 if (root->r.r_begin)
8313 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
8314 if (root->r.r_begin) {
8315 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8319 root->r.r_begin = TRUE;
8321 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8326void __kmp_user_set_library(
enum library_type arg) {
8333 gtid = __kmp_entry_gtid();
8334 thread = __kmp_threads[gtid];
8336 root = thread->th.th_root;
8338 KA_TRACE(20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
8340 if (root->r.r_in_parallel) {
8342 KMP_WARNING(SetLibraryIncorrectCall);
8347 case library_serial:
8348 thread->th.th_set_nproc = 0;
8349 set__nproc(thread, 1);
8351 case library_turnaround:
8352 thread->th.th_set_nproc = 0;
8353 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8354 : __kmp_dflt_team_nth_ub);
8356 case library_throughput:
8357 thread->th.th_set_nproc = 0;
8358 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8359 : __kmp_dflt_team_nth_ub);
8362 KMP_FATAL(UnknownLibraryType, arg);
8365 __kmp_aux_set_library(arg);
8368void __kmp_aux_set_stacksize(
size_t arg) {
8369 if (!__kmp_init_serial)
8370 __kmp_serial_initialize();
8373 if (arg & (0x1000 - 1)) {
8374 arg &= ~(0x1000 - 1);
8379 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8382 if (!TCR_4(__kmp_init_parallel)) {
8385 if (value < __kmp_sys_min_stksize)
8386 value = __kmp_sys_min_stksize;
8387 else if (value > KMP_MAX_STKSIZE)
8388 value = KMP_MAX_STKSIZE;
8390 __kmp_stksize = value;
8392 __kmp_env_stksize = TRUE;
8395 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
8400void __kmp_aux_set_library(
enum library_type arg) {
8401 __kmp_library = arg;
8403 switch (__kmp_library) {
8404 case library_serial: {
8405 KMP_INFORM(LibraryIsSerial);
8407 case library_turnaround:
8408 if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
8409 __kmp_use_yield = 2;
8411 case library_throughput:
8412 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
8413 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
8416 KMP_FATAL(UnknownLibraryType, arg);
8422static kmp_team_t *__kmp_aux_get_team_info(
int &teams_serialized) {
8423 kmp_info_t *thr = __kmp_entry_thread();
8424 teams_serialized = 0;
8425 if (thr->th.th_teams_microtask) {
8426 kmp_team_t *team = thr->th.th_team;
8427 int tlevel = thr->th.th_teams_level;
8428 int ii = team->t.t_level;
8429 teams_serialized = team->t.t_serialized;
8430 int level = tlevel + 1;
8431 KMP_DEBUG_ASSERT(ii >= tlevel);
8432 while (ii > level) {
8433 for (teams_serialized = team->t.t_serialized;
8434 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
8436 if (team->t.t_serialized && (!teams_serialized)) {
8437 team = team->t.t_parent;
8441 team = team->t.t_parent;
8450int __kmp_aux_get_team_num() {
8452 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8454 if (serialized > 1) {
8457 return team->t.t_master_tid;
8463int __kmp_aux_get_num_teams() {
8465 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8467 if (serialized > 1) {
8470 return team->t.t_parent->t.t_nproc;
8509typedef struct kmp_affinity_format_field_t {
8511 const char *long_name;
8514} kmp_affinity_format_field_t;
8516static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
8517#if KMP_AFFINITY_SUPPORTED
8518 {
'A',
"thread_affinity",
's'},
8520 {
't',
"team_num",
'd'},
8521 {
'T',
"num_teams",
'd'},
8522 {
'L',
"nesting_level",
'd'},
8523 {
'n',
"thread_num",
'd'},
8524 {
'N',
"num_threads",
'd'},
8525 {
'a',
"ancestor_tnum",
'd'},
8527 {
'P',
"process_id",
'd'},
8528 {
'i',
"native_thread_id",
'd'}};
8531static int __kmp_aux_capture_affinity_field(
int gtid,
const kmp_info_t *th,
8533 kmp_str_buf_t *field_buffer) {
8534 int rc, format_index, field_value;
8535 const char *width_left, *width_right;
8536 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
8537 static const int FORMAT_SIZE = 20;
8538 char format[FORMAT_SIZE] = {0};
8539 char absolute_short_name = 0;
8541 KMP_DEBUG_ASSERT(gtid >= 0);
8542 KMP_DEBUG_ASSERT(th);
8543 KMP_DEBUG_ASSERT(**ptr ==
'%');
8544 KMP_DEBUG_ASSERT(field_buffer);
8546 __kmp_str_buf_clear(field_buffer);
8553 __kmp_str_buf_cat(field_buffer,
"%", 1);
8564 right_justify =
false;
8566 right_justify =
true;
8570 width_left = width_right = NULL;
8571 if (**ptr >=
'0' && **ptr <=
'9') {
8579 format[format_index++] =
'%';
8581 format[format_index++] =
'-';
8583 format[format_index++] =
'0';
8584 if (width_left && width_right) {
8588 while (i < 8 && width_left < width_right) {
8589 format[format_index++] = *width_left;
8597 found_valid_name =
false;
8598 parse_long_name = (**ptr ==
'{');
8599 if (parse_long_name)
8601 for (
size_t i = 0; i <
sizeof(__kmp_affinity_format_table) /
8602 sizeof(__kmp_affinity_format_table[0]);
8604 char short_name = __kmp_affinity_format_table[i].short_name;
8605 const char *long_name = __kmp_affinity_format_table[i].long_name;
8606 char field_format = __kmp_affinity_format_table[i].field_format;
8607 if (parse_long_name) {
8608 size_t length = KMP_STRLEN(long_name);
8609 if (strncmp(*ptr, long_name, length) == 0) {
8610 found_valid_name =
true;
8613 }
else if (**ptr == short_name) {
8614 found_valid_name =
true;
8617 if (found_valid_name) {
8618 format[format_index++] = field_format;
8619 format[format_index++] =
'\0';
8620 absolute_short_name = short_name;
8624 if (parse_long_name) {
8626 absolute_short_name = 0;
8634 switch (absolute_short_name) {
8636 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
8639 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
8642 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
8645 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
8648 static const int BUFFER_SIZE = 256;
8649 char buf[BUFFER_SIZE];
8650 __kmp_expand_host_name(buf, BUFFER_SIZE);
8651 rc = __kmp_str_buf_print(field_buffer, format, buf);
8654 rc = __kmp_str_buf_print(field_buffer, format, getpid());
8657 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid());
8660 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
8664 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
8665 rc = __kmp_str_buf_print(field_buffer, format, field_value);
8667#if KMP_AFFINITY_SUPPORTED
8670 __kmp_str_buf_init(&buf);
8671 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
8672 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
8673 __kmp_str_buf_free(&buf);
8679 rc = __kmp_str_buf_print(field_buffer,
"%s",
"undefined");
8681 if (parse_long_name) {
8690 KMP_ASSERT(format_index <= FORMAT_SIZE);
8700size_t __kmp_aux_capture_affinity(
int gtid,
const char *format,
8701 kmp_str_buf_t *buffer) {
8702 const char *parse_ptr;
8704 const kmp_info_t *th;
8705 kmp_str_buf_t field;
8707 KMP_DEBUG_ASSERT(buffer);
8708 KMP_DEBUG_ASSERT(gtid >= 0);
8710 __kmp_str_buf_init(&field);
8711 __kmp_str_buf_clear(buffer);
8713 th = __kmp_threads[gtid];
8719 if (parse_ptr == NULL || *parse_ptr ==
'\0') {
8720 parse_ptr = __kmp_affinity_format;
8722 KMP_DEBUG_ASSERT(parse_ptr);
8724 while (*parse_ptr !=
'\0') {
8726 if (*parse_ptr ==
'%') {
8728 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
8729 __kmp_str_buf_catbuf(buffer, &field);
8733 __kmp_str_buf_cat(buffer, parse_ptr, 1);
8738 __kmp_str_buf_free(&field);
8743void __kmp_aux_display_affinity(
int gtid,
const char *format) {
8745 __kmp_str_buf_init(&buf);
8746 __kmp_aux_capture_affinity(gtid, format, &buf);
8747 __kmp_fprintf(kmp_out,
"%s" KMP_END_OF_LINE, buf.str);
8748 __kmp_str_buf_free(&buf);
8753void __kmp_aux_set_blocktime(
int arg, kmp_info_t *thread,
int tid) {
8754 int blocktime = arg;
8760 __kmp_save_internal_controls(thread);
8763 if (blocktime < KMP_MIN_BLOCKTIME)
8764 blocktime = KMP_MIN_BLOCKTIME;
8765 else if (blocktime > KMP_MAX_BLOCKTIME)
8766 blocktime = KMP_MAX_BLOCKTIME;
8768 set__blocktime_team(thread->th.th_team, tid, blocktime);
8769 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
8773 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8775 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8776 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
8782 set__bt_set_team(thread->th.th_team, tid, bt_set);
8783 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
8785 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
8786 "bt_intervals=%d, monitor_updates=%d\n",
8787 __kmp_gtid_from_tid(tid, thread->th.th_team),
8788 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
8789 __kmp_monitor_wakeups));
8791 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
8792 __kmp_gtid_from_tid(tid, thread->th.th_team),
8793 thread->th.th_team->t.t_id, tid, blocktime));
8797void __kmp_aux_set_defaults(
char const *str,
size_t len) {
8798 if (!__kmp_init_serial) {
8799 __kmp_serial_initialize();
8801 __kmp_env_initialize(str);
8803 if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) {
8811PACKED_REDUCTION_METHOD_T
8812__kmp_determine_reduction_method(
8813 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size,
8814 void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
8815 kmp_critical_name *lck) {
8826 PACKED_REDUCTION_METHOD_T retval;
8830 KMP_DEBUG_ASSERT(loc);
8831 KMP_DEBUG_ASSERT(lck);
8833#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8835 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)))
8836#define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
8838 retval = critical_reduce_block;
8841 team_size = __kmp_get_team_num_threads(global_tid);
8842 if (team_size == 1) {
8844 retval = empty_reduce_block;
8848 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8850#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
8851 KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64
8853#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8854 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD
8856 int teamsize_cutoff = 4;
8858#if KMP_MIC_SUPPORTED
8859 if (__kmp_mic_type != non_mic) {
8860 teamsize_cutoff = 8;
8863 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8864 if (tree_available) {
8865 if (team_size <= teamsize_cutoff) {
8866 if (atomic_available) {
8867 retval = atomic_reduce_block;
8870 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8872 }
else if (atomic_available) {
8873 retval = atomic_reduce_block;
8876#error "Unknown or unsupported OS"
8880#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
8882#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_HURD
8886 if (atomic_available) {
8887 if (num_vars <= 2) {
8888 retval = atomic_reduce_block;
8894 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8895 if (atomic_available && (num_vars <= 3)) {
8896 retval = atomic_reduce_block;
8897 }
else if (tree_available) {
8898 if ((reduce_size > (9 *
sizeof(kmp_real64))) &&
8899 (reduce_size < (2000 *
sizeof(kmp_real64)))) {
8900 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
8905#error "Unknown or unsupported OS"
8909#error "Unknown or unsupported architecture"
8917 if (__kmp_force_reduction_method != reduction_method_not_defined &&
8920 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
8922 int atomic_available, tree_available;
8924 switch ((forced_retval = __kmp_force_reduction_method)) {
8925 case critical_reduce_block:
8929 case atomic_reduce_block:
8930 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8931 if (!atomic_available) {
8932 KMP_WARNING(RedMethodNotSupported,
"atomic");
8933 forced_retval = critical_reduce_block;
8937 case tree_reduce_block:
8938 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8939 if (!tree_available) {
8940 KMP_WARNING(RedMethodNotSupported,
"tree");
8941 forced_retval = critical_reduce_block;
8943#if KMP_FAST_REDUCTION_BARRIER
8944 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8953 retval = forced_retval;
8956 KA_TRACE(10, (
"reduction method selected=%08x\n", retval));
8958#undef FAST_REDUCTION_TREE_METHOD_GENERATED
8959#undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
8964kmp_int32 __kmp_get_reduce_method(
void) {
8965 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
8970void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
8974void __kmp_hard_pause() {
8975 __kmp_pause_status = kmp_hard_paused;
8976 __kmp_internal_end_thread(-1);
8980void __kmp_resume_if_soft_paused() {
8981 if (__kmp_pause_status == kmp_soft_paused) {
8982 __kmp_pause_status = kmp_not_paused;
8984 for (
int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
8985 kmp_info_t *thread = __kmp_threads[gtid];
8987 kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
8989 if (fl.is_sleeping())
8991 else if (__kmp_try_suspend_mx(thread)) {
8992 __kmp_unlock_suspend_mx(thread);
8995 if (fl.is_sleeping()) {
8998 }
else if (__kmp_try_suspend_mx(thread)) {
8999 __kmp_unlock_suspend_mx(thread);
9011int __kmp_pause_resource(kmp_pause_status_t level) {
9012 if (level == kmp_not_paused) {
9013 if (__kmp_pause_status == kmp_not_paused) {
9017 KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||
9018 __kmp_pause_status == kmp_hard_paused);
9019 __kmp_pause_status = kmp_not_paused;
9022 }
else if (level == kmp_soft_paused) {
9023 if (__kmp_pause_status != kmp_not_paused) {
9030 }
else if (level == kmp_hard_paused) {
9031 if (__kmp_pause_status != kmp_not_paused) {
9044void __kmp_omp_display_env(
int verbose) {
9045 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
9046 if (__kmp_init_serial == 0)
9047 __kmp_do_serial_initialize();
9048 __kmp_display_env_impl(!verbose, verbose);
9049 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
9053void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
9055 KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
9057 kmp_info_t **other_threads = team->t.t_threads;
9061 for (
int f = 1; f < old_nthreads; ++f) {
9062 KMP_DEBUG_ASSERT(other_threads[f] != NULL);
9064 if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) {
9070 if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) {
9071 while (team->t.t_threads[f]->th.th_used_in_team.load() == 3)
9075 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1);
9077 team->t.t_threads[f]->th.th_used_in_team.store(2);
9078 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2);
9081 team->t.b->go_release();
9087 int count = old_nthreads - 1;
9089 count = old_nthreads - 1;
9090 for (
int f = 1; f < old_nthreads; ++f) {
9091 if (other_threads[f]->th.th_used_in_team.load() != 0) {
9092 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9093 kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST(
9094 void *, other_threads[f]->th.th_sleep_loc);
9095 __kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag);
9098 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0);
9104 team->t.b->update_num_threads(new_nthreads);
9105 team->t.b->go_reset();
9108void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads) {
9110 KMP_DEBUG_ASSERT(team);
9116 for (
int f = 1; f < new_nthreads; ++f) {
9117 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
9118 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 0,
9120 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9121 __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid,
9122 (kmp_flag_32<false, false> *)NULL);
9128 int count = new_nthreads - 1;
9130 count = new_nthreads - 1;
9131 for (
int f = 1; f < new_nthreads; ++f) {
9132 if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) {
9140kmp_info_t **__kmp_hidden_helper_threads;
9141kmp_info_t *__kmp_hidden_helper_main_thread;
9142std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;
9144kmp_int32 __kmp_hidden_helper_threads_num = 8;
9145kmp_int32 __kmp_enable_hidden_helper = TRUE;
9147kmp_int32 __kmp_hidden_helper_threads_num = 0;
9148kmp_int32 __kmp_enable_hidden_helper = FALSE;
9152std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num;
9154void __kmp_hidden_helper_wrapper_fn(
int *gtid,
int *, ...) {
9159 KMP_ATOMIC_INC(&__kmp_hit_hidden_helper_threads_num);
9160 while (KMP_ATOMIC_LD_ACQ(&__kmp_hit_hidden_helper_threads_num) !=
9161 __kmp_hidden_helper_threads_num)
9167 TCW_4(__kmp_init_hidden_helper_threads, FALSE);
9168 __kmp_hidden_helper_initz_release();
9169 __kmp_hidden_helper_main_thread_wait();
9171 for (
int i = 1; i < __kmp_hit_hidden_helper_threads_num; ++i) {
9172 __kmp_hidden_helper_worker_thread_signal();
9178void __kmp_hidden_helper_threads_initz_routine() {
9180 const int gtid = __kmp_register_root(TRUE);
9181 __kmp_hidden_helper_main_thread = __kmp_threads[gtid];
9182 __kmp_hidden_helper_threads = &__kmp_threads[gtid];
9183 __kmp_hidden_helper_main_thread->th.th_set_nproc =
9184 __kmp_hidden_helper_threads_num;
9186 KMP_ATOMIC_ST_REL(&__kmp_hit_hidden_helper_threads_num, 0);
9191 TCW_SYNC_4(__kmp_init_hidden_helper, FALSE);
9193 __kmp_hidden_helper_threads_deinitz_release();
9213void __kmp_init_nesting_mode() {
9214 int levels = KMP_HW_LAST;
9215 __kmp_nesting_mode_nlevels = levels;
9216 __kmp_nesting_nth_level = (
int *)KMP_INTERNAL_MALLOC(levels *
sizeof(
int));
9217 for (
int i = 0; i < levels; ++i)
9218 __kmp_nesting_nth_level[i] = 0;
9219 if (__kmp_nested_nth.size < levels) {
9220 __kmp_nested_nth.nth =
9221 (
int *)KMP_INTERNAL_REALLOC(__kmp_nested_nth.nth, levels *
sizeof(
int));
9222 __kmp_nested_nth.size = levels;
9227void __kmp_set_nesting_mode_threads() {
9228 kmp_info_t *thread = __kmp_threads[__kmp_entry_gtid()];
9230 if (__kmp_nesting_mode == 1)
9231 __kmp_nesting_mode_nlevels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
9232 else if (__kmp_nesting_mode > 1)
9233 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9235 if (__kmp_topology) {
9237 for (loc = 0, hw_level = 0; hw_level < __kmp_topology->get_depth() &&
9238 loc < __kmp_nesting_mode_nlevels;
9239 loc++, hw_level++) {
9240 __kmp_nesting_nth_level[loc] = __kmp_topology->get_ratio(hw_level);
9241 if (__kmp_nesting_nth_level[loc] == 1)
9245 if (__kmp_nesting_mode > 1 && loc > 1) {
9246 int core_level = __kmp_topology->get_level(KMP_HW_CORE);
9247 int num_cores = __kmp_topology->get_count(core_level);
9248 int upper_levels = 1;
9249 for (
int level = 0; level < loc - 1; ++level)
9250 upper_levels *= __kmp_nesting_nth_level[level];
9251 if (upper_levels * __kmp_nesting_nth_level[loc - 1] < num_cores)
9252 __kmp_nesting_nth_level[loc - 1] =
9253 num_cores / __kmp_nesting_nth_level[loc - 2];
9255 __kmp_nesting_mode_nlevels = loc;
9256 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9258 if (__kmp_avail_proc >= 4) {
9259 __kmp_nesting_nth_level[0] = __kmp_avail_proc / 2;
9260 __kmp_nesting_nth_level[1] = 2;
9261 __kmp_nesting_mode_nlevels = 2;
9263 __kmp_nesting_nth_level[0] = __kmp_avail_proc;
9264 __kmp_nesting_mode_nlevels = 1;
9266 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9268 for (
int i = 0; i < __kmp_nesting_mode_nlevels; ++i) {
9269 __kmp_nested_nth.nth[i] = __kmp_nesting_nth_level[i];
9271 set__nproc(thread, __kmp_nesting_nth_level[0]);
9272 if (__kmp_nesting_mode > 1 && __kmp_nesting_mode_nlevels > __kmp_nesting_mode)
9273 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9274 if (get__max_active_levels(thread) > 1) {
9276 __kmp_nesting_mode_nlevels = get__max_active_levels(thread);
9278 if (__kmp_nesting_mode == 1)
9279 set__max_active_levels(thread, __kmp_nesting_mode_nlevels);
9284#if !KMP_STATS_ENABLED
9285void __kmp_reset_stats() {}
9288int __kmp_omp_debug_struct_info = FALSE;
9289int __kmp_debugging = FALSE;
9291#if !USE_ITT_BUILD || !USE_ITT_NOTIFY
9292void __kmp_itt_fini_ittlib() {}
9293void __kmp_itt_init_ittlib() {}
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs, kmpc_micro microtask,...)
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_INIT_PARTITIONED_TIMERS(name)
Initializes the partitioned timers to begin with name.
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
stats_state_e
the states which a thread can be in
KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid)