15 #include "kmp_affinity.h" 16 #include "kmp_atomic.h" 17 #include "kmp_environment.h" 18 #include "kmp_error.h" 22 #include "kmp_settings.h" 23 #include "kmp_stats.h" 25 #include "kmp_wait_release.h" 26 #include "kmp_wrapper_getpid.h" 27 #include "kmp_dispatch.h" 28 #if KMP_USE_HIER_SCHED 29 #include "kmp_dispatch_hier.h" 33 #include "ompt-specific.h" 37 #define KMP_USE_PRCTL 0 43 #include "tsan_annotations.h" 45 #if defined(KMP_GOMP_COMPAT) 46 char const __kmp_version_alt_comp[] =
47 KMP_VERSION_PREFIX
"alternative compiler support: yes";
50 char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX
"API version: " 62 char const __kmp_version_lock[] =
63 KMP_VERSION_PREFIX
"lock type: run time selectable";
66 #define KMP_MIN(x, y) ((x) < (y) ? (x) : (y)) 71 kmp_info_t __kmp_monitor;
76 void __kmp_cleanup(
void);
78 static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *,
int tid,
80 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
81 kmp_internal_control_t *new_icvs,
83 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 84 static void __kmp_partition_places(kmp_team_t *team,
85 int update_master_only = 0);
87 static void __kmp_do_serial_initialize(
void);
88 void __kmp_fork_barrier(
int gtid,
int tid);
89 void __kmp_join_barrier(
int gtid);
90 void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
91 kmp_internal_control_t *new_icvs,
ident_t *loc);
93 #ifdef USE_LOAD_BALANCE 94 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc);
97 static int __kmp_expand_threads(
int nNeed);
99 static int __kmp_unregister_root_other_thread(
int gtid);
101 static void __kmp_unregister_library(
void);
102 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root);
103 kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
108 int __kmp_get_global_thread_id() {
110 kmp_info_t **other_threads;
118 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
119 __kmp_nth, __kmp_all_nth));
126 if (!TCR_4(__kmp_init_gtid))
129 #ifdef KMP_TDATA_GTID 130 if (TCR_4(__kmp_gtid_mode) >= 3) {
131 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
135 if (TCR_4(__kmp_gtid_mode) >= 2) {
136 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
137 return __kmp_gtid_get_specific();
139 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
141 stack_addr = (
char *)&stack_data;
142 other_threads = __kmp_threads;
155 for (i = 0; i < __kmp_threads_capacity; i++) {
157 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
161 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
162 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
166 if (stack_addr <= stack_base) {
167 size_t stack_diff = stack_base - stack_addr;
169 if (stack_diff <= stack_size) {
172 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i);
180 (
"*** __kmp_get_global_thread_id: internal alg. failed to find " 181 "thread, using TLS\n"));
182 i = __kmp_gtid_get_specific();
192 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
193 KMP_FATAL(StackOverflow, i);
196 stack_base = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
197 if (stack_addr > stack_base) {
198 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
199 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
200 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
203 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
204 stack_base - stack_addr);
208 if (__kmp_storage_map) {
209 char *stack_end = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
210 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
211 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
212 other_threads[i]->th.th_info.ds.ds_stacksize,
213 "th_%d stack (refinement)", i);
218 int __kmp_get_global_thread_id_reg() {
221 if (!__kmp_init_serial) {
224 #ifdef KMP_TDATA_GTID 225 if (TCR_4(__kmp_gtid_mode) >= 3) {
226 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
230 if (TCR_4(__kmp_gtid_mode) >= 2) {
231 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
232 gtid = __kmp_gtid_get_specific();
235 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
236 gtid = __kmp_get_global_thread_id();
240 if (gtid == KMP_GTID_DNE) {
242 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. " 243 "Registering a new gtid.\n"));
244 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
245 if (!__kmp_init_serial) {
246 __kmp_do_serial_initialize();
247 gtid = __kmp_gtid_get_specific();
249 gtid = __kmp_register_root(FALSE);
251 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
255 KMP_DEBUG_ASSERT(gtid >= 0);
261 void __kmp_check_stack_overlap(kmp_info_t *th) {
263 char *stack_beg = NULL;
264 char *stack_end = NULL;
267 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
268 if (__kmp_storage_map) {
269 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
270 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
272 gtid = __kmp_gtid_from_thread(th);
274 if (gtid == KMP_GTID_MONITOR) {
275 __kmp_print_storage_map_gtid(
276 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
277 "th_%s stack (%s)",
"mon",
278 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
280 __kmp_print_storage_map_gtid(
281 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
282 "th_%d stack (%s)", gtid,
283 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
289 gtid = __kmp_gtid_from_thread(th);
290 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
292 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
293 if (stack_beg == NULL) {
294 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
295 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
298 for (f = 0; f < __kmp_threads_capacity; f++) {
299 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
301 if (f_th && f_th != th) {
302 char *other_stack_end =
303 (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
304 char *other_stack_beg =
305 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
306 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
307 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
310 if (__kmp_storage_map)
311 __kmp_print_storage_map_gtid(
312 -1, other_stack_beg, other_stack_end,
313 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
314 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
316 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
322 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
327 void __kmp_infinite_loop(
void) {
328 static int done = FALSE;
335 #define MAX_MESSAGE 512 337 void __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
338 char const *format, ...) {
339 char buffer[MAX_MESSAGE];
342 va_start(ap, format);
343 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
344 p2, (
unsigned long)size, format);
345 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
346 __kmp_vprintf(kmp_err, buffer, ap);
347 #if KMP_PRINT_DATA_PLACEMENT 350 if (p1 <= p2 && (
char *)p2 - (
char *)p1 == size) {
351 if (__kmp_storage_map_verbose) {
352 node = __kmp_get_host_node(p1);
354 __kmp_storage_map_verbose = FALSE;
358 int localProc = __kmp_get_cpu_from_gtid(gtid);
360 const int page_size = KMP_GET_PAGE_SIZE();
362 p1 = (
void *)((
size_t)p1 & ~((size_t)page_size - 1));
363 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
365 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid,
368 __kmp_printf_no_lock(
" GTID %d\n", gtid);
377 (
char *)p1 += page_size;
378 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
379 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last, (
char *)p1 - 1,
383 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
384 (
char *)p1 + (page_size - 1),
385 __kmp_get_host_node(p1));
387 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
388 (
char *)p2 + (page_size - 1),
389 __kmp_get_host_node(p2));
395 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR(StorageMapWarning));
398 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
401 void __kmp_warn(
char const *format, ...) {
402 char buffer[MAX_MESSAGE];
405 if (__kmp_generate_warnings == kmp_warnings_off) {
409 va_start(ap, format);
411 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
412 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
413 __kmp_vprintf(kmp_err, buffer, ap);
414 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
419 void __kmp_abort_process() {
421 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
423 if (__kmp_debug_buf) {
424 __kmp_dump_debug_buffer();
427 if (KMP_OS_WINDOWS) {
430 __kmp_global.g.g_abort = SIGABRT;
447 __kmp_infinite_loop();
448 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
452 void __kmp_abort_thread(
void) {
455 __kmp_infinite_loop();
461 static void __kmp_print_thread_storage_map(kmp_info_t *thr,
int gtid) {
462 __kmp_print_storage_map_gtid(gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d",
465 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
466 sizeof(kmp_desc_t),
"th_%d.th_info", gtid);
468 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
469 sizeof(kmp_local_t),
"th_%d.th_local", gtid);
471 __kmp_print_storage_map_gtid(
472 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
473 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid);
475 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
476 &thr->th.th_bar[bs_plain_barrier + 1],
477 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]",
480 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
481 &thr->th.th_bar[bs_forkjoin_barrier + 1],
482 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]",
485 #if KMP_FAST_REDUCTION_BARRIER 486 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
487 &thr->th.th_bar[bs_reduction_barrier + 1],
488 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]",
490 #endif // KMP_FAST_REDUCTION_BARRIER 496 static void __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
497 int team_id,
int num_thr) {
498 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
499 __kmp_print_storage_map_gtid(-1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
502 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
503 &team->t.t_bar[bs_last_barrier],
504 sizeof(kmp_balign_team_t) * bs_last_barrier,
505 "%s_%d.t_bar", header, team_id);
507 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
508 &team->t.t_bar[bs_plain_barrier + 1],
509 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]",
512 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
513 &team->t.t_bar[bs_forkjoin_barrier + 1],
514 sizeof(kmp_balign_team_t),
515 "%s_%d.t_bar[forkjoin]", header, team_id);
517 #if KMP_FAST_REDUCTION_BARRIER 518 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
519 &team->t.t_bar[bs_reduction_barrier + 1],
520 sizeof(kmp_balign_team_t),
521 "%s_%d.t_bar[reduction]", header, team_id);
522 #endif // KMP_FAST_REDUCTION_BARRIER 524 __kmp_print_storage_map_gtid(
525 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
526 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
528 __kmp_print_storage_map_gtid(
529 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
530 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
532 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
533 &team->t.t_disp_buffer[num_disp_buff],
534 sizeof(dispatch_shared_info_t) * num_disp_buff,
535 "%s_%d.t_disp_buffer", header, team_id);
537 __kmp_print_storage_map_gtid(-1, &team->t.t_taskq, &team->t.t_copypriv_data,
538 sizeof(kmp_taskq_t),
"%s_%d.t_taskq", header,
542 static void __kmp_init_allocator() {}
543 static void __kmp_fini_allocator() {}
547 #ifdef KMP_DYNAMIC_LIB 550 static void __kmp_reset_lock(kmp_bootstrap_lock_t *lck) {
552 __kmp_init_bootstrap_lock(lck);
555 static void __kmp_reset_locks_on_process_detach(
int gtid_req) {
573 for (i = 0; i < __kmp_threads_capacity; ++i) {
576 kmp_info_t *th = __kmp_threads[i];
579 int gtid = th->th.th_info.ds.ds_gtid;
580 if (gtid == gtid_req)
585 int alive = __kmp_is_thread_alive(th, &exit_val);
590 if (thread_count == 0)
596 __kmp_reset_lock(&__kmp_forkjoin_lock);
598 __kmp_reset_lock(&__kmp_stdio_lock);
602 BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
607 case DLL_PROCESS_ATTACH:
608 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
612 case DLL_PROCESS_DETACH:
613 KA_TRACE(10, (
"DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
615 if (lpReserved != NULL) {
641 __kmp_reset_locks_on_process_detach(__kmp_gtid_get_specific());
644 __kmp_internal_end_library(__kmp_gtid_get_specific());
648 case DLL_THREAD_ATTACH:
649 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
655 case DLL_THREAD_DETACH:
656 KA_TRACE(10, (
"DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
658 __kmp_internal_end_thread(__kmp_gtid_get_specific());
670 int __kmp_change_library(
int status) {
673 old_status = __kmp_yield_init &
677 __kmp_yield_init |= 1;
679 __kmp_yield_init &= ~1;
687 void __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
688 int gtid = *gtid_ref;
689 #ifdef BUILD_PARALLEL_ORDERED 690 kmp_team_t *team = __kmp_team_from_gtid(gtid);
693 if (__kmp_env_consistency_check) {
694 if (__kmp_threads[gtid]->th.th_root->r.r_active)
695 #if KMP_USE_DYNAMIC_LOCK 696 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
698 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
701 #ifdef BUILD_PARALLEL_ORDERED 702 if (!team->t.t_serialized) {
704 KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid),
712 void __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
713 int gtid = *gtid_ref;
714 #ifdef BUILD_PARALLEL_ORDERED 715 int tid = __kmp_tid_from_gtid(gtid);
716 kmp_team_t *team = __kmp_team_from_gtid(gtid);
719 if (__kmp_env_consistency_check) {
720 if (__kmp_threads[gtid]->th.th_root->r.r_active)
721 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
723 #ifdef BUILD_PARALLEL_ORDERED 724 if (!team->t.t_serialized) {
729 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
739 int __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws) {
744 if (!TCR_4(__kmp_init_parallel))
745 __kmp_parallel_initialize();
747 th = __kmp_threads[gtid];
748 team = th->th.th_team;
751 th->th.th_ident = id_ref;
753 if (team->t.t_serialized) {
756 kmp_int32 old_this = th->th.th_local.this_construct;
758 ++th->th.th_local.this_construct;
762 if (team->t.t_construct == old_this) {
763 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
764 th->th.th_local.this_construct);
767 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
768 KMP_MASTER_GTID(gtid) &&
770 th->th.th_teams_microtask == NULL &&
772 team->t.t_active_level ==
774 __kmp_itt_metadata_single(id_ref);
779 if (__kmp_env_consistency_check) {
780 if (status && push_ws) {
781 __kmp_push_workshare(gtid, ct_psingle, id_ref);
783 __kmp_check_workshare(gtid, ct_psingle, id_ref);
788 __kmp_itt_single_start(gtid);
794 void __kmp_exit_single(
int gtid) {
796 __kmp_itt_single_end(gtid);
798 if (__kmp_env_consistency_check)
799 __kmp_pop_workshare(gtid, ct_psingle, NULL);
808 static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
809 int master_tid,
int set_nthreads
817 KMP_DEBUG_ASSERT(__kmp_init_serial);
818 KMP_DEBUG_ASSERT(root && parent_team);
822 new_nthreads = set_nthreads;
823 if (!get__dynamic_2(parent_team, master_tid)) {
826 #ifdef USE_LOAD_BALANCE 827 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
828 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
829 if (new_nthreads == 1) {
830 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced " 831 "reservation to 1 thread\n",
835 if (new_nthreads < set_nthreads) {
836 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced " 837 "reservation to %d threads\n",
838 master_tid, new_nthreads));
842 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
843 new_nthreads = __kmp_avail_proc - __kmp_nth +
844 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
845 if (new_nthreads <= 1) {
846 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced " 847 "reservation to 1 thread\n",
851 if (new_nthreads < set_nthreads) {
852 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced " 853 "reservation to %d threads\n",
854 master_tid, new_nthreads));
856 new_nthreads = set_nthreads;
858 }
else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
859 if (set_nthreads > 2) {
860 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
861 new_nthreads = (new_nthreads % set_nthreads) + 1;
862 if (new_nthreads == 1) {
863 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced " 864 "reservation to 1 thread\n",
868 if (new_nthreads < set_nthreads) {
869 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced " 870 "reservation to %d threads\n",
871 master_tid, new_nthreads));
879 if (__kmp_nth + new_nthreads -
880 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
882 int tl_nthreads = __kmp_max_nth - __kmp_nth +
883 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
884 if (tl_nthreads <= 0) {
889 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
890 __kmp_reserve_warn = 1;
891 __kmp_msg(kmp_ms_warning,
892 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
893 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
895 if (tl_nthreads == 1) {
896 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT " 897 "reduced reservation to 1 thread\n",
901 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced " 902 "reservation to %d threads\n",
903 master_tid, tl_nthreads));
904 new_nthreads = tl_nthreads;
908 if (root->r.r_cg_nthreads + new_nthreads -
909 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
911 int tl_nthreads = __kmp_cg_max_nth - root->r.r_cg_nthreads +
912 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
913 if (tl_nthreads <= 0) {
918 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
919 __kmp_reserve_warn = 1;
920 __kmp_msg(kmp_ms_warning,
921 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
922 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
924 if (tl_nthreads == 1) {
925 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT " 926 "reduced reservation to 1 thread\n",
930 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced " 931 "reservation to %d threads\n",
932 master_tid, tl_nthreads));
933 new_nthreads = tl_nthreads;
939 capacity = __kmp_threads_capacity;
940 if (TCR_PTR(__kmp_threads[0]) == NULL) {
943 if (__kmp_nth + new_nthreads -
944 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
947 int slotsRequired = __kmp_nth + new_nthreads -
948 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
950 int slotsAdded = __kmp_expand_threads(slotsRequired);
951 if (slotsAdded < slotsRequired) {
953 new_nthreads -= (slotsRequired - slotsAdded);
954 KMP_ASSERT(new_nthreads >= 1);
957 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
958 __kmp_reserve_warn = 1;
959 if (__kmp_tp_cached) {
960 __kmp_msg(kmp_ms_warning,
961 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
962 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
963 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
965 __kmp_msg(kmp_ms_warning,
966 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
967 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
974 if (new_nthreads == 1) {
976 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming " 977 "dead roots and rechecking; requested %d threads\n",
978 __kmp_get_gtid(), set_nthreads));
980 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested" 982 __kmp_get_gtid(), new_nthreads, set_nthreads));
991 static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
992 kmp_info_t *master_th,
int master_gtid) {
996 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
997 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
1001 master_th->th.th_info.ds.ds_tid = 0;
1002 master_th->th.th_team = team;
1003 master_th->th.th_team_nproc = team->t.t_nproc;
1004 master_th->th.th_team_master = master_th;
1005 master_th->th.th_team_serialized = FALSE;
1006 master_th->th.th_dispatch = &team->t.t_dispatch[0];
1009 #if KMP_NESTED_HOT_TEAMS 1011 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
1014 int level = team->t.t_active_level - 1;
1015 if (master_th->th.th_teams_microtask) {
1016 if (master_th->th.th_teams_size.nteams > 1) {
1020 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1021 master_th->th.th_teams_level == team->t.t_level) {
1026 if (level < __kmp_hot_teams_max_level) {
1027 if (hot_teams[level].hot_team) {
1029 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1033 hot_teams[level].hot_team = team;
1034 hot_teams[level].hot_team_nth = team->t.t_nproc;
1041 use_hot_team = team == root->r.r_hot_team;
1043 if (!use_hot_team) {
1046 team->t.t_threads[0] = master_th;
1047 __kmp_initialize_info(master_th, team, 0, master_gtid);
1050 for (i = 1; i < team->t.t_nproc; i++) {
1053 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
1054 team->t.t_threads[i] = thr;
1055 KMP_DEBUG_ASSERT(thr);
1056 KMP_DEBUG_ASSERT(thr->th.th_team == team);
1058 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived " 1059 "T#%d(%d:%d) join =%llu, plain=%llu\n",
1060 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
1061 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
1062 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
1063 team->t.t_bar[bs_plain_barrier].b_arrived));
1065 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1066 thr->th.th_teams_level = master_th->th.th_teams_level;
1067 thr->th.th_teams_size = master_th->th.th_teams_size;
1071 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1072 for (b = 0; b < bs_last_barrier; ++b) {
1073 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1074 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1076 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1082 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 1083 __kmp_partition_places(team);
1090 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1094 inline static void propagateFPControl(kmp_team_t *team) {
1095 if (__kmp_inherit_fp_control) {
1096 kmp_int16 x87_fpu_control_word;
1100 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1101 __kmp_store_mxcsr(&mxcsr);
1102 mxcsr &= KMP_X86_MXCSR_MASK;
1113 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1114 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1117 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1121 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1127 inline static void updateHWFPControl(kmp_team_t *team) {
1128 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1131 kmp_int16 x87_fpu_control_word;
1133 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1134 __kmp_store_mxcsr(&mxcsr);
1135 mxcsr &= KMP_X86_MXCSR_MASK;
1137 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1138 __kmp_clear_x87_fpu_status_word();
1139 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1142 if (team->t.t_mxcsr != mxcsr) {
1143 __kmp_load_mxcsr(&team->t.t_mxcsr);
1148 #define propagateFPControl(x) ((void)0) 1149 #define updateHWFPControl(x) ((void)0) 1152 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
1157 void __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid) {
1158 kmp_info_t *this_thr;
1159 kmp_team_t *serial_team;
1161 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1168 if (!TCR_4(__kmp_init_parallel))
1169 __kmp_parallel_initialize();
1171 this_thr = __kmp_threads[global_tid];
1172 serial_team = this_thr->th.th_serial_team;
1175 KMP_DEBUG_ASSERT(serial_team);
1178 if (__kmp_tasking_mode != tskm_immediate_exec) {
1180 this_thr->th.th_task_team ==
1181 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1182 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1184 KA_TRACE(20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / " 1185 "team %p, new task_team = NULL\n",
1186 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1187 this_thr->th.th_task_team = NULL;
1191 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1192 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1193 proc_bind = proc_bind_false;
1194 }
else if (proc_bind == proc_bind_default) {
1197 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1200 this_thr->th.th_set_proc_bind = proc_bind_default;
1204 ompt_data_t ompt_parallel_data;
1205 ompt_parallel_data.ptr = NULL;
1206 ompt_data_t *implicit_task_data;
1207 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1208 if (ompt_enabled.enabled &&
1209 this_thr->th.ompt_thread_info.state != omp_state_overhead) {
1211 ompt_task_info_t *parent_task_info;
1212 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1214 parent_task_info->frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1);
1215 if (ompt_enabled.ompt_callback_parallel_begin) {
1218 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1219 &(parent_task_info->task_data), &(parent_task_info->frame),
1220 &ompt_parallel_data, team_size, ompt_invoker_program, codeptr);
1223 #endif // OMPT_SUPPORT 1225 if (this_thr->th.th_team != serial_team) {
1227 int level = this_thr->th.th_team->t.t_level;
1229 if (serial_team->t.t_serialized) {
1232 kmp_team_t *new_team;
1234 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1236 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1243 &this_thr->th.th_current_task->td_icvs,
1244 0 USE_NESTED_HOT_ARG(NULL));
1245 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1246 KMP_ASSERT(new_team);
1249 new_team->t.t_threads[0] = this_thr;
1250 new_team->t.t_parent = this_thr->th.th_team;
1251 serial_team = new_team;
1252 this_thr->th.th_serial_team = serial_team;
1256 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1257 global_tid, serial_team));
1265 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1266 global_tid, serial_team));
1270 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1271 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1272 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1273 serial_team->t.t_ident = loc;
1274 serial_team->t.t_serialized = 1;
1275 serial_team->t.t_nproc = 1;
1276 serial_team->t.t_parent = this_thr->th.th_team;
1277 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1278 this_thr->th.th_team = serial_team;
1279 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1281 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#d curtask=%p\n", global_tid,
1282 this_thr->th.th_current_task));
1283 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1284 this_thr->th.th_current_task->td_flags.executing = 0;
1286 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1291 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1292 &this_thr->th.th_current_task->td_parent->td_icvs);
1296 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1297 this_thr->th.th_current_task->td_icvs.nproc =
1298 __kmp_nested_nth.nth[level + 1];
1302 if (__kmp_nested_proc_bind.used &&
1303 (level + 1 < __kmp_nested_proc_bind.used)) {
1304 this_thr->th.th_current_task->td_icvs.proc_bind =
1305 __kmp_nested_proc_bind.bind_types[level + 1];
1310 serial_team->t.t_pkfn = (microtask_t)(~0);
1312 this_thr->th.th_info.ds.ds_tid = 0;
1315 this_thr->th.th_team_nproc = 1;
1316 this_thr->th.th_team_master = this_thr;
1317 this_thr->th.th_team_serialized = 1;
1319 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1320 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1322 propagateFPControl(serial_team);
1325 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1326 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1327 serial_team->t.t_dispatch->th_disp_buffer =
1328 (dispatch_private_info_t *)__kmp_allocate(
1329 sizeof(dispatch_private_info_t));
1331 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1338 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1339 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1340 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1341 ++serial_team->t.t_serialized;
1342 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1345 int level = this_thr->th.th_team->t.t_level;
1348 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1349 this_thr->th.th_current_task->td_icvs.nproc =
1350 __kmp_nested_nth.nth[level + 1];
1352 serial_team->t.t_level++;
1353 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level " 1354 "of serial team %p to %d\n",
1355 global_tid, serial_team, serial_team->t.t_level));
1358 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1360 dispatch_private_info_t *disp_buffer =
1361 (dispatch_private_info_t *)__kmp_allocate(
1362 sizeof(dispatch_private_info_t));
1363 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1364 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1366 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1371 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1374 if (__kmp_env_consistency_check)
1375 __kmp_push_parallel(global_tid, NULL);
1377 serial_team->t.ompt_team_info.master_return_address = codeptr;
1378 if (ompt_enabled.enabled &&
1379 this_thr->th.ompt_thread_info.state != omp_state_overhead) {
1380 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = OMPT_GET_FRAME_ADDRESS(1);
1382 ompt_lw_taskteam_t lw_taskteam;
1383 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1384 &ompt_parallel_data, codeptr);
1386 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1390 implicit_task_data = OMPT_CUR_TASK_DATA(this_thr);
1391 if (ompt_enabled.ompt_callback_implicit_task) {
1392 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1393 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1394 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid));
1395 OMPT_CUR_TASK_INFO(this_thr)
1396 ->thread_num = __kmp_tid_from_gtid(global_tid);
1400 this_thr->th.ompt_thread_info.state = omp_state_work_parallel;
1401 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = OMPT_GET_FRAME_ADDRESS(1);
1408 int __kmp_fork_call(
ident_t *loc,
int gtid,
1409 enum fork_context_e call_context,
1410 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1412 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1421 int master_this_cons;
1423 kmp_team_t *parent_team;
1424 kmp_info_t *master_th;
1428 int master_set_numthreads;
1434 #if KMP_NESTED_HOT_TEAMS 1435 kmp_hot_team_ptr_t **p_hot_teams;
1438 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1441 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1442 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1445 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1447 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1448 __kmp_stkpadding += (short)((kmp_int64)dummy);
1454 if (!TCR_4(__kmp_init_parallel))
1455 __kmp_parallel_initialize();
1458 master_th = __kmp_threads[gtid];
1460 parent_team = master_th->th.th_team;
1461 master_tid = master_th->th.th_info.ds.ds_tid;
1462 master_this_cons = master_th->th.th_local.this_construct;
1463 root = master_th->th.th_root;
1464 master_active = root->r.r_active;
1465 master_set_numthreads = master_th->th.th_set_nproc;
1468 ompt_data_t ompt_parallel_data;
1469 ompt_parallel_data.ptr = NULL;
1470 ompt_data_t *parent_task_data;
1471 omp_frame_t *ompt_frame;
1472 ompt_data_t *implicit_task_data;
1473 void *return_address = NULL;
1475 if (ompt_enabled.enabled) {
1476 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1478 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1483 level = parent_team->t.t_level;
1485 active_level = parent_team->t.t_active_level;
1488 teams_level = master_th->th.th_teams_level;
1490 #if KMP_NESTED_HOT_TEAMS 1491 p_hot_teams = &master_th->th.th_hot_teams;
1492 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1493 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1494 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1495 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1497 (*p_hot_teams)[0].hot_team_nth = 1;
1502 if (ompt_enabled.enabled) {
1503 if (ompt_enabled.ompt_callback_parallel_begin) {
1504 int team_size = master_set_numthreads
1505 ? master_set_numthreads
1506 : get__nproc_2(parent_team, master_tid);
1507 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1508 parent_task_data, ompt_frame, &ompt_parallel_data, team_size,
1509 OMPT_INVOKER(call_context), return_address);
1511 master_th->th.ompt_thread_info.state = omp_state_overhead;
1515 master_th->th.th_ident = loc;
1518 if (master_th->th.th_teams_microtask && ap &&
1519 microtask != (microtask_t)__kmp_teams_master && level == teams_level) {
1523 parent_team->t.t_ident = loc;
1524 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1525 parent_team->t.t_argc = argc;
1526 argv = (
void **)parent_team->t.t_argv;
1527 for (i = argc - 1; i >= 0; --i)
1529 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1530 *argv++ = va_arg(*ap,
void *);
1532 *argv++ = va_arg(ap,
void *);
1535 if (parent_team == master_th->th.th_serial_team) {
1538 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1541 parent_team->t.t_serialized--;
1544 void **exit_runtime_p;
1546 ompt_lw_taskteam_t lw_taskteam;
1548 if (ompt_enabled.enabled) {
1549 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1550 &ompt_parallel_data, return_address);
1551 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_frame);
1553 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1557 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1558 if (ompt_enabled.ompt_callback_implicit_task) {
1559 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1560 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1561 implicit_task_data, 1, __kmp_tid_from_gtid(gtid));
1562 OMPT_CUR_TASK_INFO(master_th)
1563 ->thread_num = __kmp_tid_from_gtid(gtid);
1567 master_th->th.ompt_thread_info.state = omp_state_work_parallel;
1569 exit_runtime_p = &dummy;
1574 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1575 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1576 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1585 *exit_runtime_p = NULL;
1586 if (ompt_enabled.enabled) {
1587 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = NULL;
1588 if (ompt_enabled.ompt_callback_implicit_task) {
1589 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1590 ompt_scope_end, NULL, implicit_task_data, 1,
1591 OMPT_CUR_TASK_INFO(master_th)->thread_num);
1593 __ompt_lw_taskteam_unlink(master_th);
1595 if (ompt_enabled.ompt_callback_parallel_end) {
1596 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1597 OMPT_CUR_TEAM_DATA(master_th), OMPT_CUR_TASK_DATA(master_th),
1598 OMPT_INVOKER(call_context), return_address);
1600 master_th->th.ompt_thread_info.state = omp_state_overhead;
1606 parent_team->t.t_pkfn = microtask;
1607 parent_team->t.t_invoke = invoker;
1608 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1609 parent_team->t.t_active_level++;
1610 parent_team->t.t_level++;
1613 if (master_set_numthreads) {
1614 if (master_set_numthreads < master_th->th.th_teams_size.nth) {
1616 kmp_info_t **other_threads = parent_team->t.t_threads;
1617 parent_team->t.t_nproc = master_set_numthreads;
1618 for (i = 0; i < master_set_numthreads; ++i) {
1619 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1623 master_th->th.th_set_nproc = 0;
1627 if (__kmp_debugging) {
1628 int nth = __kmp_omp_num_threads(loc);
1630 master_set_numthreads = nth;
1635 KF_TRACE(10, (
"__kmp_fork_call: before internal fork: root=%p, team=%p, " 1636 "master_th=%p, gtid=%d\n",
1637 root, parent_team, master_th, gtid));
1638 __kmp_internal_fork(loc, gtid, parent_team);
1639 KF_TRACE(10, (
"__kmp_fork_call: after internal fork: root=%p, team=%p, " 1640 "master_th=%p, gtid=%d\n",
1641 root, parent_team, master_th, gtid));
1644 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
1645 parent_team->t.t_id, parent_team->t.t_pkfn));
1647 if (!parent_team->t.t_invoke(gtid)) {
1648 KMP_ASSERT2(0,
"cannot invoke microtask for MASTER thread");
1650 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
1651 parent_team->t.t_id, parent_team->t.t_pkfn));
1654 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
1661 if (__kmp_tasking_mode != tskm_immediate_exec) {
1662 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1663 parent_team->t.t_task_team[master_th->th.th_task_state]);
1667 if (parent_team->t.t_active_level >=
1668 master_th->th.th_current_task->td_icvs.max_active_levels) {
1672 int enter_teams = ((ap == NULL && active_level == 0) ||
1673 (ap && teams_level > 0 && teams_level == level));
1676 master_set_numthreads
1677 ? master_set_numthreads
1686 if ((!get__nested(master_th) && (root->r.r_in_parallel
1691 (__kmp_library == library_serial)) {
1692 KC_TRACE(10, (
"__kmp_fork_call: T#%d serializing team; requested %d" 1700 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1701 nthreads = __kmp_reserve_threads(
1702 root, parent_team, master_tid, nthreads
1713 if (nthreads == 1) {
1717 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1721 KMP_DEBUG_ASSERT(nthreads > 0);
1724 master_th->th.th_set_nproc = 0;
1727 if (nthreads == 1) {
1729 #if KMP_OS_LINUX && \ 1730 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 1733 void **args = (
void **)KMP_ALLOCA(argc *
sizeof(
void *));
1738 (
"__kmp_fork_call: T#%d serializing parallel region\n", gtid));
1742 if (call_context == fork_context_intel) {
1744 master_th->th.th_serial_team->t.t_ident = loc;
1748 master_th->th.th_serial_team->t.t_level--;
1753 void **exit_runtime_p;
1754 ompt_task_info_t *task_info;
1756 ompt_lw_taskteam_t lw_taskteam;
1758 if (ompt_enabled.enabled) {
1759 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1760 &ompt_parallel_data, return_address);
1762 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1765 task_info = OMPT_CUR_TASK_INFO(master_th);
1766 exit_runtime_p = &(task_info->frame.exit_frame);
1767 if (ompt_enabled.ompt_callback_implicit_task) {
1768 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1769 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1770 &(task_info->task_data), 1, __kmp_tid_from_gtid(gtid));
1771 OMPT_CUR_TASK_INFO(master_th)
1772 ->thread_num = __kmp_tid_from_gtid(gtid);
1776 master_th->th.ompt_thread_info.state = omp_state_work_parallel;
1778 exit_runtime_p = &dummy;
1783 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1784 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1785 __kmp_invoke_microtask(microtask, gtid, 0, argc,
1786 parent_team->t.t_argv
1795 if (ompt_enabled.enabled) {
1796 exit_runtime_p = NULL;
1797 if (ompt_enabled.ompt_callback_implicit_task) {
1798 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1799 ompt_scope_end, NULL, &(task_info->task_data), 1,
1800 OMPT_CUR_TASK_INFO(master_th)->thread_num);
1803 __ompt_lw_taskteam_unlink(master_th);
1804 if (ompt_enabled.ompt_callback_parallel_end) {
1805 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1806 OMPT_CUR_TEAM_DATA(master_th), parent_task_data,
1807 OMPT_INVOKER(call_context), return_address);
1809 master_th->th.ompt_thread_info.state = omp_state_overhead;
1812 }
else if (microtask == (microtask_t)__kmp_teams_master) {
1813 KMP_DEBUG_ASSERT(master_th->th.th_team ==
1814 master_th->th.th_serial_team);
1815 team = master_th->th.th_team;
1817 team->t.t_invoke = invoker;
1818 __kmp_alloc_argv_entries(argc, team, TRUE);
1819 team->t.t_argc = argc;
1820 argv = (
void **)team->t.t_argv;
1822 for (i = argc - 1; i >= 0; --i)
1824 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1825 *argv++ = va_arg(*ap,
void *);
1827 *argv++ = va_arg(ap,
void *);
1830 for (i = 0; i < argc; ++i)
1832 argv[i] = parent_team->t.t_argv[i];
1842 for (i = argc - 1; i >= 0; --i)
1844 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1845 *argv++ = va_arg(*ap,
void *);
1847 *argv++ = va_arg(ap,
void *);
1853 void **exit_runtime_p;
1854 ompt_task_info_t *task_info;
1856 ompt_lw_taskteam_t lw_taskteam;
1858 if (ompt_enabled.enabled) {
1859 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1860 &ompt_parallel_data, return_address);
1861 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1863 task_info = OMPT_CUR_TASK_INFO(master_th);
1864 exit_runtime_p = &(task_info->frame.exit_frame);
1867 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1868 if (ompt_enabled.ompt_callback_implicit_task) {
1869 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1870 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1871 implicit_task_data, 1, __kmp_tid_from_gtid(gtid));
1872 OMPT_CUR_TASK_INFO(master_th)
1873 ->thread_num = __kmp_tid_from_gtid(gtid);
1877 master_th->th.ompt_thread_info.state = omp_state_work_parallel;
1879 exit_runtime_p = &dummy;
1884 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1885 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1886 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1895 if (ompt_enabled.enabled) {
1896 *exit_runtime_p = NULL;
1897 if (ompt_enabled.ompt_callback_implicit_task) {
1898 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1899 ompt_scope_end, NULL, &(task_info->task_data), 1,
1900 OMPT_CUR_TASK_INFO(master_th)->thread_num);
1903 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1904 __ompt_lw_taskteam_unlink(master_th);
1905 if (ompt_enabled.ompt_callback_parallel_end) {
1906 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1907 &ompt_parallel_data, parent_task_data,
1908 OMPT_INVOKER(call_context), return_address);
1910 master_th->th.ompt_thread_info.state = omp_state_overhead;
1916 }
else if (call_context == fork_context_gnu) {
1918 ompt_lw_taskteam_t lwt;
1919 __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data,
1922 lwt.ompt_task_info.frame.exit_frame = NULL;
1923 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1928 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1931 KMP_ASSERT2(call_context < fork_context_last,
1932 "__kmp_fork_call: unknown fork_context parameter");
1935 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1942 KF_TRACE(10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, " 1943 "curtask=%p, curtask_max_aclevel=%d\n",
1944 parent_team->t.t_active_level, master_th,
1945 master_th->th.th_current_task,
1946 master_th->th.th_current_task->td_icvs.max_active_levels));
1950 master_th->th.th_current_task->td_flags.executing = 0;
1953 if (!master_th->th.th_teams_microtask || level > teams_level)
1957 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1961 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
1962 if ((level + 1 < __kmp_nested_nth.used) &&
1963 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
1964 nthreads_icv = __kmp_nested_nth.nth[level + 1];
1971 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1972 kmp_proc_bind_t proc_bind_icv =
1974 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1975 proc_bind = proc_bind_false;
1977 if (proc_bind == proc_bind_default) {
1980 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1986 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1987 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1988 master_th->th.th_current_task->td_icvs.proc_bind)) {
1989 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1994 master_th->th.th_set_proc_bind = proc_bind_default;
1997 if ((nthreads_icv > 0)
1999 || (proc_bind_icv != proc_bind_default)
2002 kmp_internal_control_t new_icvs;
2003 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2004 new_icvs.next = NULL;
2005 if (nthreads_icv > 0) {
2006 new_icvs.nproc = nthreads_icv;
2010 if (proc_bind_icv != proc_bind_default) {
2011 new_icvs.proc_bind = proc_bind_icv;
2016 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2017 team = __kmp_allocate_team(root, nthreads, nthreads,
2024 &new_icvs, argc USE_NESTED_HOT_ARG(master_th));
2027 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2028 team = __kmp_allocate_team(root, nthreads, nthreads,
2035 &master_th->th.th_current_task->td_icvs,
2036 argc USE_NESTED_HOT_ARG(master_th));
2039 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2042 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2043 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2044 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2045 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2046 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2048 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2051 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2054 if (!master_th->th.th_teams_microtask || level > teams_level) {
2056 int new_level = parent_team->t.t_level + 1;
2057 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2058 new_level = parent_team->t.t_active_level + 1;
2059 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2063 int new_level = parent_team->t.t_level;
2064 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2065 new_level = parent_team->t.t_active_level;
2066 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2069 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2071 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2074 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2078 propagateFPControl(team);
2080 if (__kmp_tasking_mode != tskm_immediate_exec) {
2083 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2084 parent_team->t.t_task_team[master_th->th.th_task_state]);
2085 KA_TRACE(20, (
"__kmp_fork_call: Master T#%d pushing task_team %p / team " 2086 "%p, new task_team %p / team %p\n",
2087 __kmp_gtid_from_thread(master_th),
2088 master_th->th.th_task_team, parent_team,
2089 team->t.t_task_team[master_th->th.th_task_state], team));
2091 if (active_level || master_th->th.th_task_team) {
2093 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2094 if (master_th->th.th_task_state_top >=
2095 master_th->th.th_task_state_stack_sz) {
2096 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2097 kmp_uint8 *old_stack, *new_stack;
2099 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2100 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2101 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2103 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2107 old_stack = master_th->th.th_task_state_memo_stack;
2108 master_th->th.th_task_state_memo_stack = new_stack;
2109 master_th->th.th_task_state_stack_sz = new_size;
2110 __kmp_free(old_stack);
2114 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2115 master_th->th.th_task_state;
2116 master_th->th.th_task_state_top++;
2117 #if KMP_NESTED_HOT_TEAMS 2118 if (team == master_th->th.th_hot_teams[active_level].hot_team) {
2120 master_th->th.th_task_state =
2122 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2125 master_th->th.th_task_state = 0;
2126 #if KMP_NESTED_HOT_TEAMS 2130 #if !KMP_NESTED_HOT_TEAMS 2131 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2132 (team == root->r.r_hot_team));
2138 (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2139 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2141 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2142 (team->t.t_master_tid == 0 &&
2143 (team->t.t_parent == root->r.r_root_team ||
2144 team->t.t_parent->t.t_serialized)));
2148 argv = (
void **)team->t.t_argv;
2152 for (i = argc - 1; i >= 0; --i) {
2154 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 2155 void *new_argv = va_arg(*ap,
void *);
2157 void *new_argv = va_arg(ap,
void *);
2159 KMP_CHECK_UPDATE(*argv, new_argv);
2164 for (i = 0; i < argc; ++i) {
2166 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2172 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2173 if (!root->r.r_active)
2174 root->r.r_active = TRUE;
2176 __kmp_fork_team_threads(root, team, master_th, gtid);
2177 __kmp_setup_icv_copy(team, nthreads,
2178 &master_th->th.th_current_task->td_icvs, loc);
2181 master_th->th.ompt_thread_info.state = omp_state_work_parallel;
2184 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2187 if (team->t.t_active_level == 1
2189 && !master_th->th.th_teams_microtask
2193 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2194 (__kmp_forkjoin_frames_mode == 3 ||
2195 __kmp_forkjoin_frames_mode == 1)) {
2196 kmp_uint64 tmp_time = 0;
2197 if (__itt_get_timestamp_ptr)
2198 tmp_time = __itt_get_timestamp();
2200 master_th->th.th_frame_time = tmp_time;
2201 if (__kmp_forkjoin_frames_mode == 3)
2202 team->t.t_region_time = tmp_time;
2206 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2207 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2209 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2215 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2218 (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2219 root, team, master_th, gtid));
2222 if (__itt_stack_caller_create_ptr) {
2223 team->t.t_stack_id =
2224 __kmp_itt_stack_caller_create();
2235 __kmp_internal_fork(loc, gtid, team);
2236 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, " 2237 "master_th=%p, gtid=%d\n",
2238 root, team, master_th, gtid));
2241 if (call_context == fork_context_gnu) {
2242 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2247 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2248 team->t.t_id, team->t.t_pkfn));
2251 if (!team->t.t_invoke(gtid)) {
2252 KMP_ASSERT2(0,
"cannot invoke microtask for MASTER thread");
2254 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2255 team->t.t_id, team->t.t_pkfn));
2258 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2261 if (ompt_enabled.enabled) {
2262 master_th->th.ompt_thread_info.state = omp_state_overhead;
2270 static inline void __kmp_join_restore_state(kmp_info_t *thread,
2273 thread->th.ompt_thread_info.state =
2274 ((team->t.t_serialized) ? omp_state_work_serial
2275 : omp_state_work_parallel);
2278 static inline void __kmp_join_ompt(
int gtid, kmp_info_t *thread,
2279 kmp_team_t *team, ompt_data_t *parallel_data,
2280 fork_context_e fork_context,
void *codeptr) {
2281 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2282 if (ompt_enabled.ompt_callback_parallel_end) {
2283 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2284 parallel_data, &(task_info->task_data), OMPT_INVOKER(fork_context),
2288 task_info->frame.enter_frame = NULL;
2289 __kmp_join_restore_state(thread, team);
2293 void __kmp_join_call(
ident_t *loc,
int gtid
2296 enum fork_context_e fork_context
2303 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2305 kmp_team_t *parent_team;
2306 kmp_info_t *master_th;
2311 KA_TRACE(20, (
"__kmp_join_call: enter T#%d\n", gtid));
2314 master_th = __kmp_threads[gtid];
2315 root = master_th->th.th_root;
2316 team = master_th->th.th_team;
2317 parent_team = team->t.t_parent;
2319 master_th->th.th_ident = loc;
2322 if (ompt_enabled.enabled) {
2323 master_th->th.ompt_thread_info.state = omp_state_overhead;
2328 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2329 KA_TRACE(20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, " 2330 "th_task_team = %p\n",
2331 __kmp_gtid_from_thread(master_th), team,
2332 team->t.t_task_team[master_th->th.th_task_state],
2333 master_th->th.th_task_team));
2334 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2335 team->t.t_task_team[master_th->th.th_task_state]);
2339 if (team->t.t_serialized) {
2341 if (master_th->th.th_teams_microtask) {
2343 int level = team->t.t_level;
2344 int tlevel = master_th->th.th_teams_level;
2345 if (level == tlevel) {
2349 }
else if (level == tlevel + 1) {
2353 team->t.t_serialized++;
2360 if (ompt_enabled.enabled) {
2361 __kmp_join_restore_state(master_th, parent_team);
2368 master_active = team->t.t_master_active;
2376 __kmp_internal_join(loc, gtid, team);
2380 master_th->th.th_task_state =
2388 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2389 void *codeptr = team->t.ompt_team_info.master_return_address;
2393 if (__itt_stack_caller_create_ptr) {
2394 __kmp_itt_stack_caller_destroy(
2395 (__itt_caller)team->t
2400 if (team->t.t_active_level == 1
2402 && !master_th->th.th_teams_microtask
2405 master_th->th.th_ident = loc;
2408 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2409 __kmp_forkjoin_frames_mode == 3)
2410 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2411 master_th->th.th_frame_time, 0, loc,
2412 master_th->th.th_team_nproc, 1);
2413 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2414 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2415 __kmp_itt_region_joined(gtid);
2420 if (master_th->th.th_teams_microtask && !exit_teams &&
2421 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2422 team->t.t_level == master_th->th.th_teams_level + 1) {
2429 team->t.t_active_level--;
2430 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2433 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2434 int old_num = master_th->th.th_team_nproc;
2435 int new_num = master_th->th.th_teams_size.nth;
2436 kmp_info_t **other_threads = team->t.t_threads;
2437 team->t.t_nproc = new_num;
2438 for (i = 0; i < old_num; ++i) {
2439 other_threads[i]->th.th_team_nproc = new_num;
2442 for (i = old_num; i < new_num; ++i) {
2445 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2446 for (b = 0; b < bs_last_barrier; ++b) {
2447 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2448 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2450 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2453 if (__kmp_tasking_mode != tskm_immediate_exec) {
2455 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2461 if (ompt_enabled.enabled) {
2462 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
2472 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2473 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2475 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2480 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2483 if (!master_th->th.th_teams_microtask ||
2484 team->t.t_level > master_th->th.th_teams_level)
2488 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2490 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2493 if (ompt_enabled.enabled) {
2494 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2495 if (ompt_enabled.ompt_callback_implicit_task) {
2496 int ompt_team_size = team->t.t_nproc;
2497 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2498 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2499 OMPT_CUR_TASK_INFO(master_th)->thread_num);
2502 task_info->frame.exit_frame = NULL;
2503 task_info->task_data = ompt_data_none;
2507 KF_TRACE(10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2509 __kmp_pop_current_task_from_thread(master_th);
2511 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 2513 master_th->th.th_first_place = team->t.t_first_place;
2514 master_th->th.th_last_place = team->t.t_last_place;
2517 updateHWFPControl(team);
2519 if (root->r.r_active != master_active)
2520 root->r.r_active = master_active;
2522 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2530 master_th->th.th_team = parent_team;
2531 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2532 master_th->th.th_team_master = parent_team->t.t_threads[0];
2533 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2536 if (parent_team->t.t_serialized &&
2537 parent_team != master_th->th.th_serial_team &&
2538 parent_team != root->r.r_root_team) {
2539 __kmp_free_team(root,
2540 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2541 master_th->th.th_serial_team = parent_team;
2544 if (__kmp_tasking_mode != tskm_immediate_exec) {
2545 if (master_th->th.th_task_state_top >
2547 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2549 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2550 master_th->th.th_task_state;
2551 --master_th->th.th_task_state_top;
2553 master_th->th.th_task_state =
2555 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2558 master_th->th.th_task_team =
2559 parent_team->t.t_task_team[master_th->th.th_task_state];
2561 (
"__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
2562 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2569 master_th->th.th_current_task->td_flags.executing = 1;
2571 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2574 if (ompt_enabled.enabled) {
2575 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
2581 KA_TRACE(20, (
"__kmp_join_call: exit T#%d\n", gtid));
2586 void __kmp_save_internal_controls(kmp_info_t *thread) {
2588 if (thread->th.th_team != thread->th.th_serial_team) {
2591 if (thread->th.th_team->t.t_serialized > 1) {
2594 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2597 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2598 thread->th.th_team->t.t_serialized) {
2603 kmp_internal_control_t *control =
2604 (kmp_internal_control_t *)__kmp_allocate(
2605 sizeof(kmp_internal_control_t));
2607 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2609 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2611 control->next = thread->th.th_team->t.t_control_stack_top;
2612 thread->th.th_team->t.t_control_stack_top = control;
2618 void __kmp_set_num_threads(
int new_nth,
int gtid) {
2622 KF_TRACE(10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2623 KMP_DEBUG_ASSERT(__kmp_init_serial);
2627 else if (new_nth > __kmp_max_nth)
2628 new_nth = __kmp_max_nth;
2631 thread = __kmp_threads[gtid];
2633 __kmp_save_internal_controls(thread);
2635 set__nproc(thread, new_nth);
2640 root = thread->th.th_root;
2641 if (__kmp_init_parallel && (!root->r.r_active) &&
2642 (root->r.r_hot_team->t.t_nproc > new_nth)
2643 #
if KMP_NESTED_HOT_TEAMS
2644 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2647 kmp_team_t *hot_team = root->r.r_hot_team;
2650 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2653 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2654 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2655 if (__kmp_tasking_mode != tskm_immediate_exec) {
2658 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2660 __kmp_free_thread(hot_team->t.t_threads[f]);
2661 hot_team->t.t_threads[f] = NULL;
2663 hot_team->t.t_nproc = new_nth;
2664 #if KMP_NESTED_HOT_TEAMS 2665 if (thread->th.th_hot_teams) {
2666 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2667 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2671 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2674 for (f = 0; f < new_nth; f++) {
2675 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2676 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2679 hot_team->t.t_size_changed = -1;
2684 void __kmp_set_max_active_levels(
int gtid,
int max_active_levels) {
2687 KF_TRACE(10, (
"__kmp_set_max_active_levels: new max_active_levels for thread " 2689 gtid, max_active_levels));
2690 KMP_DEBUG_ASSERT(__kmp_init_serial);
2693 if (max_active_levels < 0) {
2694 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2699 KF_TRACE(10, (
"__kmp_set_max_active_levels: the call is ignored: new " 2700 "max_active_levels for thread %d = (%d)\n",
2701 gtid, max_active_levels));
2704 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2709 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2710 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2711 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2717 KF_TRACE(10, (
"__kmp_set_max_active_levels: after validation: new " 2718 "max_active_levels for thread %d = (%d)\n",
2719 gtid, max_active_levels));
2721 thread = __kmp_threads[gtid];
2723 __kmp_save_internal_controls(thread);
2725 set__max_active_levels(thread, max_active_levels);
2729 int __kmp_get_max_active_levels(
int gtid) {
2732 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d\n", gtid));
2733 KMP_DEBUG_ASSERT(__kmp_init_serial);
2735 thread = __kmp_threads[gtid];
2736 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2737 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, " 2738 "curtask_maxaclevel=%d\n",
2739 gtid, thread->th.th_current_task,
2740 thread->th.th_current_task->td_icvs.max_active_levels));
2741 return thread->th.th_current_task->td_icvs.max_active_levels;
2745 void __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk) {
2749 KF_TRACE(10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2750 gtid, (
int)kind, chunk));
2751 KMP_DEBUG_ASSERT(__kmp_init_serial);
2757 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2758 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2760 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2761 KMP_HNT(DefaultScheduleKindUsed,
"static, no chunk"),
2763 kind = kmp_sched_default;
2767 thread = __kmp_threads[gtid];
2769 __kmp_save_internal_controls(thread);
2771 if (kind < kmp_sched_upper_std) {
2772 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2775 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2777 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2778 __kmp_sch_map[kind - kmp_sched_lower - 1];
2783 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2784 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2785 kmp_sched_lower - 2];
2787 if (kind == kmp_sched_auto || chunk < 1) {
2789 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2791 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2796 void __kmp_get_schedule(
int gtid, kmp_sched_t *kind,
int *chunk) {
2800 KF_TRACE(10, (
"__kmp_get_schedule: thread %d\n", gtid));
2801 KMP_DEBUG_ASSERT(__kmp_init_serial);
2803 thread = __kmp_threads[gtid];
2805 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2809 case kmp_sch_static_greedy:
2810 case kmp_sch_static_balanced:
2811 *kind = kmp_sched_static;
2814 case kmp_sch_static_chunked:
2815 *kind = kmp_sched_static;
2817 case kmp_sch_dynamic_chunked:
2818 *kind = kmp_sched_dynamic;
2821 case kmp_sch_guided_iterative_chunked:
2822 case kmp_sch_guided_analytical_chunked:
2823 *kind = kmp_sched_guided;
2826 *kind = kmp_sched_auto;
2828 case kmp_sch_trapezoidal:
2829 *kind = kmp_sched_trapezoidal;
2831 #if KMP_STATIC_STEAL_ENABLED 2832 case kmp_sch_static_steal:
2833 *kind = kmp_sched_static_steal;
2837 KMP_FATAL(UnknownSchedulingType, th_type);
2840 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
2843 int __kmp_get_ancestor_thread_num(
int gtid,
int level) {
2849 KF_TRACE(10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
2850 KMP_DEBUG_ASSERT(__kmp_init_serial);
2857 thr = __kmp_threads[gtid];
2858 team = thr->th.th_team;
2859 ii = team->t.t_level;
2864 if (thr->th.th_teams_microtask) {
2866 int tlevel = thr->th.th_teams_level;
2869 KMP_DEBUG_ASSERT(ii >= tlevel);
2882 return __kmp_tid_from_gtid(gtid);
2884 dd = team->t.t_serialized;
2886 while (ii > level) {
2887 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
2889 if ((team->t.t_serialized) && (!dd)) {
2890 team = team->t.t_parent;
2894 team = team->t.t_parent;
2895 dd = team->t.t_serialized;
2900 return (dd > 1) ? (0) : (team->t.t_master_tid);
2903 int __kmp_get_team_size(
int gtid,
int level) {
2909 KF_TRACE(10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level));
2910 KMP_DEBUG_ASSERT(__kmp_init_serial);
2917 thr = __kmp_threads[gtid];
2918 team = thr->th.th_team;
2919 ii = team->t.t_level;
2924 if (thr->th.th_teams_microtask) {
2926 int tlevel = thr->th.th_teams_level;
2929 KMP_DEBUG_ASSERT(ii >= tlevel);
2941 while (ii > level) {
2942 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
2944 if (team->t.t_serialized && (!dd)) {
2945 team = team->t.t_parent;
2949 team = team->t.t_parent;
2954 return team->t.t_nproc;
2957 kmp_r_sched_t __kmp_get_schedule_global() {
2962 kmp_r_sched_t r_sched;
2970 r_sched.r_sched_type = __kmp_static;
2973 r_sched.r_sched_type = __kmp_guided;
2975 r_sched.r_sched_type = __kmp_sched;
2978 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
2980 r_sched.chunk = KMP_DEFAULT_CHUNK;
2982 r_sched.chunk = __kmp_chunk;
2990 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc) {
2992 KMP_DEBUG_ASSERT(team);
2993 if (!realloc || argc > team->t.t_max_argc) {
2995 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, " 2996 "current entries=%d\n",
2997 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
2999 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3000 __kmp_free((
void *)team->t.t_argv);
3002 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3004 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3005 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d " 3007 team->t.t_id, team->t.t_max_argc));
3008 team->t.t_argv = &team->t.t_inline_argv[0];
3009 if (__kmp_storage_map) {
3010 __kmp_print_storage_map_gtid(
3011 -1, &team->t.t_inline_argv[0],
3012 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3013 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
"team_%d.t_inline_argv",
3018 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3019 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3021 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d " 3023 team->t.t_id, team->t.t_max_argc));
3025 (
void **)__kmp_page_allocate(
sizeof(
void *) * team->t.t_max_argc);
3026 if (__kmp_storage_map) {
3027 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3028 &team->t.t_argv[team->t.t_max_argc],
3029 sizeof(
void *) * team->t.t_max_argc,
3030 "team_%d.t_argv", team->t.t_id);
3036 static void __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth) {
3038 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3040 (kmp_info_t **)__kmp_allocate(
sizeof(kmp_info_t *) * max_nth);
3041 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3042 sizeof(dispatch_shared_info_t) * num_disp_buff);
3043 team->t.t_dispatch =
3044 (kmp_disp_t *)__kmp_allocate(
sizeof(kmp_disp_t) * max_nth);
3045 team->t.t_implicit_task_taskdata =
3046 (kmp_taskdata_t *)__kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth);
3047 team->t.t_max_nproc = max_nth;
3050 for (i = 0; i < num_disp_buff; ++i) {
3051 team->t.t_disp_buffer[i].buffer_index = i;
3053 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3058 static void __kmp_free_team_arrays(kmp_team_t *team) {
3061 for (i = 0; i < team->t.t_max_nproc; ++i) {
3062 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3063 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3064 team->t.t_dispatch[i].th_disp_buffer = NULL;
3067 #if KMP_USE_HIER_SCHED 3068 __kmp_dispatch_free_hierarchies(team);
3070 __kmp_free(team->t.t_threads);
3071 __kmp_free(team->t.t_disp_buffer);
3072 __kmp_free(team->t.t_dispatch);
3073 __kmp_free(team->t.t_implicit_task_taskdata);
3074 team->t.t_threads = NULL;
3075 team->t.t_disp_buffer = NULL;
3076 team->t.t_dispatch = NULL;
3077 team->t.t_implicit_task_taskdata = 0;
3080 static void __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3081 kmp_info_t **oldThreads = team->t.t_threads;
3083 __kmp_free(team->t.t_disp_buffer);
3084 __kmp_free(team->t.t_dispatch);
3085 __kmp_free(team->t.t_implicit_task_taskdata);
3086 __kmp_allocate_team_arrays(team, max_nth);
3088 KMP_MEMCPY(team->t.t_threads, oldThreads,
3089 team->t.t_nproc *
sizeof(kmp_info_t *));
3091 __kmp_free(oldThreads);
3094 static kmp_internal_control_t __kmp_get_global_icvs(
void) {
3096 kmp_r_sched_t r_sched =
3097 __kmp_get_schedule_global();
3100 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
3103 kmp_internal_control_t g_icvs = {
3105 (kmp_int8)__kmp_dflt_nested,
3107 (kmp_int8)__kmp_global.g.g_dynamic,
3109 (kmp_int8)__kmp_env_blocktime,
3111 __kmp_dflt_blocktime,
3116 __kmp_dflt_team_nth,
3119 __kmp_dflt_max_active_levels,
3124 __kmp_nested_proc_bind.bind_types[0],
3125 __kmp_default_device,
3133 static kmp_internal_control_t __kmp_get_x_global_icvs(
const kmp_team_t *team) {
3135 kmp_internal_control_t gx_icvs;
3136 gx_icvs.serial_nesting_level =
3138 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3139 gx_icvs.next = NULL;
3144 static void __kmp_initialize_root(kmp_root_t *root) {
3146 kmp_team_t *root_team;
3147 kmp_team_t *hot_team;
3148 int hot_team_max_nth;
3149 kmp_r_sched_t r_sched =
3150 __kmp_get_schedule_global();
3151 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3152 KMP_DEBUG_ASSERT(root);
3153 KMP_ASSERT(!root->r.r_begin);
3156 __kmp_init_lock(&root->r.r_begin_lock);
3157 root->r.r_begin = FALSE;
3158 root->r.r_active = FALSE;
3159 root->r.r_in_parallel = 0;
3160 root->r.r_blocktime = __kmp_dflt_blocktime;
3161 root->r.r_nested = __kmp_dflt_nested;
3162 root->r.r_cg_nthreads = 1;
3166 KF_TRACE(10, (
"__kmp_initialize_root: before root_team\n"));
3169 __kmp_allocate_team(root,
3176 __kmp_nested_proc_bind.bind_types[0],
3180 USE_NESTED_HOT_ARG(NULL)
3185 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3188 KF_TRACE(10, (
"__kmp_initialize_root: after root_team = %p\n", root_team));
3190 root->r.r_root_team = root_team;
3191 root_team->t.t_control_stack_top = NULL;
3194 root_team->t.t_threads[0] = NULL;
3195 root_team->t.t_nproc = 1;
3196 root_team->t.t_serialized = 1;
3198 root_team->t.t_sched.sched = r_sched.sched;
3201 (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3202 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3206 KF_TRACE(10, (
"__kmp_initialize_root: before hot_team\n"));
3209 __kmp_allocate_team(root,
3211 __kmp_dflt_team_nth_ub * 2,
3216 __kmp_nested_proc_bind.bind_types[0],
3220 USE_NESTED_HOT_ARG(NULL)
3222 KF_TRACE(10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team));
3224 root->r.r_hot_team = hot_team;
3225 root_team->t.t_control_stack_top = NULL;
3228 hot_team->t.t_parent = root_team;
3231 hot_team_max_nth = hot_team->t.t_max_nproc;
3232 for (f = 0; f < hot_team_max_nth; ++f) {
3233 hot_team->t.t_threads[f] = NULL;
3235 hot_team->t.t_nproc = 1;
3237 hot_team->t.t_sched.sched = r_sched.sched;
3238 hot_team->t.t_size_changed = 0;
3243 typedef struct kmp_team_list_item {
3244 kmp_team_p
const *entry;
3245 struct kmp_team_list_item *next;
3246 } kmp_team_list_item_t;
3247 typedef kmp_team_list_item_t *kmp_team_list_t;
3249 static void __kmp_print_structure_team_accum(
3250 kmp_team_list_t list,
3251 kmp_team_p
const *team
3261 KMP_DEBUG_ASSERT(list != NULL);
3266 __kmp_print_structure_team_accum(list, team->t.t_parent);
3267 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3271 while (l->next != NULL && l->entry != team) {
3274 if (l->next != NULL) {
3280 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3286 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3287 sizeof(kmp_team_list_item_t));
3294 static void __kmp_print_structure_team(
char const *title, kmp_team_p
const *team
3297 __kmp_printf(
"%s", title);
3299 __kmp_printf(
"%2x %p\n", team->t.t_id, team);
3301 __kmp_printf(
" - (nil)\n");
3305 static void __kmp_print_structure_thread(
char const *title,
3306 kmp_info_p
const *thread) {
3307 __kmp_printf(
"%s", title);
3308 if (thread != NULL) {
3309 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3311 __kmp_printf(
" - (nil)\n");
3315 void __kmp_print_structure(
void) {
3317 kmp_team_list_t list;
3321 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof(kmp_team_list_item_t));
3325 __kmp_printf(
"\n------------------------------\nGlobal Thread " 3326 "Table\n------------------------------\n");
3329 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3330 __kmp_printf(
"%2d", gtid);
3331 if (__kmp_threads != NULL) {
3332 __kmp_printf(
" %p", __kmp_threads[gtid]);
3334 if (__kmp_root != NULL) {
3335 __kmp_printf(
" %p", __kmp_root[gtid]);
3342 __kmp_printf(
"\n------------------------------\nThreads\n--------------------" 3344 if (__kmp_threads != NULL) {
3346 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3347 kmp_info_t
const *thread = __kmp_threads[gtid];
3348 if (thread != NULL) {
3349 __kmp_printf(
"GTID %2d %p:\n", gtid, thread);
3350 __kmp_printf(
" Our Root: %p\n", thread->th.th_root);
3351 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team);
3352 __kmp_print_structure_team(
" Serial Team: ",
3353 thread->th.th_serial_team);
3354 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc);
3355 __kmp_print_structure_thread(
" Master: ",
3356 thread->th.th_team_master);
3357 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized);
3358 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc);
3360 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3362 __kmp_print_structure_thread(
" Next in pool: ",
3363 thread->th.th_next_pool);
3365 __kmp_print_structure_team_accum(list, thread->th.th_team);
3366 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3370 __kmp_printf(
"Threads array is not allocated.\n");
3374 __kmp_printf(
"\n------------------------------\nUbers\n----------------------" 3376 if (__kmp_root != NULL) {
3378 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3379 kmp_root_t
const *root = __kmp_root[gtid];
3381 __kmp_printf(
"GTID %2d %p:\n", gtid, root);
3382 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team);
3383 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team);
3384 __kmp_print_structure_thread(
" Uber Thread: ",
3385 root->r.r_uber_thread);
3386 __kmp_printf(
" Active?: %2d\n", root->r.r_active);
3387 __kmp_printf(
" Nested?: %2d\n", root->r.r_nested);
3388 __kmp_printf(
" In Parallel: %2d\n",
3389 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
3391 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3392 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3396 __kmp_printf(
"Ubers array is not allocated.\n");
3399 __kmp_printf(
"\n------------------------------\nTeams\n----------------------" 3401 while (list->next != NULL) {
3402 kmp_team_p
const *team = list->entry;
3404 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team);
3405 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent);
3406 __kmp_printf(
" Master TID: %2d\n", team->t.t_master_tid);
3407 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc);
3408 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized);
3409 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc);
3410 for (i = 0; i < team->t.t_nproc; ++i) {
3411 __kmp_printf(
" Thread %2d: ", i);
3412 __kmp_print_structure_thread(
"", team->t.t_threads[i]);
3414 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool);
3420 __kmp_printf(
"\n------------------------------\nPools\n----------------------" 3422 __kmp_print_structure_thread(
"Thread pool: ",
3423 CCAST(kmp_info_t *, __kmp_thread_pool));
3424 __kmp_print_structure_team(
"Team pool: ",
3425 CCAST(kmp_team_t *, __kmp_team_pool));
3429 while (list != NULL) {
3430 kmp_team_list_item_t *item = list;
3432 KMP_INTERNAL_FREE(item);
3441 static const unsigned __kmp_primes[] = {
3442 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3443 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3444 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3445 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3446 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3447 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3448 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3449 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3450 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3451 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3452 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3456 unsigned short __kmp_get_random(kmp_info_t *thread) {
3457 unsigned x = thread->th.th_x;
3458 unsigned short r = x >> 16;
3460 thread->th.th_x = x * thread->th.th_a + 1;
3462 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3463 thread->th.th_info.ds.ds_tid, r));
3469 void __kmp_init_random(kmp_info_t *thread) {
3470 unsigned seed = thread->th.th_info.ds.ds_tid;
3473 __kmp_primes[seed % (
sizeof(__kmp_primes) /
sizeof(__kmp_primes[0]))];
3474 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3476 (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3482 static int __kmp_reclaim_dead_roots(
void) {
3485 for (i = 0; i < __kmp_threads_capacity; ++i) {
3486 if (KMP_UBER_GTID(i) &&
3487 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3490 r += __kmp_unregister_root_other_thread(i);
3515 static int __kmp_expand_threads(
int nNeed) {
3517 int minimumRequiredCapacity;
3519 kmp_info_t **newThreads;
3520 kmp_root_t **newRoot;
3526 #if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB 3529 added = __kmp_reclaim_dead_roots();
3558 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
3561 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3565 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3567 newCapacity = __kmp_threads_capacity;
3569 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3570 : __kmp_sys_max_nth;
3571 }
while (newCapacity < minimumRequiredCapacity);
3572 newThreads = (kmp_info_t **)__kmp_allocate(
3573 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3575 (kmp_root_t **)((
char *)newThreads +
sizeof(kmp_info_t *) * newCapacity);
3576 KMP_MEMCPY(newThreads, __kmp_threads,
3577 __kmp_threads_capacity *
sizeof(kmp_info_t *));
3578 KMP_MEMCPY(newRoot, __kmp_root,
3579 __kmp_threads_capacity *
sizeof(kmp_root_t *));
3581 kmp_info_t **temp_threads = __kmp_threads;
3582 *(kmp_info_t * *
volatile *)&__kmp_threads = newThreads;
3583 *(kmp_root_t * *
volatile *)&__kmp_root = newRoot;
3584 __kmp_free(temp_threads);
3585 added += newCapacity - __kmp_threads_capacity;
3586 *(
volatile int *)&__kmp_threads_capacity = newCapacity;
3588 if (newCapacity > __kmp_tp_capacity) {
3589 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3590 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3591 __kmp_threadprivate_resize_cache(newCapacity);
3593 *(
volatile int *)&__kmp_tp_capacity = newCapacity;
3595 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3604 int __kmp_register_root(
int initial_thread) {
3605 kmp_info_t *root_thread;
3609 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3610 KA_TRACE(20, (
"__kmp_register_root: entered\n"));
3627 capacity = __kmp_threads_capacity;
3628 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3633 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3634 if (__kmp_tp_cached) {
3635 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3636 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3637 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3639 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3647 for (gtid = (initial_thread ? 0 : 1); TCR_PTR(__kmp_threads[gtid]) != NULL;
3651 (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3652 KMP_ASSERT(gtid < __kmp_threads_capacity);
3656 TCW_4(__kmp_nth, __kmp_nth + 1);
3660 if (__kmp_adjust_gtid_mode) {
3661 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3662 if (TCR_4(__kmp_gtid_mode) != 2) {
3663 TCW_4(__kmp_gtid_mode, 2);
3666 if (TCR_4(__kmp_gtid_mode) != 1) {
3667 TCW_4(__kmp_gtid_mode, 1);
3672 #ifdef KMP_ADJUST_BLOCKTIME 3675 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3676 if (__kmp_nth > __kmp_avail_proc) {
3677 __kmp_zero_bt = TRUE;
3683 if (!(root = __kmp_root[gtid])) {
3684 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(
sizeof(kmp_root_t));
3685 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3688 #if KMP_STATS_ENABLED 3690 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3691 __kmp_stats_thread_ptr->startLife();
3692 KMP_SET_THREAD_STATE(SERIAL_REGION);
3695 __kmp_initialize_root(root);
3698 if (root->r.r_uber_thread) {
3699 root_thread = root->r.r_uber_thread;
3701 root_thread = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
3702 if (__kmp_storage_map) {
3703 __kmp_print_thread_storage_map(root_thread, gtid);
3705 root_thread->th.th_info.ds.ds_gtid = gtid;
3707 root_thread->th.ompt_thread_info.thread_data.ptr = NULL;
3709 root_thread->th.th_root = root;
3710 if (__kmp_env_consistency_check) {
3711 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3714 __kmp_initialize_fast_memory(root_thread);
3718 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3719 __kmp_initialize_bget(root_thread);
3721 __kmp_init_random(root_thread);
3725 if (!root_thread->th.th_serial_team) {
3726 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3727 KF_TRACE(10, (
"__kmp_register_root: before serial_team\n"));
3728 root_thread->th.th_serial_team =
3729 __kmp_allocate_team(root, 1, 1,
3736 &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3738 KMP_ASSERT(root_thread->th.th_serial_team);
3739 KF_TRACE(10, (
"__kmp_register_root: after serial_team = %p\n",
3740 root_thread->th.th_serial_team));
3743 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3745 root->r.r_root_team->t.t_threads[0] = root_thread;
3746 root->r.r_hot_team->t.t_threads[0] = root_thread;
3747 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3749 root_thread->th.th_serial_team->t.t_serialized = 0;
3750 root->r.r_uber_thread = root_thread;
3753 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3754 TCW_4(__kmp_init_gtid, TRUE);
3757 __kmp_gtid_set_specific(gtid);
3760 __kmp_itt_thread_name(gtid);
3763 #ifdef KMP_TDATA_GTID 3766 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3767 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3769 KA_TRACE(20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, " 3771 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3772 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3773 KMP_INIT_BARRIER_STATE));
3776 for (b = 0; b < bs_last_barrier; ++b) {
3777 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3779 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3783 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
3784 KMP_INIT_BARRIER_STATE);
3786 #if KMP_AFFINITY_SUPPORTED 3788 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3789 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3790 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3791 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3794 if (TCR_4(__kmp_init_middle)) {
3795 __kmp_affinity_set_init_mask(gtid, TRUE);
3799 __kmp_root_counter++;
3802 if (!initial_thread && ompt_enabled.enabled) {
3804 ompt_thread_t *root_thread = ompt_get_thread();
3806 ompt_set_thread_state(root_thread, omp_state_overhead);
3808 if (ompt_enabled.ompt_callback_thread_begin) {
3809 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
3810 ompt_thread_initial, __ompt_get_thread_data_internal());
3812 ompt_data_t *task_data;
3813 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
3814 if (ompt_enabled.ompt_callback_task_create) {
3815 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
3816 NULL, NULL, task_data, ompt_task_initial, 0, NULL);
3820 ompt_set_thread_state(root_thread, omp_state_work_serial);
3825 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3830 #if KMP_NESTED_HOT_TEAMS 3831 static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr,
int level,
3832 const int max_level) {
3834 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3835 if (!hot_teams || !hot_teams[level].hot_team) {
3838 KMP_DEBUG_ASSERT(level < max_level);
3839 kmp_team_t *team = hot_teams[level].hot_team;
3840 nth = hot_teams[level].hot_team_nth;
3842 if (level < max_level - 1) {
3843 for (i = 0; i < nth; ++i) {
3844 kmp_info_t *th = team->t.t_threads[i];
3845 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
3846 if (i > 0 && th->th.th_hot_teams) {
3847 __kmp_free(th->th.th_hot_teams);
3848 th->th.th_hot_teams = NULL;
3852 __kmp_free_team(root, team, NULL);
3859 static int __kmp_reset_root(
int gtid, kmp_root_t *root) {
3860 kmp_team_t *root_team = root->r.r_root_team;
3861 kmp_team_t *hot_team = root->r.r_hot_team;
3862 int n = hot_team->t.t_nproc;
3865 KMP_DEBUG_ASSERT(!root->r.r_active);
3867 root->r.r_root_team = NULL;
3868 root->r.r_hot_team = NULL;
3871 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
3872 #if KMP_NESTED_HOT_TEAMS 3873 if (__kmp_hot_teams_max_level >
3875 for (i = 0; i < hot_team->t.t_nproc; ++i) {
3876 kmp_info_t *th = hot_team->t.t_threads[i];
3877 if (__kmp_hot_teams_max_level > 1) {
3878 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
3880 if (th->th.th_hot_teams) {
3881 __kmp_free(th->th.th_hot_teams);
3882 th->th.th_hot_teams = NULL;
3887 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
3892 if (__kmp_tasking_mode != tskm_immediate_exec) {
3893 __kmp_wait_to_unref_task_teams();
3899 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
3901 (LPVOID) & (root->r.r_uber_thread->th),
3902 root->r.r_uber_thread->th.th_info.ds.ds_thread));
3903 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
3907 if (ompt_enabled.ompt_callback_thread_end) {
3908 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
3909 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
3915 root->r.r_cg_nthreads--;
3917 __kmp_reap_thread(root->r.r_uber_thread, 1);
3921 root->r.r_uber_thread = NULL;
3923 root->r.r_begin = FALSE;
3928 void __kmp_unregister_root_current_thread(
int gtid) {
3929 KA_TRACE(1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
3933 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3934 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
3935 KC_TRACE(10, (
"__kmp_unregister_root_current_thread: already finished, " 3938 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3941 kmp_root_t *root = __kmp_root[gtid];
3943 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
3944 KMP_ASSERT(KMP_UBER_GTID(gtid));
3945 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
3946 KMP_ASSERT(root->r.r_active == FALSE);
3951 kmp_info_t *thread = __kmp_threads[gtid];
3952 kmp_team_t *team = thread->th.th_team;
3953 kmp_task_team_t *task_team = thread->th.th_task_team;
3956 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) {
3959 thread->th.ompt_thread_info.state = omp_state_undefined;
3961 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
3965 __kmp_reset_root(gtid, root);
3968 __kmp_gtid_set_specific(KMP_GTID_DNE);
3969 #ifdef KMP_TDATA_GTID 3970 __kmp_gtid = KMP_GTID_DNE;
3975 (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
3977 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3984 static int __kmp_unregister_root_other_thread(
int gtid) {
3985 kmp_root_t *root = __kmp_root[gtid];
3988 KA_TRACE(1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
3989 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
3990 KMP_ASSERT(KMP_UBER_GTID(gtid));
3991 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
3992 KMP_ASSERT(root->r.r_active == FALSE);
3994 r = __kmp_reset_root(gtid, root);
3996 (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4002 void __kmp_task_info() {
4004 kmp_int32 gtid = __kmp_entry_gtid();
4005 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4006 kmp_info_t *this_thr = __kmp_threads[gtid];
4007 kmp_team_t *steam = this_thr->th.th_serial_team;
4008 kmp_team_t *team = this_thr->th.th_team;
4010 __kmp_printf(
"__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p " 4012 gtid, tid, this_thr, team, this_thr->th.th_current_task,
4013 team->t.t_implicit_task_taskdata[tid].td_parent);
4020 static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4021 int tid,
int gtid) {
4025 kmp_info_t *master = team->t.t_threads[0];
4026 KMP_DEBUG_ASSERT(this_thr != NULL);
4027 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4028 KMP_DEBUG_ASSERT(team);
4029 KMP_DEBUG_ASSERT(team->t.t_threads);
4030 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4031 KMP_DEBUG_ASSERT(master);
4032 KMP_DEBUG_ASSERT(master->th.th_root);
4036 TCW_SYNC_PTR(this_thr->th.th_team, team);
4038 this_thr->th.th_info.ds.ds_tid = tid;
4039 this_thr->th.th_set_nproc = 0;
4040 if (__kmp_tasking_mode != tskm_immediate_exec)
4043 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4045 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4047 this_thr->th.th_set_proc_bind = proc_bind_default;
4048 #if KMP_AFFINITY_SUPPORTED 4049 this_thr->th.th_new_place = this_thr->th.th_current_place;
4052 this_thr->th.th_root = master->th.th_root;
4055 this_thr->th.th_team_nproc = team->t.t_nproc;
4056 this_thr->th.th_team_master = master;
4057 this_thr->th.th_team_serialized = team->t.t_serialized;
4058 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
4060 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4062 KF_TRACE(10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4063 tid, gtid, this_thr, this_thr->th.th_current_task));
4065 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4068 KF_TRACE(10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4069 tid, gtid, this_thr, this_thr->th.th_current_task));
4074 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4076 this_thr->th.th_local.this_construct = 0;
4078 if (!this_thr->th.th_pri_common) {
4079 this_thr->th.th_pri_common =
4080 (
struct common_table *)__kmp_allocate(
sizeof(
struct common_table));
4081 if (__kmp_storage_map) {
4082 __kmp_print_storage_map_gtid(
4083 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4084 sizeof(
struct common_table),
"th_%d.th_pri_common\n", gtid);
4086 this_thr->th.th_pri_head = NULL;
4091 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4094 sizeof(dispatch_private_info_t) *
4095 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4096 KD_TRACE(10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4097 team->t.t_max_nproc));
4098 KMP_ASSERT(dispatch);
4099 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4100 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4102 dispatch->th_disp_index = 0;
4104 dispatch->th_doacross_buf_idx = 0;
4106 if (!dispatch->th_disp_buffer) {
4107 dispatch->th_disp_buffer =
4108 (dispatch_private_info_t *)__kmp_allocate(disp_size);
4110 if (__kmp_storage_map) {
4111 __kmp_print_storage_map_gtid(
4112 gtid, &dispatch->th_disp_buffer[0],
4113 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4115 : __kmp_dispatch_num_buffers],
4116 disp_size,
"th_%d.th_dispatch.th_disp_buffer " 4117 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4118 gtid, team->t.t_id, gtid);
4121 memset(&dispatch->th_disp_buffer[0],
'\0', disp_size);
4124 dispatch->th_dispatch_pr_current = 0;
4125 dispatch->th_dispatch_sh_current = 0;
4127 dispatch->th_deo_fcn = 0;
4128 dispatch->th_dxo_fcn = 0;
4131 this_thr->th.th_next_pool = NULL;
4133 if (!this_thr->th.th_task_state_memo_stack) {
4135 this_thr->th.th_task_state_memo_stack =
4136 (kmp_uint8 *)__kmp_allocate(4 *
sizeof(kmp_uint8));
4137 this_thr->th.th_task_state_top = 0;
4138 this_thr->th.th_task_state_stack_sz = 4;
4139 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4141 this_thr->th.th_task_state_memo_stack[i] = 0;
4144 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4145 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4155 kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4157 kmp_team_t *serial_team;
4158 kmp_info_t *new_thr;
4161 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4162 KMP_DEBUG_ASSERT(root && team);
4163 #if !KMP_NESTED_HOT_TEAMS 4164 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
4169 if (__kmp_thread_pool) {
4171 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4172 __kmp_thread_pool = (
volatile kmp_info_t *)new_thr->th.th_next_pool;
4173 if (new_thr == __kmp_thread_pool_insert_pt) {
4174 __kmp_thread_pool_insert_pt = NULL;
4176 TCW_4(new_thr->th.th_in_pool, FALSE);
4179 __kmp_thread_pool_nth--;
4181 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4182 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4183 KMP_ASSERT(!new_thr->th.th_team);
4184 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4185 KMP_DEBUG_ASSERT(__kmp_thread_pool_nth >= 0);
4188 __kmp_initialize_info(new_thr, team, new_tid,
4189 new_thr->th.th_info.ds.ds_gtid);
4190 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4192 TCW_4(__kmp_nth, __kmp_nth + 1);
4193 root->r.r_cg_nthreads++;
4195 new_thr->th.th_task_state = 0;
4196 new_thr->th.th_task_state_top = 0;
4197 new_thr->th.th_task_state_stack_sz = 4;
4199 #ifdef KMP_ADJUST_BLOCKTIME 4202 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4203 if (__kmp_nth > __kmp_avail_proc) {
4204 __kmp_zero_bt = TRUE;
4213 kmp_balign_t *balign = new_thr->th.th_bar;
4214 for (b = 0; b < bs_last_barrier; ++b)
4215 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4218 KF_TRACE(10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4219 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4226 KMP_ASSERT(__kmp_nth == __kmp_all_nth);
4227 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4232 if (!TCR_4(__kmp_init_monitor)) {
4233 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4234 if (!TCR_4(__kmp_init_monitor)) {
4235 KF_TRACE(10, (
"before __kmp_create_monitor\n"));
4236 TCW_4(__kmp_init_monitor, 1);
4237 __kmp_create_monitor(&__kmp_monitor);
4238 KF_TRACE(10, (
"after __kmp_create_monitor\n"));
4249 while (TCR_4(__kmp_init_monitor) < 2) {
4252 KF_TRACE(10, (
"after monitor thread has started\n"));
4255 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4260 for (new_gtid = 1; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid) {
4261 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4265 new_thr = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
4267 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4269 if (__kmp_storage_map) {
4270 __kmp_print_thread_storage_map(new_thr, new_gtid);
4275 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4276 KF_TRACE(10, (
"__kmp_allocate_thread: before th_serial/serial_team\n"));
4277 new_thr->th.th_serial_team = serial_team =
4278 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4285 &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
4287 KMP_ASSERT(serial_team);
4288 serial_team->t.t_serialized = 0;
4290 serial_team->t.t_threads[0] = new_thr;
4292 (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4296 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4299 __kmp_initialize_fast_memory(new_thr);
4303 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4304 __kmp_initialize_bget(new_thr);
4307 __kmp_init_random(new_thr);
4311 (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4312 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4315 kmp_balign_t *balign = new_thr->th.th_bar;
4316 for (b = 0; b < bs_last_barrier; ++b) {
4317 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4318 balign[b].bb.team = NULL;
4319 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4320 balign[b].bb.use_oncore_barrier = 0;
4323 new_thr->th.th_spin_here = FALSE;
4324 new_thr->th.th_next_waiting = 0;
4326 new_thr->th.th_blocking =
false;
4329 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 4330 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4331 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4332 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4333 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4336 TCW_4(new_thr->th.th_in_pool, FALSE);
4337 new_thr->th.th_active_in_pool = FALSE;
4338 TCW_4(new_thr->th.th_active, TRUE);
4344 root->r.r_cg_nthreads++;
4348 if (__kmp_adjust_gtid_mode) {
4349 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4350 if (TCR_4(__kmp_gtid_mode) != 2) {
4351 TCW_4(__kmp_gtid_mode, 2);
4354 if (TCR_4(__kmp_gtid_mode) != 1) {
4355 TCW_4(__kmp_gtid_mode, 1);
4360 #ifdef KMP_ADJUST_BLOCKTIME 4363 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4364 if (__kmp_nth > __kmp_avail_proc) {
4365 __kmp_zero_bt = TRUE;
4372 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4373 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4375 (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4377 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4388 static void __kmp_reinitialize_team(kmp_team_t *team,
4389 kmp_internal_control_t *new_icvs,
4391 KF_TRACE(10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4392 team->t.t_threads[0], team));
4393 KMP_DEBUG_ASSERT(team && new_icvs);
4394 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4395 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4397 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4399 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4400 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4402 KF_TRACE(10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4403 team->t.t_threads[0], team));
4409 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
4410 kmp_internal_control_t *new_icvs,
4412 KF_TRACE(10, (
"__kmp_initialize_team: enter: team=%p\n", team));
4415 KMP_DEBUG_ASSERT(team);
4416 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4417 KMP_DEBUG_ASSERT(team->t.t_threads);
4420 team->t.t_master_tid = 0;
4422 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4423 team->t.t_nproc = new_nproc;
4426 team->t.t_next_pool = NULL;
4430 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4431 team->t.t_invoke = NULL;
4434 team->t.t_sched.sched = new_icvs->sched.sched;
4436 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 4437 team->t.t_fp_control_saved = FALSE;
4438 team->t.t_x87_fpu_control_word = 0;
4439 team->t.t_mxcsr = 0;
4442 team->t.t_construct = 0;
4444 team->t.t_ordered.dt.t_value = 0;
4445 team->t.t_master_active = FALSE;
4447 memset(&team->t.t_taskq,
'\0',
sizeof(kmp_taskq_t));
4450 team->t.t_copypriv_data = NULL;
4453 team->t.t_copyin_counter = 0;
4456 team->t.t_control_stack_top = NULL;
4458 __kmp_reinitialize_team(team, new_icvs, loc);
4461 KF_TRACE(10, (
"__kmp_initialize_team: exit: team=%p\n", team));
4464 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4467 __kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
4468 if (KMP_AFFINITY_CAPABLE()) {
4470 if (old_mask != NULL) {
4471 status = __kmp_get_system_affinity(old_mask, TRUE);
4474 __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError), KMP_ERR(error),
4478 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE);
4483 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 4489 static void __kmp_partition_places(kmp_team_t *team,
int update_master_only) {
4491 kmp_info_t *master_th = team->t.t_threads[0];
4492 KMP_DEBUG_ASSERT(master_th != NULL);
4493 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4494 int first_place = master_th->th.th_first_place;
4495 int last_place = master_th->th.th_last_place;
4496 int masters_place = master_th->th.th_current_place;
4497 team->t.t_first_place = first_place;
4498 team->t.t_last_place = last_place;
4500 KA_TRACE(20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) " 4501 "bound to place %d partition = [%d,%d]\n",
4502 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4503 team->t.t_id, masters_place, first_place, last_place));
4505 switch (proc_bind) {
4507 case proc_bind_default:
4510 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4513 case proc_bind_master: {
4515 int n_th = team->t.t_nproc;
4516 for (f = 1; f < n_th; f++) {
4517 kmp_info_t *th = team->t.t_threads[f];
4518 KMP_DEBUG_ASSERT(th != NULL);
4519 th->th.th_first_place = first_place;
4520 th->th.th_last_place = last_place;
4521 th->th.th_new_place = masters_place;
4523 KA_TRACE(100, (
"__kmp_partition_places: master: T#%d(%d:%d) place %d " 4524 "partition = [%d,%d]\n",
4525 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4526 f, masters_place, first_place, last_place));
4530 case proc_bind_close: {
4532 int n_th = team->t.t_nproc;
4534 if (first_place <= last_place) {
4535 n_places = last_place - first_place + 1;
4537 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4539 if (n_th <= n_places) {
4540 int place = masters_place;
4541 for (f = 1; f < n_th; f++) {
4542 kmp_info_t *th = team->t.t_threads[f];
4543 KMP_DEBUG_ASSERT(th != NULL);
4545 if (place == last_place) {
4546 place = first_place;
4547 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4552 th->th.th_first_place = first_place;
4553 th->th.th_last_place = last_place;
4554 th->th.th_new_place = place;
4556 KA_TRACE(100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d " 4557 "partition = [%d,%d]\n",
4558 __kmp_gtid_from_thread(team->t.t_threads[f]),
4559 team->t.t_id, f, place, first_place, last_place));
4562 int S, rem, gap, s_count;
4563 S = n_th / n_places;
4565 rem = n_th - (S * n_places);
4566 gap = rem > 0 ? n_places / rem : n_places;
4567 int place = masters_place;
4569 for (f = 0; f < n_th; f++) {
4570 kmp_info_t *th = team->t.t_threads[f];
4571 KMP_DEBUG_ASSERT(th != NULL);
4573 th->th.th_first_place = first_place;
4574 th->th.th_last_place = last_place;
4575 th->th.th_new_place = place;
4578 if ((s_count == S) && rem && (gap_ct == gap)) {
4580 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4582 if (place == last_place) {
4583 place = first_place;
4584 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4592 }
else if (s_count == S) {
4593 if (place == last_place) {
4594 place = first_place;
4595 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4605 (
"__kmp_partition_places: close: T#%d(%d:%d) place %d " 4606 "partition = [%d,%d]\n",
4607 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4608 th->th.th_new_place, first_place, last_place));
4610 KMP_DEBUG_ASSERT(place == masters_place);
4614 case proc_bind_spread: {
4616 int n_th = team->t.t_nproc;
4619 if (first_place <= last_place) {
4620 n_places = last_place - first_place + 1;
4622 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4624 if (n_th <= n_places) {
4627 if (n_places != static_cast<int>(__kmp_affinity_num_masks)) {
4628 int S = n_places / n_th;
4629 int s_count, rem, gap, gap_ct;
4631 place = masters_place;
4632 rem = n_places - n_th * S;
4633 gap = rem ? n_th / rem : 1;
4636 if (update_master_only == 1)
4638 for (f = 0; f < thidx; f++) {
4639 kmp_info_t *th = team->t.t_threads[f];
4640 KMP_DEBUG_ASSERT(th != NULL);
4642 th->th.th_first_place = place;
4643 th->th.th_new_place = place;
4645 while (s_count < S) {
4646 if (place == last_place) {
4647 place = first_place;
4648 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4655 if (rem && (gap_ct == gap)) {
4656 if (place == last_place) {
4657 place = first_place;
4658 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4666 th->th.th_last_place = place;
4669 if (place == last_place) {
4670 place = first_place;
4671 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4678 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d " 4679 "partition = [%d,%d], __kmp_affinity_num_masks: %u\n",
4680 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4681 f, th->th.th_new_place, th->th.th_first_place,
4682 th->th.th_last_place, __kmp_affinity_num_masks));
4688 double current =
static_cast<double>(masters_place);
4690 (
static_cast<double>(n_places + 1) / static_cast<double>(n_th));
4695 if (update_master_only == 1)
4697 for (f = 0; f < thidx; f++) {
4698 first =
static_cast<int>(current);
4699 last =
static_cast<int>(current + spacing) - 1;
4700 KMP_DEBUG_ASSERT(last >= first);
4701 if (first >= n_places) {
4702 if (masters_place) {
4705 if (first == (masters_place + 1)) {
4706 KMP_DEBUG_ASSERT(f == n_th);
4709 if (last == masters_place) {
4710 KMP_DEBUG_ASSERT(f == (n_th - 1));
4714 KMP_DEBUG_ASSERT(f == n_th);
4719 if (last >= n_places) {
4720 last = (n_places - 1);
4725 KMP_DEBUG_ASSERT(0 <= first);
4726 KMP_DEBUG_ASSERT(n_places > first);
4727 KMP_DEBUG_ASSERT(0 <= last);
4728 KMP_DEBUG_ASSERT(n_places > last);
4729 KMP_DEBUG_ASSERT(last_place >= first_place);
4730 th = team->t.t_threads[f];
4731 KMP_DEBUG_ASSERT(th);
4732 th->th.th_first_place = first;
4733 th->th.th_new_place = place;
4734 th->th.th_last_place = last;
4737 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d " 4738 "partition = [%d,%d], spacing = %.4f\n",
4739 __kmp_gtid_from_thread(team->t.t_threads[f]),
4740 team->t.t_id, f, th->th.th_new_place,
4741 th->th.th_first_place, th->th.th_last_place, spacing));
4745 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4747 int S, rem, gap, s_count;
4748 S = n_th / n_places;
4750 rem = n_th - (S * n_places);
4751 gap = rem > 0 ? n_places / rem : n_places;
4752 int place = masters_place;
4755 if (update_master_only == 1)
4757 for (f = 0; f < thidx; f++) {
4758 kmp_info_t *th = team->t.t_threads[f];
4759 KMP_DEBUG_ASSERT(th != NULL);
4761 th->th.th_first_place = place;
4762 th->th.th_last_place = place;
4763 th->th.th_new_place = place;
4766 if ((s_count == S) && rem && (gap_ct == gap)) {
4768 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4770 if (place == last_place) {
4771 place = first_place;
4772 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4780 }
else if (s_count == S) {
4781 if (place == last_place) {
4782 place = first_place;
4783 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4792 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d " 4793 "partition = [%d,%d]\n",
4794 __kmp_gtid_from_thread(team->t.t_threads[f]),
4795 team->t.t_id, f, th->th.th_new_place,
4796 th->th.th_first_place, th->th.th_last_place));
4798 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4806 KA_TRACE(20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id));
4814 __kmp_allocate_team(kmp_root_t *root,
int new_nproc,
int max_nproc,
4816 ompt_data_t ompt_parallel_data,
4819 kmp_proc_bind_t new_proc_bind,
4821 kmp_internal_control_t *new_icvs,
4822 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
4823 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
4826 int use_hot_team = !root->r.r_active;
4829 KA_TRACE(20, (
"__kmp_allocate_team: called\n"));
4830 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
4831 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
4834 #if KMP_NESTED_HOT_TEAMS 4835 kmp_hot_team_ptr_t *hot_teams;
4837 team = master->th.th_team;
4838 level = team->t.t_active_level;
4839 if (master->th.th_teams_microtask) {
4840 if (master->th.th_teams_size.nteams > 1 &&
4843 (microtask_t)__kmp_teams_master ||
4844 master->th.th_teams_level <
4850 hot_teams = master->th.th_hot_teams;
4851 if (level < __kmp_hot_teams_max_level && hot_teams &&
4861 if (use_hot_team && new_nproc > 1) {
4862 KMP_DEBUG_ASSERT(new_nproc == max_nproc);
4863 #if KMP_NESTED_HOT_TEAMS 4864 team = hot_teams[level].hot_team;
4866 team = root->r.r_hot_team;
4869 if (__kmp_tasking_mode != tskm_immediate_exec) {
4870 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p " 4871 "task_team[1] = %p before reinit\n",
4872 team->t.t_task_team[0], team->t.t_task_team[1]));
4879 if (team->t.t_nproc == new_nproc) {
4880 KA_TRACE(20, (
"__kmp_allocate_team: reusing hot team\n"));
4883 if (team->t.t_size_changed == -1) {
4884 team->t.t_size_changed = 1;
4886 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
4890 kmp_r_sched_t new_sched = new_icvs->sched;
4892 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
4894 __kmp_reinitialize_team(team, new_icvs,
4895 root->r.r_uber_thread->th.th_ident);
4897 KF_TRACE(10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
4898 team->t.t_threads[0], team));
4899 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
4902 #if KMP_AFFINITY_SUPPORTED 4903 if ((team->t.t_size_changed == 0) &&
4904 (team->t.t_proc_bind == new_proc_bind)) {
4905 if (new_proc_bind == proc_bind_spread) {
4906 __kmp_partition_places(
4909 KA_TRACE(200, (
"__kmp_allocate_team: reusing hot team #%d bindings: " 4910 "proc_bind = %d, partition = [%d,%d]\n",
4911 team->t.t_id, new_proc_bind, team->t.t_first_place,
4912 team->t.t_last_place));
4914 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4915 __kmp_partition_places(team);
4918 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4921 }
else if (team->t.t_nproc > new_nproc) {
4923 (
"__kmp_allocate_team: decreasing hot team thread count to %d\n",
4926 team->t.t_size_changed = 1;
4927 #if KMP_NESTED_HOT_TEAMS 4928 if (__kmp_hot_teams_mode == 0) {
4931 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
4932 hot_teams[level].hot_team_nth = new_nproc;
4933 #endif // KMP_NESTED_HOT_TEAMS 4935 for (f = new_nproc; f < team->t.t_nproc; f++) {
4936 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
4937 if (__kmp_tasking_mode != tskm_immediate_exec) {
4940 team->t.t_threads[f]->th.th_task_team = NULL;
4942 __kmp_free_thread(team->t.t_threads[f]);
4943 team->t.t_threads[f] = NULL;
4945 #if KMP_NESTED_HOT_TEAMS 4950 for (f = new_nproc; f < team->t.t_nproc; ++f) {
4951 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
4952 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
4953 for (
int b = 0; b < bs_last_barrier; ++b) {
4954 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
4955 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
4957 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
4961 #endif // KMP_NESTED_HOT_TEAMS 4962 team->t.t_nproc = new_nproc;
4964 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
4965 __kmp_reinitialize_team(team, new_icvs,
4966 root->r.r_uber_thread->th.th_ident);
4969 for (f = 0; f < new_nproc; ++f) {
4970 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
4974 KF_TRACE(10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
4975 team->t.t_threads[0], team));
4977 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
4980 for (f = 0; f < team->t.t_nproc; f++) {
4981 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
4982 team->t.t_threads[f]->th.th_team_nproc ==
4988 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4989 #if KMP_AFFINITY_SUPPORTED 4990 __kmp_partition_places(team);
4994 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4995 kmp_affin_mask_t *old_mask;
4996 if (KMP_AFFINITY_CAPABLE()) {
4997 KMP_CPU_ALLOC(old_mask);
5002 (
"__kmp_allocate_team: increasing hot team thread count to %d\n",
5005 team->t.t_size_changed = 1;
5007 #if KMP_NESTED_HOT_TEAMS 5008 int avail_threads = hot_teams[level].hot_team_nth;
5009 if (new_nproc < avail_threads)
5010 avail_threads = new_nproc;
5011 kmp_info_t **other_threads = team->t.t_threads;
5012 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5016 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5017 for (b = 0; b < bs_last_barrier; ++b) {
5018 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5019 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5021 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5025 if (hot_teams[level].hot_team_nth >= new_nproc) {
5028 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5029 team->t.t_nproc = new_nproc;
5035 hot_teams[level].hot_team_nth = new_nproc;
5036 #endif // KMP_NESTED_HOT_TEAMS 5037 if (team->t.t_max_nproc < new_nproc) {
5039 __kmp_reallocate_team_arrays(team, new_nproc);
5040 __kmp_reinitialize_team(team, new_icvs, NULL);
5043 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 5048 __kmp_set_thread_affinity_mask_full_tmp(old_mask);
5052 for (f = team->t.t_nproc; f < new_nproc; f++) {
5053 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5054 KMP_DEBUG_ASSERT(new_worker);
5055 team->t.t_threads[f] = new_worker;
5058 (
"__kmp_allocate_team: team %d init T#%d arrived: " 5059 "join=%llu, plain=%llu\n",
5060 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5061 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5062 team->t.t_bar[bs_plain_barrier].b_arrived));
5066 kmp_balign_t *balign = new_worker->th.th_bar;
5067 for (b = 0; b < bs_last_barrier; ++b) {
5068 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5069 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5070 KMP_BARRIER_PARENT_FLAG);
5072 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5078 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 5079 if (KMP_AFFINITY_CAPABLE()) {
5081 __kmp_set_system_affinity(old_mask, TRUE);
5082 KMP_CPU_FREE(old_mask);
5085 #if KMP_NESTED_HOT_TEAMS 5087 #endif // KMP_NESTED_HOT_TEAMS 5089 int old_nproc = team->t.t_nproc;
5091 __kmp_initialize_team(team, new_nproc, new_icvs,
5092 root->r.r_uber_thread->th.th_ident);
5095 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5096 for (f = 0; f < team->t.t_nproc; ++f)
5097 __kmp_initialize_info(team->t.t_threads[f], team, f,
5098 __kmp_gtid_from_tid(f, team));
5105 for (f = old_nproc; f < team->t.t_nproc; ++f)
5106 team->t.t_threads[f]->th.th_task_state =
5107 team->t.t_threads[0]->th.th_task_state_memo_stack[level];
5110 team->t.t_threads[0]->th.th_task_state;
5111 for (f = old_nproc; f < team->t.t_nproc; ++f)
5112 team->t.t_threads[f]->th.th_task_state = old_state;
5116 for (f = 0; f < team->t.t_nproc; ++f) {
5117 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5118 team->t.t_threads[f]->th.th_team_nproc ==
5124 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5125 #if KMP_AFFINITY_SUPPORTED 5126 __kmp_partition_places(team);
5132 kmp_info_t *master = team->t.t_threads[0];
5133 if (master->th.th_teams_microtask) {
5134 for (f = 1; f < new_nproc; ++f) {
5136 kmp_info_t *thr = team->t.t_threads[f];
5137 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5138 thr->th.th_teams_level = master->th.th_teams_level;
5139 thr->th.th_teams_size = master->th.th_teams_size;
5143 #if KMP_NESTED_HOT_TEAMS 5147 for (f = 1; f < new_nproc; ++f) {
5148 kmp_info_t *thr = team->t.t_threads[f];
5150 kmp_balign_t *balign = thr->th.th_bar;
5151 for (b = 0; b < bs_last_barrier; ++b) {
5152 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5153 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5155 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5160 #endif // KMP_NESTED_HOT_TEAMS 5163 __kmp_alloc_argv_entries(argc, team, TRUE);
5164 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5168 KF_TRACE(10, (
" hot_team = %p\n", team));
5171 if (__kmp_tasking_mode != tskm_immediate_exec) {
5172 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p " 5173 "task_team[1] = %p after reinit\n",
5174 team->t.t_task_team[0], team->t.t_task_team[1]));
5179 __ompt_team_assign_id(team, ompt_parallel_data);
5189 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5192 if (team->t.t_max_nproc >= max_nproc) {
5194 __kmp_team_pool = team->t.t_next_pool;
5197 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5199 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and " 5200 "task_team[1] %p to NULL\n",
5201 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5202 team->t.t_task_team[0] = NULL;
5203 team->t.t_task_team[1] = NULL;
5206 __kmp_alloc_argv_entries(argc, team, TRUE);
5207 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5210 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5211 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5214 for (b = 0; b < bs_last_barrier; ++b) {
5215 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5217 team->t.t_bar[b].b_master_arrived = 0;
5218 team->t.t_bar[b].b_team_arrived = 0;
5224 team->t.t_proc_bind = new_proc_bind;
5227 KA_TRACE(20, (
"__kmp_allocate_team: using team from pool %d.\n",
5231 __ompt_team_assign_id(team, ompt_parallel_data);
5243 team = __kmp_reap_team(team);
5244 __kmp_team_pool = team;
5249 team = (kmp_team_t *)__kmp_allocate(
sizeof(kmp_team_t));
5252 team->t.t_max_nproc = max_nproc;
5255 __kmp_allocate_team_arrays(team, max_nproc);
5257 KA_TRACE(20, (
"__kmp_allocate_team: making a new team\n"));
5258 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5260 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] " 5262 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5263 team->t.t_task_team[0] = NULL;
5265 team->t.t_task_team[1] = NULL;
5268 if (__kmp_storage_map) {
5269 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc);
5273 __kmp_alloc_argv_entries(argc, team, FALSE);
5274 team->t.t_argc = argc;
5277 (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5278 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5281 for (b = 0; b < bs_last_barrier; ++b) {
5282 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5284 team->t.t_bar[b].b_master_arrived = 0;
5285 team->t.t_bar[b].b_team_arrived = 0;
5291 team->t.t_proc_bind = new_proc_bind;
5295 __ompt_team_assign_id(team, ompt_parallel_data);
5296 team->t.ompt_serialized_team_info = NULL;
5301 KA_TRACE(20, (
"__kmp_allocate_team: done creating a new team %d.\n",
5312 void __kmp_free_team(kmp_root_t *root,
5313 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5315 KA_TRACE(20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5319 KMP_DEBUG_ASSERT(root);
5320 KMP_DEBUG_ASSERT(team);
5321 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5322 KMP_DEBUG_ASSERT(team->t.t_threads);
5324 int use_hot_team = team == root->r.r_hot_team;
5325 #if KMP_NESTED_HOT_TEAMS 5327 kmp_hot_team_ptr_t *hot_teams;
5329 level = team->t.t_active_level - 1;
5330 if (master->th.th_teams_microtask) {
5331 if (master->th.th_teams_size.nteams > 1) {
5335 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5336 master->th.th_teams_level == team->t.t_level) {
5341 hot_teams = master->th.th_hot_teams;
5342 if (level < __kmp_hot_teams_max_level) {
5343 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5347 #endif // KMP_NESTED_HOT_TEAMS 5350 TCW_SYNC_PTR(team->t.t_pkfn,
5353 team->t.t_copyin_counter = 0;
5358 if (!use_hot_team) {
5359 if (__kmp_tasking_mode != tskm_immediate_exec) {
5361 for (f = 1; f < team->t.t_nproc; ++f) {
5362 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5363 kmp_info_t *th = team->t.t_threads[f];
5364 volatile kmp_uint32 *state = &th->th.th_reap_state;
5365 while (*state != KMP_SAFE_TO_REAP) {
5369 if (!__kmp_is_thread_alive(th, &ecode)) {
5370 *state = KMP_SAFE_TO_REAP;
5375 kmp_flag_64 fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5376 if (fl.is_sleeping())
5377 fl.resume(__kmp_gtid_from_thread(th));
5384 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5385 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5386 if (task_team != NULL) {
5387 for (f = 0; f < team->t.t_nproc;
5389 team->t.t_threads[f]->th.th_task_team = NULL;
5393 (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5394 __kmp_get_gtid(), task_team, team->t.t_id));
5395 #if KMP_NESTED_HOT_TEAMS 5396 __kmp_free_task_team(master, task_team);
5398 team->t.t_task_team[tt_idx] = NULL;
5404 team->t.t_parent = NULL;
5405 team->t.t_level = 0;
5406 team->t.t_active_level = 0;
5409 for (f = 1; f < team->t.t_nproc; ++f) {
5410 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5411 __kmp_free_thread(team->t.t_threads[f]);
5412 team->t.t_threads[f] = NULL;
5417 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5418 __kmp_team_pool = (
volatile kmp_team_t *)team;
5425 kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5426 kmp_team_t *next_pool = team->t.t_next_pool;
5428 KMP_DEBUG_ASSERT(team);
5429 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5430 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5431 KMP_DEBUG_ASSERT(team->t.t_threads);
5432 KMP_DEBUG_ASSERT(team->t.t_argv);
5437 __kmp_free_team_arrays(team);
5438 if (team->t.t_argv != &team->t.t_inline_argv[0])
5439 __kmp_free((
void *)team->t.t_argv);
5471 void __kmp_free_thread(kmp_info_t *this_th) {
5474 kmp_root_t *root = this_th->th.th_root;
5476 KA_TRACE(20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5477 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5479 KMP_DEBUG_ASSERT(this_th);
5484 kmp_balign_t *balign = this_th->th.th_bar;
5485 for (b = 0; b < bs_last_barrier; ++b) {
5486 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5487 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5488 balign[b].bb.team = NULL;
5489 balign[b].bb.leaf_kids = 0;
5491 this_th->th.th_task_state = 0;
5492 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5495 TCW_PTR(this_th->th.th_team, NULL);
5496 TCW_PTR(this_th->th.th_root, NULL);
5497 TCW_PTR(this_th->th.th_dispatch, NULL);
5504 __kmp_free_implicit_task(this_th);
5505 this_th->th.th_current_task = NULL;
5509 gtid = this_th->th.th_info.ds.ds_gtid;
5510 if (__kmp_thread_pool_insert_pt != NULL) {
5511 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5512 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5513 __kmp_thread_pool_insert_pt = NULL;
5522 if (__kmp_thread_pool_insert_pt != NULL) {
5523 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5525 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5527 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5528 scan = &((*scan)->th.th_next_pool))
5533 TCW_PTR(this_th->th.th_next_pool, *scan);
5534 __kmp_thread_pool_insert_pt = *scan = this_th;
5535 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5536 (this_th->th.th_info.ds.ds_gtid <
5537 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5538 TCW_4(this_th->th.th_in_pool, TRUE);
5539 __kmp_thread_pool_nth++;
5541 TCW_4(__kmp_nth, __kmp_nth - 1);
5542 root->r.r_cg_nthreads--;
5544 #ifdef KMP_ADJUST_BLOCKTIME 5547 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5548 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5549 if (__kmp_nth <= __kmp_avail_proc) {
5550 __kmp_zero_bt = FALSE;
5560 void *__kmp_launch_thread(kmp_info_t *this_thr) {
5561 int gtid = this_thr->th.th_info.ds.ds_gtid;
5563 kmp_team_t *(*
volatile pteam);
5566 KA_TRACE(10, (
"__kmp_launch_thread: T#%d start\n", gtid));
5568 if (__kmp_env_consistency_check) {
5569 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid);
5573 ompt_data_t *thread_data;
5574 if (ompt_enabled.enabled) {
5575 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
5576 thread_data->ptr = NULL;
5578 this_thr->th.ompt_thread_info.state = omp_state_overhead;
5579 this_thr->th.ompt_thread_info.wait_id = 0;
5580 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
5581 if (ompt_enabled.ompt_callback_thread_begin) {
5582 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
5583 ompt_thread_worker, thread_data);
5589 if (ompt_enabled.enabled) {
5590 this_thr->th.ompt_thread_info.state = omp_state_idle;
5594 while (!TCR_4(__kmp_global.g.g_done)) {
5595 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
5599 KA_TRACE(20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid));
5602 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
5605 if (ompt_enabled.enabled) {
5606 this_thr->th.ompt_thread_info.state = omp_state_overhead;
5610 pteam = (kmp_team_t * (*))(&this_thr->th.th_team);
5613 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
5615 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
5618 (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5619 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5620 (*pteam)->t.t_pkfn));
5622 updateHWFPControl(*pteam);
5625 if (ompt_enabled.enabled) {
5626 this_thr->th.ompt_thread_info.state = omp_state_work_parallel;
5630 rc = (*pteam)->t.t_invoke(gtid);
5634 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5635 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5636 (*pteam)->t.t_pkfn));
5639 if (ompt_enabled.enabled) {
5641 __ompt_get_task_info_object(0)->frame.exit_frame = NULL;
5643 this_thr->th.ompt_thread_info.state = omp_state_overhead;
5647 __kmp_join_barrier(gtid);
5650 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
5653 if (ompt_enabled.ompt_callback_thread_end) {
5654 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
5658 this_thr->th.th_task_team = NULL;
5660 __kmp_common_destroy_gtid(gtid);
5662 KA_TRACE(10, (
"__kmp_launch_thread: T#%d done\n", gtid));
5669 void __kmp_internal_end_dest(
void *specific_gtid) {
5670 #if KMP_COMPILER_ICC 5671 #pragma warning(push) 5672 #pragma warning(disable : 810) // conversion from "void *" to "int" may lose 5676 int gtid = (kmp_intptr_t)specific_gtid - 1;
5677 #if KMP_COMPILER_ICC 5678 #pragma warning(pop) 5681 KA_TRACE(30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
5694 if (gtid >= 0 && KMP_UBER_GTID(gtid))
5695 __kmp_gtid_set_specific(gtid);
5696 #ifdef KMP_TDATA_GTID 5699 __kmp_internal_end_thread(gtid);
5702 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB 5708 __attribute__((destructor))
void __kmp_internal_end_dtor(
void) {
5709 __kmp_internal_end_atexit();
5712 void __kmp_internal_end_fini(
void) { __kmp_internal_end_atexit(); }
5718 void __kmp_internal_end_atexit(
void) {
5719 KA_TRACE(30, (
"__kmp_internal_end_atexit\n"));
5743 __kmp_internal_end_library(-1);
5745 __kmp_close_console();
5749 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root) {
5754 KMP_DEBUG_ASSERT(thread != NULL);
5756 gtid = thread->th.th_info.ds.ds_gtid;
5760 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5763 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
5767 ANNOTATE_HAPPENS_BEFORE(thread);
5768 kmp_flag_64 flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread);
5769 __kmp_release_64(&flag);
5773 __kmp_reap_worker(thread);
5785 if (thread->th.th_active_in_pool) {
5786 thread->th.th_active_in_pool = FALSE;
5787 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
5788 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
5792 KMP_DEBUG_ASSERT(__kmp_thread_pool_nth > 0);
5793 --__kmp_thread_pool_nth;
5796 __kmp_free_implicit_task(thread);
5800 __kmp_free_fast_memory(thread);
5803 __kmp_suspend_uninitialize_thread(thread);
5805 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
5806 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5811 #ifdef KMP_ADJUST_BLOCKTIME 5814 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5815 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5816 if (__kmp_nth <= __kmp_avail_proc) {
5817 __kmp_zero_bt = FALSE;
5823 if (__kmp_env_consistency_check) {
5824 if (thread->th.th_cons) {
5825 __kmp_free_cons_stack(thread->th.th_cons);
5826 thread->th.th_cons = NULL;
5830 if (thread->th.th_pri_common != NULL) {
5831 __kmp_free(thread->th.th_pri_common);
5832 thread->th.th_pri_common = NULL;
5835 if (thread->th.th_task_state_memo_stack != NULL) {
5836 __kmp_free(thread->th.th_task_state_memo_stack);
5837 thread->th.th_task_state_memo_stack = NULL;
5841 if (thread->th.th_local.bget_data != NULL) {
5842 __kmp_finalize_bget(thread);
5846 #if KMP_AFFINITY_SUPPORTED 5847 if (thread->th.th_affin_mask != NULL) {
5848 KMP_CPU_FREE(thread->th.th_affin_mask);
5849 thread->th.th_affin_mask = NULL;
5853 #if KMP_USE_HIER_SCHED 5854 if (thread->th.th_hier_bar_data != NULL) {
5855 __kmp_free(thread->th.th_hier_bar_data);
5856 thread->th.th_hier_bar_data = NULL;
5860 __kmp_reap_team(thread->th.th_serial_team);
5861 thread->th.th_serial_team = NULL;
5868 static void __kmp_internal_end(
void) {
5872 __kmp_unregister_library();
5879 __kmp_reclaim_dead_roots();
5883 for (i = 0; i < __kmp_threads_capacity; i++)
5885 if (__kmp_root[i]->r.r_active)
5888 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5890 if (i < __kmp_threads_capacity) {
5902 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
5903 if (TCR_4(__kmp_init_monitor)) {
5904 __kmp_reap_monitor(&__kmp_monitor);
5905 TCW_4(__kmp_init_monitor, 0);
5907 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
5908 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
5909 #endif // KMP_USE_MONITOR 5914 for (i = 0; i < __kmp_threads_capacity; i++) {
5915 if (__kmp_root[i]) {
5918 KMP_ASSERT(!__kmp_root[i]->r.r_active);
5927 while (__kmp_thread_pool != NULL) {
5929 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
5930 __kmp_thread_pool = thread->th.th_next_pool;
5932 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
5933 thread->th.th_next_pool = NULL;
5934 thread->th.th_in_pool = FALSE;
5935 __kmp_reap_thread(thread, 0);
5937 __kmp_thread_pool_insert_pt = NULL;
5940 while (__kmp_team_pool != NULL) {
5942 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
5943 __kmp_team_pool = team->t.t_next_pool;
5945 team->t.t_next_pool = NULL;
5946 __kmp_reap_team(team);
5949 __kmp_reap_task_teams();
5956 for (i = 0; i < __kmp_threads_capacity; i++) {
5957 kmp_info_t *thr = __kmp_threads[i];
5958 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
5963 for (i = 0; i < __kmp_threads_capacity; ++i) {
5970 TCW_SYNC_4(__kmp_init_common, FALSE);
5972 KA_TRACE(10, (
"__kmp_internal_end: all workers reaped\n"));
5980 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
5981 if (TCR_4(__kmp_init_monitor)) {
5982 __kmp_reap_monitor(&__kmp_monitor);
5983 TCW_4(__kmp_init_monitor, 0);
5985 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
5986 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
5989 TCW_4(__kmp_init_gtid, FALSE);
5998 void __kmp_internal_end_library(
int gtid_req) {
6005 if (__kmp_global.g.g_abort) {
6006 KA_TRACE(11, (
"__kmp_internal_end_library: abort, exiting\n"));
6010 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6011 KA_TRACE(10, (
"__kmp_internal_end_library: already finished\n"));
6019 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6021 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6022 if (gtid == KMP_GTID_SHUTDOWN) {
6023 KA_TRACE(10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system " 6024 "already shutdown\n"));
6026 }
else if (gtid == KMP_GTID_MONITOR) {
6027 KA_TRACE(10, (
"__kmp_internal_end_library: monitor thread, gtid not " 6028 "registered, or system shutdown\n"));
6030 }
else if (gtid == KMP_GTID_DNE) {
6031 KA_TRACE(10, (
"__kmp_internal_end_library: gtid not registered or system " 6034 }
else if (KMP_UBER_GTID(gtid)) {
6036 if (__kmp_root[gtid]->r.r_active) {
6037 __kmp_global.g.g_abort = -1;
6038 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6040 (
"__kmp_internal_end_library: root still active, abort T#%d\n",
6046 (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6047 __kmp_unregister_root_current_thread(gtid);
6054 #ifdef DUMP_DEBUG_ON_EXIT 6055 if (__kmp_debug_buf)
6056 __kmp_dump_debug_buffer();
6062 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6065 if (__kmp_global.g.g_abort) {
6066 KA_TRACE(10, (
"__kmp_internal_end_library: abort, exiting\n"));
6068 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6071 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6072 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6081 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6084 __kmp_internal_end();
6086 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6087 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6089 KA_TRACE(10, (
"__kmp_internal_end_library: exit\n"));
6091 #ifdef DUMP_DEBUG_ON_EXIT 6092 if (__kmp_debug_buf)
6093 __kmp_dump_debug_buffer();
6097 __kmp_close_console();
6100 __kmp_fini_allocator();
6104 void __kmp_internal_end_thread(
int gtid_req) {
6113 if (__kmp_global.g.g_abort) {
6114 KA_TRACE(11, (
"__kmp_internal_end_thread: abort, exiting\n"));
6118 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6119 KA_TRACE(10, (
"__kmp_internal_end_thread: already finished\n"));
6127 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6129 (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6130 if (gtid == KMP_GTID_SHUTDOWN) {
6131 KA_TRACE(10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system " 6132 "already shutdown\n"));
6134 }
else if (gtid == KMP_GTID_MONITOR) {
6135 KA_TRACE(10, (
"__kmp_internal_end_thread: monitor thread, gtid not " 6136 "registered, or system shutdown\n"));
6138 }
else if (gtid == KMP_GTID_DNE) {
6139 KA_TRACE(10, (
"__kmp_internal_end_thread: gtid not registered or system " 6143 }
else if (KMP_UBER_GTID(gtid)) {
6145 if (__kmp_root[gtid]->r.r_active) {
6146 __kmp_global.g.g_abort = -1;
6147 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6149 (
"__kmp_internal_end_thread: root still active, abort T#%d\n",
6153 KA_TRACE(10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n",
6155 __kmp_unregister_root_current_thread(gtid);
6159 KA_TRACE(10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6162 __kmp_threads[gtid]->th.th_task_team = NULL;
6166 (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6171 #if defined KMP_DYNAMIC_LIB 6180 KA_TRACE(10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6184 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6187 if (__kmp_global.g.g_abort) {
6188 KA_TRACE(10, (
"__kmp_internal_end_thread: abort, exiting\n"));
6190 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6193 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6194 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6205 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6207 for (i = 0; i < __kmp_threads_capacity; ++i) {
6208 if (KMP_UBER_GTID(i)) {
6211 (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6212 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6213 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6220 __kmp_internal_end();
6222 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6223 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6225 KA_TRACE(10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6227 #ifdef DUMP_DEBUG_ON_EXIT 6228 if (__kmp_debug_buf)
6229 __kmp_dump_debug_buffer();
6236 static long __kmp_registration_flag = 0;
6238 static char *__kmp_registration_str = NULL;
6241 static inline char *__kmp_reg_status_name() {
6246 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int)getpid());
6249 void __kmp_register_library_startup(
void) {
6251 char *name = __kmp_reg_status_name();
6257 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 6258 __kmp_initialize_system_tick();
6260 __kmp_read_system_time(&time.dtime);
6261 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6262 __kmp_registration_str =
6263 __kmp_str_format(
"%p-%lx-%s", &__kmp_registration_flag,
6264 __kmp_registration_flag, KMP_LIBRARY_FILE);
6266 KA_TRACE(50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name,
6267 __kmp_registration_str));
6274 __kmp_env_set(name, __kmp_registration_str, 0);
6276 value = __kmp_env_get(name);
6277 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6287 char *flag_addr_str = NULL;
6288 char *flag_val_str = NULL;
6289 char const *file_name = NULL;
6290 __kmp_str_split(tail,
'-', &flag_addr_str, &tail);
6291 __kmp_str_split(tail,
'-', &flag_val_str, &tail);
6294 long *flag_addr = 0;
6296 KMP_SSCANF(flag_addr_str,
"%p", &flag_addr);
6297 KMP_SSCANF(flag_val_str,
"%lx", &flag_val);
6298 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name,
"") != 0) {
6302 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6316 file_name =
"unknown library";
6320 char *duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK");
6321 if (!__kmp_str_match_true(duplicate_ok)) {
6323 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6324 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6326 KMP_INTERNAL_FREE(duplicate_ok);
6327 __kmp_duplicate_library_ok = 1;
6332 __kmp_env_unset(name);
6334 default: { KMP_DEBUG_ASSERT(0); }
break;
6337 KMP_INTERNAL_FREE((
void *)value);
6339 KMP_INTERNAL_FREE((
void *)name);
6343 void __kmp_unregister_library(
void) {
6345 char *name = __kmp_reg_status_name();
6346 char *value = __kmp_env_get(name);
6348 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6349 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6350 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6352 __kmp_env_unset(name);
6355 KMP_INTERNAL_FREE(__kmp_registration_str);
6356 KMP_INTERNAL_FREE(value);
6357 KMP_INTERNAL_FREE(name);
6359 __kmp_registration_flag = 0;
6360 __kmp_registration_str = NULL;
6367 #if KMP_MIC_SUPPORTED 6369 static void __kmp_check_mic_type() {
6370 kmp_cpuid_t cpuid_state = {0};
6371 kmp_cpuid_t *cs_p = &cpuid_state;
6372 __kmp_x86_cpuid(1, 0, cs_p);
6374 if ((cs_p->eax & 0xff0) == 0xB10) {
6375 __kmp_mic_type = mic2;
6376 }
else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6377 __kmp_mic_type = mic3;
6379 __kmp_mic_type = non_mic;
6385 static void __kmp_do_serial_initialize(
void) {
6389 KA_TRACE(10, (
"__kmp_do_serial_initialize: enter\n"));
6391 KMP_DEBUG_ASSERT(
sizeof(kmp_int32) == 4);
6392 KMP_DEBUG_ASSERT(
sizeof(kmp_uint32) == 4);
6393 KMP_DEBUG_ASSERT(
sizeof(kmp_int64) == 8);
6394 KMP_DEBUG_ASSERT(
sizeof(kmp_uint64) == 8);
6395 KMP_DEBUG_ASSERT(
sizeof(kmp_intptr_t) ==
sizeof(
void *));
6401 __kmp_validate_locks();
6404 __kmp_init_allocator();
6409 __kmp_register_library_startup();
6412 if (TCR_4(__kmp_global.g.g_done)) {
6413 KA_TRACE(10, (
"__kmp_do_serial_initialize: reinitialization of library\n"));
6416 __kmp_global.g.g_abort = 0;
6417 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
6420 #if KMP_USE_ADAPTIVE_LOCKS 6421 #if KMP_DEBUG_ADAPTIVE_LOCKS 6422 __kmp_init_speculative_stats();
6425 #if KMP_STATS_ENABLED 6428 __kmp_init_lock(&__kmp_global_lock);
6429 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
6430 __kmp_init_lock(&__kmp_debug_lock);
6431 __kmp_init_atomic_lock(&__kmp_atomic_lock);
6432 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
6433 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
6434 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
6435 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
6436 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
6437 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
6438 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
6439 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
6440 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
6441 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
6442 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
6443 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
6444 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
6445 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
6447 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
6449 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
6453 __kmp_runtime_initialize();
6455 #if KMP_MIC_SUPPORTED 6456 __kmp_check_mic_type();
6463 __kmp_abort_delay = 0;
6467 __kmp_dflt_team_nth_ub = __kmp_xproc;
6468 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
6469 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6471 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
6472 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6474 __kmp_max_nth = __kmp_sys_max_nth;
6475 __kmp_cg_max_nth = __kmp_sys_max_nth;
6476 __kmp_teams_max_nth = __kmp_xproc;
6477 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
6478 __kmp_teams_max_nth = __kmp_sys_max_nth;
6483 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
6485 __kmp_monitor_wakeups =
6486 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
6487 __kmp_bt_intervals =
6488 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
6491 __kmp_library = library_throughput;
6493 __kmp_static = kmp_sch_static_balanced;
6500 #if KMP_FAST_REDUCTION_BARRIER 6501 #define kmp_reduction_barrier_gather_bb ((int)1) 6502 #define kmp_reduction_barrier_release_bb ((int)1) 6503 #define kmp_reduction_barrier_gather_pat bp_hyper_bar 6504 #define kmp_reduction_barrier_release_pat bp_hyper_bar 6505 #endif // KMP_FAST_REDUCTION_BARRIER 6506 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
6507 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
6508 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
6509 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
6510 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
6511 #if KMP_FAST_REDUCTION_BARRIER 6512 if (i == bs_reduction_barrier) {
6514 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
6515 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
6516 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
6517 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
6519 #endif // KMP_FAST_REDUCTION_BARRIER 6521 #if KMP_FAST_REDUCTION_BARRIER 6522 #undef kmp_reduction_barrier_release_pat 6523 #undef kmp_reduction_barrier_gather_pat 6524 #undef kmp_reduction_barrier_release_bb 6525 #undef kmp_reduction_barrier_gather_bb 6526 #endif // KMP_FAST_REDUCTION_BARRIER 6527 #if KMP_MIC_SUPPORTED 6528 if (__kmp_mic_type == mic2) {
6530 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3;
6531 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
6533 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6534 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6536 #if KMP_FAST_REDUCTION_BARRIER 6537 if (__kmp_mic_type == mic2) {
6538 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6539 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6541 #endif // KMP_FAST_REDUCTION_BARRIER 6542 #endif // KMP_MIC_SUPPORTED 6546 __kmp_env_checks = TRUE;
6548 __kmp_env_checks = FALSE;
6552 __kmp_foreign_tp = TRUE;
6554 __kmp_global.g.g_dynamic = FALSE;
6555 __kmp_global.g.g_dynamic_mode = dynamic_default;
6557 __kmp_env_initialize(NULL);
6561 char const *val = __kmp_env_get(
"KMP_DUMP_CATALOG");
6562 if (__kmp_str_match_true(val)) {
6563 kmp_str_buf_t buffer;
6564 __kmp_str_buf_init(&buffer);
6565 __kmp_i18n_dump_catalog(&buffer);
6566 __kmp_printf(
"%s", buffer.str);
6567 __kmp_str_buf_free(&buffer);
6569 __kmp_env_free(&val);
6572 __kmp_threads_capacity =
6573 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
6575 __kmp_tp_capacity = __kmp_default_tp_capacity(
6576 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
6581 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
6582 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
6583 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
6584 __kmp_thread_pool = NULL;
6585 __kmp_thread_pool_insert_pt = NULL;
6586 __kmp_team_pool = NULL;
6593 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * __kmp_threads_capacity +
6595 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
6596 __kmp_root = (kmp_root_t **)((
char *)__kmp_threads +
6597 sizeof(kmp_info_t *) * __kmp_threads_capacity);
6600 KMP_DEBUG_ASSERT(__kmp_all_nth ==
6602 KMP_DEBUG_ASSERT(__kmp_nth == 0);
6607 gtid = __kmp_register_root(TRUE);
6608 KA_TRACE(10, (
"__kmp_do_serial_initialize T#%d\n", gtid));
6609 KMP_ASSERT(KMP_UBER_GTID(gtid));
6610 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
6614 __kmp_common_initialize();
6618 __kmp_register_atfork();
6621 #if !defined KMP_DYNAMIC_LIB 6625 int rc = atexit(__kmp_internal_end_atexit);
6627 __kmp_fatal(KMP_MSG(FunctionError,
"atexit()"), KMP_ERR(rc),
6633 #if KMP_HANDLE_SIGNALS 6639 __kmp_install_signals(FALSE);
6642 __kmp_install_signals(TRUE);
6647 __kmp_init_counter++;
6649 __kmp_init_serial = TRUE;
6651 if (__kmp_settings) {
6656 if (__kmp_display_env || __kmp_display_env_verbose) {
6657 __kmp_env_print_2();
6659 #endif // OMP_40_ENABLED 6667 KA_TRACE(10, (
"__kmp_do_serial_initialize: exit\n"));
6670 void __kmp_serial_initialize(
void) {
6671 if (__kmp_init_serial) {
6674 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6675 if (__kmp_init_serial) {
6676 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6679 __kmp_do_serial_initialize();
6680 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6683 static void __kmp_do_middle_initialize(
void) {
6685 int prev_dflt_team_nth;
6687 if (!__kmp_init_serial) {
6688 __kmp_do_serial_initialize();
6691 KA_TRACE(10, (
"__kmp_middle_initialize: enter\n"));
6695 prev_dflt_team_nth = __kmp_dflt_team_nth;
6697 #if KMP_AFFINITY_SUPPORTED 6700 __kmp_affinity_initialize();
6704 for (i = 0; i < __kmp_threads_capacity; i++) {
6705 if (TCR_PTR(__kmp_threads[i]) != NULL) {
6706 __kmp_affinity_set_init_mask(i, TRUE);
6711 KMP_ASSERT(__kmp_xproc > 0);
6712 if (__kmp_avail_proc == 0) {
6713 __kmp_avail_proc = __kmp_xproc;
6719 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
6720 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
6725 if (__kmp_dflt_team_nth == 0) {
6726 #ifdef KMP_DFLT_NTH_CORES 6728 __kmp_dflt_team_nth = __kmp_ncores;
6729 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = " 6730 "__kmp_ncores (%d)\n",
6731 __kmp_dflt_team_nth));
6734 __kmp_dflt_team_nth = __kmp_avail_proc;
6735 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = " 6736 "__kmp_avail_proc(%d)\n",
6737 __kmp_dflt_team_nth));
6741 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
6742 __kmp_dflt_team_nth = KMP_MIN_NTH;
6744 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
6745 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6750 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
6752 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
6757 for (i = 0; i < __kmp_threads_capacity; i++) {
6758 kmp_info_t *thread = __kmp_threads[i];
6761 if (thread->th.th_current_task->td_icvs.nproc != 0)
6764 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
6769 (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6770 __kmp_dflt_team_nth));
6772 #ifdef KMP_ADJUST_BLOCKTIME 6774 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6775 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6776 if (__kmp_nth > __kmp_avail_proc) {
6777 __kmp_zero_bt = TRUE;
6783 TCW_SYNC_4(__kmp_init_middle, TRUE);
6785 KA_TRACE(10, (
"__kmp_do_middle_initialize: exit\n"));
6788 void __kmp_middle_initialize(
void) {
6789 if (__kmp_init_middle) {
6792 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6793 if (__kmp_init_middle) {
6794 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6797 __kmp_do_middle_initialize();
6798 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6801 void __kmp_parallel_initialize(
void) {
6802 int gtid = __kmp_entry_gtid();
6805 if (TCR_4(__kmp_init_parallel))
6807 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6808 if (TCR_4(__kmp_init_parallel)) {
6809 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6814 if (TCR_4(__kmp_global.g.g_done)) {
6817 (
"__kmp_parallel_initialize: attempt to init while shutting down\n"));
6818 __kmp_infinite_loop();
6824 if (!__kmp_init_middle) {
6825 __kmp_do_middle_initialize();
6829 KA_TRACE(10, (
"__kmp_parallel_initialize: enter\n"));
6830 KMP_ASSERT(KMP_UBER_GTID(gtid));
6832 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 6835 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
6836 __kmp_store_mxcsr(&__kmp_init_mxcsr);
6837 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
6841 #if KMP_HANDLE_SIGNALS 6843 __kmp_install_signals(TRUE);
6847 __kmp_suspend_initialize();
6849 #if defined(USE_LOAD_BALANCE) 6850 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
6851 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
6854 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
6855 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
6859 if (__kmp_version) {
6860 __kmp_print_version_2();
6864 TCW_SYNC_4(__kmp_init_parallel, TRUE);
6867 KA_TRACE(10, (
"__kmp_parallel_initialize: exit\n"));
6869 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6874 void __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
6876 kmp_disp_t *dispatch;
6881 this_thr->th.th_local.this_construct = 0;
6882 #if KMP_CACHE_MANAGE 6883 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
6885 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
6886 KMP_DEBUG_ASSERT(dispatch);
6887 KMP_DEBUG_ASSERT(team->t.t_dispatch);
6891 dispatch->th_disp_index = 0;
6893 dispatch->th_doacross_buf_idx =
6896 if (__kmp_env_consistency_check)
6897 __kmp_push_parallel(gtid, team->t.t_ident);
6902 void __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
6904 if (__kmp_env_consistency_check)
6905 __kmp_pop_parallel(gtid, team->t.t_ident);
6907 __kmp_finish_implicit_task(this_thr);
6910 int __kmp_invoke_task_func(
int gtid) {
6912 int tid = __kmp_tid_from_gtid(gtid);
6913 kmp_info_t *this_thr = __kmp_threads[gtid];
6914 kmp_team_t *team = this_thr->th.th_team;
6916 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
6918 if (__itt_stack_caller_create_ptr) {
6919 __kmp_itt_stack_callee_enter(
6921 team->t.t_stack_id);
6924 #if INCLUDE_SSC_MARKS 6925 SSC_MARK_INVOKING();
6930 void **exit_runtime_p;
6931 ompt_data_t *my_task_data;
6932 ompt_data_t *my_parallel_data;
6935 if (ompt_enabled.enabled) {
6937 team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_frame);
6939 exit_runtime_p = &dummy;
6943 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
6944 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
6945 if (ompt_enabled.ompt_callback_implicit_task) {
6946 ompt_team_size = team->t.t_nproc;
6947 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
6948 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
6949 __kmp_tid_from_gtid(gtid));
6950 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
6955 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
6956 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
6958 __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
6959 tid, (
int)team->t.t_argc, (
void **)team->t.t_argv
6966 *exit_runtime_p = NULL;
6971 if (__itt_stack_caller_create_ptr) {
6972 __kmp_itt_stack_callee_leave(
6974 team->t.t_stack_id);
6977 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
6983 void __kmp_teams_master(
int gtid) {
6985 kmp_info_t *thr = __kmp_threads[gtid];
6986 kmp_team_t *team = thr->th.th_team;
6987 ident_t *loc = team->t.t_ident;
6988 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
6989 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
6990 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
6991 KA_TRACE(20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
6992 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
6995 #if INCLUDE_SSC_MARKS 6998 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
6999 (microtask_t)thr->th.th_teams_microtask,
7000 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
7001 #if INCLUDE_SSC_MARKS 7007 __kmp_join_call(loc, gtid
7016 int __kmp_invoke_teams_master(
int gtid) {
7017 kmp_info_t *this_thr = __kmp_threads[gtid];
7018 kmp_team_t *team = this_thr->th.th_team;
7020 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7021 KMP_DEBUG_ASSERT((
void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7022 (
void *)__kmp_teams_master);
7024 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7025 __kmp_teams_master(gtid);
7026 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7036 void __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads) {
7037 kmp_info_t *thr = __kmp_threads[gtid];
7039 if (num_threads > 0)
7040 thr->th.th_set_nproc = num_threads;
7047 void __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
7049 kmp_info_t *thr = __kmp_threads[gtid];
7050 KMP_DEBUG_ASSERT(num_teams >= 0);
7051 KMP_DEBUG_ASSERT(num_threads >= 0);
7055 if (num_teams > __kmp_teams_max_nth) {
7056 if (!__kmp_reserve_warn) {
7057 __kmp_reserve_warn = 1;
7058 __kmp_msg(kmp_ms_warning,
7059 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7060 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7062 num_teams = __kmp_teams_max_nth;
7066 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7069 if (num_threads == 0) {
7070 if (!TCR_4(__kmp_init_middle))
7071 __kmp_middle_initialize();
7072 num_threads = __kmp_avail_proc / num_teams;
7073 if (num_teams * num_threads > __kmp_teams_max_nth) {
7075 num_threads = __kmp_teams_max_nth / num_teams;
7078 if (num_teams * num_threads > __kmp_teams_max_nth) {
7079 int new_threads = __kmp_teams_max_nth / num_teams;
7080 if (!__kmp_reserve_warn) {
7081 __kmp_reserve_warn = 1;
7082 __kmp_msg(kmp_ms_warning,
7083 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7084 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7086 num_threads = new_threads;
7089 thr->th.th_teams_size.nth = num_threads;
7093 void __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind) {
7094 kmp_info_t *thr = __kmp_threads[gtid];
7095 thr->th.th_set_proc_bind = proc_bind;
7102 void __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team) {
7103 kmp_info_t *this_thr = __kmp_threads[gtid];
7109 KMP_DEBUG_ASSERT(team);
7110 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7111 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7114 team->t.t_construct = 0;
7115 team->t.t_ordered.dt.t_value =
7119 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
7120 if (team->t.t_max_nproc > 1) {
7122 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7123 team->t.t_disp_buffer[i].buffer_index = i;
7125 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7129 team->t.t_disp_buffer[0].buffer_index = 0;
7131 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7136 KMP_ASSERT(this_thr->th.th_team == team);
7139 for (f = 0; f < team->t.t_nproc; f++) {
7140 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
7141 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
7146 __kmp_fork_barrier(gtid, 0);
7149 void __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team) {
7150 kmp_info_t *this_thr = __kmp_threads[gtid];
7152 KMP_DEBUG_ASSERT(team);
7153 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7154 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7160 if (__kmp_threads[gtid] &&
7161 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
7162 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
7163 __kmp_threads[gtid]);
7164 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, " 7165 "team->t.t_nproc=%d\n",
7166 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
7168 __kmp_print_structure();
7170 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
7171 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
7174 __kmp_join_barrier(gtid);
7176 if (ompt_enabled.enabled &&
7177 this_thr->th.ompt_thread_info.state == omp_state_wait_barrier_implicit) {
7178 int ds_tid = this_thr->th.th_info.ds.ds_tid;
7179 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
7180 this_thr->th.ompt_thread_info.state = omp_state_overhead;
7182 void *codeptr = NULL;
7183 if (KMP_MASTER_TID(ds_tid) &&
7184 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
7185 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
7186 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
7188 if (ompt_enabled.ompt_callback_sync_region_wait) {
7189 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
7190 ompt_sync_region_barrier, ompt_scope_end, NULL, task_data, codeptr);
7192 if (ompt_enabled.ompt_callback_sync_region) {
7193 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
7194 ompt_sync_region_barrier, ompt_scope_end, NULL, task_data, codeptr);
7197 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
7198 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7199 ompt_scope_end, NULL, task_data, 0, ds_tid);
7205 KMP_ASSERT(this_thr->th.th_team == team);
7210 #ifdef USE_LOAD_BALANCE 7214 static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
7217 kmp_team_t *hot_team;
7219 if (root->r.r_active) {
7222 hot_team = root->r.r_hot_team;
7223 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
7224 return hot_team->t.t_nproc - 1;
7229 for (i = 1; i < hot_team->t.t_nproc; i++) {
7230 if (hot_team->t.t_threads[i]->th.th_active) {
7239 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc) {
7242 int hot_team_active;
7243 int team_curr_active;
7246 KB_TRACE(20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
7248 KMP_DEBUG_ASSERT(root);
7249 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
7250 ->th.th_current_task->td_icvs.dynamic == TRUE);
7251 KMP_DEBUG_ASSERT(set_nproc > 1);
7253 if (set_nproc == 1) {
7254 KB_TRACE(20, (
"__kmp_load_balance_nproc: serial execution.\n"));
7263 pool_active = __kmp_thread_pool_active_nth;
7264 hot_team_active = __kmp_active_hot_team_nproc(root);
7265 team_curr_active = pool_active + hot_team_active + 1;
7268 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
7269 KB_TRACE(30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d " 7270 "hot team active = %d\n",
7271 system_active, pool_active, hot_team_active));
7273 if (system_active < 0) {
7277 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7278 KMP_WARNING(CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit");
7281 retval = __kmp_avail_proc - __kmp_nth +
7282 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
7283 if (retval > set_nproc) {
7286 if (retval < KMP_MIN_NTH) {
7287 retval = KMP_MIN_NTH;
7290 KB_TRACE(20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
7298 if (system_active < team_curr_active) {
7299 system_active = team_curr_active;
7301 retval = __kmp_avail_proc - system_active + team_curr_active;
7302 if (retval > set_nproc) {
7305 if (retval < KMP_MIN_NTH) {
7306 retval = KMP_MIN_NTH;
7309 KB_TRACE(20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval));
7318 void __kmp_cleanup(
void) {
7321 KA_TRACE(10, (
"__kmp_cleanup: enter\n"));
7323 if (TCR_4(__kmp_init_parallel)) {
7324 #if KMP_HANDLE_SIGNALS 7325 __kmp_remove_signals();
7327 TCW_4(__kmp_init_parallel, FALSE);
7330 if (TCR_4(__kmp_init_middle)) {
7331 #if KMP_AFFINITY_SUPPORTED 7332 __kmp_affinity_uninitialize();
7334 __kmp_cleanup_hierarchy();
7335 TCW_4(__kmp_init_middle, FALSE);
7338 KA_TRACE(10, (
"__kmp_cleanup: go serial cleanup\n"));
7340 if (__kmp_init_serial) {
7341 __kmp_runtime_destroy();
7342 __kmp_init_serial = FALSE;
7345 __kmp_cleanup_threadprivate_caches();
7347 for (f = 0; f < __kmp_threads_capacity; f++) {
7348 if (__kmp_root[f] != NULL) {
7349 __kmp_free(__kmp_root[f]);
7350 __kmp_root[f] = NULL;
7353 __kmp_free(__kmp_threads);
7356 __kmp_threads = NULL;
7358 __kmp_threads_capacity = 0;
7360 #if KMP_USE_DYNAMIC_LOCK 7361 __kmp_cleanup_indirect_user_locks();
7363 __kmp_cleanup_user_locks();
7366 #if KMP_AFFINITY_SUPPORTED 7367 KMP_INTERNAL_FREE(CCAST(
char *, __kmp_cpuinfo_file));
7368 __kmp_cpuinfo_file = NULL;
7371 #if KMP_USE_ADAPTIVE_LOCKS 7372 #if KMP_DEBUG_ADAPTIVE_LOCKS 7373 __kmp_print_speculative_stats();
7376 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
7377 __kmp_nested_nth.nth = NULL;
7378 __kmp_nested_nth.size = 0;
7379 __kmp_nested_nth.used = 0;
7380 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
7381 __kmp_nested_proc_bind.bind_types = NULL;
7382 __kmp_nested_proc_bind.size = 0;
7383 __kmp_nested_proc_bind.used = 0;
7385 __kmp_i18n_catclose();
7387 #if KMP_USE_HIER_SCHED 7388 __kmp_hier_scheds.deallocate();
7391 #if KMP_STATS_ENABLED 7395 KA_TRACE(10, (
"__kmp_cleanup: exit\n"));
7400 int __kmp_ignore_mppbeg(
void) {
7403 if ((env = getenv(
"KMP_IGNORE_MPPBEG")) != NULL) {
7404 if (__kmp_str_match_false(env))
7411 int __kmp_ignore_mppend(
void) {
7414 if ((env = getenv(
"KMP_IGNORE_MPPEND")) != NULL) {
7415 if (__kmp_str_match_false(env))
7422 void __kmp_internal_begin(
void) {
7428 gtid = __kmp_entry_gtid();
7429 root = __kmp_threads[gtid]->th.th_root;
7430 KMP_ASSERT(KMP_UBER_GTID(gtid));
7432 if (root->r.r_begin)
7434 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
7435 if (root->r.r_begin) {
7436 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7440 root->r.r_begin = TRUE;
7442 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7447 void __kmp_user_set_library(
enum library_type arg) {
7454 gtid = __kmp_entry_gtid();
7455 thread = __kmp_threads[gtid];
7457 root = thread->th.th_root;
7459 KA_TRACE(20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
7461 if (root->r.r_in_parallel) {
7463 KMP_WARNING(SetLibraryIncorrectCall);
7468 case library_serial:
7469 thread->th.th_set_nproc = 0;
7470 set__nproc(thread, 1);
7472 case library_turnaround:
7473 thread->th.th_set_nproc = 0;
7474 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7475 : __kmp_dflt_team_nth_ub);
7477 case library_throughput:
7478 thread->th.th_set_nproc = 0;
7479 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7480 : __kmp_dflt_team_nth_ub);
7483 KMP_FATAL(UnknownLibraryType, arg);
7486 __kmp_aux_set_library(arg);
7489 void __kmp_aux_set_stacksize(
size_t arg) {
7490 if (!__kmp_init_serial)
7491 __kmp_serial_initialize();
7494 if (arg & (0x1000 - 1)) {
7495 arg &= ~(0x1000 - 1);
7500 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7503 if (!TCR_4(__kmp_init_parallel)) {
7506 if (value < __kmp_sys_min_stksize)
7507 value = __kmp_sys_min_stksize;
7508 else if (value > KMP_MAX_STKSIZE)
7509 value = KMP_MAX_STKSIZE;
7511 __kmp_stksize = value;
7513 __kmp_env_stksize = TRUE;
7516 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7521 void __kmp_aux_set_library(
enum library_type arg) {
7522 __kmp_library = arg;
7524 switch (__kmp_library) {
7525 case library_serial: {
7526 KMP_INFORM(LibraryIsSerial);
7527 (void)__kmp_change_library(TRUE);
7529 case library_turnaround:
7530 (void)__kmp_change_library(TRUE);
7532 case library_throughput:
7533 (void)__kmp_change_library(FALSE);
7536 KMP_FATAL(UnknownLibraryType, arg);
7542 void __kmp_aux_set_blocktime(
int arg, kmp_info_t *thread,
int tid) {
7543 int blocktime = arg;
7549 __kmp_save_internal_controls(thread);
7552 if (blocktime < KMP_MIN_BLOCKTIME)
7553 blocktime = KMP_MIN_BLOCKTIME;
7554 else if (blocktime > KMP_MAX_BLOCKTIME)
7555 blocktime = KMP_MAX_BLOCKTIME;
7557 set__blocktime_team(thread->th.th_team, tid, blocktime);
7558 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
7562 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
7564 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
7565 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
7571 set__bt_set_team(thread->th.th_team, tid, bt_set);
7572 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
7574 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, " 7575 "bt_intervals=%d, monitor_updates=%d\n",
7576 __kmp_gtid_from_tid(tid, thread->th.th_team),
7577 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
7578 __kmp_monitor_wakeups));
7580 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
7581 __kmp_gtid_from_tid(tid, thread->th.th_team),
7582 thread->th.th_team->t.t_id, tid, blocktime));
7586 void __kmp_aux_set_defaults(
char const *str,
int len) {
7587 if (!__kmp_init_serial) {
7588 __kmp_serial_initialize();
7590 __kmp_env_initialize(str);
7594 || __kmp_display_env || __kmp_display_env_verbose
7604 PACKED_REDUCTION_METHOD_T
7605 __kmp_determine_reduction_method(
7606 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size,
7607 void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
7608 kmp_critical_name *lck) {
7619 PACKED_REDUCTION_METHOD_T retval;
7623 KMP_DEBUG_ASSERT(loc);
7624 KMP_DEBUG_ASSERT(lck);
7626 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \ 7627 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)) 7628 #define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func)) 7630 retval = critical_reduce_block;
7633 team_size = __kmp_get_team_num_threads(global_tid);
7634 if (team_size == 1) {
7636 retval = empty_reduce_block;
7640 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7641 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7643 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 7645 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || \ 7646 KMP_OS_DARWIN || KMP_OS_HURD 7648 int teamsize_cutoff = 4;
7650 #if KMP_MIC_SUPPORTED 7651 if (__kmp_mic_type != non_mic) {
7652 teamsize_cutoff = 8;
7655 if (tree_available) {
7656 if (team_size <= teamsize_cutoff) {
7657 if (atomic_available) {
7658 retval = atomic_reduce_block;
7661 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7663 }
else if (atomic_available) {
7664 retval = atomic_reduce_block;
7667 #error "Unknown or unsupported OS" 7668 #endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || 7671 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS 7673 #if KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_HURD 7677 if (atomic_available) {
7678 if (num_vars <= 2) {
7679 retval = atomic_reduce_block;
7685 if (atomic_available && (num_vars <= 3)) {
7686 retval = atomic_reduce_block;
7687 }
else if (tree_available) {
7688 if ((reduce_size > (9 *
sizeof(kmp_real64))) &&
7689 (reduce_size < (2000 *
sizeof(kmp_real64)))) {
7690 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
7695 #error "Unknown or unsupported OS" 7699 #error "Unknown or unsupported architecture" 7707 if (__kmp_force_reduction_method != reduction_method_not_defined &&
7710 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
7712 int atomic_available, tree_available;
7714 switch ((forced_retval = __kmp_force_reduction_method)) {
7715 case critical_reduce_block:
7719 case atomic_reduce_block:
7720 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7721 if (!atomic_available) {
7722 KMP_WARNING(RedMethodNotSupported,
"atomic");
7723 forced_retval = critical_reduce_block;
7727 case tree_reduce_block:
7728 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7729 if (!tree_available) {
7730 KMP_WARNING(RedMethodNotSupported,
"tree");
7731 forced_retval = critical_reduce_block;
7733 #if KMP_FAST_REDUCTION_BARRIER 7734 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7743 retval = forced_retval;
7746 KA_TRACE(10, (
"reduction method selected=%08x\n", retval));
7748 #undef FAST_REDUCTION_TREE_METHOD_GENERATED 7749 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED 7755 kmp_int32 __kmp_get_reduce_method(
void) {
7756 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_IDENT_AUTOPAR
#define KMP_INIT_PARTITIONED_TIMERS(name)
Initializes the paritioned timers to begin with name.
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)