15 #include "kmp_affinity.h" 16 #include "kmp_atomic.h" 17 #include "kmp_environment.h" 18 #include "kmp_error.h" 22 #include "kmp_settings.h" 23 #include "kmp_stats.h" 25 #include "kmp_wait_release.h" 26 #include "kmp_wrapper_getpid.h" 27 #include "kmp_dispatch.h" 28 #if KMP_USE_HIER_SCHED 29 #include "kmp_dispatch_hier.h" 33 #include "ompt-specific.h" 37 #define KMP_USE_PRCTL 0 43 #include "tsan_annotations.h" 45 #if defined(KMP_GOMP_COMPAT) 46 char const __kmp_version_alt_comp[] =
47 KMP_VERSION_PREFIX
"alternative compiler support: yes";
50 char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX
"API version: " 62 char const __kmp_version_lock[] =
63 KMP_VERSION_PREFIX
"lock type: run time selectable";
66 #define KMP_MIN(x, y) ((x) < (y) ? (x) : (y)) 71 kmp_info_t __kmp_monitor;
76 void __kmp_cleanup(
void);
78 static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *,
int tid,
80 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
81 kmp_internal_control_t *new_icvs,
83 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 84 static void __kmp_partition_places(kmp_team_t *team,
85 int update_master_only = 0);
87 static void __kmp_do_serial_initialize(
void);
88 void __kmp_fork_barrier(
int gtid,
int tid);
89 void __kmp_join_barrier(
int gtid);
90 void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
91 kmp_internal_control_t *new_icvs,
ident_t *loc);
93 #ifdef USE_LOAD_BALANCE 94 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc);
97 static int __kmp_expand_threads(
int nNeed);
99 static int __kmp_unregister_root_other_thread(
int gtid);
101 static void __kmp_unregister_library(
void);
102 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root);
103 kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
108 int __kmp_get_global_thread_id() {
110 kmp_info_t **other_threads;
118 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
119 __kmp_nth, __kmp_all_nth));
126 if (!TCR_4(__kmp_init_gtid))
129 #ifdef KMP_TDATA_GTID 130 if (TCR_4(__kmp_gtid_mode) >= 3) {
131 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
135 if (TCR_4(__kmp_gtid_mode) >= 2) {
136 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
137 return __kmp_gtid_get_specific();
139 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
141 stack_addr = (
char *)&stack_data;
142 other_threads = __kmp_threads;
155 for (i = 0; i < __kmp_threads_capacity; i++) {
157 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
161 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
162 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
166 if (stack_addr <= stack_base) {
167 size_t stack_diff = stack_base - stack_addr;
169 if (stack_diff <= stack_size) {
172 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i);
180 (
"*** __kmp_get_global_thread_id: internal alg. failed to find " 181 "thread, using TLS\n"));
182 i = __kmp_gtid_get_specific();
192 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
193 KMP_FATAL(StackOverflow, i);
196 stack_base = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
197 if (stack_addr > stack_base) {
198 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
199 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
200 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
203 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
204 stack_base - stack_addr);
208 if (__kmp_storage_map) {
209 char *stack_end = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
210 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
211 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
212 other_threads[i]->th.th_info.ds.ds_stacksize,
213 "th_%d stack (refinement)", i);
218 int __kmp_get_global_thread_id_reg() {
221 if (!__kmp_init_serial) {
224 #ifdef KMP_TDATA_GTID 225 if (TCR_4(__kmp_gtid_mode) >= 3) {
226 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
230 if (TCR_4(__kmp_gtid_mode) >= 2) {
231 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
232 gtid = __kmp_gtid_get_specific();
235 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
236 gtid = __kmp_get_global_thread_id();
240 if (gtid == KMP_GTID_DNE) {
242 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. " 243 "Registering a new gtid.\n"));
244 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
245 if (!__kmp_init_serial) {
246 __kmp_do_serial_initialize();
247 gtid = __kmp_gtid_get_specific();
249 gtid = __kmp_register_root(FALSE);
251 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
255 KMP_DEBUG_ASSERT(gtid >= 0);
261 void __kmp_check_stack_overlap(kmp_info_t *th) {
263 char *stack_beg = NULL;
264 char *stack_end = NULL;
267 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
268 if (__kmp_storage_map) {
269 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
270 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
272 gtid = __kmp_gtid_from_thread(th);
274 if (gtid == KMP_GTID_MONITOR) {
275 __kmp_print_storage_map_gtid(
276 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
277 "th_%s stack (%s)",
"mon",
278 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
280 __kmp_print_storage_map_gtid(
281 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
282 "th_%d stack (%s)", gtid,
283 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
289 gtid = __kmp_gtid_from_thread(th);
290 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
292 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
293 if (stack_beg == NULL) {
294 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
295 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
298 for (f = 0; f < __kmp_threads_capacity; f++) {
299 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
301 if (f_th && f_th != th) {
302 char *other_stack_end =
303 (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
304 char *other_stack_beg =
305 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
306 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
307 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
310 if (__kmp_storage_map)
311 __kmp_print_storage_map_gtid(
312 -1, other_stack_beg, other_stack_end,
313 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
314 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
316 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
322 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
327 void __kmp_infinite_loop(
void) {
328 static int done = FALSE;
335 #define MAX_MESSAGE 512 337 void __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
338 char const *format, ...) {
339 char buffer[MAX_MESSAGE];
342 va_start(ap, format);
343 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
344 p2, (
unsigned long)size, format);
345 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
346 __kmp_vprintf(kmp_err, buffer, ap);
347 #if KMP_PRINT_DATA_PLACEMENT 350 if (p1 <= p2 && (
char *)p2 - (
char *)p1 == size) {
351 if (__kmp_storage_map_verbose) {
352 node = __kmp_get_host_node(p1);
354 __kmp_storage_map_verbose = FALSE;
358 int localProc = __kmp_get_cpu_from_gtid(gtid);
360 const int page_size = KMP_GET_PAGE_SIZE();
362 p1 = (
void *)((
size_t)p1 & ~((size_t)page_size - 1));
363 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
365 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid,
368 __kmp_printf_no_lock(
" GTID %d\n", gtid);
377 (
char *)p1 += page_size;
378 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
379 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last, (
char *)p1 - 1,
383 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
384 (
char *)p1 + (page_size - 1),
385 __kmp_get_host_node(p1));
387 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
388 (
char *)p2 + (page_size - 1),
389 __kmp_get_host_node(p2));
395 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR(StorageMapWarning));
398 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
401 void __kmp_warn(
char const *format, ...) {
402 char buffer[MAX_MESSAGE];
405 if (__kmp_generate_warnings == kmp_warnings_off) {
409 va_start(ap, format);
411 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
412 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
413 __kmp_vprintf(kmp_err, buffer, ap);
414 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
419 void __kmp_abort_process() {
421 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
423 if (__kmp_debug_buf) {
424 __kmp_dump_debug_buffer();
427 if (KMP_OS_WINDOWS) {
430 __kmp_global.g.g_abort = SIGABRT;
447 __kmp_infinite_loop();
448 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
452 void __kmp_abort_thread(
void) {
455 __kmp_infinite_loop();
461 static void __kmp_print_thread_storage_map(kmp_info_t *thr,
int gtid) {
462 __kmp_print_storage_map_gtid(gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d",
465 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
466 sizeof(kmp_desc_t),
"th_%d.th_info", gtid);
468 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
469 sizeof(kmp_local_t),
"th_%d.th_local", gtid);
471 __kmp_print_storage_map_gtid(
472 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
473 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid);
475 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
476 &thr->th.th_bar[bs_plain_barrier + 1],
477 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]",
480 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
481 &thr->th.th_bar[bs_forkjoin_barrier + 1],
482 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]",
485 #if KMP_FAST_REDUCTION_BARRIER 486 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
487 &thr->th.th_bar[bs_reduction_barrier + 1],
488 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]",
490 #endif // KMP_FAST_REDUCTION_BARRIER 496 static void __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
497 int team_id,
int num_thr) {
498 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
499 __kmp_print_storage_map_gtid(-1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
502 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
503 &team->t.t_bar[bs_last_barrier],
504 sizeof(kmp_balign_team_t) * bs_last_barrier,
505 "%s_%d.t_bar", header, team_id);
507 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
508 &team->t.t_bar[bs_plain_barrier + 1],
509 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]",
512 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
513 &team->t.t_bar[bs_forkjoin_barrier + 1],
514 sizeof(kmp_balign_team_t),
515 "%s_%d.t_bar[forkjoin]", header, team_id);
517 #if KMP_FAST_REDUCTION_BARRIER 518 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
519 &team->t.t_bar[bs_reduction_barrier + 1],
520 sizeof(kmp_balign_team_t),
521 "%s_%d.t_bar[reduction]", header, team_id);
522 #endif // KMP_FAST_REDUCTION_BARRIER 524 __kmp_print_storage_map_gtid(
525 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
526 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
528 __kmp_print_storage_map_gtid(
529 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
530 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
532 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
533 &team->t.t_disp_buffer[num_disp_buff],
534 sizeof(dispatch_shared_info_t) * num_disp_buff,
535 "%s_%d.t_disp_buffer", header, team_id);
537 __kmp_print_storage_map_gtid(-1, &team->t.t_taskq, &team->t.t_copypriv_data,
538 sizeof(kmp_taskq_t),
"%s_%d.t_taskq", header,
542 static void __kmp_init_allocator() {
544 __kmp_init_memkind();
547 static void __kmp_fini_allocator() {
549 __kmp_fini_memkind();
555 #ifdef KMP_DYNAMIC_LIB 558 static void __kmp_reset_lock(kmp_bootstrap_lock_t *lck) {
560 __kmp_init_bootstrap_lock(lck);
563 static void __kmp_reset_locks_on_process_detach(
int gtid_req) {
581 for (i = 0; i < __kmp_threads_capacity; ++i) {
584 kmp_info_t *th = __kmp_threads[i];
587 int gtid = th->th.th_info.ds.ds_gtid;
588 if (gtid == gtid_req)
593 int alive = __kmp_is_thread_alive(th, &exit_val);
598 if (thread_count == 0)
604 __kmp_reset_lock(&__kmp_forkjoin_lock);
606 __kmp_reset_lock(&__kmp_stdio_lock);
610 BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
615 case DLL_PROCESS_ATTACH:
616 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
620 case DLL_PROCESS_DETACH:
621 KA_TRACE(10, (
"DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
623 if (lpReserved != NULL) {
649 __kmp_reset_locks_on_process_detach(__kmp_gtid_get_specific());
652 __kmp_internal_end_library(__kmp_gtid_get_specific());
656 case DLL_THREAD_ATTACH:
657 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
663 case DLL_THREAD_DETACH:
664 KA_TRACE(10, (
"DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
666 __kmp_internal_end_thread(__kmp_gtid_get_specific());
678 int __kmp_change_library(
int status) {
681 old_status = __kmp_yield_init &
685 __kmp_yield_init |= 1;
687 __kmp_yield_init &= ~1;
695 void __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
696 int gtid = *gtid_ref;
697 #ifdef BUILD_PARALLEL_ORDERED 698 kmp_team_t *team = __kmp_team_from_gtid(gtid);
701 if (__kmp_env_consistency_check) {
702 if (__kmp_threads[gtid]->th.th_root->r.r_active)
703 #if KMP_USE_DYNAMIC_LOCK 704 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
706 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
709 #ifdef BUILD_PARALLEL_ORDERED 710 if (!team->t.t_serialized) {
712 KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid),
720 void __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
721 int gtid = *gtid_ref;
722 #ifdef BUILD_PARALLEL_ORDERED 723 int tid = __kmp_tid_from_gtid(gtid);
724 kmp_team_t *team = __kmp_team_from_gtid(gtid);
727 if (__kmp_env_consistency_check) {
728 if (__kmp_threads[gtid]->th.th_root->r.r_active)
729 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
731 #ifdef BUILD_PARALLEL_ORDERED 732 if (!team->t.t_serialized) {
737 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
747 int __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws) {
752 if (!TCR_4(__kmp_init_parallel))
753 __kmp_parallel_initialize();
755 th = __kmp_threads[gtid];
756 team = th->th.th_team;
759 th->th.th_ident = id_ref;
761 if (team->t.t_serialized) {
764 kmp_int32 old_this = th->th.th_local.this_construct;
766 ++th->th.th_local.this_construct;
770 if (team->t.t_construct == old_this) {
771 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
772 th->th.th_local.this_construct);
775 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
776 KMP_MASTER_GTID(gtid) &&
778 th->th.th_teams_microtask == NULL &&
780 team->t.t_active_level ==
782 __kmp_itt_metadata_single(id_ref);
787 if (__kmp_env_consistency_check) {
788 if (status && push_ws) {
789 __kmp_push_workshare(gtid, ct_psingle, id_ref);
791 __kmp_check_workshare(gtid, ct_psingle, id_ref);
796 __kmp_itt_single_start(gtid);
802 void __kmp_exit_single(
int gtid) {
804 __kmp_itt_single_end(gtid);
806 if (__kmp_env_consistency_check)
807 __kmp_pop_workshare(gtid, ct_psingle, NULL);
816 static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
817 int master_tid,
int set_nthreads
825 KMP_DEBUG_ASSERT(__kmp_init_serial);
826 KMP_DEBUG_ASSERT(root && parent_team);
830 new_nthreads = set_nthreads;
831 if (!get__dynamic_2(parent_team, master_tid)) {
834 #ifdef USE_LOAD_BALANCE 835 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
836 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
837 if (new_nthreads == 1) {
838 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced " 839 "reservation to 1 thread\n",
843 if (new_nthreads < set_nthreads) {
844 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced " 845 "reservation to %d threads\n",
846 master_tid, new_nthreads));
850 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
851 new_nthreads = __kmp_avail_proc - __kmp_nth +
852 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
853 if (new_nthreads <= 1) {
854 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced " 855 "reservation to 1 thread\n",
859 if (new_nthreads < set_nthreads) {
860 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced " 861 "reservation to %d threads\n",
862 master_tid, new_nthreads));
864 new_nthreads = set_nthreads;
866 }
else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
867 if (set_nthreads > 2) {
868 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
869 new_nthreads = (new_nthreads % set_nthreads) + 1;
870 if (new_nthreads == 1) {
871 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced " 872 "reservation to 1 thread\n",
876 if (new_nthreads < set_nthreads) {
877 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced " 878 "reservation to %d threads\n",
879 master_tid, new_nthreads));
887 if (__kmp_nth + new_nthreads -
888 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
890 int tl_nthreads = __kmp_max_nth - __kmp_nth +
891 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
892 if (tl_nthreads <= 0) {
897 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
898 __kmp_reserve_warn = 1;
899 __kmp_msg(kmp_ms_warning,
900 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
901 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
903 if (tl_nthreads == 1) {
904 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT " 905 "reduced reservation to 1 thread\n",
909 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced " 910 "reservation to %d threads\n",
911 master_tid, tl_nthreads));
912 new_nthreads = tl_nthreads;
916 if (root->r.r_cg_nthreads + new_nthreads -
917 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
919 int tl_nthreads = __kmp_cg_max_nth - root->r.r_cg_nthreads +
920 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
921 if (tl_nthreads <= 0) {
926 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
927 __kmp_reserve_warn = 1;
928 __kmp_msg(kmp_ms_warning,
929 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
930 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
932 if (tl_nthreads == 1) {
933 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT " 934 "reduced reservation to 1 thread\n",
938 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced " 939 "reservation to %d threads\n",
940 master_tid, tl_nthreads));
941 new_nthreads = tl_nthreads;
947 capacity = __kmp_threads_capacity;
948 if (TCR_PTR(__kmp_threads[0]) == NULL) {
951 if (__kmp_nth + new_nthreads -
952 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
955 int slotsRequired = __kmp_nth + new_nthreads -
956 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
958 int slotsAdded = __kmp_expand_threads(slotsRequired);
959 if (slotsAdded < slotsRequired) {
961 new_nthreads -= (slotsRequired - slotsAdded);
962 KMP_ASSERT(new_nthreads >= 1);
965 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
966 __kmp_reserve_warn = 1;
967 if (__kmp_tp_cached) {
968 __kmp_msg(kmp_ms_warning,
969 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
970 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
971 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
973 __kmp_msg(kmp_ms_warning,
974 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
975 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
982 if (new_nthreads == 1) {
984 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming " 985 "dead roots and rechecking; requested %d threads\n",
986 __kmp_get_gtid(), set_nthreads));
988 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested" 990 __kmp_get_gtid(), new_nthreads, set_nthreads));
999 static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
1000 kmp_info_t *master_th,
int master_gtid) {
1004 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
1005 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
1009 master_th->th.th_info.ds.ds_tid = 0;
1010 master_th->th.th_team = team;
1011 master_th->th.th_team_nproc = team->t.t_nproc;
1012 master_th->th.th_team_master = master_th;
1013 master_th->th.th_team_serialized = FALSE;
1014 master_th->th.th_dispatch = &team->t.t_dispatch[0];
1017 #if KMP_NESTED_HOT_TEAMS 1019 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
1022 int level = team->t.t_active_level - 1;
1023 if (master_th->th.th_teams_microtask) {
1024 if (master_th->th.th_teams_size.nteams > 1) {
1028 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1029 master_th->th.th_teams_level == team->t.t_level) {
1034 if (level < __kmp_hot_teams_max_level) {
1035 if (hot_teams[level].hot_team) {
1037 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1041 hot_teams[level].hot_team = team;
1042 hot_teams[level].hot_team_nth = team->t.t_nproc;
1049 use_hot_team = team == root->r.r_hot_team;
1051 if (!use_hot_team) {
1054 team->t.t_threads[0] = master_th;
1055 __kmp_initialize_info(master_th, team, 0, master_gtid);
1058 for (i = 1; i < team->t.t_nproc; i++) {
1061 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
1062 team->t.t_threads[i] = thr;
1063 KMP_DEBUG_ASSERT(thr);
1064 KMP_DEBUG_ASSERT(thr->th.th_team == team);
1066 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived " 1067 "T#%d(%d:%d) join =%llu, plain=%llu\n",
1068 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
1069 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
1070 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
1071 team->t.t_bar[bs_plain_barrier].b_arrived));
1073 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1074 thr->th.th_teams_level = master_th->th.th_teams_level;
1075 thr->th.th_teams_size = master_th->th.th_teams_size;
1079 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1080 for (b = 0; b < bs_last_barrier; ++b) {
1081 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1082 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1084 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1090 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 1091 __kmp_partition_places(team);
1098 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1102 inline static void propagateFPControl(kmp_team_t *team) {
1103 if (__kmp_inherit_fp_control) {
1104 kmp_int16 x87_fpu_control_word;
1108 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1109 __kmp_store_mxcsr(&mxcsr);
1110 mxcsr &= KMP_X86_MXCSR_MASK;
1121 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1122 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1125 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1129 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1135 inline static void updateHWFPControl(kmp_team_t *team) {
1136 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1139 kmp_int16 x87_fpu_control_word;
1141 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1142 __kmp_store_mxcsr(&mxcsr);
1143 mxcsr &= KMP_X86_MXCSR_MASK;
1145 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1146 __kmp_clear_x87_fpu_status_word();
1147 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1150 if (team->t.t_mxcsr != mxcsr) {
1151 __kmp_load_mxcsr(&team->t.t_mxcsr);
1156 #define propagateFPControl(x) ((void)0) 1157 #define updateHWFPControl(x) ((void)0) 1160 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
1165 void __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid) {
1166 kmp_info_t *this_thr;
1167 kmp_team_t *serial_team;
1169 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1176 if (!TCR_4(__kmp_init_parallel))
1177 __kmp_parallel_initialize();
1179 this_thr = __kmp_threads[global_tid];
1180 serial_team = this_thr->th.th_serial_team;
1183 KMP_DEBUG_ASSERT(serial_team);
1186 if (__kmp_tasking_mode != tskm_immediate_exec) {
1188 this_thr->th.th_task_team ==
1189 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1190 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1192 KA_TRACE(20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / " 1193 "team %p, new task_team = NULL\n",
1194 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1195 this_thr->th.th_task_team = NULL;
1199 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1200 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1201 proc_bind = proc_bind_false;
1202 }
else if (proc_bind == proc_bind_default) {
1205 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1208 this_thr->th.th_set_proc_bind = proc_bind_default;
1212 ompt_data_t ompt_parallel_data;
1213 ompt_parallel_data.ptr = NULL;
1214 ompt_data_t *implicit_task_data;
1215 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1216 if (ompt_enabled.enabled &&
1217 this_thr->th.ompt_thread_info.state != omp_state_overhead) {
1219 ompt_task_info_t *parent_task_info;
1220 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1222 parent_task_info->frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1);
1223 if (ompt_enabled.ompt_callback_parallel_begin) {
1226 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1227 &(parent_task_info->task_data), &(parent_task_info->frame),
1228 &ompt_parallel_data, team_size, ompt_parallel_invoker_program,
1232 #endif // OMPT_SUPPORT 1234 if (this_thr->th.th_team != serial_team) {
1236 int level = this_thr->th.th_team->t.t_level;
1238 if (serial_team->t.t_serialized) {
1241 kmp_team_t *new_team;
1243 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1245 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1252 &this_thr->th.th_current_task->td_icvs,
1253 0 USE_NESTED_HOT_ARG(NULL));
1254 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1255 KMP_ASSERT(new_team);
1258 new_team->t.t_threads[0] = this_thr;
1259 new_team->t.t_parent = this_thr->th.th_team;
1260 serial_team = new_team;
1261 this_thr->th.th_serial_team = serial_team;
1265 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1266 global_tid, serial_team));
1274 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1275 global_tid, serial_team));
1279 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1280 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1281 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1282 serial_team->t.t_ident = loc;
1283 serial_team->t.t_serialized = 1;
1284 serial_team->t.t_nproc = 1;
1285 serial_team->t.t_parent = this_thr->th.th_team;
1286 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1287 this_thr->th.th_team = serial_team;
1288 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1290 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#d curtask=%p\n", global_tid,
1291 this_thr->th.th_current_task));
1292 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1293 this_thr->th.th_current_task->td_flags.executing = 0;
1295 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1300 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1301 &this_thr->th.th_current_task->td_parent->td_icvs);
1305 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1306 this_thr->th.th_current_task->td_icvs.nproc =
1307 __kmp_nested_nth.nth[level + 1];
1311 if (__kmp_nested_proc_bind.used &&
1312 (level + 1 < __kmp_nested_proc_bind.used)) {
1313 this_thr->th.th_current_task->td_icvs.proc_bind =
1314 __kmp_nested_proc_bind.bind_types[level + 1];
1319 serial_team->t.t_pkfn = (microtask_t)(~0);
1321 this_thr->th.th_info.ds.ds_tid = 0;
1324 this_thr->th.th_team_nproc = 1;
1325 this_thr->th.th_team_master = this_thr;
1326 this_thr->th.th_team_serialized = 1;
1328 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1329 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1331 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator;
1334 propagateFPControl(serial_team);
1337 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1338 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1339 serial_team->t.t_dispatch->th_disp_buffer =
1340 (dispatch_private_info_t *)__kmp_allocate(
1341 sizeof(dispatch_private_info_t));
1343 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1350 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1351 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1352 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1353 ++serial_team->t.t_serialized;
1354 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1357 int level = this_thr->th.th_team->t.t_level;
1360 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1361 this_thr->th.th_current_task->td_icvs.nproc =
1362 __kmp_nested_nth.nth[level + 1];
1364 serial_team->t.t_level++;
1365 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level " 1366 "of serial team %p to %d\n",
1367 global_tid, serial_team, serial_team->t.t_level));
1370 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1372 dispatch_private_info_t *disp_buffer =
1373 (dispatch_private_info_t *)__kmp_allocate(
1374 sizeof(dispatch_private_info_t));
1375 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1376 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1378 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1383 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1386 if (__kmp_env_consistency_check)
1387 __kmp_push_parallel(global_tid, NULL);
1389 serial_team->t.ompt_team_info.master_return_address = codeptr;
1390 if (ompt_enabled.enabled &&
1391 this_thr->th.ompt_thread_info.state != omp_state_overhead) {
1392 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = OMPT_GET_FRAME_ADDRESS(1);
1394 ompt_lw_taskteam_t lw_taskteam;
1395 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1396 &ompt_parallel_data, codeptr);
1398 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1402 implicit_task_data = OMPT_CUR_TASK_DATA(this_thr);
1403 if (ompt_enabled.ompt_callback_implicit_task) {
1404 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1405 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1406 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid));
1407 OMPT_CUR_TASK_INFO(this_thr)
1408 ->thread_num = __kmp_tid_from_gtid(global_tid);
1412 this_thr->th.ompt_thread_info.state = omp_state_work_parallel;
1413 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = OMPT_GET_FRAME_ADDRESS(1);
1420 int __kmp_fork_call(
ident_t *loc,
int gtid,
1421 enum fork_context_e call_context,
1422 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1424 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1433 int master_this_cons;
1435 kmp_team_t *parent_team;
1436 kmp_info_t *master_th;
1440 int master_set_numthreads;
1446 #if KMP_NESTED_HOT_TEAMS 1447 kmp_hot_team_ptr_t **p_hot_teams;
1450 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1453 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1454 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1457 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1459 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1460 __kmp_stkpadding += (short)((kmp_int64)dummy);
1466 if (!TCR_4(__kmp_init_parallel))
1467 __kmp_parallel_initialize();
1470 master_th = __kmp_threads[gtid];
1472 parent_team = master_th->th.th_team;
1473 master_tid = master_th->th.th_info.ds.ds_tid;
1474 master_this_cons = master_th->th.th_local.this_construct;
1475 root = master_th->th.th_root;
1476 master_active = root->r.r_active;
1477 master_set_numthreads = master_th->th.th_set_nproc;
1480 ompt_data_t ompt_parallel_data;
1481 ompt_parallel_data.ptr = NULL;
1482 ompt_data_t *parent_task_data;
1483 omp_frame_t *ompt_frame;
1484 ompt_data_t *implicit_task_data;
1485 void *return_address = NULL;
1487 if (ompt_enabled.enabled) {
1488 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1490 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1495 level = parent_team->t.t_level;
1497 active_level = parent_team->t.t_active_level;
1500 teams_level = master_th->th.th_teams_level;
1502 #if KMP_NESTED_HOT_TEAMS 1503 p_hot_teams = &master_th->th.th_hot_teams;
1504 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1505 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1506 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1507 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1509 (*p_hot_teams)[0].hot_team_nth = 1;
1514 if (ompt_enabled.enabled) {
1515 if (ompt_enabled.ompt_callback_parallel_begin) {
1516 int team_size = master_set_numthreads
1517 ? master_set_numthreads
1518 : get__nproc_2(parent_team, master_tid);
1519 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1520 parent_task_data, ompt_frame, &ompt_parallel_data, team_size,
1521 OMPT_INVOKER(call_context), return_address);
1523 master_th->th.ompt_thread_info.state = omp_state_overhead;
1527 master_th->th.th_ident = loc;
1530 if (master_th->th.th_teams_microtask && ap &&
1531 microtask != (microtask_t)__kmp_teams_master && level == teams_level) {
1535 parent_team->t.t_ident = loc;
1536 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1537 parent_team->t.t_argc = argc;
1538 argv = (
void **)parent_team->t.t_argv;
1539 for (i = argc - 1; i >= 0; --i)
1541 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1542 *argv++ = va_arg(*ap,
void *);
1544 *argv++ = va_arg(ap,
void *);
1547 if (parent_team == master_th->th.th_serial_team) {
1550 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1553 parent_team->t.t_serialized--;
1556 void **exit_runtime_p;
1558 ompt_lw_taskteam_t lw_taskteam;
1560 if (ompt_enabled.enabled) {
1561 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1562 &ompt_parallel_data, return_address);
1563 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_frame);
1565 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1569 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1570 if (ompt_enabled.ompt_callback_implicit_task) {
1571 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1572 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1573 implicit_task_data, 1, __kmp_tid_from_gtid(gtid));
1574 OMPT_CUR_TASK_INFO(master_th)
1575 ->thread_num = __kmp_tid_from_gtid(gtid);
1579 master_th->th.ompt_thread_info.state = omp_state_work_parallel;
1581 exit_runtime_p = &dummy;
1586 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1587 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1588 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1597 *exit_runtime_p = NULL;
1598 if (ompt_enabled.enabled) {
1599 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = NULL;
1600 if (ompt_enabled.ompt_callback_implicit_task) {
1601 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1602 ompt_scope_end, NULL, implicit_task_data, 1,
1603 OMPT_CUR_TASK_INFO(master_th)->thread_num);
1605 __ompt_lw_taskteam_unlink(master_th);
1607 if (ompt_enabled.ompt_callback_parallel_end) {
1608 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1609 OMPT_CUR_TEAM_DATA(master_th), OMPT_CUR_TASK_DATA(master_th),
1610 OMPT_INVOKER(call_context), return_address);
1612 master_th->th.ompt_thread_info.state = omp_state_overhead;
1618 parent_team->t.t_pkfn = microtask;
1619 parent_team->t.t_invoke = invoker;
1620 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1621 parent_team->t.t_active_level++;
1622 parent_team->t.t_level++;
1624 parent_team->t.t_def_allocator = master_th->th.th_def_allocator;
1628 if (master_set_numthreads) {
1629 if (master_set_numthreads < master_th->th.th_teams_size.nth) {
1631 kmp_info_t **other_threads = parent_team->t.t_threads;
1632 parent_team->t.t_nproc = master_set_numthreads;
1633 for (i = 0; i < master_set_numthreads; ++i) {
1634 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1638 master_th->th.th_set_nproc = 0;
1642 if (__kmp_debugging) {
1643 int nth = __kmp_omp_num_threads(loc);
1645 master_set_numthreads = nth;
1650 KF_TRACE(10, (
"__kmp_fork_call: before internal fork: root=%p, team=%p, " 1651 "master_th=%p, gtid=%d\n",
1652 root, parent_team, master_th, gtid));
1653 __kmp_internal_fork(loc, gtid, parent_team);
1654 KF_TRACE(10, (
"__kmp_fork_call: after internal fork: root=%p, team=%p, " 1655 "master_th=%p, gtid=%d\n",
1656 root, parent_team, master_th, gtid));
1659 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
1660 parent_team->t.t_id, parent_team->t.t_pkfn));
1662 if (!parent_team->t.t_invoke(gtid)) {
1663 KMP_ASSERT2(0,
"cannot invoke microtask for MASTER thread");
1665 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
1666 parent_team->t.t_id, parent_team->t.t_pkfn));
1669 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
1676 if (__kmp_tasking_mode != tskm_immediate_exec) {
1677 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1678 parent_team->t.t_task_team[master_th->th.th_task_state]);
1682 if (parent_team->t.t_active_level >=
1683 master_th->th.th_current_task->td_icvs.max_active_levels) {
1687 int enter_teams = ((ap == NULL && active_level == 0) ||
1688 (ap && teams_level > 0 && teams_level == level));
1691 master_set_numthreads
1692 ? master_set_numthreads
1701 if ((!get__nested(master_th) && (root->r.r_in_parallel
1706 (__kmp_library == library_serial)) {
1707 KC_TRACE(10, (
"__kmp_fork_call: T#%d serializing team; requested %d" 1715 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1716 nthreads = __kmp_reserve_threads(
1717 root, parent_team, master_tid, nthreads
1728 if (nthreads == 1) {
1732 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1736 KMP_DEBUG_ASSERT(nthreads > 0);
1739 master_th->th.th_set_nproc = 0;
1742 if (nthreads == 1) {
1744 #if KMP_OS_LINUX && \ 1745 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 1748 void **args = (
void **)KMP_ALLOCA(argc *
sizeof(
void *));
1753 (
"__kmp_fork_call: T#%d serializing parallel region\n", gtid));
1757 if (call_context == fork_context_intel) {
1759 master_th->th.th_serial_team->t.t_ident = loc;
1763 master_th->th.th_serial_team->t.t_level--;
1768 void **exit_runtime_p;
1769 ompt_task_info_t *task_info;
1771 ompt_lw_taskteam_t lw_taskteam;
1773 if (ompt_enabled.enabled) {
1774 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1775 &ompt_parallel_data, return_address);
1777 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1780 task_info = OMPT_CUR_TASK_INFO(master_th);
1781 exit_runtime_p = &(task_info->frame.exit_frame);
1782 if (ompt_enabled.ompt_callback_implicit_task) {
1783 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1784 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1785 &(task_info->task_data), 1, __kmp_tid_from_gtid(gtid));
1786 OMPT_CUR_TASK_INFO(master_th)
1787 ->thread_num = __kmp_tid_from_gtid(gtid);
1791 master_th->th.ompt_thread_info.state = omp_state_work_parallel;
1793 exit_runtime_p = &dummy;
1798 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1799 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1800 __kmp_invoke_microtask(microtask, gtid, 0, argc,
1801 parent_team->t.t_argv
1810 if (ompt_enabled.enabled) {
1811 exit_runtime_p = NULL;
1812 if (ompt_enabled.ompt_callback_implicit_task) {
1813 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1814 ompt_scope_end, NULL, &(task_info->task_data), 1,
1815 OMPT_CUR_TASK_INFO(master_th)->thread_num);
1818 __ompt_lw_taskteam_unlink(master_th);
1819 if (ompt_enabled.ompt_callback_parallel_end) {
1820 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1821 OMPT_CUR_TEAM_DATA(master_th), parent_task_data,
1822 OMPT_INVOKER(call_context), return_address);
1824 master_th->th.ompt_thread_info.state = omp_state_overhead;
1827 }
else if (microtask == (microtask_t)__kmp_teams_master) {
1828 KMP_DEBUG_ASSERT(master_th->th.th_team ==
1829 master_th->th.th_serial_team);
1830 team = master_th->th.th_team;
1832 team->t.t_invoke = invoker;
1833 __kmp_alloc_argv_entries(argc, team, TRUE);
1834 team->t.t_argc = argc;
1835 argv = (
void **)team->t.t_argv;
1837 for (i = argc - 1; i >= 0; --i)
1839 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1840 *argv++ = va_arg(*ap,
void *);
1842 *argv++ = va_arg(ap,
void *);
1845 for (i = 0; i < argc; ++i)
1847 argv[i] = parent_team->t.t_argv[i];
1857 for (i = argc - 1; i >= 0; --i)
1859 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1860 *argv++ = va_arg(*ap,
void *);
1862 *argv++ = va_arg(ap,
void *);
1868 void **exit_runtime_p;
1869 ompt_task_info_t *task_info;
1871 ompt_lw_taskteam_t lw_taskteam;
1873 if (ompt_enabled.enabled) {
1874 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1875 &ompt_parallel_data, return_address);
1876 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1878 task_info = OMPT_CUR_TASK_INFO(master_th);
1879 exit_runtime_p = &(task_info->frame.exit_frame);
1882 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1883 if (ompt_enabled.ompt_callback_implicit_task) {
1884 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1885 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1886 implicit_task_data, 1, __kmp_tid_from_gtid(gtid));
1887 OMPT_CUR_TASK_INFO(master_th)
1888 ->thread_num = __kmp_tid_from_gtid(gtid);
1892 master_th->th.ompt_thread_info.state = omp_state_work_parallel;
1894 exit_runtime_p = &dummy;
1899 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1900 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1901 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1910 if (ompt_enabled.enabled) {
1911 *exit_runtime_p = NULL;
1912 if (ompt_enabled.ompt_callback_implicit_task) {
1913 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1914 ompt_scope_end, NULL, &(task_info->task_data), 1,
1915 OMPT_CUR_TASK_INFO(master_th)->thread_num);
1918 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1919 __ompt_lw_taskteam_unlink(master_th);
1920 if (ompt_enabled.ompt_callback_parallel_end) {
1921 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1922 &ompt_parallel_data, parent_task_data,
1923 OMPT_INVOKER(call_context), return_address);
1925 master_th->th.ompt_thread_info.state = omp_state_overhead;
1931 }
else if (call_context == fork_context_gnu) {
1933 ompt_lw_taskteam_t lwt;
1934 __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data,
1937 lwt.ompt_task_info.frame.exit_frame = NULL;
1938 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1943 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1946 KMP_ASSERT2(call_context < fork_context_last,
1947 "__kmp_fork_call: unknown fork_context parameter");
1950 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1957 KF_TRACE(10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, " 1958 "curtask=%p, curtask_max_aclevel=%d\n",
1959 parent_team->t.t_active_level, master_th,
1960 master_th->th.th_current_task,
1961 master_th->th.th_current_task->td_icvs.max_active_levels));
1965 master_th->th.th_current_task->td_flags.executing = 0;
1968 if (!master_th->th.th_teams_microtask || level > teams_level)
1972 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1976 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
1977 if ((level + 1 < __kmp_nested_nth.used) &&
1978 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
1979 nthreads_icv = __kmp_nested_nth.nth[level + 1];
1986 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1987 kmp_proc_bind_t proc_bind_icv =
1989 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1990 proc_bind = proc_bind_false;
1992 if (proc_bind == proc_bind_default) {
1995 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2001 if ((level + 1 < __kmp_nested_proc_bind.used) &&
2002 (__kmp_nested_proc_bind.bind_types[level + 1] !=
2003 master_th->th.th_current_task->td_icvs.proc_bind)) {
2004 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2009 master_th->th.th_set_proc_bind = proc_bind_default;
2012 if ((nthreads_icv > 0)
2014 || (proc_bind_icv != proc_bind_default)
2017 kmp_internal_control_t new_icvs;
2018 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2019 new_icvs.next = NULL;
2020 if (nthreads_icv > 0) {
2021 new_icvs.nproc = nthreads_icv;
2025 if (proc_bind_icv != proc_bind_default) {
2026 new_icvs.proc_bind = proc_bind_icv;
2031 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2032 team = __kmp_allocate_team(root, nthreads, nthreads,
2039 &new_icvs, argc USE_NESTED_HOT_ARG(master_th));
2042 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2043 team = __kmp_allocate_team(root, nthreads, nthreads,
2050 &master_th->th.th_current_task->td_icvs,
2051 argc USE_NESTED_HOT_ARG(master_th));
2054 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2057 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2058 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2059 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2060 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2061 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2063 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2066 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2069 if (!master_th->th.th_teams_microtask || level > teams_level) {
2071 int new_level = parent_team->t.t_level + 1;
2072 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2073 new_level = parent_team->t.t_active_level + 1;
2074 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2078 int new_level = parent_team->t.t_level;
2079 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2080 new_level = parent_team->t.t_active_level;
2081 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2084 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2086 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2089 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2092 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2096 propagateFPControl(team);
2098 if (__kmp_tasking_mode != tskm_immediate_exec) {
2101 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2102 parent_team->t.t_task_team[master_th->th.th_task_state]);
2103 KA_TRACE(20, (
"__kmp_fork_call: Master T#%d pushing task_team %p / team " 2104 "%p, new task_team %p / team %p\n",
2105 __kmp_gtid_from_thread(master_th),
2106 master_th->th.th_task_team, parent_team,
2107 team->t.t_task_team[master_th->th.th_task_state], team));
2109 if (active_level || master_th->th.th_task_team) {
2111 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2112 if (master_th->th.th_task_state_top >=
2113 master_th->th.th_task_state_stack_sz) {
2114 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2115 kmp_uint8 *old_stack, *new_stack;
2117 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2118 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2119 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2121 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2125 old_stack = master_th->th.th_task_state_memo_stack;
2126 master_th->th.th_task_state_memo_stack = new_stack;
2127 master_th->th.th_task_state_stack_sz = new_size;
2128 __kmp_free(old_stack);
2132 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2133 master_th->th.th_task_state;
2134 master_th->th.th_task_state_top++;
2135 #if KMP_NESTED_HOT_TEAMS 2136 if (master_th->th.th_hot_teams &&
2137 team == master_th->th.th_hot_teams[active_level].hot_team) {
2139 master_th->th.th_task_state =
2141 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2144 master_th->th.th_task_state = 0;
2145 #if KMP_NESTED_HOT_TEAMS 2149 #if !KMP_NESTED_HOT_TEAMS 2150 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2151 (team == root->r.r_hot_team));
2157 (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2158 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2160 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2161 (team->t.t_master_tid == 0 &&
2162 (team->t.t_parent == root->r.r_root_team ||
2163 team->t.t_parent->t.t_serialized)));
2167 argv = (
void **)team->t.t_argv;
2171 for (i = argc - 1; i >= 0; --i) {
2173 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 2174 void *new_argv = va_arg(*ap,
void *);
2176 void *new_argv = va_arg(ap,
void *);
2178 KMP_CHECK_UPDATE(*argv, new_argv);
2183 for (i = 0; i < argc; ++i) {
2185 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2191 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2192 if (!root->r.r_active)
2193 root->r.r_active = TRUE;
2195 __kmp_fork_team_threads(root, team, master_th, gtid);
2196 __kmp_setup_icv_copy(team, nthreads,
2197 &master_th->th.th_current_task->td_icvs, loc);
2200 master_th->th.ompt_thread_info.state = omp_state_work_parallel;
2203 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2206 if (team->t.t_active_level == 1
2208 && !master_th->th.th_teams_microtask
2212 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2213 (__kmp_forkjoin_frames_mode == 3 ||
2214 __kmp_forkjoin_frames_mode == 1)) {
2215 kmp_uint64 tmp_time = 0;
2216 if (__itt_get_timestamp_ptr)
2217 tmp_time = __itt_get_timestamp();
2219 master_th->th.th_frame_time = tmp_time;
2220 if (__kmp_forkjoin_frames_mode == 3)
2221 team->t.t_region_time = tmp_time;
2225 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2226 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2228 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2234 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2237 (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2238 root, team, master_th, gtid));
2241 if (__itt_stack_caller_create_ptr) {
2242 team->t.t_stack_id =
2243 __kmp_itt_stack_caller_create();
2254 __kmp_internal_fork(loc, gtid, team);
2255 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, " 2256 "master_th=%p, gtid=%d\n",
2257 root, team, master_th, gtid));
2260 if (call_context == fork_context_gnu) {
2261 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2266 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2267 team->t.t_id, team->t.t_pkfn));
2270 if (!team->t.t_invoke(gtid)) {
2271 KMP_ASSERT2(0,
"cannot invoke microtask for MASTER thread");
2273 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2274 team->t.t_id, team->t.t_pkfn));
2277 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2280 if (ompt_enabled.enabled) {
2281 master_th->th.ompt_thread_info.state = omp_state_overhead;
2289 static inline void __kmp_join_restore_state(kmp_info_t *thread,
2292 thread->th.ompt_thread_info.state =
2293 ((team->t.t_serialized) ? omp_state_work_serial
2294 : omp_state_work_parallel);
2297 static inline void __kmp_join_ompt(
int gtid, kmp_info_t *thread,
2298 kmp_team_t *team, ompt_data_t *parallel_data,
2299 fork_context_e fork_context,
void *codeptr) {
2300 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2301 if (ompt_enabled.ompt_callback_parallel_end) {
2302 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2303 parallel_data, &(task_info->task_data), OMPT_INVOKER(fork_context),
2307 task_info->frame.enter_frame = NULL;
2308 __kmp_join_restore_state(thread, team);
2312 void __kmp_join_call(
ident_t *loc,
int gtid
2315 enum fork_context_e fork_context
2322 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2324 kmp_team_t *parent_team;
2325 kmp_info_t *master_th;
2330 KA_TRACE(20, (
"__kmp_join_call: enter T#%d\n", gtid));
2333 master_th = __kmp_threads[gtid];
2334 root = master_th->th.th_root;
2335 team = master_th->th.th_team;
2336 parent_team = team->t.t_parent;
2338 master_th->th.th_ident = loc;
2341 if (ompt_enabled.enabled) {
2342 master_th->th.ompt_thread_info.state = omp_state_overhead;
2347 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2348 KA_TRACE(20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, " 2349 "th_task_team = %p\n",
2350 __kmp_gtid_from_thread(master_th), team,
2351 team->t.t_task_team[master_th->th.th_task_state],
2352 master_th->th.th_task_team));
2353 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2354 team->t.t_task_team[master_th->th.th_task_state]);
2358 if (team->t.t_serialized) {
2360 if (master_th->th.th_teams_microtask) {
2362 int level = team->t.t_level;
2363 int tlevel = master_th->th.th_teams_level;
2364 if (level == tlevel) {
2368 }
else if (level == tlevel + 1) {
2372 team->t.t_serialized++;
2379 if (ompt_enabled.enabled) {
2380 __kmp_join_restore_state(master_th, parent_team);
2387 master_active = team->t.t_master_active;
2395 __kmp_internal_join(loc, gtid, team);
2399 master_th->th.th_task_state =
2407 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2408 void *codeptr = team->t.ompt_team_info.master_return_address;
2412 if (__itt_stack_caller_create_ptr) {
2413 __kmp_itt_stack_caller_destroy(
2414 (__itt_caller)team->t
2419 if (team->t.t_active_level == 1
2421 && !master_th->th.th_teams_microtask
2424 master_th->th.th_ident = loc;
2427 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2428 __kmp_forkjoin_frames_mode == 3)
2429 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2430 master_th->th.th_frame_time, 0, loc,
2431 master_th->th.th_team_nproc, 1);
2432 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2433 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2434 __kmp_itt_region_joined(gtid);
2439 if (master_th->th.th_teams_microtask && !exit_teams &&
2440 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2441 team->t.t_level == master_th->th.th_teams_level + 1) {
2448 team->t.t_active_level--;
2449 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2452 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2453 int old_num = master_th->th.th_team_nproc;
2454 int new_num = master_th->th.th_teams_size.nth;
2455 kmp_info_t **other_threads = team->t.t_threads;
2456 team->t.t_nproc = new_num;
2457 for (i = 0; i < old_num; ++i) {
2458 other_threads[i]->th.th_team_nproc = new_num;
2461 for (i = old_num; i < new_num; ++i) {
2464 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2465 for (b = 0; b < bs_last_barrier; ++b) {
2466 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2467 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2469 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2472 if (__kmp_tasking_mode != tskm_immediate_exec) {
2474 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2480 if (ompt_enabled.enabled) {
2481 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
2491 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2492 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2494 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2499 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2502 if (!master_th->th.th_teams_microtask ||
2503 team->t.t_level > master_th->th.th_teams_level)
2507 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2509 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2512 if (ompt_enabled.enabled) {
2513 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2514 if (ompt_enabled.ompt_callback_implicit_task) {
2515 int ompt_team_size = team->t.t_nproc;
2516 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2517 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2518 OMPT_CUR_TASK_INFO(master_th)->thread_num);
2521 task_info->frame.exit_frame = NULL;
2522 task_info->task_data = ompt_data_none;
2526 KF_TRACE(10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2528 __kmp_pop_current_task_from_thread(master_th);
2530 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 2532 master_th->th.th_first_place = team->t.t_first_place;
2533 master_th->th.th_last_place = team->t.t_last_place;
2536 master_th->th.th_def_allocator = team->t.t_def_allocator;
2539 updateHWFPControl(team);
2541 if (root->r.r_active != master_active)
2542 root->r.r_active = master_active;
2544 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2552 master_th->th.th_team = parent_team;
2553 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2554 master_th->th.th_team_master = parent_team->t.t_threads[0];
2555 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2558 if (parent_team->t.t_serialized &&
2559 parent_team != master_th->th.th_serial_team &&
2560 parent_team != root->r.r_root_team) {
2561 __kmp_free_team(root,
2562 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2563 master_th->th.th_serial_team = parent_team;
2566 if (__kmp_tasking_mode != tskm_immediate_exec) {
2567 if (master_th->th.th_task_state_top >
2569 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2571 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2572 master_th->th.th_task_state;
2573 --master_th->th.th_task_state_top;
2575 master_th->th.th_task_state =
2577 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2580 master_th->th.th_task_team =
2581 parent_team->t.t_task_team[master_th->th.th_task_state];
2583 (
"__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
2584 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2591 master_th->th.th_current_task->td_flags.executing = 1;
2593 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2596 if (ompt_enabled.enabled) {
2597 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
2603 KA_TRACE(20, (
"__kmp_join_call: exit T#%d\n", gtid));
2608 void __kmp_save_internal_controls(kmp_info_t *thread) {
2610 if (thread->th.th_team != thread->th.th_serial_team) {
2613 if (thread->th.th_team->t.t_serialized > 1) {
2616 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2619 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2620 thread->th.th_team->t.t_serialized) {
2625 kmp_internal_control_t *control =
2626 (kmp_internal_control_t *)__kmp_allocate(
2627 sizeof(kmp_internal_control_t));
2629 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2631 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2633 control->next = thread->th.th_team->t.t_control_stack_top;
2634 thread->th.th_team->t.t_control_stack_top = control;
2640 void __kmp_set_num_threads(
int new_nth,
int gtid) {
2644 KF_TRACE(10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2645 KMP_DEBUG_ASSERT(__kmp_init_serial);
2649 else if (new_nth > __kmp_max_nth)
2650 new_nth = __kmp_max_nth;
2653 thread = __kmp_threads[gtid];
2655 __kmp_save_internal_controls(thread);
2657 set__nproc(thread, new_nth);
2662 root = thread->th.th_root;
2663 if (__kmp_init_parallel && (!root->r.r_active) &&
2664 (root->r.r_hot_team->t.t_nproc > new_nth)
2665 #
if KMP_NESTED_HOT_TEAMS
2666 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2669 kmp_team_t *hot_team = root->r.r_hot_team;
2672 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2675 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2676 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2677 if (__kmp_tasking_mode != tskm_immediate_exec) {
2680 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2682 __kmp_free_thread(hot_team->t.t_threads[f]);
2683 hot_team->t.t_threads[f] = NULL;
2685 hot_team->t.t_nproc = new_nth;
2686 #if KMP_NESTED_HOT_TEAMS 2687 if (thread->th.th_hot_teams) {
2688 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2689 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2693 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2696 for (f = 0; f < new_nth; f++) {
2697 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2698 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2701 hot_team->t.t_size_changed = -1;
2706 void __kmp_set_max_active_levels(
int gtid,
int max_active_levels) {
2709 KF_TRACE(10, (
"__kmp_set_max_active_levels: new max_active_levels for thread " 2711 gtid, max_active_levels));
2712 KMP_DEBUG_ASSERT(__kmp_init_serial);
2715 if (max_active_levels < 0) {
2716 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2721 KF_TRACE(10, (
"__kmp_set_max_active_levels: the call is ignored: new " 2722 "max_active_levels for thread %d = (%d)\n",
2723 gtid, max_active_levels));
2726 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2731 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2732 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2733 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2739 KF_TRACE(10, (
"__kmp_set_max_active_levels: after validation: new " 2740 "max_active_levels for thread %d = (%d)\n",
2741 gtid, max_active_levels));
2743 thread = __kmp_threads[gtid];
2745 __kmp_save_internal_controls(thread);
2747 set__max_active_levels(thread, max_active_levels);
2751 int __kmp_get_max_active_levels(
int gtid) {
2754 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d\n", gtid));
2755 KMP_DEBUG_ASSERT(__kmp_init_serial);
2757 thread = __kmp_threads[gtid];
2758 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2759 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, " 2760 "curtask_maxaclevel=%d\n",
2761 gtid, thread->th.th_current_task,
2762 thread->th.th_current_task->td_icvs.max_active_levels));
2763 return thread->th.th_current_task->td_icvs.max_active_levels;
2767 void __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk) {
2771 KF_TRACE(10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2772 gtid, (
int)kind, chunk));
2773 KMP_DEBUG_ASSERT(__kmp_init_serial);
2779 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2780 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2782 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2783 KMP_HNT(DefaultScheduleKindUsed,
"static, no chunk"),
2785 kind = kmp_sched_default;
2789 thread = __kmp_threads[gtid];
2791 __kmp_save_internal_controls(thread);
2793 if (kind < kmp_sched_upper_std) {
2794 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2797 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2799 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2800 __kmp_sch_map[kind - kmp_sched_lower - 1];
2805 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2806 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2807 kmp_sched_lower - 2];
2809 if (kind == kmp_sched_auto || chunk < 1) {
2811 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2813 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2818 void __kmp_get_schedule(
int gtid, kmp_sched_t *kind,
int *chunk) {
2822 KF_TRACE(10, (
"__kmp_get_schedule: thread %d\n", gtid));
2823 KMP_DEBUG_ASSERT(__kmp_init_serial);
2825 thread = __kmp_threads[gtid];
2827 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2831 case kmp_sch_static_greedy:
2832 case kmp_sch_static_balanced:
2833 *kind = kmp_sched_static;
2836 case kmp_sch_static_chunked:
2837 *kind = kmp_sched_static;
2839 case kmp_sch_dynamic_chunked:
2840 *kind = kmp_sched_dynamic;
2843 case kmp_sch_guided_iterative_chunked:
2844 case kmp_sch_guided_analytical_chunked:
2845 *kind = kmp_sched_guided;
2848 *kind = kmp_sched_auto;
2850 case kmp_sch_trapezoidal:
2851 *kind = kmp_sched_trapezoidal;
2853 #if KMP_STATIC_STEAL_ENABLED 2854 case kmp_sch_static_steal:
2855 *kind = kmp_sched_static_steal;
2859 KMP_FATAL(UnknownSchedulingType, th_type);
2862 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
2865 int __kmp_get_ancestor_thread_num(
int gtid,
int level) {
2871 KF_TRACE(10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
2872 KMP_DEBUG_ASSERT(__kmp_init_serial);
2879 thr = __kmp_threads[gtid];
2880 team = thr->th.th_team;
2881 ii = team->t.t_level;
2886 if (thr->th.th_teams_microtask) {
2888 int tlevel = thr->th.th_teams_level;
2891 KMP_DEBUG_ASSERT(ii >= tlevel);
2904 return __kmp_tid_from_gtid(gtid);
2906 dd = team->t.t_serialized;
2908 while (ii > level) {
2909 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
2911 if ((team->t.t_serialized) && (!dd)) {
2912 team = team->t.t_parent;
2916 team = team->t.t_parent;
2917 dd = team->t.t_serialized;
2922 return (dd > 1) ? (0) : (team->t.t_master_tid);
2925 int __kmp_get_team_size(
int gtid,
int level) {
2931 KF_TRACE(10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level));
2932 KMP_DEBUG_ASSERT(__kmp_init_serial);
2939 thr = __kmp_threads[gtid];
2940 team = thr->th.th_team;
2941 ii = team->t.t_level;
2946 if (thr->th.th_teams_microtask) {
2948 int tlevel = thr->th.th_teams_level;
2951 KMP_DEBUG_ASSERT(ii >= tlevel);
2963 while (ii > level) {
2964 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
2966 if (team->t.t_serialized && (!dd)) {
2967 team = team->t.t_parent;
2971 team = team->t.t_parent;
2976 return team->t.t_nproc;
2979 kmp_r_sched_t __kmp_get_schedule_global() {
2984 kmp_r_sched_t r_sched;
2992 r_sched.r_sched_type = __kmp_static;
2995 r_sched.r_sched_type = __kmp_guided;
2997 r_sched.r_sched_type = __kmp_sched;
3000 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
3002 r_sched.chunk = KMP_DEFAULT_CHUNK;
3004 r_sched.chunk = __kmp_chunk;
3012 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc) {
3014 KMP_DEBUG_ASSERT(team);
3015 if (!realloc || argc > team->t.t_max_argc) {
3017 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, " 3018 "current entries=%d\n",
3019 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3021 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3022 __kmp_free((
void *)team->t.t_argv);
3024 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3026 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3027 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d " 3029 team->t.t_id, team->t.t_max_argc));
3030 team->t.t_argv = &team->t.t_inline_argv[0];
3031 if (__kmp_storage_map) {
3032 __kmp_print_storage_map_gtid(
3033 -1, &team->t.t_inline_argv[0],
3034 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3035 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
"team_%d.t_inline_argv",
3040 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3041 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3043 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d " 3045 team->t.t_id, team->t.t_max_argc));
3047 (
void **)__kmp_page_allocate(
sizeof(
void *) * team->t.t_max_argc);
3048 if (__kmp_storage_map) {
3049 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3050 &team->t.t_argv[team->t.t_max_argc],
3051 sizeof(
void *) * team->t.t_max_argc,
3052 "team_%d.t_argv", team->t.t_id);
3058 static void __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth) {
3060 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3062 (kmp_info_t **)__kmp_allocate(
sizeof(kmp_info_t *) * max_nth);
3063 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3064 sizeof(dispatch_shared_info_t) * num_disp_buff);
3065 team->t.t_dispatch =
3066 (kmp_disp_t *)__kmp_allocate(
sizeof(kmp_disp_t) * max_nth);
3067 team->t.t_implicit_task_taskdata =
3068 (kmp_taskdata_t *)__kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth);
3069 team->t.t_max_nproc = max_nth;
3072 for (i = 0; i < num_disp_buff; ++i) {
3073 team->t.t_disp_buffer[i].buffer_index = i;
3075 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3080 static void __kmp_free_team_arrays(kmp_team_t *team) {
3083 for (i = 0; i < team->t.t_max_nproc; ++i) {
3084 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3085 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3086 team->t.t_dispatch[i].th_disp_buffer = NULL;
3089 #if KMP_USE_HIER_SCHED 3090 __kmp_dispatch_free_hierarchies(team);
3092 __kmp_free(team->t.t_threads);
3093 __kmp_free(team->t.t_disp_buffer);
3094 __kmp_free(team->t.t_dispatch);
3095 __kmp_free(team->t.t_implicit_task_taskdata);
3096 team->t.t_threads = NULL;
3097 team->t.t_disp_buffer = NULL;
3098 team->t.t_dispatch = NULL;
3099 team->t.t_implicit_task_taskdata = 0;
3102 static void __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3103 kmp_info_t **oldThreads = team->t.t_threads;
3105 __kmp_free(team->t.t_disp_buffer);
3106 __kmp_free(team->t.t_dispatch);
3107 __kmp_free(team->t.t_implicit_task_taskdata);
3108 __kmp_allocate_team_arrays(team, max_nth);
3110 KMP_MEMCPY(team->t.t_threads, oldThreads,
3111 team->t.t_nproc *
sizeof(kmp_info_t *));
3113 __kmp_free(oldThreads);
3116 static kmp_internal_control_t __kmp_get_global_icvs(
void) {
3118 kmp_r_sched_t r_sched =
3119 __kmp_get_schedule_global();
3122 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
3125 kmp_internal_control_t g_icvs = {
3127 (kmp_int8)__kmp_dflt_nested,
3129 (kmp_int8)__kmp_global.g.g_dynamic,
3131 (kmp_int8)__kmp_env_blocktime,
3133 __kmp_dflt_blocktime,
3138 __kmp_dflt_team_nth,
3141 __kmp_dflt_max_active_levels,
3146 __kmp_nested_proc_bind.bind_types[0],
3147 __kmp_default_device,
3155 static kmp_internal_control_t __kmp_get_x_global_icvs(
const kmp_team_t *team) {
3157 kmp_internal_control_t gx_icvs;
3158 gx_icvs.serial_nesting_level =
3160 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3161 gx_icvs.next = NULL;
3166 static void __kmp_initialize_root(kmp_root_t *root) {
3168 kmp_team_t *root_team;
3169 kmp_team_t *hot_team;
3170 int hot_team_max_nth;
3171 kmp_r_sched_t r_sched =
3172 __kmp_get_schedule_global();
3173 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3174 KMP_DEBUG_ASSERT(root);
3175 KMP_ASSERT(!root->r.r_begin);
3178 __kmp_init_lock(&root->r.r_begin_lock);
3179 root->r.r_begin = FALSE;
3180 root->r.r_active = FALSE;
3181 root->r.r_in_parallel = 0;
3182 root->r.r_blocktime = __kmp_dflt_blocktime;
3183 root->r.r_nested = __kmp_dflt_nested;
3184 root->r.r_cg_nthreads = 1;
3188 KF_TRACE(10, (
"__kmp_initialize_root: before root_team\n"));
3191 __kmp_allocate_team(root,
3198 __kmp_nested_proc_bind.bind_types[0],
3202 USE_NESTED_HOT_ARG(NULL)
3207 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3210 KF_TRACE(10, (
"__kmp_initialize_root: after root_team = %p\n", root_team));
3212 root->r.r_root_team = root_team;
3213 root_team->t.t_control_stack_top = NULL;
3216 root_team->t.t_threads[0] = NULL;
3217 root_team->t.t_nproc = 1;
3218 root_team->t.t_serialized = 1;
3220 root_team->t.t_sched.sched = r_sched.sched;
3223 (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3224 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3228 KF_TRACE(10, (
"__kmp_initialize_root: before hot_team\n"));
3231 __kmp_allocate_team(root,
3233 __kmp_dflt_team_nth_ub * 2,
3238 __kmp_nested_proc_bind.bind_types[0],
3242 USE_NESTED_HOT_ARG(NULL)
3244 KF_TRACE(10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team));
3246 root->r.r_hot_team = hot_team;
3247 root_team->t.t_control_stack_top = NULL;
3250 hot_team->t.t_parent = root_team;
3253 hot_team_max_nth = hot_team->t.t_max_nproc;
3254 for (f = 0; f < hot_team_max_nth; ++f) {
3255 hot_team->t.t_threads[f] = NULL;
3257 hot_team->t.t_nproc = 1;
3259 hot_team->t.t_sched.sched = r_sched.sched;
3260 hot_team->t.t_size_changed = 0;
3265 typedef struct kmp_team_list_item {
3266 kmp_team_p
const *entry;
3267 struct kmp_team_list_item *next;
3268 } kmp_team_list_item_t;
3269 typedef kmp_team_list_item_t *kmp_team_list_t;
3271 static void __kmp_print_structure_team_accum(
3272 kmp_team_list_t list,
3273 kmp_team_p
const *team
3283 KMP_DEBUG_ASSERT(list != NULL);
3288 __kmp_print_structure_team_accum(list, team->t.t_parent);
3289 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3293 while (l->next != NULL && l->entry != team) {
3296 if (l->next != NULL) {
3302 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3308 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3309 sizeof(kmp_team_list_item_t));
3316 static void __kmp_print_structure_team(
char const *title, kmp_team_p
const *team
3319 __kmp_printf(
"%s", title);
3321 __kmp_printf(
"%2x %p\n", team->t.t_id, team);
3323 __kmp_printf(
" - (nil)\n");
3327 static void __kmp_print_structure_thread(
char const *title,
3328 kmp_info_p
const *thread) {
3329 __kmp_printf(
"%s", title);
3330 if (thread != NULL) {
3331 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3333 __kmp_printf(
" - (nil)\n");
3337 void __kmp_print_structure(
void) {
3339 kmp_team_list_t list;
3343 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof(kmp_team_list_item_t));
3347 __kmp_printf(
"\n------------------------------\nGlobal Thread " 3348 "Table\n------------------------------\n");
3351 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3352 __kmp_printf(
"%2d", gtid);
3353 if (__kmp_threads != NULL) {
3354 __kmp_printf(
" %p", __kmp_threads[gtid]);
3356 if (__kmp_root != NULL) {
3357 __kmp_printf(
" %p", __kmp_root[gtid]);
3364 __kmp_printf(
"\n------------------------------\nThreads\n--------------------" 3366 if (__kmp_threads != NULL) {
3368 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3369 kmp_info_t
const *thread = __kmp_threads[gtid];
3370 if (thread != NULL) {
3371 __kmp_printf(
"GTID %2d %p:\n", gtid, thread);
3372 __kmp_printf(
" Our Root: %p\n", thread->th.th_root);
3373 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team);
3374 __kmp_print_structure_team(
" Serial Team: ",
3375 thread->th.th_serial_team);
3376 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc);
3377 __kmp_print_structure_thread(
" Master: ",
3378 thread->th.th_team_master);
3379 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized);
3380 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc);
3382 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3384 __kmp_print_structure_thread(
" Next in pool: ",
3385 thread->th.th_next_pool);
3387 __kmp_print_structure_team_accum(list, thread->th.th_team);
3388 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3392 __kmp_printf(
"Threads array is not allocated.\n");
3396 __kmp_printf(
"\n------------------------------\nUbers\n----------------------" 3398 if (__kmp_root != NULL) {
3400 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3401 kmp_root_t
const *root = __kmp_root[gtid];
3403 __kmp_printf(
"GTID %2d %p:\n", gtid, root);
3404 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team);
3405 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team);
3406 __kmp_print_structure_thread(
" Uber Thread: ",
3407 root->r.r_uber_thread);
3408 __kmp_printf(
" Active?: %2d\n", root->r.r_active);
3409 __kmp_printf(
" Nested?: %2d\n", root->r.r_nested);
3410 __kmp_printf(
" In Parallel: %2d\n",
3411 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
3413 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3414 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3418 __kmp_printf(
"Ubers array is not allocated.\n");
3421 __kmp_printf(
"\n------------------------------\nTeams\n----------------------" 3423 while (list->next != NULL) {
3424 kmp_team_p
const *team = list->entry;
3426 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team);
3427 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent);
3428 __kmp_printf(
" Master TID: %2d\n", team->t.t_master_tid);
3429 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc);
3430 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized);
3431 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc);
3432 for (i = 0; i < team->t.t_nproc; ++i) {
3433 __kmp_printf(
" Thread %2d: ", i);
3434 __kmp_print_structure_thread(
"", team->t.t_threads[i]);
3436 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool);
3442 __kmp_printf(
"\n------------------------------\nPools\n----------------------" 3444 __kmp_print_structure_thread(
"Thread pool: ",
3445 CCAST(kmp_info_t *, __kmp_thread_pool));
3446 __kmp_print_structure_team(
"Team pool: ",
3447 CCAST(kmp_team_t *, __kmp_team_pool));
3451 while (list != NULL) {
3452 kmp_team_list_item_t *item = list;
3454 KMP_INTERNAL_FREE(item);
3463 static const unsigned __kmp_primes[] = {
3464 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3465 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3466 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3467 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3468 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3469 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3470 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3471 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3472 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3473 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3474 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3478 unsigned short __kmp_get_random(kmp_info_t *thread) {
3479 unsigned x = thread->th.th_x;
3480 unsigned short r = x >> 16;
3482 thread->th.th_x = x * thread->th.th_a + 1;
3484 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3485 thread->th.th_info.ds.ds_tid, r));
3491 void __kmp_init_random(kmp_info_t *thread) {
3492 unsigned seed = thread->th.th_info.ds.ds_tid;
3495 __kmp_primes[seed % (
sizeof(__kmp_primes) /
sizeof(__kmp_primes[0]))];
3496 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3498 (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3504 static int __kmp_reclaim_dead_roots(
void) {
3507 for (i = 0; i < __kmp_threads_capacity; ++i) {
3508 if (KMP_UBER_GTID(i) &&
3509 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3512 r += __kmp_unregister_root_other_thread(i);
3537 static int __kmp_expand_threads(
int nNeed) {
3539 int minimumRequiredCapacity;
3541 kmp_info_t **newThreads;
3542 kmp_root_t **newRoot;
3548 #if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB 3551 added = __kmp_reclaim_dead_roots();
3580 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
3583 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3587 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3589 newCapacity = __kmp_threads_capacity;
3591 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3592 : __kmp_sys_max_nth;
3593 }
while (newCapacity < minimumRequiredCapacity);
3594 newThreads = (kmp_info_t **)__kmp_allocate(
3595 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3597 (kmp_root_t **)((
char *)newThreads +
sizeof(kmp_info_t *) * newCapacity);
3598 KMP_MEMCPY(newThreads, __kmp_threads,
3599 __kmp_threads_capacity *
sizeof(kmp_info_t *));
3600 KMP_MEMCPY(newRoot, __kmp_root,
3601 __kmp_threads_capacity *
sizeof(kmp_root_t *));
3603 kmp_info_t **temp_threads = __kmp_threads;
3604 *(kmp_info_t * *
volatile *)&__kmp_threads = newThreads;
3605 *(kmp_root_t * *
volatile *)&__kmp_root = newRoot;
3606 __kmp_free(temp_threads);
3607 added += newCapacity - __kmp_threads_capacity;
3608 *(
volatile int *)&__kmp_threads_capacity = newCapacity;
3610 if (newCapacity > __kmp_tp_capacity) {
3611 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3612 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3613 __kmp_threadprivate_resize_cache(newCapacity);
3615 *(
volatile int *)&__kmp_tp_capacity = newCapacity;
3617 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3626 int __kmp_register_root(
int initial_thread) {
3627 kmp_info_t *root_thread;
3631 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3632 KA_TRACE(20, (
"__kmp_register_root: entered\n"));
3649 capacity = __kmp_threads_capacity;
3650 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3655 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3656 if (__kmp_tp_cached) {
3657 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3658 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3659 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3661 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3669 for (gtid = (initial_thread ? 0 : 1); TCR_PTR(__kmp_threads[gtid]) != NULL;
3673 (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3674 KMP_ASSERT(gtid < __kmp_threads_capacity);
3678 TCW_4(__kmp_nth, __kmp_nth + 1);
3682 if (__kmp_adjust_gtid_mode) {
3683 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3684 if (TCR_4(__kmp_gtid_mode) != 2) {
3685 TCW_4(__kmp_gtid_mode, 2);
3688 if (TCR_4(__kmp_gtid_mode) != 1) {
3689 TCW_4(__kmp_gtid_mode, 1);
3694 #ifdef KMP_ADJUST_BLOCKTIME 3697 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3698 if (__kmp_nth > __kmp_avail_proc) {
3699 __kmp_zero_bt = TRUE;
3705 if (!(root = __kmp_root[gtid])) {
3706 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(
sizeof(kmp_root_t));
3707 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3710 #if KMP_STATS_ENABLED 3712 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3713 __kmp_stats_thread_ptr->startLife();
3714 KMP_SET_THREAD_STATE(SERIAL_REGION);
3717 __kmp_initialize_root(root);
3720 if (root->r.r_uber_thread) {
3721 root_thread = root->r.r_uber_thread;
3723 root_thread = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
3724 if (__kmp_storage_map) {
3725 __kmp_print_thread_storage_map(root_thread, gtid);
3727 root_thread->th.th_info.ds.ds_gtid = gtid;
3729 root_thread->th.ompt_thread_info.thread_data.ptr = NULL;
3731 root_thread->th.th_root = root;
3732 if (__kmp_env_consistency_check) {
3733 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3736 __kmp_initialize_fast_memory(root_thread);
3740 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3741 __kmp_initialize_bget(root_thread);
3743 __kmp_init_random(root_thread);
3747 if (!root_thread->th.th_serial_team) {
3748 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3749 KF_TRACE(10, (
"__kmp_register_root: before serial_team\n"));
3750 root_thread->th.th_serial_team =
3751 __kmp_allocate_team(root, 1, 1,
3758 &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3760 KMP_ASSERT(root_thread->th.th_serial_team);
3761 KF_TRACE(10, (
"__kmp_register_root: after serial_team = %p\n",
3762 root_thread->th.th_serial_team));
3765 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3767 root->r.r_root_team->t.t_threads[0] = root_thread;
3768 root->r.r_hot_team->t.t_threads[0] = root_thread;
3769 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3771 root_thread->th.th_serial_team->t.t_serialized = 0;
3772 root->r.r_uber_thread = root_thread;
3775 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3776 TCW_4(__kmp_init_gtid, TRUE);
3779 __kmp_gtid_set_specific(gtid);
3782 __kmp_itt_thread_name(gtid);
3785 #ifdef KMP_TDATA_GTID 3788 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3789 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3791 KA_TRACE(20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, " 3793 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3794 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3795 KMP_INIT_BARRIER_STATE));
3798 for (b = 0; b < bs_last_barrier; ++b) {
3799 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3801 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3805 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
3806 KMP_INIT_BARRIER_STATE);
3808 #if KMP_AFFINITY_SUPPORTED 3810 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3811 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3812 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3813 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3815 if (TCR_4(__kmp_init_middle)) {
3816 __kmp_affinity_set_init_mask(gtid, TRUE);
3820 root_thread->th.th_def_allocator = __kmp_def_allocator;
3823 __kmp_root_counter++;
3826 if (!initial_thread && ompt_enabled.enabled) {
3828 kmp_info_t *root_thread = ompt_get_thread();
3830 ompt_set_thread_state(root_thread, omp_state_overhead);
3832 if (ompt_enabled.ompt_callback_thread_begin) {
3833 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
3834 ompt_thread_initial, __ompt_get_thread_data_internal());
3836 ompt_data_t *task_data;
3837 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
3838 if (ompt_enabled.ompt_callback_task_create) {
3839 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
3840 NULL, NULL, task_data, ompt_task_initial, 0, NULL);
3844 ompt_set_thread_state(root_thread, omp_state_work_serial);
3849 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3854 #if KMP_NESTED_HOT_TEAMS 3855 static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr,
int level,
3856 const int max_level) {
3858 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3859 if (!hot_teams || !hot_teams[level].hot_team) {
3862 KMP_DEBUG_ASSERT(level < max_level);
3863 kmp_team_t *team = hot_teams[level].hot_team;
3864 nth = hot_teams[level].hot_team_nth;
3866 if (level < max_level - 1) {
3867 for (i = 0; i < nth; ++i) {
3868 kmp_info_t *th = team->t.t_threads[i];
3869 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
3870 if (i > 0 && th->th.th_hot_teams) {
3871 __kmp_free(th->th.th_hot_teams);
3872 th->th.th_hot_teams = NULL;
3876 __kmp_free_team(root, team, NULL);
3883 static int __kmp_reset_root(
int gtid, kmp_root_t *root) {
3884 kmp_team_t *root_team = root->r.r_root_team;
3885 kmp_team_t *hot_team = root->r.r_hot_team;
3886 int n = hot_team->t.t_nproc;
3889 KMP_DEBUG_ASSERT(!root->r.r_active);
3891 root->r.r_root_team = NULL;
3892 root->r.r_hot_team = NULL;
3895 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
3896 #if KMP_NESTED_HOT_TEAMS 3897 if (__kmp_hot_teams_max_level >
3899 for (i = 0; i < hot_team->t.t_nproc; ++i) {
3900 kmp_info_t *th = hot_team->t.t_threads[i];
3901 if (__kmp_hot_teams_max_level > 1) {
3902 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
3904 if (th->th.th_hot_teams) {
3905 __kmp_free(th->th.th_hot_teams);
3906 th->th.th_hot_teams = NULL;
3911 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
3916 if (__kmp_tasking_mode != tskm_immediate_exec) {
3917 __kmp_wait_to_unref_task_teams();
3923 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
3925 (LPVOID) & (root->r.r_uber_thread->th),
3926 root->r.r_uber_thread->th.th_info.ds.ds_thread));
3927 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
3931 if (ompt_enabled.ompt_callback_thread_end) {
3932 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
3933 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
3939 root->r.r_cg_nthreads--;
3941 __kmp_reap_thread(root->r.r_uber_thread, 1);
3945 root->r.r_uber_thread = NULL;
3947 root->r.r_begin = FALSE;
3952 void __kmp_unregister_root_current_thread(
int gtid) {
3953 KA_TRACE(1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
3957 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3958 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
3959 KC_TRACE(10, (
"__kmp_unregister_root_current_thread: already finished, " 3962 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3965 kmp_root_t *root = __kmp_root[gtid];
3967 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
3968 KMP_ASSERT(KMP_UBER_GTID(gtid));
3969 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
3970 KMP_ASSERT(root->r.r_active == FALSE);
3975 kmp_info_t *thread = __kmp_threads[gtid];
3976 kmp_team_t *team = thread->th.th_team;
3977 kmp_task_team_t *task_team = thread->th.th_task_team;
3980 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) {
3983 thread->th.ompt_thread_info.state = omp_state_undefined;
3985 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
3989 __kmp_reset_root(gtid, root);
3992 __kmp_gtid_set_specific(KMP_GTID_DNE);
3993 #ifdef KMP_TDATA_GTID 3994 __kmp_gtid = KMP_GTID_DNE;
3999 (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
4001 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4008 static int __kmp_unregister_root_other_thread(
int gtid) {
4009 kmp_root_t *root = __kmp_root[gtid];
4012 KA_TRACE(1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4013 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4014 KMP_ASSERT(KMP_UBER_GTID(gtid));
4015 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4016 KMP_ASSERT(root->r.r_active == FALSE);
4018 r = __kmp_reset_root(gtid, root);
4020 (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4026 void __kmp_task_info() {
4028 kmp_int32 gtid = __kmp_entry_gtid();
4029 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4030 kmp_info_t *this_thr = __kmp_threads[gtid];
4031 kmp_team_t *steam = this_thr->th.th_serial_team;
4032 kmp_team_t *team = this_thr->th.th_team;
4035 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p " 4037 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4038 team->t.t_implicit_task_taskdata[tid].td_parent);
4045 static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4046 int tid,
int gtid) {
4050 kmp_info_t *master = team->t.t_threads[0];
4051 KMP_DEBUG_ASSERT(this_thr != NULL);
4052 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4053 KMP_DEBUG_ASSERT(team);
4054 KMP_DEBUG_ASSERT(team->t.t_threads);
4055 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4056 KMP_DEBUG_ASSERT(master);
4057 KMP_DEBUG_ASSERT(master->th.th_root);
4061 TCW_SYNC_PTR(this_thr->th.th_team, team);
4063 this_thr->th.th_info.ds.ds_tid = tid;
4064 this_thr->th.th_set_nproc = 0;
4065 if (__kmp_tasking_mode != tskm_immediate_exec)
4068 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4070 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4072 this_thr->th.th_set_proc_bind = proc_bind_default;
4073 #if KMP_AFFINITY_SUPPORTED 4074 this_thr->th.th_new_place = this_thr->th.th_current_place;
4077 this_thr->th.th_root = master->th.th_root;
4080 this_thr->th.th_team_nproc = team->t.t_nproc;
4081 this_thr->th.th_team_master = master;
4082 this_thr->th.th_team_serialized = team->t.t_serialized;
4083 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
4085 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4087 KF_TRACE(10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4088 tid, gtid, this_thr, this_thr->th.th_current_task));
4090 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4093 KF_TRACE(10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4094 tid, gtid, this_thr, this_thr->th.th_current_task));
4099 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4101 this_thr->th.th_local.this_construct = 0;
4103 if (!this_thr->th.th_pri_common) {
4104 this_thr->th.th_pri_common =
4105 (
struct common_table *)__kmp_allocate(
sizeof(
struct common_table));
4106 if (__kmp_storage_map) {
4107 __kmp_print_storage_map_gtid(
4108 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4109 sizeof(
struct common_table),
"th_%d.th_pri_common\n", gtid);
4111 this_thr->th.th_pri_head = NULL;
4116 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4119 sizeof(dispatch_private_info_t) *
4120 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4121 KD_TRACE(10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4122 team->t.t_max_nproc));
4123 KMP_ASSERT(dispatch);
4124 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4125 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4127 dispatch->th_disp_index = 0;
4129 dispatch->th_doacross_buf_idx = 0;
4131 if (!dispatch->th_disp_buffer) {
4132 dispatch->th_disp_buffer =
4133 (dispatch_private_info_t *)__kmp_allocate(disp_size);
4135 if (__kmp_storage_map) {
4136 __kmp_print_storage_map_gtid(
4137 gtid, &dispatch->th_disp_buffer[0],
4138 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4140 : __kmp_dispatch_num_buffers],
4141 disp_size,
"th_%d.th_dispatch.th_disp_buffer " 4142 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4143 gtid, team->t.t_id, gtid);
4146 memset(&dispatch->th_disp_buffer[0],
'\0', disp_size);
4149 dispatch->th_dispatch_pr_current = 0;
4150 dispatch->th_dispatch_sh_current = 0;
4152 dispatch->th_deo_fcn = 0;
4153 dispatch->th_dxo_fcn = 0;
4156 this_thr->th.th_next_pool = NULL;
4158 if (!this_thr->th.th_task_state_memo_stack) {
4160 this_thr->th.th_task_state_memo_stack =
4161 (kmp_uint8 *)__kmp_allocate(4 *
sizeof(kmp_uint8));
4162 this_thr->th.th_task_state_top = 0;
4163 this_thr->th.th_task_state_stack_sz = 4;
4164 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4166 this_thr->th.th_task_state_memo_stack[i] = 0;
4169 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4170 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4180 kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4182 kmp_team_t *serial_team;
4183 kmp_info_t *new_thr;
4186 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4187 KMP_DEBUG_ASSERT(root && team);
4188 #if !KMP_NESTED_HOT_TEAMS 4189 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
4194 if (__kmp_thread_pool) {
4196 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4197 __kmp_thread_pool = (
volatile kmp_info_t *)new_thr->th.th_next_pool;
4198 if (new_thr == __kmp_thread_pool_insert_pt) {
4199 __kmp_thread_pool_insert_pt = NULL;
4201 TCW_4(new_thr->th.th_in_pool, FALSE);
4204 __kmp_thread_pool_nth--;
4206 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4207 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4208 KMP_ASSERT(!new_thr->th.th_team);
4209 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4210 KMP_DEBUG_ASSERT(__kmp_thread_pool_nth >= 0);
4213 __kmp_initialize_info(new_thr, team, new_tid,
4214 new_thr->th.th_info.ds.ds_gtid);
4215 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4217 TCW_4(__kmp_nth, __kmp_nth + 1);
4218 root->r.r_cg_nthreads++;
4220 new_thr->th.th_task_state = 0;
4221 new_thr->th.th_task_state_top = 0;
4222 new_thr->th.th_task_state_stack_sz = 4;
4224 #ifdef KMP_ADJUST_BLOCKTIME 4227 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4228 if (__kmp_nth > __kmp_avail_proc) {
4229 __kmp_zero_bt = TRUE;
4238 kmp_balign_t *balign = new_thr->th.th_bar;
4239 for (b = 0; b < bs_last_barrier; ++b)
4240 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4243 KF_TRACE(10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4244 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4251 KMP_ASSERT(__kmp_nth == __kmp_all_nth);
4252 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4257 if (!TCR_4(__kmp_init_monitor)) {
4258 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4259 if (!TCR_4(__kmp_init_monitor)) {
4260 KF_TRACE(10, (
"before __kmp_create_monitor\n"));
4261 TCW_4(__kmp_init_monitor, 1);
4262 __kmp_create_monitor(&__kmp_monitor);
4263 KF_TRACE(10, (
"after __kmp_create_monitor\n"));
4274 while (TCR_4(__kmp_init_monitor) < 2) {
4277 KF_TRACE(10, (
"after monitor thread has started\n"));
4280 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4285 for (new_gtid = 1; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid) {
4286 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4290 new_thr = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
4292 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4294 if (__kmp_storage_map) {
4295 __kmp_print_thread_storage_map(new_thr, new_gtid);
4300 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4301 KF_TRACE(10, (
"__kmp_allocate_thread: before th_serial/serial_team\n"));
4302 new_thr->th.th_serial_team = serial_team =
4303 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4310 &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
4312 KMP_ASSERT(serial_team);
4313 serial_team->t.t_serialized = 0;
4315 serial_team->t.t_threads[0] = new_thr;
4317 (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4321 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4324 __kmp_initialize_fast_memory(new_thr);
4328 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4329 __kmp_initialize_bget(new_thr);
4332 __kmp_init_random(new_thr);
4336 (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4337 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4340 kmp_balign_t *balign = new_thr->th.th_bar;
4341 for (b = 0; b < bs_last_barrier; ++b) {
4342 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4343 balign[b].bb.team = NULL;
4344 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4345 balign[b].bb.use_oncore_barrier = 0;
4348 new_thr->th.th_spin_here = FALSE;
4349 new_thr->th.th_next_waiting = 0;
4351 new_thr->th.th_blocking =
false;
4354 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 4355 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4356 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4357 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4358 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4361 new_thr->th.th_def_allocator = __kmp_def_allocator;
4364 TCW_4(new_thr->th.th_in_pool, FALSE);
4365 new_thr->th.th_active_in_pool = FALSE;
4366 TCW_4(new_thr->th.th_active, TRUE);
4372 root->r.r_cg_nthreads++;
4376 if (__kmp_adjust_gtid_mode) {
4377 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4378 if (TCR_4(__kmp_gtid_mode) != 2) {
4379 TCW_4(__kmp_gtid_mode, 2);
4382 if (TCR_4(__kmp_gtid_mode) != 1) {
4383 TCW_4(__kmp_gtid_mode, 1);
4388 #ifdef KMP_ADJUST_BLOCKTIME 4391 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4392 if (__kmp_nth > __kmp_avail_proc) {
4393 __kmp_zero_bt = TRUE;
4400 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4401 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4403 (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4405 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4416 static void __kmp_reinitialize_team(kmp_team_t *team,
4417 kmp_internal_control_t *new_icvs,
4419 KF_TRACE(10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4420 team->t.t_threads[0], team));
4421 KMP_DEBUG_ASSERT(team && new_icvs);
4422 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4423 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4425 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4427 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4428 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4430 KF_TRACE(10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4431 team->t.t_threads[0], team));
4437 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
4438 kmp_internal_control_t *new_icvs,
4440 KF_TRACE(10, (
"__kmp_initialize_team: enter: team=%p\n", team));
4443 KMP_DEBUG_ASSERT(team);
4444 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4445 KMP_DEBUG_ASSERT(team->t.t_threads);
4448 team->t.t_master_tid = 0;
4450 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4451 team->t.t_nproc = new_nproc;
4454 team->t.t_next_pool = NULL;
4458 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4459 team->t.t_invoke = NULL;
4462 team->t.t_sched.sched = new_icvs->sched.sched;
4464 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 4465 team->t.t_fp_control_saved = FALSE;
4466 team->t.t_x87_fpu_control_word = 0;
4467 team->t.t_mxcsr = 0;
4470 team->t.t_construct = 0;
4472 team->t.t_ordered.dt.t_value = 0;
4473 team->t.t_master_active = FALSE;
4475 memset(&team->t.t_taskq,
'\0',
sizeof(kmp_taskq_t));
4478 team->t.t_copypriv_data = NULL;
4481 team->t.t_copyin_counter = 0;
4484 team->t.t_control_stack_top = NULL;
4486 __kmp_reinitialize_team(team, new_icvs, loc);
4489 KF_TRACE(10, (
"__kmp_initialize_team: exit: team=%p\n", team));
4492 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4495 __kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
4496 if (KMP_AFFINITY_CAPABLE()) {
4498 if (old_mask != NULL) {
4499 status = __kmp_get_system_affinity(old_mask, TRUE);
4502 __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError), KMP_ERR(error),
4506 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE);
4511 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 4517 static void __kmp_partition_places(kmp_team_t *team,
int update_master_only) {
4519 kmp_info_t *master_th = team->t.t_threads[0];
4520 KMP_DEBUG_ASSERT(master_th != NULL);
4521 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4522 int first_place = master_th->th.th_first_place;
4523 int last_place = master_th->th.th_last_place;
4524 int masters_place = master_th->th.th_current_place;
4525 team->t.t_first_place = first_place;
4526 team->t.t_last_place = last_place;
4528 KA_TRACE(20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) " 4529 "bound to place %d partition = [%d,%d]\n",
4530 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4531 team->t.t_id, masters_place, first_place, last_place));
4533 switch (proc_bind) {
4535 case proc_bind_default:
4538 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4541 case proc_bind_master: {
4543 int n_th = team->t.t_nproc;
4544 for (f = 1; f < n_th; f++) {
4545 kmp_info_t *th = team->t.t_threads[f];
4546 KMP_DEBUG_ASSERT(th != NULL);
4547 th->th.th_first_place = first_place;
4548 th->th.th_last_place = last_place;
4549 th->th.th_new_place = masters_place;
4551 KA_TRACE(100, (
"__kmp_partition_places: master: T#%d(%d:%d) place %d " 4552 "partition = [%d,%d]\n",
4553 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4554 f, masters_place, first_place, last_place));
4558 case proc_bind_close: {
4560 int n_th = team->t.t_nproc;
4562 if (first_place <= last_place) {
4563 n_places = last_place - first_place + 1;
4565 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4567 if (n_th <= n_places) {
4568 int place = masters_place;
4569 for (f = 1; f < n_th; f++) {
4570 kmp_info_t *th = team->t.t_threads[f];
4571 KMP_DEBUG_ASSERT(th != NULL);
4573 if (place == last_place) {
4574 place = first_place;
4575 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4580 th->th.th_first_place = first_place;
4581 th->th.th_last_place = last_place;
4582 th->th.th_new_place = place;
4584 KA_TRACE(100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d " 4585 "partition = [%d,%d]\n",
4586 __kmp_gtid_from_thread(team->t.t_threads[f]),
4587 team->t.t_id, f, place, first_place, last_place));
4590 int S, rem, gap, s_count;
4591 S = n_th / n_places;
4593 rem = n_th - (S * n_places);
4594 gap = rem > 0 ? n_places / rem : n_places;
4595 int place = masters_place;
4597 for (f = 0; f < n_th; f++) {
4598 kmp_info_t *th = team->t.t_threads[f];
4599 KMP_DEBUG_ASSERT(th != NULL);
4601 th->th.th_first_place = first_place;
4602 th->th.th_last_place = last_place;
4603 th->th.th_new_place = place;
4606 if ((s_count == S) && rem && (gap_ct == gap)) {
4608 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4610 if (place == last_place) {
4611 place = first_place;
4612 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4620 }
else if (s_count == S) {
4621 if (place == last_place) {
4622 place = first_place;
4623 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4633 (
"__kmp_partition_places: close: T#%d(%d:%d) place %d " 4634 "partition = [%d,%d]\n",
4635 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4636 th->th.th_new_place, first_place, last_place));
4638 KMP_DEBUG_ASSERT(place == masters_place);
4642 case proc_bind_spread: {
4644 int n_th = team->t.t_nproc;
4647 if (first_place <= last_place) {
4648 n_places = last_place - first_place + 1;
4650 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4652 if (n_th <= n_places) {
4655 if (n_places != static_cast<int>(__kmp_affinity_num_masks)) {
4656 int S = n_places / n_th;
4657 int s_count, rem, gap, gap_ct;
4659 place = masters_place;
4660 rem = n_places - n_th * S;
4661 gap = rem ? n_th / rem : 1;
4664 if (update_master_only == 1)
4666 for (f = 0; f < thidx; f++) {
4667 kmp_info_t *th = team->t.t_threads[f];
4668 KMP_DEBUG_ASSERT(th != NULL);
4670 th->th.th_first_place = place;
4671 th->th.th_new_place = place;
4673 while (s_count < S) {
4674 if (place == last_place) {
4675 place = first_place;
4676 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4683 if (rem && (gap_ct == gap)) {
4684 if (place == last_place) {
4685 place = first_place;
4686 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4694 th->th.th_last_place = place;
4697 if (place == last_place) {
4698 place = first_place;
4699 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4706 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d " 4707 "partition = [%d,%d], __kmp_affinity_num_masks: %u\n",
4708 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4709 f, th->th.th_new_place, th->th.th_first_place,
4710 th->th.th_last_place, __kmp_affinity_num_masks));
4716 double current =
static_cast<double>(masters_place);
4718 (
static_cast<double>(n_places + 1) / static_cast<double>(n_th));
4723 if (update_master_only == 1)
4725 for (f = 0; f < thidx; f++) {
4726 first =
static_cast<int>(current);
4727 last =
static_cast<int>(current + spacing) - 1;
4728 KMP_DEBUG_ASSERT(last >= first);
4729 if (first >= n_places) {
4730 if (masters_place) {
4733 if (first == (masters_place + 1)) {
4734 KMP_DEBUG_ASSERT(f == n_th);
4737 if (last == masters_place) {
4738 KMP_DEBUG_ASSERT(f == (n_th - 1));
4742 KMP_DEBUG_ASSERT(f == n_th);
4747 if (last >= n_places) {
4748 last = (n_places - 1);
4753 KMP_DEBUG_ASSERT(0 <= first);
4754 KMP_DEBUG_ASSERT(n_places > first);
4755 KMP_DEBUG_ASSERT(0 <= last);
4756 KMP_DEBUG_ASSERT(n_places > last);
4757 KMP_DEBUG_ASSERT(last_place >= first_place);
4758 th = team->t.t_threads[f];
4759 KMP_DEBUG_ASSERT(th);
4760 th->th.th_first_place = first;
4761 th->th.th_new_place = place;
4762 th->th.th_last_place = last;
4765 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d " 4766 "partition = [%d,%d], spacing = %.4f\n",
4767 __kmp_gtid_from_thread(team->t.t_threads[f]),
4768 team->t.t_id, f, th->th.th_new_place,
4769 th->th.th_first_place, th->th.th_last_place, spacing));
4773 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4775 int S, rem, gap, s_count;
4776 S = n_th / n_places;
4778 rem = n_th - (S * n_places);
4779 gap = rem > 0 ? n_places / rem : n_places;
4780 int place = masters_place;
4783 if (update_master_only == 1)
4785 for (f = 0; f < thidx; f++) {
4786 kmp_info_t *th = team->t.t_threads[f];
4787 KMP_DEBUG_ASSERT(th != NULL);
4789 th->th.th_first_place = place;
4790 th->th.th_last_place = place;
4791 th->th.th_new_place = place;
4794 if ((s_count == S) && rem && (gap_ct == gap)) {
4796 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4798 if (place == last_place) {
4799 place = first_place;
4800 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4808 }
else if (s_count == S) {
4809 if (place == last_place) {
4810 place = first_place;
4811 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4820 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d " 4821 "partition = [%d,%d]\n",
4822 __kmp_gtid_from_thread(team->t.t_threads[f]),
4823 team->t.t_id, f, th->th.th_new_place,
4824 th->th.th_first_place, th->th.th_last_place));
4826 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4834 KA_TRACE(20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id));
4842 __kmp_allocate_team(kmp_root_t *root,
int new_nproc,
int max_nproc,
4844 ompt_data_t ompt_parallel_data,
4847 kmp_proc_bind_t new_proc_bind,
4849 kmp_internal_control_t *new_icvs,
4850 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
4851 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
4854 int use_hot_team = !root->r.r_active;
4857 KA_TRACE(20, (
"__kmp_allocate_team: called\n"));
4858 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
4859 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
4862 #if KMP_NESTED_HOT_TEAMS 4863 kmp_hot_team_ptr_t *hot_teams;
4865 team = master->th.th_team;
4866 level = team->t.t_active_level;
4867 if (master->th.th_teams_microtask) {
4868 if (master->th.th_teams_size.nteams > 1 &&
4871 (microtask_t)__kmp_teams_master ||
4872 master->th.th_teams_level <
4878 hot_teams = master->th.th_hot_teams;
4879 if (level < __kmp_hot_teams_max_level && hot_teams &&
4889 if (use_hot_team && new_nproc > 1) {
4890 KMP_DEBUG_ASSERT(new_nproc == max_nproc);
4891 #if KMP_NESTED_HOT_TEAMS 4892 team = hot_teams[level].hot_team;
4894 team = root->r.r_hot_team;
4897 if (__kmp_tasking_mode != tskm_immediate_exec) {
4898 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p " 4899 "task_team[1] = %p before reinit\n",
4900 team->t.t_task_team[0], team->t.t_task_team[1]));
4907 if (team->t.t_nproc == new_nproc) {
4908 KA_TRACE(20, (
"__kmp_allocate_team: reusing hot team\n"));
4911 if (team->t.t_size_changed == -1) {
4912 team->t.t_size_changed = 1;
4914 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
4918 kmp_r_sched_t new_sched = new_icvs->sched;
4920 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
4922 __kmp_reinitialize_team(team, new_icvs,
4923 root->r.r_uber_thread->th.th_ident);
4925 KF_TRACE(10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
4926 team->t.t_threads[0], team));
4927 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
4930 #if KMP_AFFINITY_SUPPORTED 4931 if ((team->t.t_size_changed == 0) &&
4932 (team->t.t_proc_bind == new_proc_bind)) {
4933 if (new_proc_bind == proc_bind_spread) {
4934 __kmp_partition_places(
4937 KA_TRACE(200, (
"__kmp_allocate_team: reusing hot team #%d bindings: " 4938 "proc_bind = %d, partition = [%d,%d]\n",
4939 team->t.t_id, new_proc_bind, team->t.t_first_place,
4940 team->t.t_last_place));
4942 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4943 __kmp_partition_places(team);
4946 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4949 }
else if (team->t.t_nproc > new_nproc) {
4951 (
"__kmp_allocate_team: decreasing hot team thread count to %d\n",
4954 team->t.t_size_changed = 1;
4955 #if KMP_NESTED_HOT_TEAMS 4956 if (__kmp_hot_teams_mode == 0) {
4959 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
4960 hot_teams[level].hot_team_nth = new_nproc;
4961 #endif // KMP_NESTED_HOT_TEAMS 4963 for (f = new_nproc; f < team->t.t_nproc; f++) {
4964 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
4965 if (__kmp_tasking_mode != tskm_immediate_exec) {
4968 team->t.t_threads[f]->th.th_task_team = NULL;
4970 __kmp_free_thread(team->t.t_threads[f]);
4971 team->t.t_threads[f] = NULL;
4973 #if KMP_NESTED_HOT_TEAMS 4978 for (f = new_nproc; f < team->t.t_nproc; ++f) {
4979 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
4980 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
4981 for (
int b = 0; b < bs_last_barrier; ++b) {
4982 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
4983 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
4985 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
4989 #endif // KMP_NESTED_HOT_TEAMS 4990 team->t.t_nproc = new_nproc;
4992 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
4993 __kmp_reinitialize_team(team, new_icvs,
4994 root->r.r_uber_thread->th.th_ident);
4997 for (f = 0; f < new_nproc; ++f) {
4998 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5002 KF_TRACE(10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5003 team->t.t_threads[0], team));
5005 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5008 for (f = 0; f < team->t.t_nproc; f++) {
5009 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5010 team->t.t_threads[f]->th.th_team_nproc ==
5016 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5017 #if KMP_AFFINITY_SUPPORTED 5018 __kmp_partition_places(team);
5022 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 5023 kmp_affin_mask_t *old_mask;
5024 if (KMP_AFFINITY_CAPABLE()) {
5025 KMP_CPU_ALLOC(old_mask);
5030 (
"__kmp_allocate_team: increasing hot team thread count to %d\n",
5033 team->t.t_size_changed = 1;
5035 #if KMP_NESTED_HOT_TEAMS 5036 int avail_threads = hot_teams[level].hot_team_nth;
5037 if (new_nproc < avail_threads)
5038 avail_threads = new_nproc;
5039 kmp_info_t **other_threads = team->t.t_threads;
5040 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5044 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5045 for (b = 0; b < bs_last_barrier; ++b) {
5046 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5047 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5049 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5053 if (hot_teams[level].hot_team_nth >= new_nproc) {
5056 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5057 team->t.t_nproc = new_nproc;
5063 hot_teams[level].hot_team_nth = new_nproc;
5064 #endif // KMP_NESTED_HOT_TEAMS 5065 if (team->t.t_max_nproc < new_nproc) {
5067 __kmp_reallocate_team_arrays(team, new_nproc);
5068 __kmp_reinitialize_team(team, new_icvs, NULL);
5071 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 5076 __kmp_set_thread_affinity_mask_full_tmp(old_mask);
5080 for (f = team->t.t_nproc; f < new_nproc; f++) {
5081 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5082 KMP_DEBUG_ASSERT(new_worker);
5083 team->t.t_threads[f] = new_worker;
5086 (
"__kmp_allocate_team: team %d init T#%d arrived: " 5087 "join=%llu, plain=%llu\n",
5088 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5089 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5090 team->t.t_bar[bs_plain_barrier].b_arrived));
5094 kmp_balign_t *balign = new_worker->th.th_bar;
5095 for (b = 0; b < bs_last_barrier; ++b) {
5096 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5097 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5098 KMP_BARRIER_PARENT_FLAG);
5100 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5106 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 5107 if (KMP_AFFINITY_CAPABLE()) {
5109 __kmp_set_system_affinity(old_mask, TRUE);
5110 KMP_CPU_FREE(old_mask);
5113 #if KMP_NESTED_HOT_TEAMS 5115 #endif // KMP_NESTED_HOT_TEAMS 5117 int old_nproc = team->t.t_nproc;
5119 __kmp_initialize_team(team, new_nproc, new_icvs,
5120 root->r.r_uber_thread->th.th_ident);
5123 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5124 for (f = 0; f < team->t.t_nproc; ++f)
5125 __kmp_initialize_info(team->t.t_threads[f], team, f,
5126 __kmp_gtid_from_tid(f, team));
5133 for (f = old_nproc; f < team->t.t_nproc; ++f)
5134 team->t.t_threads[f]->th.th_task_state =
5135 team->t.t_threads[0]->th.th_task_state_memo_stack[level];
5138 team->t.t_threads[0]->th.th_task_state;
5139 for (f = old_nproc; f < team->t.t_nproc; ++f)
5140 team->t.t_threads[f]->th.th_task_state = old_state;
5144 for (f = 0; f < team->t.t_nproc; ++f) {
5145 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5146 team->t.t_threads[f]->th.th_team_nproc ==
5152 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5153 #if KMP_AFFINITY_SUPPORTED 5154 __kmp_partition_places(team);
5160 kmp_info_t *master = team->t.t_threads[0];
5161 if (master->th.th_teams_microtask) {
5162 for (f = 1; f < new_nproc; ++f) {
5164 kmp_info_t *thr = team->t.t_threads[f];
5165 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5166 thr->th.th_teams_level = master->th.th_teams_level;
5167 thr->th.th_teams_size = master->th.th_teams_size;
5171 #if KMP_NESTED_HOT_TEAMS 5175 for (f = 1; f < new_nproc; ++f) {
5176 kmp_info_t *thr = team->t.t_threads[f];
5178 kmp_balign_t *balign = thr->th.th_bar;
5179 for (b = 0; b < bs_last_barrier; ++b) {
5180 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5181 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5183 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5188 #endif // KMP_NESTED_HOT_TEAMS 5191 __kmp_alloc_argv_entries(argc, team, TRUE);
5192 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5196 KF_TRACE(10, (
" hot_team = %p\n", team));
5199 if (__kmp_tasking_mode != tskm_immediate_exec) {
5200 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p " 5201 "task_team[1] = %p after reinit\n",
5202 team->t.t_task_team[0], team->t.t_task_team[1]));
5207 __ompt_team_assign_id(team, ompt_parallel_data);
5217 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5220 if (team->t.t_max_nproc >= max_nproc) {
5222 __kmp_team_pool = team->t.t_next_pool;
5225 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5227 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and " 5228 "task_team[1] %p to NULL\n",
5229 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5230 team->t.t_task_team[0] = NULL;
5231 team->t.t_task_team[1] = NULL;
5234 __kmp_alloc_argv_entries(argc, team, TRUE);
5235 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5238 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5239 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5242 for (b = 0; b < bs_last_barrier; ++b) {
5243 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5245 team->t.t_bar[b].b_master_arrived = 0;
5246 team->t.t_bar[b].b_team_arrived = 0;
5252 team->t.t_proc_bind = new_proc_bind;
5255 KA_TRACE(20, (
"__kmp_allocate_team: using team from pool %d.\n",
5259 __ompt_team_assign_id(team, ompt_parallel_data);
5271 team = __kmp_reap_team(team);
5272 __kmp_team_pool = team;
5277 team = (kmp_team_t *)__kmp_allocate(
sizeof(kmp_team_t));
5280 team->t.t_max_nproc = max_nproc;
5283 __kmp_allocate_team_arrays(team, max_nproc);
5285 KA_TRACE(20, (
"__kmp_allocate_team: making a new team\n"));
5286 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5288 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] " 5290 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5291 team->t.t_task_team[0] = NULL;
5293 team->t.t_task_team[1] = NULL;
5296 if (__kmp_storage_map) {
5297 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc);
5301 __kmp_alloc_argv_entries(argc, team, FALSE);
5302 team->t.t_argc = argc;
5305 (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5306 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5309 for (b = 0; b < bs_last_barrier; ++b) {
5310 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5312 team->t.t_bar[b].b_master_arrived = 0;
5313 team->t.t_bar[b].b_team_arrived = 0;
5319 team->t.t_proc_bind = new_proc_bind;
5323 __ompt_team_assign_id(team, ompt_parallel_data);
5324 team->t.ompt_serialized_team_info = NULL;
5329 KA_TRACE(20, (
"__kmp_allocate_team: done creating a new team %d.\n",
5340 void __kmp_free_team(kmp_root_t *root,
5341 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5343 KA_TRACE(20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5347 KMP_DEBUG_ASSERT(root);
5348 KMP_DEBUG_ASSERT(team);
5349 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5350 KMP_DEBUG_ASSERT(team->t.t_threads);
5352 int use_hot_team = team == root->r.r_hot_team;
5353 #if KMP_NESTED_HOT_TEAMS 5355 kmp_hot_team_ptr_t *hot_teams;
5357 level = team->t.t_active_level - 1;
5358 if (master->th.th_teams_microtask) {
5359 if (master->th.th_teams_size.nteams > 1) {
5363 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5364 master->th.th_teams_level == team->t.t_level) {
5369 hot_teams = master->th.th_hot_teams;
5370 if (level < __kmp_hot_teams_max_level) {
5371 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5375 #endif // KMP_NESTED_HOT_TEAMS 5378 TCW_SYNC_PTR(team->t.t_pkfn,
5381 team->t.t_copyin_counter = 0;
5386 if (!use_hot_team) {
5387 if (__kmp_tasking_mode != tskm_immediate_exec) {
5389 for (f = 1; f < team->t.t_nproc; ++f) {
5390 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5391 kmp_info_t *th = team->t.t_threads[f];
5392 volatile kmp_uint32 *state = &th->th.th_reap_state;
5393 while (*state != KMP_SAFE_TO_REAP) {
5397 if (!__kmp_is_thread_alive(th, &ecode)) {
5398 *state = KMP_SAFE_TO_REAP;
5403 kmp_flag_64 fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5404 if (fl.is_sleeping())
5405 fl.resume(__kmp_gtid_from_thread(th));
5412 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5413 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5414 if (task_team != NULL) {
5415 for (f = 0; f < team->t.t_nproc;
5417 team->t.t_threads[f]->th.th_task_team = NULL;
5421 (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5422 __kmp_get_gtid(), task_team, team->t.t_id));
5423 #if KMP_NESTED_HOT_TEAMS 5424 __kmp_free_task_team(master, task_team);
5426 team->t.t_task_team[tt_idx] = NULL;
5432 team->t.t_parent = NULL;
5433 team->t.t_level = 0;
5434 team->t.t_active_level = 0;
5437 for (f = 1; f < team->t.t_nproc; ++f) {
5438 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5439 __kmp_free_thread(team->t.t_threads[f]);
5440 team->t.t_threads[f] = NULL;
5445 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5446 __kmp_team_pool = (
volatile kmp_team_t *)team;
5453 kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5454 kmp_team_t *next_pool = team->t.t_next_pool;
5456 KMP_DEBUG_ASSERT(team);
5457 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5458 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5459 KMP_DEBUG_ASSERT(team->t.t_threads);
5460 KMP_DEBUG_ASSERT(team->t.t_argv);
5465 __kmp_free_team_arrays(team);
5466 if (team->t.t_argv != &team->t.t_inline_argv[0])
5467 __kmp_free((
void *)team->t.t_argv);
5499 void __kmp_free_thread(kmp_info_t *this_th) {
5502 kmp_root_t *root = this_th->th.th_root;
5504 KA_TRACE(20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5505 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5507 KMP_DEBUG_ASSERT(this_th);
5512 kmp_balign_t *balign = this_th->th.th_bar;
5513 for (b = 0; b < bs_last_barrier; ++b) {
5514 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5515 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5516 balign[b].bb.team = NULL;
5517 balign[b].bb.leaf_kids = 0;
5519 this_th->th.th_task_state = 0;
5520 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5523 TCW_PTR(this_th->th.th_team, NULL);
5524 TCW_PTR(this_th->th.th_root, NULL);
5525 TCW_PTR(this_th->th.th_dispatch, NULL);
5532 __kmp_free_implicit_task(this_th);
5533 this_th->th.th_current_task = NULL;
5537 gtid = this_th->th.th_info.ds.ds_gtid;
5538 if (__kmp_thread_pool_insert_pt != NULL) {
5539 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5540 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5541 __kmp_thread_pool_insert_pt = NULL;
5550 if (__kmp_thread_pool_insert_pt != NULL) {
5551 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5553 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5555 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5556 scan = &((*scan)->th.th_next_pool))
5561 TCW_PTR(this_th->th.th_next_pool, *scan);
5562 __kmp_thread_pool_insert_pt = *scan = this_th;
5563 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5564 (this_th->th.th_info.ds.ds_gtid <
5565 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5566 TCW_4(this_th->th.th_in_pool, TRUE);
5567 __kmp_thread_pool_nth++;
5569 TCW_4(__kmp_nth, __kmp_nth - 1);
5570 root->r.r_cg_nthreads--;
5572 #ifdef KMP_ADJUST_BLOCKTIME 5575 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5576 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5577 if (__kmp_nth <= __kmp_avail_proc) {
5578 __kmp_zero_bt = FALSE;
5588 void *__kmp_launch_thread(kmp_info_t *this_thr) {
5589 int gtid = this_thr->th.th_info.ds.ds_gtid;
5591 kmp_team_t *(*
volatile pteam);
5594 KA_TRACE(10, (
"__kmp_launch_thread: T#%d start\n", gtid));
5596 if (__kmp_env_consistency_check) {
5597 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid);
5601 ompt_data_t *thread_data;
5602 if (ompt_enabled.enabled) {
5603 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
5604 thread_data->ptr = NULL;
5606 this_thr->th.ompt_thread_info.state = omp_state_overhead;
5607 this_thr->th.ompt_thread_info.wait_id = 0;
5608 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
5609 if (ompt_enabled.ompt_callback_thread_begin) {
5610 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
5611 ompt_thread_worker, thread_data);
5617 if (ompt_enabled.enabled) {
5618 this_thr->th.ompt_thread_info.state = omp_state_idle;
5622 while (!TCR_4(__kmp_global.g.g_done)) {
5623 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
5627 KA_TRACE(20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid));
5630 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
5633 if (ompt_enabled.enabled) {
5634 this_thr->th.ompt_thread_info.state = omp_state_overhead;
5638 pteam = (kmp_team_t * (*))(&this_thr->th.th_team);
5641 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
5643 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
5646 (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5647 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5648 (*pteam)->t.t_pkfn));
5650 updateHWFPControl(*pteam);
5653 if (ompt_enabled.enabled) {
5654 this_thr->th.ompt_thread_info.state = omp_state_work_parallel;
5658 rc = (*pteam)->t.t_invoke(gtid);
5662 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5663 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5664 (*pteam)->t.t_pkfn));
5667 if (ompt_enabled.enabled) {
5669 __ompt_get_task_info_object(0)->frame.exit_frame = NULL;
5671 this_thr->th.ompt_thread_info.state = omp_state_overhead;
5675 __kmp_join_barrier(gtid);
5678 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
5681 if (ompt_enabled.ompt_callback_thread_end) {
5682 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
5686 this_thr->th.th_task_team = NULL;
5688 __kmp_common_destroy_gtid(gtid);
5690 KA_TRACE(10, (
"__kmp_launch_thread: T#%d done\n", gtid));
5697 void __kmp_internal_end_dest(
void *specific_gtid) {
5698 #if KMP_COMPILER_ICC 5699 #pragma warning(push) 5700 #pragma warning(disable : 810) // conversion from "void *" to "int" may lose 5704 int gtid = (kmp_intptr_t)specific_gtid - 1;
5705 #if KMP_COMPILER_ICC 5706 #pragma warning(pop) 5709 KA_TRACE(30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
5722 if (gtid >= 0 && KMP_UBER_GTID(gtid))
5723 __kmp_gtid_set_specific(gtid);
5724 #ifdef KMP_TDATA_GTID 5727 __kmp_internal_end_thread(gtid);
5730 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB 5736 __attribute__((destructor))
void __kmp_internal_end_dtor(
void) {
5737 __kmp_internal_end_atexit();
5740 void __kmp_internal_end_fini(
void) { __kmp_internal_end_atexit(); }
5746 void __kmp_internal_end_atexit(
void) {
5747 KA_TRACE(30, (
"__kmp_internal_end_atexit\n"));
5771 __kmp_internal_end_library(-1);
5773 __kmp_close_console();
5777 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root) {
5782 KMP_DEBUG_ASSERT(thread != NULL);
5784 gtid = thread->th.th_info.ds.ds_gtid;
5788 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5791 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
5795 ANNOTATE_HAPPENS_BEFORE(thread);
5796 kmp_flag_64 flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread);
5797 __kmp_release_64(&flag);
5801 __kmp_reap_worker(thread);
5813 if (thread->th.th_active_in_pool) {
5814 thread->th.th_active_in_pool = FALSE;
5815 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
5816 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
5820 KMP_DEBUG_ASSERT(__kmp_thread_pool_nth > 0);
5821 --__kmp_thread_pool_nth;
5824 __kmp_free_implicit_task(thread);
5828 __kmp_free_fast_memory(thread);
5831 __kmp_suspend_uninitialize_thread(thread);
5833 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
5834 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5839 #ifdef KMP_ADJUST_BLOCKTIME 5842 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5843 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5844 if (__kmp_nth <= __kmp_avail_proc) {
5845 __kmp_zero_bt = FALSE;
5851 if (__kmp_env_consistency_check) {
5852 if (thread->th.th_cons) {
5853 __kmp_free_cons_stack(thread->th.th_cons);
5854 thread->th.th_cons = NULL;
5858 if (thread->th.th_pri_common != NULL) {
5859 __kmp_free(thread->th.th_pri_common);
5860 thread->th.th_pri_common = NULL;
5863 if (thread->th.th_task_state_memo_stack != NULL) {
5864 __kmp_free(thread->th.th_task_state_memo_stack);
5865 thread->th.th_task_state_memo_stack = NULL;
5869 if (thread->th.th_local.bget_data != NULL) {
5870 __kmp_finalize_bget(thread);
5874 #if KMP_AFFINITY_SUPPORTED 5875 if (thread->th.th_affin_mask != NULL) {
5876 KMP_CPU_FREE(thread->th.th_affin_mask);
5877 thread->th.th_affin_mask = NULL;
5881 #if KMP_USE_HIER_SCHED 5882 if (thread->th.th_hier_bar_data != NULL) {
5883 __kmp_free(thread->th.th_hier_bar_data);
5884 thread->th.th_hier_bar_data = NULL;
5888 __kmp_reap_team(thread->th.th_serial_team);
5889 thread->th.th_serial_team = NULL;
5896 static void __kmp_internal_end(
void) {
5900 __kmp_unregister_library();
5907 __kmp_reclaim_dead_roots();
5911 for (i = 0; i < __kmp_threads_capacity; i++)
5913 if (__kmp_root[i]->r.r_active)
5916 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5918 if (i < __kmp_threads_capacity) {
5930 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
5931 if (TCR_4(__kmp_init_monitor)) {
5932 __kmp_reap_monitor(&__kmp_monitor);
5933 TCW_4(__kmp_init_monitor, 0);
5935 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
5936 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
5937 #endif // KMP_USE_MONITOR 5942 for (i = 0; i < __kmp_threads_capacity; i++) {
5943 if (__kmp_root[i]) {
5946 KMP_ASSERT(!__kmp_root[i]->r.r_active);
5955 while (__kmp_thread_pool != NULL) {
5957 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
5958 __kmp_thread_pool = thread->th.th_next_pool;
5960 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
5961 thread->th.th_next_pool = NULL;
5962 thread->th.th_in_pool = FALSE;
5963 __kmp_reap_thread(thread, 0);
5965 __kmp_thread_pool_insert_pt = NULL;
5968 while (__kmp_team_pool != NULL) {
5970 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
5971 __kmp_team_pool = team->t.t_next_pool;
5973 team->t.t_next_pool = NULL;
5974 __kmp_reap_team(team);
5977 __kmp_reap_task_teams();
5984 for (i = 0; i < __kmp_threads_capacity; i++) {
5985 kmp_info_t *thr = __kmp_threads[i];
5986 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
5991 for (i = 0; i < __kmp_threads_capacity; ++i) {
5998 TCW_SYNC_4(__kmp_init_common, FALSE);
6000 KA_TRACE(10, (
"__kmp_internal_end: all workers reaped\n"));
6008 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6009 if (TCR_4(__kmp_init_monitor)) {
6010 __kmp_reap_monitor(&__kmp_monitor);
6011 TCW_4(__kmp_init_monitor, 0);
6013 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6014 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6017 TCW_4(__kmp_init_gtid, FALSE);
6026 void __kmp_internal_end_library(
int gtid_req) {
6033 if (__kmp_global.g.g_abort) {
6034 KA_TRACE(11, (
"__kmp_internal_end_library: abort, exiting\n"));
6038 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6039 KA_TRACE(10, (
"__kmp_internal_end_library: already finished\n"));
6047 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6049 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6050 if (gtid == KMP_GTID_SHUTDOWN) {
6051 KA_TRACE(10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system " 6052 "already shutdown\n"));
6054 }
else if (gtid == KMP_GTID_MONITOR) {
6055 KA_TRACE(10, (
"__kmp_internal_end_library: monitor thread, gtid not " 6056 "registered, or system shutdown\n"));
6058 }
else if (gtid == KMP_GTID_DNE) {
6059 KA_TRACE(10, (
"__kmp_internal_end_library: gtid not registered or system " 6062 }
else if (KMP_UBER_GTID(gtid)) {
6064 if (__kmp_root[gtid]->r.r_active) {
6065 __kmp_global.g.g_abort = -1;
6066 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6068 (
"__kmp_internal_end_library: root still active, abort T#%d\n",
6074 (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6075 __kmp_unregister_root_current_thread(gtid);
6082 #ifdef DUMP_DEBUG_ON_EXIT 6083 if (__kmp_debug_buf)
6084 __kmp_dump_debug_buffer();
6090 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6093 if (__kmp_global.g.g_abort) {
6094 KA_TRACE(10, (
"__kmp_internal_end_library: abort, exiting\n"));
6096 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6099 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6100 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6109 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6112 __kmp_internal_end();
6114 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6115 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6117 KA_TRACE(10, (
"__kmp_internal_end_library: exit\n"));
6119 #ifdef DUMP_DEBUG_ON_EXIT 6120 if (__kmp_debug_buf)
6121 __kmp_dump_debug_buffer();
6125 __kmp_close_console();
6128 __kmp_fini_allocator();
6132 void __kmp_internal_end_thread(
int gtid_req) {
6141 if (__kmp_global.g.g_abort) {
6142 KA_TRACE(11, (
"__kmp_internal_end_thread: abort, exiting\n"));
6146 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6147 KA_TRACE(10, (
"__kmp_internal_end_thread: already finished\n"));
6155 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6157 (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6158 if (gtid == KMP_GTID_SHUTDOWN) {
6159 KA_TRACE(10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system " 6160 "already shutdown\n"));
6162 }
else if (gtid == KMP_GTID_MONITOR) {
6163 KA_TRACE(10, (
"__kmp_internal_end_thread: monitor thread, gtid not " 6164 "registered, or system shutdown\n"));
6166 }
else if (gtid == KMP_GTID_DNE) {
6167 KA_TRACE(10, (
"__kmp_internal_end_thread: gtid not registered or system " 6171 }
else if (KMP_UBER_GTID(gtid)) {
6173 if (__kmp_root[gtid]->r.r_active) {
6174 __kmp_global.g.g_abort = -1;
6175 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6177 (
"__kmp_internal_end_thread: root still active, abort T#%d\n",
6181 KA_TRACE(10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n",
6183 __kmp_unregister_root_current_thread(gtid);
6187 KA_TRACE(10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6190 __kmp_threads[gtid]->th.th_task_team = NULL;
6194 (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6199 #if defined KMP_DYNAMIC_LIB 6208 KA_TRACE(10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6212 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6215 if (__kmp_global.g.g_abort) {
6216 KA_TRACE(10, (
"__kmp_internal_end_thread: abort, exiting\n"));
6218 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6221 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6222 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6233 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6235 for (i = 0; i < __kmp_threads_capacity; ++i) {
6236 if (KMP_UBER_GTID(i)) {
6239 (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6240 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6241 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6248 __kmp_internal_end();
6250 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6251 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6253 KA_TRACE(10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6255 #ifdef DUMP_DEBUG_ON_EXIT 6256 if (__kmp_debug_buf)
6257 __kmp_dump_debug_buffer();
6264 static long __kmp_registration_flag = 0;
6266 static char *__kmp_registration_str = NULL;
6269 static inline char *__kmp_reg_status_name() {
6274 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int)getpid());
6277 void __kmp_register_library_startup(
void) {
6279 char *name = __kmp_reg_status_name();
6285 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 6286 __kmp_initialize_system_tick();
6288 __kmp_read_system_time(&time.dtime);
6289 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6290 __kmp_registration_str =
6291 __kmp_str_format(
"%p-%lx-%s", &__kmp_registration_flag,
6292 __kmp_registration_flag, KMP_LIBRARY_FILE);
6294 KA_TRACE(50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name,
6295 __kmp_registration_str));
6302 __kmp_env_set(name, __kmp_registration_str, 0);
6304 value = __kmp_env_get(name);
6305 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6315 char *flag_addr_str = NULL;
6316 char *flag_val_str = NULL;
6317 char const *file_name = NULL;
6318 __kmp_str_split(tail,
'-', &flag_addr_str, &tail);
6319 __kmp_str_split(tail,
'-', &flag_val_str, &tail);
6322 long *flag_addr = 0;
6324 KMP_SSCANF(flag_addr_str,
"%p", RCAST(
void**, &flag_addr));
6325 KMP_SSCANF(flag_val_str,
"%lx", &flag_val);
6326 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name,
"") != 0) {
6330 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6344 file_name =
"unknown library";
6348 char *duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK");
6349 if (!__kmp_str_match_true(duplicate_ok)) {
6351 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6352 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6354 KMP_INTERNAL_FREE(duplicate_ok);
6355 __kmp_duplicate_library_ok = 1;
6360 __kmp_env_unset(name);
6362 default: { KMP_DEBUG_ASSERT(0); }
break;
6365 KMP_INTERNAL_FREE((
void *)value);
6367 KMP_INTERNAL_FREE((
void *)name);
6371 void __kmp_unregister_library(
void) {
6373 char *name = __kmp_reg_status_name();
6374 char *value = __kmp_env_get(name);
6376 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6377 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6378 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6380 __kmp_env_unset(name);
6383 KMP_INTERNAL_FREE(__kmp_registration_str);
6384 KMP_INTERNAL_FREE(value);
6385 KMP_INTERNAL_FREE(name);
6387 __kmp_registration_flag = 0;
6388 __kmp_registration_str = NULL;
6395 #if KMP_MIC_SUPPORTED 6397 static void __kmp_check_mic_type() {
6398 kmp_cpuid_t cpuid_state = {0};
6399 kmp_cpuid_t *cs_p = &cpuid_state;
6400 __kmp_x86_cpuid(1, 0, cs_p);
6402 if ((cs_p->eax & 0xff0) == 0xB10) {
6403 __kmp_mic_type = mic2;
6404 }
else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6405 __kmp_mic_type = mic3;
6407 __kmp_mic_type = non_mic;
6413 static void __kmp_do_serial_initialize(
void) {
6417 KA_TRACE(10, (
"__kmp_do_serial_initialize: enter\n"));
6419 KMP_DEBUG_ASSERT(
sizeof(kmp_int32) == 4);
6420 KMP_DEBUG_ASSERT(
sizeof(kmp_uint32) == 4);
6421 KMP_DEBUG_ASSERT(
sizeof(kmp_int64) == 8);
6422 KMP_DEBUG_ASSERT(
sizeof(kmp_uint64) == 8);
6423 KMP_DEBUG_ASSERT(
sizeof(kmp_intptr_t) ==
sizeof(
void *));
6429 __kmp_validate_locks();
6432 __kmp_init_allocator();
6437 __kmp_register_library_startup();
6440 if (TCR_4(__kmp_global.g.g_done)) {
6441 KA_TRACE(10, (
"__kmp_do_serial_initialize: reinitialization of library\n"));
6444 __kmp_global.g.g_abort = 0;
6445 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
6448 #if KMP_USE_ADAPTIVE_LOCKS 6449 #if KMP_DEBUG_ADAPTIVE_LOCKS 6450 __kmp_init_speculative_stats();
6453 #if KMP_STATS_ENABLED 6456 __kmp_init_lock(&__kmp_global_lock);
6457 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
6458 __kmp_init_lock(&__kmp_debug_lock);
6459 __kmp_init_atomic_lock(&__kmp_atomic_lock);
6460 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
6461 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
6462 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
6463 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
6464 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
6465 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
6466 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
6467 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
6468 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
6469 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
6470 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
6471 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
6472 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
6473 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
6475 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
6477 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
6481 __kmp_runtime_initialize();
6483 #if KMP_MIC_SUPPORTED 6484 __kmp_check_mic_type();
6491 __kmp_abort_delay = 0;
6495 __kmp_dflt_team_nth_ub = __kmp_xproc;
6496 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
6497 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6499 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
6500 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6502 __kmp_max_nth = __kmp_sys_max_nth;
6503 __kmp_cg_max_nth = __kmp_sys_max_nth;
6504 __kmp_teams_max_nth = __kmp_xproc;
6505 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
6506 __kmp_teams_max_nth = __kmp_sys_max_nth;
6511 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
6513 __kmp_monitor_wakeups =
6514 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
6515 __kmp_bt_intervals =
6516 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
6519 __kmp_library = library_throughput;
6521 __kmp_static = kmp_sch_static_balanced;
6528 #if KMP_FAST_REDUCTION_BARRIER 6529 #define kmp_reduction_barrier_gather_bb ((int)1) 6530 #define kmp_reduction_barrier_release_bb ((int)1) 6531 #define kmp_reduction_barrier_gather_pat bp_hyper_bar 6532 #define kmp_reduction_barrier_release_pat bp_hyper_bar 6533 #endif // KMP_FAST_REDUCTION_BARRIER 6534 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
6535 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
6536 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
6537 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
6538 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
6539 #if KMP_FAST_REDUCTION_BARRIER 6540 if (i == bs_reduction_barrier) {
6542 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
6543 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
6544 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
6545 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
6547 #endif // KMP_FAST_REDUCTION_BARRIER 6549 #if KMP_FAST_REDUCTION_BARRIER 6550 #undef kmp_reduction_barrier_release_pat 6551 #undef kmp_reduction_barrier_gather_pat 6552 #undef kmp_reduction_barrier_release_bb 6553 #undef kmp_reduction_barrier_gather_bb 6554 #endif // KMP_FAST_REDUCTION_BARRIER 6555 #if KMP_MIC_SUPPORTED 6556 if (__kmp_mic_type == mic2) {
6558 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3;
6559 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
6561 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6562 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6564 #if KMP_FAST_REDUCTION_BARRIER 6565 if (__kmp_mic_type == mic2) {
6566 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6567 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6569 #endif // KMP_FAST_REDUCTION_BARRIER 6570 #endif // KMP_MIC_SUPPORTED 6574 __kmp_env_checks = TRUE;
6576 __kmp_env_checks = FALSE;
6580 __kmp_foreign_tp = TRUE;
6582 __kmp_global.g.g_dynamic = FALSE;
6583 __kmp_global.g.g_dynamic_mode = dynamic_default;
6585 __kmp_env_initialize(NULL);
6589 char const *val = __kmp_env_get(
"KMP_DUMP_CATALOG");
6590 if (__kmp_str_match_true(val)) {
6591 kmp_str_buf_t buffer;
6592 __kmp_str_buf_init(&buffer);
6593 __kmp_i18n_dump_catalog(&buffer);
6594 __kmp_printf(
"%s", buffer.str);
6595 __kmp_str_buf_free(&buffer);
6597 __kmp_env_free(&val);
6600 __kmp_threads_capacity =
6601 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
6603 __kmp_tp_capacity = __kmp_default_tp_capacity(
6604 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
6609 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
6610 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
6611 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
6612 __kmp_thread_pool = NULL;
6613 __kmp_thread_pool_insert_pt = NULL;
6614 __kmp_team_pool = NULL;
6621 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * __kmp_threads_capacity +
6623 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
6624 __kmp_root = (kmp_root_t **)((
char *)__kmp_threads +
6625 sizeof(kmp_info_t *) * __kmp_threads_capacity);
6628 KMP_DEBUG_ASSERT(__kmp_all_nth ==
6630 KMP_DEBUG_ASSERT(__kmp_nth == 0);
6635 gtid = __kmp_register_root(TRUE);
6636 KA_TRACE(10, (
"__kmp_do_serial_initialize T#%d\n", gtid));
6637 KMP_ASSERT(KMP_UBER_GTID(gtid));
6638 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
6642 __kmp_common_initialize();
6646 __kmp_register_atfork();
6649 #if !defined KMP_DYNAMIC_LIB 6653 int rc = atexit(__kmp_internal_end_atexit);
6655 __kmp_fatal(KMP_MSG(FunctionError,
"atexit()"), KMP_ERR(rc),
6661 #if KMP_HANDLE_SIGNALS 6667 __kmp_install_signals(FALSE);
6670 __kmp_install_signals(TRUE);
6675 __kmp_init_counter++;
6677 __kmp_init_serial = TRUE;
6679 if (__kmp_settings) {
6684 if (__kmp_display_env || __kmp_display_env_verbose) {
6685 __kmp_env_print_2();
6687 #endif // OMP_40_ENABLED 6695 KA_TRACE(10, (
"__kmp_do_serial_initialize: exit\n"));
6698 void __kmp_serial_initialize(
void) {
6699 if (__kmp_init_serial) {
6702 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6703 if (__kmp_init_serial) {
6704 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6707 __kmp_do_serial_initialize();
6708 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6711 static void __kmp_do_middle_initialize(
void) {
6713 int prev_dflt_team_nth;
6715 if (!__kmp_init_serial) {
6716 __kmp_do_serial_initialize();
6719 KA_TRACE(10, (
"__kmp_middle_initialize: enter\n"));
6723 prev_dflt_team_nth = __kmp_dflt_team_nth;
6725 #if KMP_AFFINITY_SUPPORTED 6728 __kmp_affinity_initialize();
6732 for (i = 0; i < __kmp_threads_capacity; i++) {
6733 if (TCR_PTR(__kmp_threads[i]) != NULL) {
6734 __kmp_affinity_set_init_mask(i, TRUE);
6739 KMP_ASSERT(__kmp_xproc > 0);
6740 if (__kmp_avail_proc == 0) {
6741 __kmp_avail_proc = __kmp_xproc;
6747 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
6748 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
6753 if (__kmp_dflt_team_nth == 0) {
6754 #ifdef KMP_DFLT_NTH_CORES 6756 __kmp_dflt_team_nth = __kmp_ncores;
6757 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = " 6758 "__kmp_ncores (%d)\n",
6759 __kmp_dflt_team_nth));
6762 __kmp_dflt_team_nth = __kmp_avail_proc;
6763 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = " 6764 "__kmp_avail_proc(%d)\n",
6765 __kmp_dflt_team_nth));
6769 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
6770 __kmp_dflt_team_nth = KMP_MIN_NTH;
6772 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
6773 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6778 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
6780 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
6785 for (i = 0; i < __kmp_threads_capacity; i++) {
6786 kmp_info_t *thread = __kmp_threads[i];
6789 if (thread->th.th_current_task->td_icvs.nproc != 0)
6792 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
6797 (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6798 __kmp_dflt_team_nth));
6800 #ifdef KMP_ADJUST_BLOCKTIME 6802 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6803 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6804 if (__kmp_nth > __kmp_avail_proc) {
6805 __kmp_zero_bt = TRUE;
6811 TCW_SYNC_4(__kmp_init_middle, TRUE);
6813 KA_TRACE(10, (
"__kmp_do_middle_initialize: exit\n"));
6816 void __kmp_middle_initialize(
void) {
6817 if (__kmp_init_middle) {
6820 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6821 if (__kmp_init_middle) {
6822 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6825 __kmp_do_middle_initialize();
6826 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6829 void __kmp_parallel_initialize(
void) {
6830 int gtid = __kmp_entry_gtid();
6833 if (TCR_4(__kmp_init_parallel))
6835 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6836 if (TCR_4(__kmp_init_parallel)) {
6837 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6842 if (TCR_4(__kmp_global.g.g_done)) {
6845 (
"__kmp_parallel_initialize: attempt to init while shutting down\n"));
6846 __kmp_infinite_loop();
6852 if (!__kmp_init_middle) {
6853 __kmp_do_middle_initialize();
6857 KA_TRACE(10, (
"__kmp_parallel_initialize: enter\n"));
6858 KMP_ASSERT(KMP_UBER_GTID(gtid));
6860 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 6863 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
6864 __kmp_store_mxcsr(&__kmp_init_mxcsr);
6865 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
6869 #if KMP_HANDLE_SIGNALS 6871 __kmp_install_signals(TRUE);
6875 __kmp_suspend_initialize();
6877 #if defined(USE_LOAD_BALANCE) 6878 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
6879 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
6882 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
6883 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
6887 if (__kmp_version) {
6888 __kmp_print_version_2();
6892 TCW_SYNC_4(__kmp_init_parallel, TRUE);
6895 KA_TRACE(10, (
"__kmp_parallel_initialize: exit\n"));
6897 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6902 void __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
6904 kmp_disp_t *dispatch;
6909 this_thr->th.th_local.this_construct = 0;
6910 #if KMP_CACHE_MANAGE 6911 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
6913 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
6914 KMP_DEBUG_ASSERT(dispatch);
6915 KMP_DEBUG_ASSERT(team->t.t_dispatch);
6919 dispatch->th_disp_index = 0;
6921 dispatch->th_doacross_buf_idx =
6924 if (__kmp_env_consistency_check)
6925 __kmp_push_parallel(gtid, team->t.t_ident);
6930 void __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
6932 if (__kmp_env_consistency_check)
6933 __kmp_pop_parallel(gtid, team->t.t_ident);
6935 __kmp_finish_implicit_task(this_thr);
6938 int __kmp_invoke_task_func(
int gtid) {
6940 int tid = __kmp_tid_from_gtid(gtid);
6941 kmp_info_t *this_thr = __kmp_threads[gtid];
6942 kmp_team_t *team = this_thr->th.th_team;
6944 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
6946 if (__itt_stack_caller_create_ptr) {
6947 __kmp_itt_stack_callee_enter(
6949 team->t.t_stack_id);
6952 #if INCLUDE_SSC_MARKS 6953 SSC_MARK_INVOKING();
6958 void **exit_runtime_p;
6959 ompt_data_t *my_task_data;
6960 ompt_data_t *my_parallel_data;
6963 if (ompt_enabled.enabled) {
6965 team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_frame);
6967 exit_runtime_p = &dummy;
6971 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
6972 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
6973 if (ompt_enabled.ompt_callback_implicit_task) {
6974 ompt_team_size = team->t.t_nproc;
6975 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
6976 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
6977 __kmp_tid_from_gtid(gtid));
6978 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
6983 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
6984 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
6986 __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
6987 tid, (
int)team->t.t_argc, (
void **)team->t.t_argv
6994 *exit_runtime_p = NULL;
6999 if (__itt_stack_caller_create_ptr) {
7000 __kmp_itt_stack_callee_leave(
7002 team->t.t_stack_id);
7005 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7011 void __kmp_teams_master(
int gtid) {
7013 kmp_info_t *thr = __kmp_threads[gtid];
7014 kmp_team_t *team = thr->th.th_team;
7015 ident_t *loc = team->t.t_ident;
7016 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7017 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7018 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7019 KA_TRACE(20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7020 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7023 #if INCLUDE_SSC_MARKS 7026 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7027 (microtask_t)thr->th.th_teams_microtask,
7028 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
7029 #if INCLUDE_SSC_MARKS 7035 __kmp_join_call(loc, gtid
7044 int __kmp_invoke_teams_master(
int gtid) {
7045 kmp_info_t *this_thr = __kmp_threads[gtid];
7046 kmp_team_t *team = this_thr->th.th_team;
7048 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7049 KMP_DEBUG_ASSERT((
void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7050 (
void *)__kmp_teams_master);
7052 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7053 __kmp_teams_master(gtid);
7054 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7064 void __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads) {
7065 kmp_info_t *thr = __kmp_threads[gtid];
7067 if (num_threads > 0)
7068 thr->th.th_set_nproc = num_threads;
7075 void __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
7077 kmp_info_t *thr = __kmp_threads[gtid];
7078 KMP_DEBUG_ASSERT(num_teams >= 0);
7079 KMP_DEBUG_ASSERT(num_threads >= 0);
7083 if (num_teams > __kmp_teams_max_nth) {
7084 if (!__kmp_reserve_warn) {
7085 __kmp_reserve_warn = 1;
7086 __kmp_msg(kmp_ms_warning,
7087 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7088 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7090 num_teams = __kmp_teams_max_nth;
7094 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7097 if (num_threads == 0) {
7098 if (!TCR_4(__kmp_init_middle))
7099 __kmp_middle_initialize();
7100 num_threads = __kmp_avail_proc / num_teams;
7101 if (num_teams * num_threads > __kmp_teams_max_nth) {
7103 num_threads = __kmp_teams_max_nth / num_teams;
7106 if (num_teams * num_threads > __kmp_teams_max_nth) {
7107 int new_threads = __kmp_teams_max_nth / num_teams;
7108 if (!__kmp_reserve_warn) {
7109 __kmp_reserve_warn = 1;
7110 __kmp_msg(kmp_ms_warning,
7111 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7112 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7114 num_threads = new_threads;
7117 thr->th.th_teams_size.nth = num_threads;
7121 void __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind) {
7122 kmp_info_t *thr = __kmp_threads[gtid];
7123 thr->th.th_set_proc_bind = proc_bind;
7130 void __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team) {
7131 kmp_info_t *this_thr = __kmp_threads[gtid];
7137 KMP_DEBUG_ASSERT(team);
7138 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7139 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7142 team->t.t_construct = 0;
7143 team->t.t_ordered.dt.t_value =
7147 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
7148 if (team->t.t_max_nproc > 1) {
7150 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7151 team->t.t_disp_buffer[i].buffer_index = i;
7153 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7157 team->t.t_disp_buffer[0].buffer_index = 0;
7159 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7164 KMP_ASSERT(this_thr->th.th_team == team);
7167 for (f = 0; f < team->t.t_nproc; f++) {
7168 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
7169 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
7174 __kmp_fork_barrier(gtid, 0);
7177 void __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team) {
7178 kmp_info_t *this_thr = __kmp_threads[gtid];
7180 KMP_DEBUG_ASSERT(team);
7181 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7182 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7188 if (__kmp_threads[gtid] &&
7189 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
7190 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
7191 __kmp_threads[gtid]);
7192 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, " 7193 "team->t.t_nproc=%d\n",
7194 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
7196 __kmp_print_structure();
7198 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
7199 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
7202 __kmp_join_barrier(gtid);
7204 if (ompt_enabled.enabled &&
7205 this_thr->th.ompt_thread_info.state == omp_state_wait_barrier_implicit) {
7206 int ds_tid = this_thr->th.th_info.ds.ds_tid;
7207 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
7208 this_thr->th.ompt_thread_info.state = omp_state_overhead;
7210 void *codeptr = NULL;
7211 if (KMP_MASTER_TID(ds_tid) &&
7212 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
7213 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
7214 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
7216 if (ompt_enabled.ompt_callback_sync_region_wait) {
7217 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
7218 ompt_sync_region_barrier, ompt_scope_end, NULL, task_data, codeptr);
7220 if (ompt_enabled.ompt_callback_sync_region) {
7221 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
7222 ompt_sync_region_barrier, ompt_scope_end, NULL, task_data, codeptr);
7225 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
7226 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7227 ompt_scope_end, NULL, task_data, 0, ds_tid);
7233 KMP_ASSERT(this_thr->th.th_team == team);
7238 #ifdef USE_LOAD_BALANCE 7242 static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
7245 kmp_team_t *hot_team;
7247 if (root->r.r_active) {
7250 hot_team = root->r.r_hot_team;
7251 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
7252 return hot_team->t.t_nproc - 1;
7257 for (i = 1; i < hot_team->t.t_nproc; i++) {
7258 if (hot_team->t.t_threads[i]->th.th_active) {
7267 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc) {
7270 int hot_team_active;
7271 int team_curr_active;
7274 KB_TRACE(20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
7276 KMP_DEBUG_ASSERT(root);
7277 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
7278 ->th.th_current_task->td_icvs.dynamic == TRUE);
7279 KMP_DEBUG_ASSERT(set_nproc > 1);
7281 if (set_nproc == 1) {
7282 KB_TRACE(20, (
"__kmp_load_balance_nproc: serial execution.\n"));
7291 pool_active = __kmp_thread_pool_active_nth;
7292 hot_team_active = __kmp_active_hot_team_nproc(root);
7293 team_curr_active = pool_active + hot_team_active + 1;
7296 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
7297 KB_TRACE(30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d " 7298 "hot team active = %d\n",
7299 system_active, pool_active, hot_team_active));
7301 if (system_active < 0) {
7305 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7306 KMP_WARNING(CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit");
7309 retval = __kmp_avail_proc - __kmp_nth +
7310 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
7311 if (retval > set_nproc) {
7314 if (retval < KMP_MIN_NTH) {
7315 retval = KMP_MIN_NTH;
7318 KB_TRACE(20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
7326 if (system_active < team_curr_active) {
7327 system_active = team_curr_active;
7329 retval = __kmp_avail_proc - system_active + team_curr_active;
7330 if (retval > set_nproc) {
7333 if (retval < KMP_MIN_NTH) {
7334 retval = KMP_MIN_NTH;
7337 KB_TRACE(20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval));
7346 void __kmp_cleanup(
void) {
7349 KA_TRACE(10, (
"__kmp_cleanup: enter\n"));
7351 if (TCR_4(__kmp_init_parallel)) {
7352 #if KMP_HANDLE_SIGNALS 7353 __kmp_remove_signals();
7355 TCW_4(__kmp_init_parallel, FALSE);
7358 if (TCR_4(__kmp_init_middle)) {
7359 #if KMP_AFFINITY_SUPPORTED 7360 __kmp_affinity_uninitialize();
7362 __kmp_cleanup_hierarchy();
7363 TCW_4(__kmp_init_middle, FALSE);
7366 KA_TRACE(10, (
"__kmp_cleanup: go serial cleanup\n"));
7368 if (__kmp_init_serial) {
7369 __kmp_runtime_destroy();
7370 __kmp_init_serial = FALSE;
7373 __kmp_cleanup_threadprivate_caches();
7375 for (f = 0; f < __kmp_threads_capacity; f++) {
7376 if (__kmp_root[f] != NULL) {
7377 __kmp_free(__kmp_root[f]);
7378 __kmp_root[f] = NULL;
7381 __kmp_free(__kmp_threads);
7384 __kmp_threads = NULL;
7386 __kmp_threads_capacity = 0;
7388 #if KMP_USE_DYNAMIC_LOCK 7389 __kmp_cleanup_indirect_user_locks();
7391 __kmp_cleanup_user_locks();
7394 #if KMP_AFFINITY_SUPPORTED 7395 KMP_INTERNAL_FREE(CCAST(
char *, __kmp_cpuinfo_file));
7396 __kmp_cpuinfo_file = NULL;
7399 #if KMP_USE_ADAPTIVE_LOCKS 7400 #if KMP_DEBUG_ADAPTIVE_LOCKS 7401 __kmp_print_speculative_stats();
7404 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
7405 __kmp_nested_nth.nth = NULL;
7406 __kmp_nested_nth.size = 0;
7407 __kmp_nested_nth.used = 0;
7408 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
7409 __kmp_nested_proc_bind.bind_types = NULL;
7410 __kmp_nested_proc_bind.size = 0;
7411 __kmp_nested_proc_bind.used = 0;
7413 __kmp_i18n_catclose();
7415 #if KMP_USE_HIER_SCHED 7416 __kmp_hier_scheds.deallocate();
7419 #if KMP_STATS_ENABLED 7423 KA_TRACE(10, (
"__kmp_cleanup: exit\n"));
7428 int __kmp_ignore_mppbeg(
void) {
7431 if ((env = getenv(
"KMP_IGNORE_MPPBEG")) != NULL) {
7432 if (__kmp_str_match_false(env))
7439 int __kmp_ignore_mppend(
void) {
7442 if ((env = getenv(
"KMP_IGNORE_MPPEND")) != NULL) {
7443 if (__kmp_str_match_false(env))
7450 void __kmp_internal_begin(
void) {
7456 gtid = __kmp_entry_gtid();
7457 root = __kmp_threads[gtid]->th.th_root;
7458 KMP_ASSERT(KMP_UBER_GTID(gtid));
7460 if (root->r.r_begin)
7462 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
7463 if (root->r.r_begin) {
7464 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7468 root->r.r_begin = TRUE;
7470 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7475 void __kmp_user_set_library(
enum library_type arg) {
7482 gtid = __kmp_entry_gtid();
7483 thread = __kmp_threads[gtid];
7485 root = thread->th.th_root;
7487 KA_TRACE(20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
7489 if (root->r.r_in_parallel) {
7491 KMP_WARNING(SetLibraryIncorrectCall);
7496 case library_serial:
7497 thread->th.th_set_nproc = 0;
7498 set__nproc(thread, 1);
7500 case library_turnaround:
7501 thread->th.th_set_nproc = 0;
7502 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7503 : __kmp_dflt_team_nth_ub);
7505 case library_throughput:
7506 thread->th.th_set_nproc = 0;
7507 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7508 : __kmp_dflt_team_nth_ub);
7511 KMP_FATAL(UnknownLibraryType, arg);
7514 __kmp_aux_set_library(arg);
7517 void __kmp_aux_set_stacksize(
size_t arg) {
7518 if (!__kmp_init_serial)
7519 __kmp_serial_initialize();
7522 if (arg & (0x1000 - 1)) {
7523 arg &= ~(0x1000 - 1);
7528 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7531 if (!TCR_4(__kmp_init_parallel)) {
7534 if (value < __kmp_sys_min_stksize)
7535 value = __kmp_sys_min_stksize;
7536 else if (value > KMP_MAX_STKSIZE)
7537 value = KMP_MAX_STKSIZE;
7539 __kmp_stksize = value;
7541 __kmp_env_stksize = TRUE;
7544 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7549 void __kmp_aux_set_library(
enum library_type arg) {
7550 __kmp_library = arg;
7552 switch (__kmp_library) {
7553 case library_serial: {
7554 KMP_INFORM(LibraryIsSerial);
7555 (void)__kmp_change_library(TRUE);
7557 case library_turnaround:
7558 (void)__kmp_change_library(TRUE);
7560 case library_throughput:
7561 (void)__kmp_change_library(FALSE);
7564 KMP_FATAL(UnknownLibraryType, arg);
7570 void __kmp_aux_set_blocktime(
int arg, kmp_info_t *thread,
int tid) {
7571 int blocktime = arg;
7577 __kmp_save_internal_controls(thread);
7580 if (blocktime < KMP_MIN_BLOCKTIME)
7581 blocktime = KMP_MIN_BLOCKTIME;
7582 else if (blocktime > KMP_MAX_BLOCKTIME)
7583 blocktime = KMP_MAX_BLOCKTIME;
7585 set__blocktime_team(thread->th.th_team, tid, blocktime);
7586 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
7590 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
7592 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
7593 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
7599 set__bt_set_team(thread->th.th_team, tid, bt_set);
7600 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
7602 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, " 7603 "bt_intervals=%d, monitor_updates=%d\n",
7604 __kmp_gtid_from_tid(tid, thread->th.th_team),
7605 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
7606 __kmp_monitor_wakeups));
7608 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
7609 __kmp_gtid_from_tid(tid, thread->th.th_team),
7610 thread->th.th_team->t.t_id, tid, blocktime));
7614 void __kmp_aux_set_defaults(
char const *str,
int len) {
7615 if (!__kmp_init_serial) {
7616 __kmp_serial_initialize();
7618 __kmp_env_initialize(str);
7622 || __kmp_display_env || __kmp_display_env_verbose
7632 PACKED_REDUCTION_METHOD_T
7633 __kmp_determine_reduction_method(
7634 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size,
7635 void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
7636 kmp_critical_name *lck) {
7647 PACKED_REDUCTION_METHOD_T retval;
7651 KMP_DEBUG_ASSERT(loc);
7652 KMP_DEBUG_ASSERT(lck);
7654 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \ 7655 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)) 7656 #define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func)) 7658 retval = critical_reduce_block;
7661 team_size = __kmp_get_team_num_threads(global_tid);
7662 if (team_size == 1) {
7664 retval = empty_reduce_block;
7668 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7670 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 7672 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || \ 7675 int teamsize_cutoff = 4;
7677 #if KMP_MIC_SUPPORTED 7678 if (__kmp_mic_type != non_mic) {
7679 teamsize_cutoff = 8;
7682 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7683 if (tree_available) {
7684 if (team_size <= teamsize_cutoff) {
7685 if (atomic_available) {
7686 retval = atomic_reduce_block;
7689 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7691 }
else if (atomic_available) {
7692 retval = atomic_reduce_block;
7695 #error "Unknown or unsupported OS" 7696 #endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || 7699 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS 7701 #if KMP_OS_LINUX || KMP_OS_WINDOWS 7705 if (atomic_available) {
7706 if (num_vars <= 2) {
7707 retval = atomic_reduce_block;
7713 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7714 if (atomic_available && (num_vars <= 3)) {
7715 retval = atomic_reduce_block;
7716 }
else if (tree_available) {
7717 if ((reduce_size > (9 *
sizeof(kmp_real64))) &&
7718 (reduce_size < (2000 *
sizeof(kmp_real64)))) {
7719 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
7724 #error "Unknown or unsupported OS" 7728 #error "Unknown or unsupported architecture" 7736 if (__kmp_force_reduction_method != reduction_method_not_defined &&
7739 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
7741 int atomic_available, tree_available;
7743 switch ((forced_retval = __kmp_force_reduction_method)) {
7744 case critical_reduce_block:
7748 case atomic_reduce_block:
7749 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7750 if (!atomic_available) {
7751 KMP_WARNING(RedMethodNotSupported,
"atomic");
7752 forced_retval = critical_reduce_block;
7756 case tree_reduce_block:
7757 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7758 if (!tree_available) {
7759 KMP_WARNING(RedMethodNotSupported,
"tree");
7760 forced_retval = critical_reduce_block;
7762 #if KMP_FAST_REDUCTION_BARRIER 7763 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7772 retval = forced_retval;
7775 KA_TRACE(10, (
"reduction method selected=%08x\n", retval));
7777 #undef FAST_REDUCTION_TREE_METHOD_GENERATED 7778 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED 7784 kmp_int32 __kmp_get_reduce_method(
void) {
7785 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_INIT_PARTITIONED_TIMERS(name)
Initializes the paritioned timers to begin with name.
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)