17 #include "kmp_atomic.h" 18 #include "kmp_wrapper_getpid.h" 19 #include "kmp_environment.h" 22 #include "kmp_settings.h" 25 #include "kmp_error.h" 26 #include "kmp_stats.h" 27 #include "kmp_wait_release.h" 30 #include "ompt-specific.h" 34 #define KMP_USE_PRCTL 0 41 #if defined(KMP_GOMP_COMPAT) 42 char const __kmp_version_alt_comp[] = KMP_VERSION_PREFIX
"alternative compiler support: yes";
45 char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX
"API version: " 53 char const __kmp_version_lock[] = KMP_VERSION_PREFIX
"lock type: run time selectable";
56 #define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) ) 61 kmp_info_t __kmp_monitor;
68 void __kmp_cleanup(
void );
70 static void __kmp_initialize_info( kmp_info_t *, kmp_team_t *,
int tid,
int gtid );
71 static void __kmp_initialize_team( kmp_team_t * team,
int new_nproc, kmp_internal_control_t * new_icvs,
ident_t * loc );
72 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 73 static void __kmp_partition_places( kmp_team_t *team,
int update_master_only=0 );
75 static void __kmp_do_serial_initialize(
void );
76 void __kmp_fork_barrier(
int gtid,
int tid );
77 void __kmp_join_barrier(
int gtid );
78 void __kmp_setup_icv_copy( kmp_team_t *team,
int new_nproc, kmp_internal_control_t * new_icvs,
ident_t *loc );
80 #ifdef USE_LOAD_BALANCE 81 static int __kmp_load_balance_nproc( kmp_root_t * root,
int set_nproc );
84 static int __kmp_expand_threads(
int nWish,
int nNeed);
86 static int __kmp_unregister_root_other_thread(
int gtid );
88 static void __kmp_unregister_library(
void );
89 static void __kmp_reap_thread( kmp_info_t * thread,
int is_root );
90 static kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
101 __kmp_get_global_thread_id( )
104 kmp_info_t **other_threads;
110 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
111 __kmp_nth, __kmp_all_nth ));
118 if ( !TCR_4(__kmp_init_gtid) )
return KMP_GTID_DNE;
120 #ifdef KMP_TDATA_GTID 121 if ( TCR_4(__kmp_gtid_mode) >= 3) {
122 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: using TDATA\n" ));
126 if ( TCR_4(__kmp_gtid_mode) >= 2) {
127 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n" ));
128 return __kmp_gtid_get_specific();
130 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n" ));
132 stack_addr = (
char*) & stack_data;
133 other_threads = __kmp_threads;
148 for( i = 0 ; i < __kmp_threads_capacity ; i++ ) {
150 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
153 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
154 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
158 if( stack_addr <= stack_base ) {
159 size_t stack_diff = stack_base - stack_addr;
161 if( stack_diff <= stack_size ) {
164 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == i );
171 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: internal alg. failed to find " 172 "thread, using TLS\n" ));
173 i = __kmp_gtid_get_specific();
181 if( ! TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow) ) {
182 KMP_FATAL( StackOverflow, i );
185 stack_base = (
char *) other_threads[i]->th.th_info.ds.ds_stackbase;
186 if( stack_addr > stack_base ) {
187 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
188 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
189 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base);
191 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, stack_base - stack_addr);
195 if ( __kmp_storage_map ) {
196 char *stack_end = (
char *) other_threads[i]->th.th_info.ds.ds_stackbase;
197 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
198 __kmp_print_storage_map_gtid( i, stack_beg, stack_end,
199 other_threads[i]->th.th_info.ds.ds_stacksize,
200 "th_%d stack (refinement)", i );
206 __kmp_get_global_thread_id_reg( )
210 if ( !__kmp_init_serial ) {
213 #ifdef KMP_TDATA_GTID 214 if ( TCR_4(__kmp_gtid_mode) >= 3 ) {
215 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n" ));
219 if ( TCR_4(__kmp_gtid_mode) >= 2 ) {
220 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n" ));
221 gtid = __kmp_gtid_get_specific();
223 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n" ));
224 gtid = __kmp_get_global_thread_id();
228 if( gtid == KMP_GTID_DNE ) {
229 KA_TRACE( 10, (
"__kmp_get_global_thread_id_reg: Encountered new root thread. " 230 "Registering a new gtid.\n" ));
231 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
232 if( !__kmp_init_serial ) {
233 __kmp_do_serial_initialize();
234 gtid = __kmp_gtid_get_specific();
236 gtid = __kmp_register_root(FALSE);
238 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
242 KMP_DEBUG_ASSERT( gtid >=0 );
249 __kmp_check_stack_overlap( kmp_info_t *th )
252 char *stack_beg = NULL;
253 char *stack_end = NULL;
256 KA_TRACE(10,(
"__kmp_check_stack_overlap: called\n"));
257 if ( __kmp_storage_map ) {
258 stack_end = (
char *) th->th.th_info.ds.ds_stackbase;
259 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
261 gtid = __kmp_gtid_from_thread( th );
263 if (gtid == KMP_GTID_MONITOR) {
264 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
265 "th_%s stack (%s)",
"mon",
266 ( th->th.th_info.ds.ds_stackgrow ) ?
"initial" :
"actual" );
268 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
269 "th_%d stack (%s)", gtid,
270 ( th->th.th_info.ds.ds_stackgrow ) ?
"initial" :
"actual" );
275 gtid = __kmp_gtid_from_thread( th );
276 if ( __kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid))
278 KA_TRACE(10,(
"__kmp_check_stack_overlap: performing extensive checking\n"));
279 if ( stack_beg == NULL ) {
280 stack_end = (
char *) th->th.th_info.ds.ds_stackbase;
281 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
284 for( f=0 ; f < __kmp_threads_capacity ; f++ ) {
285 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
287 if( f_th && f_th != th ) {
288 char *other_stack_end = (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
289 char *other_stack_beg = other_stack_end -
290 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
291 if((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
292 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
295 if ( __kmp_storage_map )
296 __kmp_print_storage_map_gtid( -1, other_stack_beg, other_stack_end,
297 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
298 "th_%d stack (overlapped)",
299 __kmp_gtid_from_thread( f_th ) );
301 __kmp_msg( kmp_ms_fatal, KMP_MSG( StackOverlap ), KMP_HNT( ChangeStackLimit ), __kmp_msg_null );
306 KA_TRACE(10,(
"__kmp_check_stack_overlap: returning\n"));
315 __kmp_infinite_loop(
void )
317 static int done = FALSE;
324 #define MAX_MESSAGE 512 327 __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
char const *format, ...) {
328 char buffer[MAX_MESSAGE];
331 va_start( ap, format);
332 KMP_SNPRINTF( buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1, p2, (
unsigned long) size, format );
333 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
334 __kmp_vprintf( kmp_err, buffer, ap );
335 #if KMP_PRINT_DATA_PLACEMENT 338 if(p1 <= p2 && (
char*)p2 - (
char*)p1 == size) {
339 if( __kmp_storage_map_verbose ) {
340 node = __kmp_get_host_node(p1);
342 __kmp_storage_map_verbose = FALSE;
346 int localProc = __kmp_get_cpu_from_gtid(gtid);
348 p1 = (
void *)( (
size_t)p1 & ~((size_t)PAGE_SIZE - 1) );
349 p2 = (
void *)( ((
size_t) p2 - 1) & ~((
size_t)PAGE_SIZE - 1) );
351 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid, localProc>>1);
353 __kmp_printf_no_lock(
" GTID %d\n", gtid);
361 (
char*)p1 += PAGE_SIZE;
362 }
while(p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
363 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last,
364 (
char*)p1 - 1, lastNode);
367 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
368 (
char*)p1 + (PAGE_SIZE - 1), __kmp_get_host_node(p1));
370 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
371 (
char*)p2 + (PAGE_SIZE - 1), __kmp_get_host_node(p2));
377 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR( StorageMapWarning ) );
380 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
384 __kmp_warn(
char const * format, ... )
386 char buffer[MAX_MESSAGE];
389 if ( __kmp_generate_warnings == kmp_warnings_off ) {
393 va_start( ap, format );
395 KMP_SNPRINTF( buffer,
sizeof(buffer) ,
"OMP warning: %s\n", format );
396 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
397 __kmp_vprintf( kmp_err, buffer, ap );
398 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
404 __kmp_abort_process()
408 __kmp_acquire_bootstrap_lock( & __kmp_exit_lock );
410 if ( __kmp_debug_buf ) {
411 __kmp_dump_debug_buffer();
414 if ( KMP_OS_WINDOWS ) {
417 __kmp_global.g.g_abort = SIGABRT;
435 __kmp_infinite_loop();
436 __kmp_release_bootstrap_lock( & __kmp_exit_lock );
441 __kmp_abort_thread(
void )
445 __kmp_infinite_loop();
456 __kmp_print_thread_storage_map( kmp_info_t *thr,
int gtid )
458 __kmp_print_storage_map_gtid( gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d", gtid );
460 __kmp_print_storage_map_gtid( gtid, &thr->th.th_info, &thr->th.th_team,
sizeof(kmp_desc_t),
461 "th_%d.th_info", gtid );
463 __kmp_print_storage_map_gtid( gtid, &thr->th.th_local, &thr->th.th_pri_head,
sizeof(kmp_local_t),
464 "th_%d.th_local", gtid );
466 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
467 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid );
469 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_plain_barrier],
470 &thr->th.th_bar[bs_plain_barrier+1],
471 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]", gtid);
473 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_forkjoin_barrier],
474 &thr->th.th_bar[bs_forkjoin_barrier+1],
475 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]", gtid);
477 #if KMP_FAST_REDUCTION_BARRIER 478 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_reduction_barrier],
479 &thr->th.th_bar[bs_reduction_barrier+1],
480 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]", gtid);
481 #endif // KMP_FAST_REDUCTION_BARRIER 490 __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
int team_id,
int num_thr )
492 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
493 __kmp_print_storage_map_gtid( -1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
496 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[0], &team->t.t_bar[bs_last_barrier],
497 sizeof(kmp_balign_team_t) * bs_last_barrier,
"%s_%d.t_bar", header, team_id );
500 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_plain_barrier], &team->t.t_bar[bs_plain_barrier+1],
501 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]", header, team_id );
503 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_forkjoin_barrier], &team->t.t_bar[bs_forkjoin_barrier+1],
504 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[forkjoin]", header, team_id );
506 #if KMP_FAST_REDUCTION_BARRIER 507 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_reduction_barrier], &team->t.t_bar[bs_reduction_barrier+1],
508 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[reduction]", header, team_id );
509 #endif // KMP_FAST_REDUCTION_BARRIER 511 __kmp_print_storage_map_gtid( -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
512 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id );
514 __kmp_print_storage_map_gtid( -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
515 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id );
517 __kmp_print_storage_map_gtid( -1, &team->t.t_disp_buffer[0], &team->t.t_disp_buffer[num_disp_buff],
518 sizeof(dispatch_shared_info_t) * num_disp_buff,
"%s_%d.t_disp_buffer",
522 __kmp_print_storage_map_gtid( -1, &team->t.t_taskq, &team->t.t_copypriv_data,
523 sizeof(kmp_taskq_t),
"%s_%d.t_taskq", header, team_id );
526 static void __kmp_init_allocator() {}
527 static void __kmp_fini_allocator() {}
531 #ifdef KMP_DYNAMIC_LIB 535 __kmp_reset_lock( kmp_bootstrap_lock_t* lck ) {
537 __kmp_init_bootstrap_lock( lck );
541 __kmp_reset_locks_on_process_detach(
int gtid_req ) {
558 for( i = 0; i < __kmp_threads_capacity; ++i ) {
559 if( !__kmp_threads )
continue;
560 kmp_info_t* th = __kmp_threads[ i ];
561 if( th == NULL )
continue;
562 int gtid = th->th.th_info.ds.ds_gtid;
563 if( gtid == gtid_req )
continue;
564 if( gtid < 0 )
continue;
566 int alive = __kmp_is_thread_alive( th, &exit_val );
571 if( thread_count == 0 )
break;
578 __kmp_reset_lock( &__kmp_forkjoin_lock );
580 __kmp_reset_lock( &__kmp_stdio_lock );
585 DllMain( HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved ) {
588 switch( fdwReason ) {
590 case DLL_PROCESS_ATTACH:
591 KA_TRACE( 10, (
"DllMain: PROCESS_ATTACH\n" ));
595 case DLL_PROCESS_DETACH:
596 KA_TRACE( 10, (
"DllMain: PROCESS_DETACH T#%d\n",
597 __kmp_gtid_get_specific() ));
599 if( lpReserved != NULL )
625 __kmp_reset_locks_on_process_detach( __kmp_gtid_get_specific() );
628 __kmp_internal_end_library( __kmp_gtid_get_specific() );
632 case DLL_THREAD_ATTACH:
633 KA_TRACE( 10, (
"DllMain: THREAD_ATTACH\n" ));
639 case DLL_THREAD_DETACH:
640 KA_TRACE( 10, (
"DllMain: THREAD_DETACH T#%d\n",
641 __kmp_gtid_get_specific() ));
643 __kmp_internal_end_thread( __kmp_gtid_get_specific() );
659 __kmp_change_library(
int status )
663 old_status = __kmp_yield_init & 1;
666 __kmp_yield_init |= 1;
669 __kmp_yield_init &= ~1;
682 __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
684 int gtid = *gtid_ref;
685 #ifdef BUILD_PARALLEL_ORDERED 686 kmp_team_t *team = __kmp_team_from_gtid( gtid );
689 if( __kmp_env_consistency_check ) {
690 if( __kmp_threads[gtid]->th.th_root->r.r_active )
691 #if KMP_USE_DYNAMIC_LOCK 692 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL, 0 );
694 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL );
697 #ifdef BUILD_PARALLEL_ORDERED 698 if( !team->t.t_serialized ) {
700 KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid( gtid ), KMP_EQ, NULL);
711 __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
713 int gtid = *gtid_ref;
714 #ifdef BUILD_PARALLEL_ORDERED 715 int tid = __kmp_tid_from_gtid( gtid );
716 kmp_team_t *team = __kmp_team_from_gtid( gtid );
719 if( __kmp_env_consistency_check ) {
720 if( __kmp_threads[gtid]->th.th_root->r.r_active )
721 __kmp_pop_sync( gtid, ct_ordered_in_parallel, loc_ref );
723 #ifdef BUILD_PARALLEL_ORDERED 724 if ( ! team->t.t_serialized ) {
729 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc );
731 #if OMPT_SUPPORT && OMPT_BLAME 733 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
735 kmp_info_t *this_thread = __kmp_threads[gtid];
736 ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
737 this_thread->th.ompt_thread_info.wait_id);
755 __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws )
761 if( ! TCR_4(__kmp_init_parallel) )
762 __kmp_parallel_initialize();
764 th = __kmp_threads[ gtid ];
765 team = th->th.th_team;
768 th->th.th_ident = id_ref;
770 if ( team->t.t_serialized ) {
773 kmp_int32 old_this = th->th.th_local.this_construct;
775 ++th->th.th_local.this_construct;
780 if (team->t.t_construct == old_this) {
781 status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this,
782 th->th.th_local.this_construct);
785 if ( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && KMP_MASTER_GTID(gtid) &&
787 th->th.th_teams_microtask == NULL &&
789 team->t.t_active_level == 1 )
791 __kmp_itt_metadata_single( id_ref );
796 if( __kmp_env_consistency_check ) {
797 if (status && push_ws) {
798 __kmp_push_workshare( gtid, ct_psingle, id_ref );
800 __kmp_check_workshare( gtid, ct_psingle, id_ref );
805 __kmp_itt_single_start( gtid );
812 __kmp_exit_single(
int gtid )
815 __kmp_itt_single_end( gtid );
817 if( __kmp_env_consistency_check )
818 __kmp_pop_workshare( gtid, ct_psingle, NULL );
831 __kmp_reserve_threads( kmp_root_t *root, kmp_team_t *parent_team,
832 int master_tid,
int set_nthreads
840 KMP_DEBUG_ASSERT( __kmp_init_serial );
841 KMP_DEBUG_ASSERT( root && parent_team );
847 new_nthreads = set_nthreads;
848 if ( ! get__dynamic_2( parent_team, master_tid ) ) {
851 #ifdef USE_LOAD_BALANCE 852 else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) {
853 new_nthreads = __kmp_load_balance_nproc( root, set_nthreads );
854 if ( new_nthreads == 1 ) {
855 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d load balance reduced reservation to 1 thread\n",
859 if ( new_nthreads < set_nthreads ) {
860 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d load balance reduced reservation to %d threads\n",
861 master_tid, new_nthreads ));
865 else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) {
866 new_nthreads = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
867 : root->r.r_hot_team->t.t_nproc);
868 if ( new_nthreads <= 1 ) {
869 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d thread limit reduced reservation to 1 thread\n",
873 if ( new_nthreads < set_nthreads ) {
874 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d thread limit reduced reservation to %d threads\n",
875 master_tid, new_nthreads ));
878 new_nthreads = set_nthreads;
881 else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) {
882 if ( set_nthreads > 2 ) {
883 new_nthreads = __kmp_get_random( parent_team->t.t_threads[master_tid] );
884 new_nthreads = ( new_nthreads % set_nthreads ) + 1;
885 if ( new_nthreads == 1 ) {
886 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d dynamic random reduced reservation to 1 thread\n",
890 if ( new_nthreads < set_nthreads ) {
891 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d dynamic random reduced reservation to %d threads\n",
892 master_tid, new_nthreads ));
903 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
904 root->r.r_hot_team->t.t_nproc ) > __kmp_max_nth ) {
905 int tl_nthreads = __kmp_max_nth - __kmp_nth + ( root->r.r_active ? 1 :
906 root->r.r_hot_team->t.t_nproc );
907 if ( tl_nthreads <= 0 ) {
914 if ( ! get__dynamic_2( parent_team, master_tid )
915 && ( ! __kmp_reserve_warn ) ) {
916 __kmp_reserve_warn = 1;
919 KMP_MSG( CantFormThrTeam, set_nthreads, tl_nthreads ),
920 KMP_HNT( Unset_ALL_THREADS ),
924 if ( tl_nthreads == 1 ) {
925 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to 1 thread\n",
929 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to %d threads\n",
930 master_tid, tl_nthreads ));
931 new_nthreads = tl_nthreads;
940 capacity = __kmp_threads_capacity;
941 if ( TCR_PTR(__kmp_threads[0]) == NULL ) {
944 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
945 root->r.r_hot_team->t.t_nproc ) > capacity ) {
949 int slotsRequired = __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
950 root->r.r_hot_team->t.t_nproc ) - capacity;
951 int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired);
952 if ( slotsAdded < slotsRequired ) {
956 new_nthreads -= ( slotsRequired - slotsAdded );
957 KMP_ASSERT( new_nthreads >= 1 );
962 if ( ! get__dynamic_2( parent_team, master_tid )
963 && ( ! __kmp_reserve_warn ) ) {
964 __kmp_reserve_warn = 1;
965 if ( __kmp_tp_cached ) {
968 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
969 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
970 KMP_HNT( PossibleSystemLimitOnThreads ),
977 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
978 KMP_HNT( SystemLimitOnThreads ),
986 if ( new_nthreads == 1 ) {
987 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d serializing team after reclaiming dead roots and rechecking; requested %d threads\n",
988 __kmp_get_gtid(), set_nthreads ) );
992 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested %d threads\n",
993 __kmp_get_gtid(), new_nthreads, set_nthreads ));
1005 __kmp_fork_team_threads( kmp_root_t *root, kmp_team_t *team,
1006 kmp_info_t *master_th,
int master_gtid )
1011 KA_TRACE( 10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc ) );
1012 KMP_DEBUG_ASSERT( master_gtid == __kmp_get_gtid() );
1016 master_th->th.th_info.ds.ds_tid = 0;
1017 master_th->th.th_team = team;
1018 master_th->th.th_team_nproc = team->t.t_nproc;
1019 master_th->th.th_team_master = master_th;
1020 master_th->th.th_team_serialized = FALSE;
1021 master_th->th.th_dispatch = & team->t.t_dispatch[ 0 ];
1024 #if KMP_NESTED_HOT_TEAMS 1026 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
1028 int level = team->t.t_active_level - 1;
1029 if( master_th->th.th_teams_microtask ) {
1030 if( master_th->th.th_teams_size.nteams > 1 ) {
1033 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1034 master_th->th.th_teams_level == team->t.t_level ) {
1038 if( level < __kmp_hot_teams_max_level ) {
1039 if( hot_teams[level].hot_team ) {
1041 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1045 hot_teams[level].hot_team = team;
1046 hot_teams[level].hot_team_nth = team->t.t_nproc;
1053 use_hot_team = team == root->r.r_hot_team;
1055 if ( !use_hot_team ) {
1058 team->t.t_threads[ 0 ] = master_th;
1059 __kmp_initialize_info( master_th, team, 0, master_gtid );
1062 for ( i=1 ; i < team->t.t_nproc ; i++ ) {
1065 kmp_info_t *thr = __kmp_allocate_thread( root, team, i );
1066 team->t.t_threads[ i ] = thr;
1067 KMP_DEBUG_ASSERT( thr );
1068 KMP_DEBUG_ASSERT( thr->th.th_team == team );
1070 KA_TRACE( 20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived T#%d(%d:%d) join =%llu, plain=%llu\n",
1071 __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0,
1072 __kmp_gtid_from_tid( i, team ), team->t.t_id, i,
1073 team->t.t_bar[ bs_forkjoin_barrier ].b_arrived,
1074 team->t.t_bar[ bs_plain_barrier ].b_arrived ) );
1076 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1077 thr->th.th_teams_level = master_th->th.th_teams_level;
1078 thr->th.th_teams_size = master_th->th.th_teams_size;
1082 kmp_balign_t * balign = team->t.t_threads[ i ]->th.th_bar;
1083 for ( b = 0; b < bs_last_barrier; ++ b ) {
1084 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
1085 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1087 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
1093 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 1094 __kmp_partition_places( team );
1102 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1109 propagateFPControl(kmp_team_t * team)
1111 if ( __kmp_inherit_fp_control ) {
1112 kmp_int16 x87_fpu_control_word;
1116 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
1117 __kmp_store_mxcsr( &mxcsr );
1118 mxcsr &= KMP_X86_MXCSR_MASK;
1127 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1128 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1131 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1135 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1141 updateHWFPControl(kmp_team_t * team)
1143 if ( __kmp_inherit_fp_control && team->t.t_fp_control_saved ) {
1148 kmp_int16 x87_fpu_control_word;
1150 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
1151 __kmp_store_mxcsr( &mxcsr );
1152 mxcsr &= KMP_X86_MXCSR_MASK;
1154 if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) {
1155 __kmp_clear_x87_fpu_status_word();
1156 __kmp_load_x87_fpu_control_word( &team->t.t_x87_fpu_control_word );
1159 if ( team->t.t_mxcsr != mxcsr ) {
1160 __kmp_load_mxcsr( &team->t.t_mxcsr );
1165 # define propagateFPControl(x) ((void)0) 1166 # define updateHWFPControl(x) ((void)0) 1170 __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc );
1176 __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid)
1178 kmp_info_t *this_thr;
1179 kmp_team_t *serial_team;
1181 KC_TRACE( 10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid ) );
1188 if( ! TCR_4( __kmp_init_parallel ) )
1189 __kmp_parallel_initialize();
1191 this_thr = __kmp_threads[ global_tid ];
1192 serial_team = this_thr->th.th_serial_team;
1195 KMP_DEBUG_ASSERT( serial_team );
1198 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1199 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1200 KMP_DEBUG_ASSERT( serial_team->t.t_task_team[this_thr->th.th_task_state] == NULL );
1201 KA_TRACE( 20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / team %p, new task_team = NULL\n",
1202 global_tid, this_thr->th.th_task_team, this_thr->th.th_team ) );
1203 this_thr->th.th_task_team = NULL;
1207 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1208 if ( this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
1209 proc_bind = proc_bind_false;
1211 else if ( proc_bind == proc_bind_default ) {
1216 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1221 this_thr->th.th_set_proc_bind = proc_bind_default;
1224 if( this_thr->th.th_team != serial_team ) {
1226 int level = this_thr->th.th_team->t.t_level;
1228 if( serial_team->t.t_serialized ) {
1231 kmp_team_t *new_team;
1233 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
1236 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1239 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1246 & this_thr->th.th_current_task->td_icvs,
1247 0 USE_NESTED_HOT_ARG(NULL) );
1248 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1249 KMP_ASSERT( new_team );
1252 new_team->t.t_threads[0] = this_thr;
1253 new_team->t.t_parent = this_thr->th.th_team;
1254 serial_team = new_team;
1255 this_thr->th.th_serial_team = serial_team;
1257 KF_TRACE( 10, (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1258 global_tid, serial_team ) );
1265 KF_TRACE( 10, (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1266 global_tid, serial_team ) );
1270 KMP_DEBUG_ASSERT( serial_team->t.t_threads );
1271 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
1272 KMP_DEBUG_ASSERT( this_thr->th.th_team != serial_team );
1273 serial_team->t.t_ident = loc;
1274 serial_team->t.t_serialized = 1;
1275 serial_team->t.t_nproc = 1;
1276 serial_team->t.t_parent = this_thr->th.th_team;
1277 serial_team->t.t_sched = this_thr->th.th_team->t.t_sched;
1278 this_thr->th.th_team = serial_team;
1279 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1281 KF_TRACE( 10, (
"__kmpc_serialized_parallel: T#d curtask=%p\n",
1282 global_tid, this_thr->th.th_current_task ) );
1283 KMP_ASSERT( this_thr->th.th_current_task->td_flags.executing == 1 );
1284 this_thr->th.th_current_task->td_flags.executing = 0;
1286 __kmp_push_current_task_to_thread( this_thr, serial_team, 0 );
1291 & this_thr->th.th_current_task->td_icvs,
1292 & this_thr->th.th_current_task->td_parent->td_icvs );
1295 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
1296 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
1300 if ( __kmp_nested_proc_bind.used && ( level + 1 < __kmp_nested_proc_bind.used ) ) {
1301 this_thr->th.th_current_task->td_icvs.proc_bind
1302 = __kmp_nested_proc_bind.bind_types[ level + 1 ];
1307 serial_team->t.t_pkfn = (microtask_t)( ~0 );
1309 this_thr->th.th_info.ds.ds_tid = 0;
1312 this_thr->th.th_team_nproc = 1;
1313 this_thr->th.th_team_master = this_thr;
1314 this_thr->th.th_team_serialized = 1;
1316 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1317 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1319 propagateFPControl (serial_team);
1322 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1323 if ( !serial_team->t.t_dispatch->th_disp_buffer ) {
1324 serial_team->t.t_dispatch->th_disp_buffer = (dispatch_private_info_t *)
1325 __kmp_allocate(
sizeof( dispatch_private_info_t ) );
1327 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1330 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1331 __ompt_team_assign_id(serial_team, ompt_parallel_id);
1339 KMP_DEBUG_ASSERT( this_thr->th.th_team == serial_team );
1340 KMP_DEBUG_ASSERT( serial_team->t.t_threads );
1341 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
1342 ++ serial_team->t.t_serialized;
1343 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1346 int level = this_thr->th.th_team->t.t_level;
1348 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
1349 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
1351 serial_team->t.t_level++;
1352 KF_TRACE( 10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level of serial team %p to %d\n",
1353 global_tid, serial_team, serial_team->t.t_level ) );
1356 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1358 dispatch_private_info_t * disp_buffer = (dispatch_private_info_t *)
1359 __kmp_allocate(
sizeof( dispatch_private_info_t ) );
1360 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1361 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1363 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1368 if ( __kmp_env_consistency_check )
1369 __kmp_push_parallel( global_tid, NULL );
1379 enum fork_context_e call_context,
1382 void *unwrapped_task,
1384 microtask_t microtask,
1387 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1397 int master_this_cons;
1399 kmp_team_t *parent_team;
1400 kmp_info_t *master_th;
1404 int master_set_numthreads;
1410 #if KMP_NESTED_HOT_TEAMS 1411 kmp_hot_team_ptr_t **p_hot_teams;
1414 KMP_TIME_DEVELOPER_BLOCK(KMP_fork_call);
1417 KA_TRACE( 20, (
"__kmp_fork_call: enter T#%d\n", gtid ));
1418 if ( __kmp_stkpadding > 0 && __kmp_root[gtid] != NULL ) {
1421 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1423 if ( __kmp_stkpadding > KMP_MAX_STKPADDING )
1424 __kmp_stkpadding += (short)((kmp_int64)dummy);
1428 KMP_DEBUG_ASSERT( __kmp_init_serial );
1429 if( ! TCR_4(__kmp_init_parallel) )
1430 __kmp_parallel_initialize();
1433 master_th = __kmp_threads[ gtid ];
1434 parent_team = master_th->th.th_team;
1435 master_tid = master_th->th.th_info.ds.ds_tid;
1436 master_this_cons = master_th->th.th_local.this_construct;
1437 root = master_th->th.th_root;
1438 master_active = root->r.r_active;
1439 master_set_numthreads = master_th->th.th_set_nproc;
1442 ompt_parallel_id_t ompt_parallel_id;
1443 ompt_task_id_t ompt_task_id;
1444 ompt_frame_t *ompt_frame;
1445 ompt_task_id_t my_task_id;
1446 ompt_parallel_id_t my_parallel_id;
1449 ompt_parallel_id = __ompt_parallel_id_new(gtid);
1450 ompt_task_id = __ompt_get_task_id_internal(0);
1451 ompt_frame = __ompt_get_task_frame_internal(0);
1456 level = parent_team->t.t_level;
1457 active_level = parent_team->t.t_active_level;
1459 teams_level = master_th->th.th_teams_level;
1461 #if KMP_NESTED_HOT_TEAMS 1462 p_hot_teams = &master_th->th.th_hot_teams;
1463 if( *p_hot_teams == NULL && __kmp_hot_teams_max_level > 0 ) {
1464 *p_hot_teams = (kmp_hot_team_ptr_t*)__kmp_allocate(
1465 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1466 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1467 (*p_hot_teams)[0].hot_team_nth = 1;
1473 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) {
1474 int team_size = master_set_numthreads;
1476 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)(
1477 ompt_task_id, ompt_frame, ompt_parallel_id,
1478 team_size, unwrapped_task, OMPT_INVOKER(call_context));
1482 master_th->th.th_ident = loc;
1485 if ( master_th->th.th_teams_microtask &&
1486 ap && microtask != (microtask_t)__kmp_teams_master && level == teams_level ) {
1490 parent_team->t.t_ident = loc;
1491 __kmp_alloc_argv_entries( argc, parent_team, TRUE );
1492 parent_team->t.t_argc = argc;
1493 argv = (
void**)parent_team->t.t_argv;
1494 for( i=argc-1; i >= 0; --i )
1496 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 1497 *argv++ = va_arg( *ap,
void * );
1499 *argv++ = va_arg( ap,
void * );
1502 if ( parent_team == master_th->th.th_serial_team ) {
1505 KMP_DEBUG_ASSERT( parent_team->t.t_serialized > 1 );
1506 parent_team->t.t_serialized--;
1511 void **exit_runtime_p;
1513 ompt_lw_taskteam_t lw_taskteam;
1516 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1517 unwrapped_task, ompt_parallel_id);
1518 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1519 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1521 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1525 my_task_id = lw_taskteam.ompt_task_info.task_id;
1526 my_parallel_id = parent_team->t.ompt_team_info.parallel_id;
1527 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
1528 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1529 my_parallel_id, my_task_id);
1534 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1536 exit_runtime_p = &dummy;
1541 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1542 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1543 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
1553 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
1555 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
1556 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1557 ompt_parallel_id, ompt_task_id);
1560 __ompt_lw_taskteam_unlink(master_th);
1562 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1565 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
1566 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
1567 ompt_parallel_id, ompt_task_id,
1568 OMPT_INVOKER(call_context));
1570 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1576 parent_team->t.t_pkfn = microtask;
1578 parent_team->t.ompt_team_info.microtask = unwrapped_task;
1580 parent_team->t.t_invoke = invoker;
1581 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
1582 parent_team->t.t_active_level ++;
1583 parent_team->t.t_level ++;
1586 if ( master_set_numthreads ) {
1587 if ( master_set_numthreads < master_th->th.th_teams_size.nth ) {
1589 kmp_info_t **other_threads = parent_team->t.t_threads;
1590 parent_team->t.t_nproc = master_set_numthreads;
1591 for ( i = 0; i < master_set_numthreads; ++i ) {
1592 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1596 master_th->th.th_set_nproc = 0;
1600 if ( __kmp_debugging ) {
1601 int nth = __kmp_omp_num_threads( loc );
1603 master_set_numthreads = nth;
1608 KF_TRACE( 10, (
"__kmp_fork_call: before internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
1609 __kmp_internal_fork( loc, gtid, parent_team );
1610 KF_TRACE( 10, (
"__kmp_fork_call: after internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
1613 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
1614 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
1617 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1618 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1619 if (! parent_team->t.t_invoke( gtid )) {
1620 KMP_ASSERT2( 0,
"cannot invoke microtask for MASTER thread" );
1623 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
1624 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
1627 KA_TRACE( 20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid ));
1634 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1635 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
1639 if ( parent_team->t.t_active_level >= master_th->th.th_current_task->td_icvs.max_active_levels ) {
1643 int enter_teams = ((ap==NULL && active_level==0)||(ap && teams_level>0 && teams_level==level));
1645 nthreads = master_set_numthreads ?
1646 master_set_numthreads : get__nproc_2( parent_team, master_tid );
1651 if ( ( !get__nested(master_th) && (root->r.r_in_parallel
1655 ) ) || ( __kmp_library == library_serial ) ) {
1656 KC_TRACE( 10, (
"__kmp_fork_call: T#%d serializing team; requested %d threads\n",
1661 if ( nthreads > 1 ) {
1663 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
1665 nthreads = __kmp_reserve_threads(root, parent_team, master_tid, nthreads
1673 if ( nthreads == 1 ) {
1677 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1681 KMP_DEBUG_ASSERT( nthreads > 0 );
1684 master_th->th.th_set_nproc = 0;
1687 if ( nthreads == 1 ) {
1689 #if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 1690 void * args[ argc ];
1692 void * * args = (
void**) KMP_ALLOCA( argc *
sizeof(
void * ) );
1695 KA_TRACE( 20, (
"__kmp_fork_call: T#%d serializing parallel region\n", gtid ));
1699 if ( call_context == fork_context_intel ) {
1701 master_th->th.th_serial_team->t.t_ident = loc;
1705 master_th->th.th_serial_team->t.t_level--;
1710 void **exit_runtime_p;
1712 ompt_lw_taskteam_t lw_taskteam;
1715 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1716 unwrapped_task, ompt_parallel_id);
1717 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1718 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1720 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1723 my_task_id = lw_taskteam.ompt_task_info.task_id;
1724 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
1725 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1726 ompt_parallel_id, my_task_id);
1731 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1733 exit_runtime_p = &dummy;
1738 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1739 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1740 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
1749 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
1752 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
1753 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1754 ompt_parallel_id, ompt_task_id);
1758 __ompt_lw_taskteam_unlink(master_th);
1760 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1762 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
1763 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
1764 ompt_parallel_id, ompt_task_id,
1765 OMPT_INVOKER(call_context));
1767 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1770 }
else if ( microtask == (microtask_t)__kmp_teams_master ) {
1771 KMP_DEBUG_ASSERT( master_th->th.th_team == master_th->th.th_serial_team );
1772 team = master_th->th.th_team;
1774 team->t.t_invoke = invoker;
1775 __kmp_alloc_argv_entries( argc, team, TRUE );
1776 team->t.t_argc = argc;
1777 argv = (
void**) team->t.t_argv;
1779 for( i=argc-1; i >= 0; --i )
1781 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1782 *argv++ = va_arg( *ap,
void * );
1784 *argv++ = va_arg( ap,
void * );
1787 for( i=0; i < argc; ++i )
1789 argv[i] = parent_team->t.t_argv[i];
1796 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1797 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1803 for( i=argc-1; i >= 0; --i )
1805 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1806 *argv++ = va_arg( *ap,
void * );
1808 *argv++ = va_arg( ap,
void * );
1814 void **exit_runtime_p;
1816 ompt_lw_taskteam_t lw_taskteam;
1819 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1820 unwrapped_task, ompt_parallel_id);
1821 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1822 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1824 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1828 my_task_id = lw_taskteam.ompt_task_info.task_id;
1829 my_parallel_id = ompt_parallel_id;
1830 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
1831 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1832 my_parallel_id, my_task_id);
1837 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1839 exit_runtime_p = &dummy;
1844 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1845 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1846 __kmp_invoke_microtask( microtask, gtid, 0, argc, args
1856 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
1858 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
1859 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1860 my_parallel_id, my_task_id);
1864 __ompt_lw_taskteam_unlink(master_th);
1866 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1868 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
1869 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
1870 ompt_parallel_id, ompt_task_id,
1871 OMPT_INVOKER(call_context));
1873 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1880 else if ( call_context == fork_context_gnu ) {
1882 ompt_lw_taskteam_t *lwt = (ompt_lw_taskteam_t *)
1883 __kmp_allocate(
sizeof(ompt_lw_taskteam_t));
1884 __ompt_lw_taskteam_init(lwt, master_th, gtid,
1885 unwrapped_task, ompt_parallel_id);
1887 lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid);
1888 lwt->ompt_task_info.frame.exit_runtime_frame = 0;
1889 __ompt_lw_taskteam_link(lwt, master_th);
1893 KA_TRACE( 20, (
"__kmp_fork_call: T#%d serial exit\n", gtid ));
1897 KMP_ASSERT2( call_context < fork_context_last,
"__kmp_fork_call: unknown fork_context parameter" );
1901 KA_TRACE( 20, (
"__kmp_fork_call: T#%d serial exit\n", gtid ));
1908 KF_TRACE( 10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, curtask=%p, curtask_max_aclevel=%d\n",
1909 parent_team->t.t_active_level, master_th, master_th->th.th_current_task,
1910 master_th->th.th_current_task->td_icvs.max_active_levels ) );
1913 master_th->th.th_current_task->td_flags.executing = 0;
1916 if ( !master_th->th.th_teams_microtask || level > teams_level )
1920 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
1924 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
1925 if ((level+1 < __kmp_nested_nth.used) && (__kmp_nested_nth.nth[level+1] != nthreads_icv)) {
1926 nthreads_icv = __kmp_nested_nth.nth[level+1];
1934 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1935 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1936 if ( master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
1937 proc_bind = proc_bind_false;
1940 if (proc_bind == proc_bind_default) {
1942 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1947 if ((level+1 < __kmp_nested_proc_bind.used)
1948 && (__kmp_nested_proc_bind.bind_types[level+1] != master_th->th.th_current_task->td_icvs.proc_bind)) {
1949 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level+1];
1954 master_th->th.th_set_proc_bind = proc_bind_default;
1957 if ((nthreads_icv > 0)
1959 || (proc_bind_icv != proc_bind_default)
1962 kmp_internal_control_t new_icvs;
1963 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
1964 new_icvs.next = NULL;
1965 if (nthreads_icv > 0) {
1966 new_icvs.nproc = nthreads_icv;
1970 if (proc_bind_icv != proc_bind_default) {
1971 new_icvs.proc_bind = proc_bind_icv;
1976 KF_TRACE( 10, (
"__kmp_fork_call: before __kmp_allocate_team\n" ) );
1977 team = __kmp_allocate_team(root, nthreads, nthreads,
1984 &new_icvs, argc USE_NESTED_HOT_ARG(master_th) );
1987 KF_TRACE( 10, (
"__kmp_fork_call: before __kmp_allocate_team\n" ) );
1988 team = __kmp_allocate_team(root, nthreads, nthreads,
1995 &master_th->th.th_current_task->td_icvs, argc
1996 USE_NESTED_HOT_ARG(master_th) );
1998 KF_TRACE( 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team ) );
2001 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2002 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2003 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2004 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2005 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2007 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.microtask, unwrapped_task);
2009 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2012 if ( !master_th->th.th_teams_microtask || level > teams_level ) {
2014 int new_level = parent_team->t.t_level + 1;
2015 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2016 new_level = parent_team->t.t_active_level + 1;
2017 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2021 int new_level = parent_team->t.t_level;
2022 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2023 new_level = parent_team->t.t_active_level;
2024 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2027 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2028 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type || team->t.t_sched.chunk != new_sched.chunk)
2029 team->t.t_sched = new_sched;
2032 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2036 propagateFPControl(team);
2038 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2043 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
2045 KA_TRACE( 20, (
"__kmp_fork_call: Master T#%d pushing task_team %p / team %p, new task_team %p / team %p\n",
2046 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
2047 parent_team, team->t.t_task_team[master_th->th.th_task_state], team ) );
2049 if ( active_level || master_th->th.th_task_team ) {
2051 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2052 if (master_th->th.th_task_state_top >= master_th->th.th_task_state_stack_sz) {
2053 kmp_uint32 new_size = 2*master_th->th.th_task_state_stack_sz;
2054 kmp_uint8 *old_stack, *new_stack;
2056 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2057 for (i=0; i<master_th->th.th_task_state_stack_sz; ++i) {
2058 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2060 for (i=master_th->th.th_task_state_stack_sz; i<new_size; ++i) {
2063 old_stack = master_th->th.th_task_state_memo_stack;
2064 master_th->th.th_task_state_memo_stack = new_stack;
2065 master_th->th.th_task_state_stack_sz = new_size;
2066 __kmp_free(old_stack);
2069 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
2070 master_th->th.th_task_state_top++;
2071 #if KMP_NESTED_HOT_TEAMS 2072 if (team == master_th->th.th_hot_teams[active_level].hot_team) {
2073 master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
2077 master_th->th.th_task_state = 0;
2078 #if KMP_NESTED_HOT_TEAMS 2082 #if !KMP_NESTED_HOT_TEAMS 2083 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || (team == root->r.r_hot_team));
2087 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2088 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, team->t.t_nproc ));
2089 KMP_DEBUG_ASSERT( team != root->r.r_hot_team ||
2090 ( team->t.t_master_tid == 0 &&
2091 ( team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized ) ));
2095 argv = (
void**)team->t.t_argv;
2099 for ( i=argc-1; i >= 0; --i ) {
2101 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 2102 void *new_argv = va_arg(*ap,
void *);
2104 void *new_argv = va_arg(ap,
void *);
2106 KMP_CHECK_UPDATE(*argv, new_argv);
2111 for ( i=0; i < argc; ++i ) {
2113 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2119 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2120 if (!root->r.r_active)
2121 root->r.r_active = TRUE;
2123 __kmp_fork_team_threads( root, team, master_th, gtid );
2124 __kmp_setup_icv_copy( team, nthreads, &master_th->th.th_current_task->td_icvs, loc );
2127 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2130 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2133 if ( team->t.t_active_level == 1
2135 && !master_th->th.th_teams_microtask
2139 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) &&
2140 ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
2142 kmp_uint64 tmp_time = 0;
2143 if ( __itt_get_timestamp_ptr )
2144 tmp_time = __itt_get_timestamp();
2146 master_th->th.th_frame_time = tmp_time;
2147 if ( __kmp_forkjoin_frames_mode == 3 )
2148 team->t.t_region_time = tmp_time;
2151 if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) &&
2152 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode )
2154 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2160 KMP_DEBUG_ASSERT( team == __kmp_threads[gtid]->th.th_team );
2162 KF_TRACE(10, (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2163 root, team, master_th, gtid));
2166 if ( __itt_stack_caller_create_ptr ) {
2167 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2175 __kmp_internal_fork( loc, gtid, team );
2176 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, master_th=%p, gtid=%d\n",
2177 root, team, master_th, gtid));
2180 if (call_context == fork_context_gnu) {
2181 KA_TRACE( 20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid ));
2186 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
2187 gtid, team->t.t_id, team->t.t_pkfn ) );
2191 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
2192 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
2194 if (! team->t.t_invoke( gtid )) {
2195 KMP_ASSERT2( 0,
"cannot invoke microtask for MASTER thread" );
2198 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
2199 gtid, team->t.t_id, team->t.t_pkfn ) );
2202 KA_TRACE( 20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid ));
2206 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2215 __kmp_join_restore_state(
2220 thread->th.ompt_thread_info.state = ((team->t.t_serialized) ?
2221 ompt_state_work_serial : ompt_state_work_parallel);
2228 ompt_parallel_id_t parallel_id,
2229 fork_context_e fork_context)
2231 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
2232 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
2233 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
2234 parallel_id, task_info->task_id, OMPT_INVOKER(fork_context));
2237 __kmp_join_restore_state(thread,team);
2242 __kmp_join_call(
ident_t *loc,
int gtid
2244 ,
enum fork_context_e fork_context
2251 KMP_TIME_DEVELOPER_BLOCK(KMP_join_call);
2253 kmp_team_t *parent_team;
2254 kmp_info_t *master_th;
2259 KA_TRACE( 20, (
"__kmp_join_call: enter T#%d\n", gtid ));
2262 master_th = __kmp_threads[ gtid ];
2263 root = master_th->th.th_root;
2264 team = master_th->th.th_team;
2265 parent_team = team->t.t_parent;
2267 master_th->th.th_ident = loc;
2271 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2276 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2277 KA_TRACE( 20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n",
2278 __kmp_gtid_from_thread( master_th ), team,
2279 team->t.t_task_team[master_th->th.th_task_state], master_th->th.th_task_team) );
2280 KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state] );
2284 if( team->t.t_serialized ) {
2286 if ( master_th->th.th_teams_microtask ) {
2288 int level = team->t.t_level;
2289 int tlevel = master_th->th.th_teams_level;
2290 if ( level == tlevel ) {
2294 }
else if ( level == tlevel + 1 ) {
2297 team->t.t_serialized++;
2305 __kmp_join_restore_state(master_th, parent_team);
2312 master_active = team->t.t_master_active;
2320 __kmp_internal_join( loc, gtid, team );
2324 master_th->th.th_task_state = 0;
2331 ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id;
2335 if ( __itt_stack_caller_create_ptr ) {
2336 __kmp_itt_stack_caller_destroy( (__itt_caller)team->t.t_stack_id );
2340 if ( team->t.t_active_level == 1
2342 && !master_th->th.th_teams_microtask
2345 master_th->th.th_ident = loc;
2347 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && __kmp_forkjoin_frames_mode == 3 )
2348 __kmp_itt_frame_submit( gtid, team->t.t_region_time, master_th->th.th_frame_time,
2349 0, loc, master_th->th.th_team_nproc, 1 );
2350 else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) &&
2351 ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames )
2352 __kmp_itt_region_joined( gtid );
2357 if ( master_th->th.th_teams_microtask &&
2359 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2360 team->t.t_level == master_th->th.th_teams_level + 1 ) {
2367 team->t.t_active_level --;
2368 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
2371 if ( master_th->th.th_team_nproc < master_th->th.th_teams_size.nth ) {
2372 int old_num = master_th->th.th_team_nproc;
2373 int new_num = master_th->th.th_teams_size.nth;
2374 kmp_info_t **other_threads = team->t.t_threads;
2375 team->t.t_nproc = new_num;
2376 for ( i = 0; i < old_num; ++i ) {
2377 other_threads[i]->th.th_team_nproc = new_num;
2380 for ( i = old_num; i < new_num; ++i ) {
2383 kmp_balign_t * balign = other_threads[i]->th.th_bar;
2384 for ( b = 0; b < bs_last_barrier; ++ b ) {
2385 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
2386 KMP_DEBUG_ASSERT(balign[ b ].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2388 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
2391 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2393 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2400 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
2409 master_th->th.th_info .ds.ds_tid = team->t.t_master_tid;
2410 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2412 master_th->th.th_dispatch =
2413 & parent_team->t.t_dispatch[ team->t.t_master_tid ];
2419 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2422 if ( !master_th->th.th_teams_microtask || team->t.t_level > master_th->th.th_teams_level )
2426 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
2428 KMP_DEBUG_ASSERT( root->r.r_in_parallel >= 0 );
2430 #if OMPT_SUPPORT && OMPT_TRACE 2432 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
2433 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
2434 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
2435 parallel_id, task_info->task_id);
2437 task_info->frame.exit_runtime_frame = 0;
2438 task_info->task_id = 0;
2442 KF_TRACE( 10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n",
2443 0, master_th, team ) );
2444 __kmp_pop_current_task_from_thread( master_th );
2446 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 2450 master_th->th.th_first_place = team->t.t_first_place;
2451 master_th->th.th_last_place = team->t.t_last_place;
2454 updateHWFPControl (team);
2456 if ( root->r.r_active != master_active )
2457 root->r.r_active = master_active;
2459 __kmp_free_team( root, team USE_NESTED_HOT_ARG(master_th) );
2467 master_th->th.th_team = parent_team;
2468 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2469 master_th->th.th_team_master = parent_team->t.t_threads[0];
2470 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2473 if( parent_team->t.t_serialized &&
2474 parent_team != master_th->th.th_serial_team &&
2475 parent_team != root->r.r_root_team ) {
2476 __kmp_free_team( root, master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL) );
2477 master_th->th.th_serial_team = parent_team;
2480 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2481 if (master_th->th.th_task_state_top > 0) {
2482 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2484 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
2485 --master_th->th.th_task_state_top;
2487 master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
2490 master_th->th.th_task_team = parent_team->t.t_task_team[master_th->th.th_task_state];
2491 KA_TRACE( 20, (
"__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
2492 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, parent_team ) );
2497 master_th->th.th_current_task->td_flags.executing = 1;
2499 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2503 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
2508 KA_TRACE( 20, (
"__kmp_join_call: exit T#%d\n", gtid ));
2517 __kmp_save_internal_controls ( kmp_info_t * thread )
2520 if ( thread->th.th_team != thread->th.th_serial_team ) {
2523 if (thread->th.th_team->t.t_serialized > 1) {
2526 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2529 if ( thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2530 thread->th.th_team->t.t_serialized ) {
2535 kmp_internal_control_t * control = (kmp_internal_control_t *) __kmp_allocate(
sizeof(kmp_internal_control_t));
2537 copy_icvs( control, & thread->th.th_current_task->td_icvs );
2539 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2541 control->next = thread->th.th_team->t.t_control_stack_top;
2542 thread->th.th_team->t.t_control_stack_top = control;
2549 __kmp_set_num_threads(
int new_nth,
int gtid )
2554 KF_TRACE( 10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth ));
2555 KMP_DEBUG_ASSERT( __kmp_init_serial );
2559 else if (new_nth > __kmp_max_nth)
2560 new_nth = __kmp_max_nth;
2563 thread = __kmp_threads[gtid];
2565 __kmp_save_internal_controls( thread );
2567 set__nproc( thread, new_nth );
2574 root = thread->th.th_root;
2575 if ( __kmp_init_parallel && ( ! root->r.r_active )
2576 && ( root->r.r_hot_team->t.t_nproc > new_nth )
2577 #
if KMP_NESTED_HOT_TEAMS
2578 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2581 kmp_team_t *hot_team = root->r.r_hot_team;
2584 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2587 for ( f = new_nth; f < hot_team->t.t_nproc; f++ ) {
2588 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
2589 if ( __kmp_tasking_mode != tskm_immediate_exec) {
2591 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2593 __kmp_free_thread( hot_team->t.t_threads[f] );
2594 hot_team->t.t_threads[f] = NULL;
2596 hot_team->t.t_nproc = new_nth;
2597 #if KMP_NESTED_HOT_TEAMS 2598 if( thread->th.th_hot_teams ) {
2599 KMP_DEBUG_ASSERT( hot_team == thread->th.th_hot_teams[0].hot_team );
2600 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2604 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2609 for( f=0 ; f < new_nth; f++ ) {
2610 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
2611 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2614 hot_team->t.t_size_changed = -1;
2620 __kmp_set_max_active_levels(
int gtid,
int max_active_levels )
2624 KF_TRACE( 10, (
"__kmp_set_max_active_levels: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2625 KMP_DEBUG_ASSERT( __kmp_init_serial );
2628 if( max_active_levels < 0 ) {
2629 KMP_WARNING( ActiveLevelsNegative, max_active_levels );
2633 KF_TRACE( 10, (
"__kmp_set_max_active_levels: the call is ignored: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2636 if( max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT ) {
2640 KMP_WARNING( ActiveLevelsExceedLimit, max_active_levels, KMP_MAX_ACTIVE_LEVELS_LIMIT );
2641 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2646 KF_TRACE( 10, (
"__kmp_set_max_active_levels: after validation: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2648 thread = __kmp_threads[ gtid ];
2650 __kmp_save_internal_controls( thread );
2652 set__max_active_levels( thread, max_active_levels );
2658 __kmp_get_max_active_levels(
int gtid )
2662 KF_TRACE( 10, (
"__kmp_get_max_active_levels: thread %d\n", gtid ) );
2663 KMP_DEBUG_ASSERT( __kmp_init_serial );
2665 thread = __kmp_threads[ gtid ];
2666 KMP_DEBUG_ASSERT( thread->th.th_current_task );
2667 KF_TRACE( 10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, curtask_maxaclevel=%d\n",
2668 gtid, thread->th.th_current_task, thread->th.th_current_task->td_icvs.max_active_levels ) );
2669 return thread->th.th_current_task->td_icvs.max_active_levels;
2674 __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk )
2679 KF_TRACE( 10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", gtid, (
int)kind, chunk ));
2680 KMP_DEBUG_ASSERT( __kmp_init_serial );
2686 if ( kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2687 ( kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std ) )
2692 KMP_MSG( ScheduleKindOutOfRange, kind ),
2693 KMP_HNT( DefaultScheduleKindUsed,
"static, no chunk" ),
2696 kind = kmp_sched_default;
2700 thread = __kmp_threads[ gtid ];
2702 __kmp_save_internal_controls( thread );
2704 if ( kind < kmp_sched_upper_std ) {
2705 if ( kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK ) {
2708 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2710 thread->th.th_current_task->td_icvs.sched.r_sched_type = __kmp_sch_map[ kind - kmp_sched_lower - 1 ];
2714 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2715 __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
2717 if ( kind == kmp_sched_auto ) {
2719 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2721 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2727 __kmp_get_schedule(
int gtid, kmp_sched_t * kind,
int * chunk )
2732 KF_TRACE( 10, (
"__kmp_get_schedule: thread %d\n", gtid ));
2733 KMP_DEBUG_ASSERT( __kmp_init_serial );
2735 thread = __kmp_threads[ gtid ];
2737 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2739 switch ( th_type ) {
2741 case kmp_sch_static_greedy:
2742 case kmp_sch_static_balanced:
2743 *kind = kmp_sched_static;
2746 case kmp_sch_static_chunked:
2747 *kind = kmp_sched_static;
2749 case kmp_sch_dynamic_chunked:
2750 *kind = kmp_sched_dynamic;
2753 case kmp_sch_guided_iterative_chunked:
2754 case kmp_sch_guided_analytical_chunked:
2755 *kind = kmp_sched_guided;
2758 *kind = kmp_sched_auto;
2760 case kmp_sch_trapezoidal:
2761 *kind = kmp_sched_trapezoidal;
2769 KMP_FATAL( UnknownSchedulingType, th_type );
2772 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
2776 __kmp_get_ancestor_thread_num(
int gtid,
int level ) {
2782 KF_TRACE( 10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level ));
2783 KMP_DEBUG_ASSERT( __kmp_init_serial );
2786 if( level == 0 )
return 0;
2787 if( level < 0 )
return -1;
2788 thr = __kmp_threads[ gtid ];
2789 team = thr->th.th_team;
2790 ii = team->t.t_level;
2791 if( level > ii )
return -1;
2794 if( thr->th.th_teams_microtask ) {
2796 int tlevel = thr->th.th_teams_level;
2797 if( level <= tlevel ) {
2798 KMP_DEBUG_ASSERT( ii >= tlevel );
2800 if ( ii == tlevel ) {
2809 if( ii == level )
return __kmp_tid_from_gtid( gtid );
2811 dd = team->t.t_serialized;
2815 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
2818 if( ( team->t.t_serialized ) && ( !dd ) ) {
2819 team = team->t.t_parent;
2823 team = team->t.t_parent;
2824 dd = team->t.t_serialized;
2829 return ( dd > 1 ) ? ( 0 ) : ( team->t.t_master_tid );
2833 __kmp_get_team_size(
int gtid,
int level ) {
2839 KF_TRACE( 10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level ));
2840 KMP_DEBUG_ASSERT( __kmp_init_serial );
2843 if( level == 0 )
return 1;
2844 if( level < 0 )
return -1;
2845 thr = __kmp_threads[ gtid ];
2846 team = thr->th.th_team;
2847 ii = team->t.t_level;
2848 if( level > ii )
return -1;
2851 if( thr->th.th_teams_microtask ) {
2853 int tlevel = thr->th.th_teams_level;
2854 if( level <= tlevel ) {
2855 KMP_DEBUG_ASSERT( ii >= tlevel );
2857 if ( ii == tlevel ) {
2868 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
2871 if( team->t.t_serialized && ( !dd ) ) {
2872 team = team->t.t_parent;
2876 team = team->t.t_parent;
2881 return team->t.t_nproc;
2885 __kmp_get_schedule_global() {
2889 kmp_r_sched_t r_sched;
2895 r_sched.r_sched_type = __kmp_static;
2897 r_sched.r_sched_type = __kmp_guided;
2899 r_sched.r_sched_type = __kmp_sched;
2902 if ( __kmp_chunk < KMP_DEFAULT_CHUNK ) {
2903 r_sched.chunk = KMP_DEFAULT_CHUNK;
2905 r_sched.chunk = __kmp_chunk;
2920 __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc )
2923 KMP_DEBUG_ASSERT( team );
2924 if( !realloc || argc > team->t.t_max_argc ) {
2926 KA_TRACE( 100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, current entries=%d\n",
2927 team->t.t_id, argc, ( realloc ) ? team->t.t_max_argc : 0 ));
2929 if ( realloc && team->t.t_argv != &team->t.t_inline_argv[0] )
2930 __kmp_free( (
void *) team->t.t_argv );
2932 if ( argc <= KMP_INLINE_ARGV_ENTRIES ) {
2934 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
2935 KA_TRACE( 100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d argv entries\n",
2936 team->t.t_id, team->t.t_max_argc ));
2937 team->t.t_argv = &team->t.t_inline_argv[0];
2938 if ( __kmp_storage_map ) {
2939 __kmp_print_storage_map_gtid( -1, &team->t.t_inline_argv[0],
2940 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
2941 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
2942 "team_%d.t_inline_argv",
2947 team->t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ?
2948 KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc;
2949 KA_TRACE( 100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n",
2950 team->t.t_id, team->t.t_max_argc ));
2951 team->t.t_argv = (
void**) __kmp_page_allocate(
sizeof(
void*) * team->t.t_max_argc );
2952 if ( __kmp_storage_map ) {
2953 __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc],
2954 sizeof(
void *) * team->t.t_max_argc,
"team_%d.t_argv",
2962 __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth)
2965 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
2966 team->t.t_threads = (kmp_info_t**) __kmp_allocate(
sizeof(kmp_info_t*) * max_nth );
2967 team->t.t_disp_buffer = (dispatch_shared_info_t*)
2968 __kmp_allocate(
sizeof(dispatch_shared_info_t) * num_disp_buff );
2969 team->t.t_dispatch = (kmp_disp_t*) __kmp_allocate(
sizeof(kmp_disp_t) * max_nth );
2970 team->t.t_implicit_task_taskdata = (kmp_taskdata_t*) __kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth );
2971 team->t.t_max_nproc = max_nth;
2974 for(i = 0 ; i < num_disp_buff; ++i) {
2975 team->t.t_disp_buffer[i].buffer_index = i;
2977 team->t.t_disp_buffer[i].doacross_buf_idx = i;
2983 __kmp_free_team_arrays(kmp_team_t *team) {
2986 for ( i = 0; i < team->t.t_max_nproc; ++ i ) {
2987 if ( team->t.t_dispatch[ i ].th_disp_buffer != NULL ) {
2988 __kmp_free( team->t.t_dispatch[ i ].th_disp_buffer );
2989 team->t.t_dispatch[ i ].th_disp_buffer = NULL;
2992 __kmp_free(team->t.t_threads);
2993 __kmp_free(team->t.t_disp_buffer);
2994 __kmp_free(team->t.t_dispatch);
2995 __kmp_free(team->t.t_implicit_task_taskdata);
2996 team->t.t_threads = NULL;
2997 team->t.t_disp_buffer = NULL;
2998 team->t.t_dispatch = NULL;
2999 team->t.t_implicit_task_taskdata = 0;
3003 __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3004 kmp_info_t **oldThreads = team->t.t_threads;
3006 __kmp_free(team->t.t_disp_buffer);
3007 __kmp_free(team->t.t_dispatch);
3008 __kmp_free(team->t.t_implicit_task_taskdata);
3009 __kmp_allocate_team_arrays(team, max_nth);
3011 KMP_MEMCPY(team->t.t_threads, oldThreads, team->t.t_nproc * sizeof (kmp_info_t*));
3013 __kmp_free(oldThreads);
3016 static kmp_internal_control_t
3017 __kmp_get_global_icvs(
void ) {
3019 kmp_r_sched_t r_sched = __kmp_get_schedule_global();
3022 KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.used > 0 );
3025 kmp_internal_control_t g_icvs = {
3027 (kmp_int8)__kmp_dflt_nested,
3028 (kmp_int8)__kmp_global.g.g_dynamic,
3029 (kmp_int8)__kmp_env_blocktime,
3030 __kmp_dflt_blocktime,
3032 __kmp_dflt_team_nth,
3034 __kmp_dflt_max_active_levels,
3037 __kmp_nested_proc_bind.bind_types[0],
3045 static kmp_internal_control_t
3046 __kmp_get_x_global_icvs(
const kmp_team_t *team ) {
3048 kmp_internal_control_t gx_icvs;
3049 gx_icvs.serial_nesting_level = 0;
3050 copy_icvs( & gx_icvs, & team->t.t_threads[0]->th.th_current_task->td_icvs );
3051 gx_icvs.next = NULL;
3057 __kmp_initialize_root( kmp_root_t *root )
3060 kmp_team_t *root_team;
3061 kmp_team_t *hot_team;
3062 int hot_team_max_nth;
3063 kmp_r_sched_t r_sched = __kmp_get_schedule_global();
3064 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3065 KMP_DEBUG_ASSERT( root );
3066 KMP_ASSERT( ! root->r.r_begin );
3069 __kmp_init_lock( &root->r.r_begin_lock );
3070 root->r.r_begin = FALSE;
3071 root->r.r_active = FALSE;
3072 root->r.r_in_parallel = 0;
3073 root->r.r_blocktime = __kmp_dflt_blocktime;
3074 root->r.r_nested = __kmp_dflt_nested;
3078 KF_TRACE( 10, (
"__kmp_initialize_root: before root_team\n" ) );
3081 __kmp_allocate_team(
3089 __kmp_nested_proc_bind.bind_types[0],
3093 USE_NESTED_HOT_ARG(NULL)
3097 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)( ~ 0 ));
3100 KF_TRACE( 10, (
"__kmp_initialize_root: after root_team = %p\n", root_team ) );
3102 root->r.r_root_team = root_team;
3103 root_team->t.t_control_stack_top = NULL;
3106 root_team->t.t_threads[0] = NULL;
3107 root_team->t.t_nproc = 1;
3108 root_team->t.t_serialized = 1;
3110 root_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3111 root_team->t.t_sched.chunk = r_sched.chunk;
3112 KA_TRACE( 20, (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3113 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
3117 KF_TRACE( 10, (
"__kmp_initialize_root: before hot_team\n" ) );
3120 __kmp_allocate_team(
3123 __kmp_dflt_team_nth_ub * 2,
3128 __kmp_nested_proc_bind.bind_types[0],
3132 USE_NESTED_HOT_ARG(NULL)
3134 KF_TRACE( 10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team ) );
3136 root->r.r_hot_team = hot_team;
3137 root_team->t.t_control_stack_top = NULL;
3140 hot_team->t.t_parent = root_team;
3143 hot_team_max_nth = hot_team->t.t_max_nproc;
3144 for ( f = 0; f < hot_team_max_nth; ++ f ) {
3145 hot_team->t.t_threads[ f ] = NULL;
3147 hot_team->t.t_nproc = 1;
3149 hot_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3150 hot_team->t.t_sched.chunk = r_sched.chunk;
3151 hot_team->t.t_size_changed = 0;
3157 typedef struct kmp_team_list_item {
3158 kmp_team_p
const * entry;
3159 struct kmp_team_list_item * next;
3160 } kmp_team_list_item_t;
3161 typedef kmp_team_list_item_t * kmp_team_list_t;
3165 __kmp_print_structure_team_accum(
3166 kmp_team_list_t list,
3167 kmp_team_p
const * team
3177 KMP_DEBUG_ASSERT( list != NULL );
3178 if ( team == NULL ) {
3182 __kmp_print_structure_team_accum( list, team->t.t_parent );
3183 __kmp_print_structure_team_accum( list, team->t.t_next_pool );
3187 while ( l->next != NULL && l->entry != team ) {
3190 if ( l->next != NULL ) {
3196 while ( l->next != NULL && l->entry->t.t_id <= team->t.t_id ) {
3202 kmp_team_list_item_t * item =
3203 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof( kmp_team_list_item_t ) );
3212 __kmp_print_structure_team(
3214 kmp_team_p
const * team
3217 __kmp_printf(
"%s", title );
3218 if ( team != NULL ) {
3219 __kmp_printf(
"%2x %p\n", team->t.t_id, team );
3221 __kmp_printf(
" - (nil)\n" );
3226 __kmp_print_structure_thread(
3228 kmp_info_p
const * thread
3231 __kmp_printf(
"%s", title );
3232 if ( thread != NULL ) {
3233 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread );
3235 __kmp_printf(
" - (nil)\n" );
3240 __kmp_print_structure(
3244 kmp_team_list_t list;
3247 list = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof( kmp_team_list_item_t ) );
3251 __kmp_printf(
"\n------------------------------\nGlobal Thread Table\n------------------------------\n" );
3254 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3255 __kmp_printf(
"%2d", gtid );
3256 if ( __kmp_threads != NULL ) {
3257 __kmp_printf(
" %p", __kmp_threads[ gtid ] );
3259 if ( __kmp_root != NULL ) {
3260 __kmp_printf(
" %p", __kmp_root[ gtid ] );
3262 __kmp_printf(
"\n" );
3267 __kmp_printf(
"\n------------------------------\nThreads\n------------------------------\n" );
3268 if ( __kmp_threads != NULL ) {
3270 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3271 kmp_info_t
const * thread = __kmp_threads[ gtid ];
3272 if ( thread != NULL ) {
3273 __kmp_printf(
"GTID %2d %p:\n", gtid, thread );
3274 __kmp_printf(
" Our Root: %p\n", thread->th.th_root );
3275 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team );
3276 __kmp_print_structure_team(
" Serial Team: ", thread->th.th_serial_team );
3277 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc );
3278 __kmp_print_structure_thread(
" Master: ", thread->th.th_team_master );
3279 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized );
3280 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc );
3282 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind );
3284 __kmp_print_structure_thread(
" Next in pool: ", thread->th.th_next_pool );
3285 __kmp_printf(
"\n" );
3286 __kmp_print_structure_team_accum( list, thread->th.th_team );
3287 __kmp_print_structure_team_accum( list, thread->th.th_serial_team );
3291 __kmp_printf(
"Threads array is not allocated.\n" );
3295 __kmp_printf(
"\n------------------------------\nUbers\n------------------------------\n" );
3296 if ( __kmp_root != NULL ) {
3298 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3299 kmp_root_t
const * root = __kmp_root[ gtid ];
3300 if ( root != NULL ) {
3301 __kmp_printf(
"GTID %2d %p:\n", gtid, root );
3302 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team );
3303 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team );
3304 __kmp_print_structure_thread(
" Uber Thread: ", root->r.r_uber_thread );
3305 __kmp_printf(
" Active?: %2d\n", root->r.r_active );
3306 __kmp_printf(
" Nested?: %2d\n", root->r.r_nested );
3307 __kmp_printf(
" In Parallel: %2d\n", root->r.r_in_parallel );
3308 __kmp_printf(
"\n" );
3309 __kmp_print_structure_team_accum( list, root->r.r_root_team );
3310 __kmp_print_structure_team_accum( list, root->r.r_hot_team );
3314 __kmp_printf(
"Ubers array is not allocated.\n" );
3317 __kmp_printf(
"\n------------------------------\nTeams\n------------------------------\n" );
3318 while ( list->next != NULL ) {
3319 kmp_team_p
const * team = list->entry;
3321 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team );
3322 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent );
3323 __kmp_printf(
" Master TID: %2d\n", team->t.t_master_tid );
3324 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc );
3325 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized );
3326 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc );
3327 for ( i = 0; i < team->t.t_nproc; ++ i ) {
3328 __kmp_printf(
" Thread %2d: ", i );
3329 __kmp_print_structure_thread(
"", team->t.t_threads[ i ] );
3331 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool );
3332 __kmp_printf(
"\n" );
3337 __kmp_printf(
"\n------------------------------\nPools\n------------------------------\n" );
3338 __kmp_print_structure_thread(
"Thread pool: ", (kmp_info_t *)__kmp_thread_pool );
3339 __kmp_print_structure_team(
"Team pool: ", (kmp_team_t *)__kmp_team_pool );
3340 __kmp_printf(
"\n" );
3343 while ( list != NULL ) {
3344 kmp_team_list_item_t * item = list;
3346 KMP_INTERNAL_FREE( item );
3358 static const unsigned __kmp_primes[] = {
3359 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5,
3360 0xba5703f5, 0xb495a877, 0xe1626741, 0x79695e6b,
3361 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3362 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b,
3363 0xbe4d6fe9, 0x5f15e201, 0x99afc3fd, 0xf3f16801,
3364 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3365 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed,
3366 0x085a3d61, 0x46eb5ea7, 0x3d9910ed, 0x2e687b5b,
3367 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3368 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7,
3369 0x54581edb, 0xf2480f45, 0x0bb9288f, 0xef1affc7,
3370 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3371 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b,
3372 0xfc411073, 0xc3749363, 0xb892d829, 0x3549366b,
3373 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3374 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f
3381 __kmp_get_random( kmp_info_t * thread )
3383 unsigned x = thread->th.th_x;
3384 unsigned short r = x>>16;
3386 thread->th.th_x = x*thread->th.th_a+1;
3388 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3389 thread->th.th_info.ds.ds_tid, r) );
3397 __kmp_init_random( kmp_info_t * thread )
3399 unsigned seed = thread->th.th_info.ds.ds_tid;
3401 thread->th.th_a = __kmp_primes[seed%(
sizeof(__kmp_primes)/
sizeof(__kmp_primes[0]))];
3402 thread->th.th_x = (seed+1)*thread->th.th_a+1;
3403 KA_TRACE(30, (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a) );
3410 __kmp_reclaim_dead_roots(
void) {
3413 for(i = 0; i < __kmp_threads_capacity; ++i) {
3414 if( KMP_UBER_GTID( i ) &&
3415 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3416 !__kmp_root[i]->r.r_active ) {
3417 r += __kmp_unregister_root_other_thread(i);
3446 __kmp_expand_threads(
int nWish,
int nNeed) {
3449 int __kmp_actual_max_nth;
3453 #if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB 3456 added = __kmp_reclaim_dead_roots();
3474 int minimumRequiredCapacity;
3476 kmp_info_t **newThreads;
3477 kmp_root_t **newRoot;
3499 old_tp_cached = __kmp_tp_cached;
3500 __kmp_actual_max_nth = old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth;
3501 KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity);
3505 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3509 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3515 nTarget = __kmp_actual_max_nth - __kmp_threads_capacity;
3522 minimumRequiredCapacity = __kmp_threads_capacity + nTarget;
3524 newCapacity = __kmp_threads_capacity;
3527 newCapacity <= (__kmp_actual_max_nth >> 1) ?
3528 (newCapacity << 1) :
3529 __kmp_actual_max_nth;
3530 }
while(newCapacity < minimumRequiredCapacity);
3531 newThreads = (kmp_info_t**) __kmp_allocate((
sizeof(kmp_info_t*) +
sizeof(kmp_root_t*)) * newCapacity + CACHE_LINE);
3532 newRoot = (kmp_root_t**) ((
char*)newThreads +
sizeof(kmp_info_t*) * newCapacity );
3533 KMP_MEMCPY(newThreads, __kmp_threads, __kmp_threads_capacity *
sizeof(kmp_info_t*));
3534 KMP_MEMCPY(newRoot, __kmp_root, __kmp_threads_capacity *
sizeof(kmp_root_t*));
3535 memset(newThreads + __kmp_threads_capacity, 0,
3536 (newCapacity - __kmp_threads_capacity) *
sizeof(kmp_info_t*));
3537 memset(newRoot + __kmp_threads_capacity, 0,
3538 (newCapacity - __kmp_threads_capacity) *
sizeof(kmp_root_t*));
3540 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3546 __kmp_free(newThreads);
3549 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3550 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3552 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3553 __kmp_free(newThreads);
3559 *(kmp_info_t**
volatile*)&__kmp_threads = newThreads;
3560 *(kmp_root_t**
volatile*)&__kmp_root = newRoot;
3561 added += newCapacity - __kmp_threads_capacity;
3562 *(
volatile int*)&__kmp_threads_capacity = newCapacity;
3563 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3574 __kmp_register_root(
int initial_thread )
3576 kmp_info_t *root_thread;
3580 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3581 KA_TRACE( 20, (
"__kmp_register_root: entered\n"));
3599 capacity = __kmp_threads_capacity;
3600 if ( ! initial_thread && TCR_PTR(__kmp_threads[0]) == NULL ) {
3605 if ( __kmp_all_nth >= capacity && !__kmp_expand_threads( 1, 1 ) ) {
3606 if ( __kmp_tp_cached ) {
3609 KMP_MSG( CantRegisterNewThread ),
3610 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
3611 KMP_HNT( PossibleSystemLimitOnThreads ),
3618 KMP_MSG( CantRegisterNewThread ),
3619 KMP_HNT( SystemLimitOnThreads ),
3628 for( gtid=(initial_thread ? 0 : 1) ; TCR_PTR(__kmp_threads[gtid]) != NULL ; gtid++ )
3630 KA_TRACE( 1, (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid ));
3631 KMP_ASSERT( gtid < __kmp_threads_capacity );
3635 TCW_4(__kmp_nth, __kmp_nth + 1);
3642 if ( __kmp_adjust_gtid_mode ) {
3643 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
3644 if ( TCR_4(__kmp_gtid_mode) != 2) {
3645 TCW_4(__kmp_gtid_mode, 2);
3649 if (TCR_4(__kmp_gtid_mode) != 1 ) {
3650 TCW_4(__kmp_gtid_mode, 1);
3655 #ifdef KMP_ADJUST_BLOCKTIME 3658 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
3659 if ( __kmp_nth > __kmp_avail_proc ) {
3660 __kmp_zero_bt = TRUE;
3666 if( ! ( root = __kmp_root[gtid] )) {
3667 root = __kmp_root[gtid] = (kmp_root_t*) __kmp_allocate(
sizeof(kmp_root_t) );
3668 KMP_DEBUG_ASSERT( ! root->r.r_root_team );
3671 __kmp_initialize_root( root );
3674 if( root->r.r_uber_thread ) {
3675 root_thread = root->r.r_uber_thread;
3677 root_thread = (kmp_info_t*) __kmp_allocate(
sizeof(kmp_info_t) );
3678 if ( __kmp_storage_map ) {
3679 __kmp_print_thread_storage_map( root_thread, gtid );
3681 root_thread->th.th_info .ds.ds_gtid = gtid;
3682 root_thread->th.th_root = root;
3683 if( __kmp_env_consistency_check ) {
3684 root_thread->th.th_cons = __kmp_allocate_cons_stack( gtid );
3687 __kmp_initialize_fast_memory( root_thread );
3691 KMP_DEBUG_ASSERT( root_thread->th.th_local.bget_data == NULL );
3692 __kmp_initialize_bget( root_thread );
3694 __kmp_init_random( root_thread );
3698 if( ! root_thread->th.th_serial_team ) {
3699 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3700 KF_TRACE( 10, (
"__kmp_register_root: before serial_team\n" ) );
3702 root_thread->th.th_serial_team = __kmp_allocate_team( root, 1, 1,
3710 0 USE_NESTED_HOT_ARG(NULL) );
3712 KMP_ASSERT( root_thread->th.th_serial_team );
3713 KF_TRACE( 10, (
"__kmp_register_root: after serial_team = %p\n",
3714 root_thread->th.th_serial_team ) );
3717 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3719 root->r.r_root_team->t.t_threads[0] = root_thread;
3720 root->r.r_hot_team ->t.t_threads[0] = root_thread;
3721 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3722 root_thread->th.th_serial_team->t.t_serialized = 0;
3723 root->r.r_uber_thread = root_thread;
3726 __kmp_initialize_info( root_thread, root->r.r_root_team, 0, gtid );
3727 TCW_4(__kmp_init_gtid, TRUE);
3730 __kmp_gtid_set_specific( gtid );
3733 __kmp_itt_thread_name( gtid );
3736 #ifdef KMP_TDATA_GTID 3739 __kmp_create_worker( gtid, root_thread, __kmp_stksize );
3740 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == gtid );
3742 KA_TRACE( 20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, plain=%u\n",
3743 gtid, __kmp_gtid_from_tid( 0, root->r.r_hot_team ),
3744 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3745 KMP_INIT_BARRIER_STATE ) );
3748 for ( b = 0; b < bs_last_barrier; ++ b ) {
3749 root_thread->th.th_bar[ b ].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3751 root_thread->th.th_bar[ b ].bb.b_worker_arrived = 0;
3755 KMP_DEBUG_ASSERT( root->r.r_hot_team->t.t_bar[ bs_forkjoin_barrier ].b_arrived == KMP_INIT_BARRIER_STATE );
3757 #if KMP_AFFINITY_SUPPORTED 3759 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3760 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3761 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3762 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3765 if ( TCR_4(__kmp_init_middle) ) {
3766 __kmp_affinity_set_init_mask( gtid, TRUE );
3770 __kmp_root_counter ++;
3773 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3778 #if KMP_NESTED_HOT_TEAMS 3780 __kmp_free_hot_teams( kmp_root_t *root, kmp_info_t *thr,
int level,
const int max_level )
3783 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3784 if( !hot_teams || !hot_teams[level].hot_team ) {
3787 KMP_DEBUG_ASSERT( level < max_level );
3788 kmp_team_t *team = hot_teams[level].hot_team;
3789 nth = hot_teams[level].hot_team_nth;
3791 if( level < max_level - 1 ) {
3792 for( i = 0; i < nth; ++i ) {
3793 kmp_info_t *th = team->t.t_threads[i];
3794 n += __kmp_free_hot_teams( root, th, level + 1, max_level );
3795 if( i > 0 && th->th.th_hot_teams ) {
3796 __kmp_free( th->th.th_hot_teams );
3797 th->th.th_hot_teams = NULL;
3801 __kmp_free_team( root, team, NULL );
3810 __kmp_reset_root(
int gtid, kmp_root_t *root)
3812 kmp_team_t * root_team = root->r.r_root_team;
3813 kmp_team_t * hot_team = root->r.r_hot_team;
3814 int n = hot_team->t.t_nproc;
3817 KMP_DEBUG_ASSERT( ! root->r.r_active );
3819 root->r.r_root_team = NULL;
3820 root->r.r_hot_team = NULL;
3823 __kmp_free_team( root, root_team USE_NESTED_HOT_ARG(NULL) );
3824 #if KMP_NESTED_HOT_TEAMS 3825 if( __kmp_hot_teams_max_level > 0 ) {
3826 for( i = 0; i < hot_team->t.t_nproc; ++i ) {
3827 kmp_info_t *th = hot_team->t.t_threads[i];
3828 if( __kmp_hot_teams_max_level > 1 ) {
3829 n += __kmp_free_hot_teams( root, th, 1, __kmp_hot_teams_max_level );
3831 if( th->th.th_hot_teams ) {
3832 __kmp_free( th->th.th_hot_teams );
3833 th->th.th_hot_teams = NULL;
3838 __kmp_free_team( root, hot_team USE_NESTED_HOT_ARG(NULL) );
3844 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
3845 __kmp_wait_to_unref_task_teams();
3850 KA_TRACE( 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
"\n",
3851 (LPVOID)&(root->r.r_uber_thread->th),
3852 root->r.r_uber_thread->th.th_info.ds.ds_thread ) );
3853 __kmp_free_handle( root->r.r_uber_thread->th.th_info.ds.ds_thread );
3858 ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
3859 int gtid = __kmp_get_gtid();
3860 __ompt_thread_end(ompt_thread_initial, gtid);
3864 TCW_4(__kmp_nth, __kmp_nth - 1);
3865 __kmp_reap_thread( root->r.r_uber_thread, 1 );
3868 root->r.r_uber_thread = NULL;
3870 root->r.r_begin = FALSE;
3876 __kmp_unregister_root_current_thread(
int gtid )
3878 KA_TRACE( 1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid ));
3883 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3884 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
3885 KC_TRACE( 10, (
"__kmp_unregister_root_current_thread: already finished, exiting T#%d\n", gtid ));
3886 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3889 kmp_root_t *root = __kmp_root[gtid];
3891 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
3892 KMP_ASSERT( KMP_UBER_GTID( gtid ));
3893 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
3894 KMP_ASSERT( root->r.r_active == FALSE );
3900 kmp_info_t * thread = __kmp_threads[gtid];
3901 kmp_team_t * team = thread->th.th_team;
3902 kmp_task_team_t * task_team = thread->th.th_task_team;
3905 if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks ) {
3908 thread->th.ompt_thread_info.state = ompt_state_undefined;
3910 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
3914 __kmp_reset_root(gtid, root);
3917 __kmp_gtid_set_specific( KMP_GTID_DNE );
3918 #ifdef KMP_TDATA_GTID 3919 __kmp_gtid = KMP_GTID_DNE;
3923 KC_TRACE( 10, (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid ));
3925 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3934 __kmp_unregister_root_other_thread(
int gtid )
3936 kmp_root_t *root = __kmp_root[gtid];
3939 KA_TRACE( 1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid ));
3940 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
3941 KMP_ASSERT( KMP_UBER_GTID( gtid ));
3942 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
3943 KMP_ASSERT( root->r.r_active == FALSE );
3945 r = __kmp_reset_root(gtid, root);
3946 KC_TRACE( 10, (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid ));
3952 void __kmp_task_info() {
3954 kmp_int32 gtid = __kmp_entry_gtid();
3955 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
3956 kmp_info_t *this_thr = __kmp_threads[ gtid ];
3957 kmp_team_t *steam = this_thr->th.th_serial_team;
3958 kmp_team_t *team = this_thr->th.th_team;
3960 __kmp_printf(
"__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p ptask=%p\n",
3961 gtid, tid, this_thr, team, this_thr->th.th_current_task, team->t.t_implicit_task_taskdata[tid].td_parent );
3969 __kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team,
int tid,
int gtid )
3973 kmp_info_t *master = team->t.t_threads[0];
3974 KMP_DEBUG_ASSERT( this_thr != NULL );
3975 KMP_DEBUG_ASSERT( this_thr->th.th_serial_team );
3976 KMP_DEBUG_ASSERT( team );
3977 KMP_DEBUG_ASSERT( team->t.t_threads );
3978 KMP_DEBUG_ASSERT( team->t.t_dispatch );
3979 KMP_DEBUG_ASSERT( master );
3980 KMP_DEBUG_ASSERT( master->th.th_root );
3984 TCW_SYNC_PTR(this_thr->th.th_team, team);
3986 this_thr->th.th_info.ds.ds_tid = tid;
3987 this_thr->th.th_set_nproc = 0;
3989 this_thr->th.th_set_proc_bind = proc_bind_default;
3990 # if KMP_AFFINITY_SUPPORTED 3991 this_thr->th.th_new_place = this_thr->th.th_current_place;
3994 this_thr->th.th_root = master->th.th_root;
3997 this_thr->th.th_team_nproc = team->t.t_nproc;
3998 this_thr->th.th_team_master = master;
3999 this_thr->th.th_team_serialized = team->t.t_serialized;
4000 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
4002 KMP_DEBUG_ASSERT( team->t.t_implicit_task_taskdata );
4004 KF_TRACE( 10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4005 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4007 __kmp_init_implicit_task( this_thr->th.th_team_master->th.th_ident, this_thr, team, tid, TRUE );
4009 KF_TRACE( 10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4010 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4014 this_thr->th.th_dispatch = &team->t.t_dispatch[ tid ];
4016 this_thr->th.th_local.this_construct = 0;
4019 this_thr->th.th_local.tv_data = 0;
4022 if ( ! this_thr->th.th_pri_common ) {
4023 this_thr->th.th_pri_common = (
struct common_table *) __kmp_allocate(
sizeof(
struct common_table) );
4024 if ( __kmp_storage_map ) {
4025 __kmp_print_storage_map_gtid(
4026 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4027 sizeof(
struct common_table ),
"th_%d.th_pri_common\n", gtid
4030 this_thr->th.th_pri_head = NULL;
4035 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4039 size_t disp_size =
sizeof( dispatch_private_info_t ) *
4040 ( team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers );
4041 KD_TRACE( 10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, team->t.t_max_nproc ) );
4042 KMP_ASSERT( dispatch );
4043 KMP_DEBUG_ASSERT( team->t.t_dispatch );
4044 KMP_DEBUG_ASSERT( dispatch == &team->t.t_dispatch[ tid ] );
4046 dispatch->th_disp_index = 0;
4048 dispatch->th_doacross_buf_idx = 0;
4050 if( ! dispatch->th_disp_buffer ) {
4051 dispatch->th_disp_buffer = (dispatch_private_info_t *) __kmp_allocate( disp_size );
4053 if ( __kmp_storage_map ) {
4054 __kmp_print_storage_map_gtid( gtid, &dispatch->th_disp_buffer[ 0 ],
4055 &dispatch->th_disp_buffer[ team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers ],
4056 disp_size,
"th_%d.th_dispatch.th_disp_buffer " 4057 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4058 gtid, team->t.t_id, gtid );
4061 memset( & dispatch->th_disp_buffer[0],
'\0', disp_size );
4064 dispatch->th_dispatch_pr_current = 0;
4065 dispatch->th_dispatch_sh_current = 0;
4067 dispatch->th_deo_fcn = 0;
4068 dispatch->th_dxo_fcn = 0;
4071 this_thr->th.th_next_pool = NULL;
4073 if (!this_thr->th.th_task_state_memo_stack) {
4075 this_thr->th.th_task_state_memo_stack = (kmp_uint8 *) __kmp_allocate( 4*
sizeof(kmp_uint8) );
4076 this_thr->th.th_task_state_top = 0;
4077 this_thr->th.th_task_state_stack_sz = 4;
4078 for (i=0; i<this_thr->th.th_task_state_stack_sz; ++i)
4079 this_thr->th.th_task_state_memo_stack[i] = 0;
4082 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
4083 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
4096 __kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team,
int new_tid )
4098 kmp_team_t *serial_team;
4099 kmp_info_t *new_thr;
4102 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid() ));
4103 KMP_DEBUG_ASSERT( root && team );
4104 #if !KMP_NESTED_HOT_TEAMS 4105 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( __kmp_get_gtid() ));
4110 if ( __kmp_thread_pool ) {
4112 new_thr = (kmp_info_t*)__kmp_thread_pool;
4113 __kmp_thread_pool = (
volatile kmp_info_t *) new_thr->th.th_next_pool;
4114 if ( new_thr == __kmp_thread_pool_insert_pt ) {
4115 __kmp_thread_pool_insert_pt = NULL;
4117 TCW_4(new_thr->th.th_in_pool, FALSE);
4122 __kmp_thread_pool_nth--;
4124 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4125 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid ));
4126 KMP_ASSERT( ! new_thr->th.th_team );
4127 KMP_DEBUG_ASSERT( __kmp_nth < __kmp_threads_capacity );
4128 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth >= 0 );
4131 __kmp_initialize_info( new_thr, team, new_tid, new_thr->th.th_info.ds.ds_gtid );
4132 KMP_DEBUG_ASSERT( new_thr->th.th_serial_team );
4134 TCW_4(__kmp_nth, __kmp_nth + 1);
4136 new_thr->th.th_task_state = 0;
4137 new_thr->th.th_task_state_top = 0;
4138 new_thr->th.th_task_state_stack_sz = 4;
4140 #ifdef KMP_ADJUST_BLOCKTIME 4143 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4144 if ( __kmp_nth > __kmp_avail_proc ) {
4145 __kmp_zero_bt = TRUE;
4153 kmp_balign_t * balign = new_thr->th.th_bar;
4154 for( b = 0; b < bs_last_barrier; ++ b )
4155 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4158 KF_TRACE( 10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4159 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid ));
4167 KMP_ASSERT( __kmp_nth == __kmp_all_nth );
4168 KMP_ASSERT( __kmp_all_nth < __kmp_threads_capacity );
4174 if ( ! TCR_4( __kmp_init_monitor ) ) {
4175 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
4176 if ( ! TCR_4( __kmp_init_monitor ) ) {
4177 KF_TRACE( 10, (
"before __kmp_create_monitor\n" ) );
4178 TCW_4( __kmp_init_monitor, 1 );
4179 __kmp_create_monitor( & __kmp_monitor );
4180 KF_TRACE( 10, (
"after __kmp_create_monitor\n" ) );
4189 while ( TCR_4(__kmp_init_monitor) < 2 ) {
4192 KF_TRACE( 10, (
"after monitor thread has started\n" ) );
4195 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
4199 for( new_gtid=1 ; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid ) {
4200 KMP_DEBUG_ASSERT( new_gtid < __kmp_threads_capacity );
4204 new_thr = (kmp_info_t*) __kmp_allocate(
sizeof(kmp_info_t) );
4206 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4208 if ( __kmp_storage_map ) {
4209 __kmp_print_thread_storage_map( new_thr, new_gtid );
4214 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs( team );
4215 KF_TRACE( 10, (
"__kmp_allocate_thread: before th_serial/serial_team\n" ) );
4217 new_thr->th.th_serial_team = serial_team =
4218 (kmp_team_t*) __kmp_allocate_team( root, 1, 1,
4226 0 USE_NESTED_HOT_ARG(NULL) );
4228 KMP_ASSERT ( serial_team );
4229 serial_team->t.t_serialized = 0;
4230 serial_team->t.t_threads[0] = new_thr;
4231 KF_TRACE( 10, (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4235 __kmp_initialize_info( new_thr, team, new_tid, new_gtid );
4238 __kmp_initialize_fast_memory( new_thr );
4242 KMP_DEBUG_ASSERT( new_thr->th.th_local.bget_data == NULL );
4243 __kmp_initialize_bget( new_thr );
4246 __kmp_init_random( new_thr );
4249 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4250 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
4253 kmp_balign_t * balign = new_thr->th.th_bar;
4254 for(b=0; b<bs_last_barrier; ++b) {
4255 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4256 balign[b].bb.team = NULL;
4257 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4258 balign[b].bb.use_oncore_barrier = 0;
4261 new_thr->th.th_spin_here = FALSE;
4262 new_thr->th.th_next_waiting = 0;
4264 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 4265 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4266 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4267 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4268 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4271 TCW_4(new_thr->th.th_in_pool, FALSE);
4272 new_thr->th.th_active_in_pool = FALSE;
4273 TCW_4(new_thr->th.th_active, TRUE);
4284 if ( __kmp_adjust_gtid_mode ) {
4285 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
4286 if ( TCR_4(__kmp_gtid_mode) != 2) {
4287 TCW_4(__kmp_gtid_mode, 2);
4291 if (TCR_4(__kmp_gtid_mode) != 1 ) {
4292 TCW_4(__kmp_gtid_mode, 1);
4297 #ifdef KMP_ADJUST_BLOCKTIME 4300 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4301 if ( __kmp_nth > __kmp_avail_proc ) {
4302 __kmp_zero_bt = TRUE;
4308 KF_TRACE( 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr ));
4309 __kmp_create_worker( new_gtid, new_thr, __kmp_stksize );
4310 KF_TRACE( 10, (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr ));
4312 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), new_gtid ));
4327 __kmp_reinitialize_team( kmp_team_t *team, kmp_internal_control_t *new_icvs,
ident_t *loc ) {
4328 KF_TRACE( 10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4329 team->t.t_threads[0], team ) );
4330 KMP_DEBUG_ASSERT( team && new_icvs);
4331 KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc );
4332 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4334 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4337 __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE );
4338 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4340 KF_TRACE( 10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4341 team->t.t_threads[0], team ) );
4349 __kmp_initialize_team(
4352 kmp_internal_control_t * new_icvs,
4355 KF_TRACE( 10, (
"__kmp_initialize_team: enter: team=%p\n", team ) );
4358 KMP_DEBUG_ASSERT( team );
4359 KMP_DEBUG_ASSERT( new_nproc <= team->t.t_max_nproc );
4360 KMP_DEBUG_ASSERT( team->t.t_threads );
4363 team->t.t_master_tid = 0;
4365 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4366 team->t.t_nproc = new_nproc;
4369 team->t.t_next_pool = NULL;
4372 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4373 team->t.t_invoke = NULL;
4376 team->t.t_sched = new_icvs->sched;
4378 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 4379 team->t.t_fp_control_saved = FALSE;
4380 team->t.t_x87_fpu_control_word = 0;
4381 team->t.t_mxcsr = 0;
4384 team->t.t_construct = 0;
4385 __kmp_init_lock( & team->t.t_single_lock );
4387 team->t.t_ordered .dt.t_value = 0;
4388 team->t.t_master_active = FALSE;
4390 memset( & team->t.t_taskq,
'\0',
sizeof( kmp_taskq_t ));
4393 team->t.t_copypriv_data = NULL;
4395 team->t.t_copyin_counter = 0;
4397 team->t.t_control_stack_top = NULL;
4399 __kmp_reinitialize_team( team, new_icvs, loc );
4402 KF_TRACE( 10, (
"__kmp_initialize_team: exit: team=%p\n", team ) );
4405 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4408 __kmp_set_thread_affinity_mask_full_tmp( kmp_affin_mask_t *old_mask )
4410 if ( KMP_AFFINITY_CAPABLE() ) {
4412 if ( old_mask != NULL ) {
4413 status = __kmp_get_system_affinity( old_mask, TRUE );
4415 if ( status != 0 ) {
4418 KMP_MSG( ChangeThreadAffMaskError ),
4424 __kmp_set_system_affinity( __kmp_affin_fullMask, TRUE );
4429 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 4438 __kmp_partition_places( kmp_team_t *team,
int update_master_only )
4443 kmp_info_t *master_th = team->t.t_threads[0];
4444 KMP_DEBUG_ASSERT( master_th != NULL );
4445 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4446 int first_place = master_th->th.th_first_place;
4447 int last_place = master_th->th.th_last_place;
4448 int masters_place = master_th->th.th_current_place;
4449 team->t.t_first_place = first_place;
4450 team->t.t_last_place = last_place;
4452 KA_TRACE( 20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) bound to place %d partition = [%d,%d]\n",
4453 proc_bind, __kmp_gtid_from_thread( team->t.t_threads[0] ), team->t.t_id,
4454 masters_place, first_place, last_place ) );
4456 switch ( proc_bind ) {
4458 case proc_bind_default:
4464 KMP_DEBUG_ASSERT( team->t.t_nproc == 1 );
4467 case proc_bind_master:
4470 int n_th = team->t.t_nproc;
4471 for ( f = 1; f < n_th; f++ ) {
4472 kmp_info_t *th = team->t.t_threads[f];
4473 KMP_DEBUG_ASSERT( th != NULL );
4474 th->th.th_first_place = first_place;
4475 th->th.th_last_place = last_place;
4476 th->th.th_new_place = masters_place;
4478 KA_TRACE( 100, (
"__kmp_partition_places: master: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4479 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4480 team->t.t_id, f, masters_place, first_place, last_place ) );
4485 case proc_bind_close:
4488 int n_th = team->t.t_nproc;
4490 if ( first_place <= last_place ) {
4491 n_places = last_place - first_place + 1;
4494 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4496 if ( n_th <= n_places ) {
4497 int place = masters_place;
4498 for ( f = 1; f < n_th; f++ ) {
4499 kmp_info_t *th = team->t.t_threads[f];
4500 KMP_DEBUG_ASSERT( th != NULL );
4502 if ( place == last_place ) {
4503 place = first_place;
4505 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4511 th->th.th_first_place = first_place;
4512 th->th.th_last_place = last_place;
4513 th->th.th_new_place = place;
4515 KA_TRACE( 100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4516 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4517 team->t.t_id, f, place, first_place, last_place ) );
4521 int S, rem, gap, s_count;
4522 S = n_th / n_places;
4524 rem = n_th - ( S * n_places );
4525 gap = rem > 0 ? n_places/rem : n_places;
4526 int place = masters_place;
4528 for ( f = 0; f < n_th; f++ ) {
4529 kmp_info_t *th = team->t.t_threads[f];
4530 KMP_DEBUG_ASSERT( th != NULL );
4532 th->th.th_first_place = first_place;
4533 th->th.th_last_place = last_place;
4534 th->th.th_new_place = place;
4537 if ( (s_count == S) && rem && (gap_ct == gap) ) {
4540 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
4542 if ( place == last_place ) {
4543 place = first_place;
4545 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4555 else if (s_count == S) {
4556 if ( place == last_place ) {
4557 place = first_place;
4559 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4569 KA_TRACE( 100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4570 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4571 team->t.t_id, f, th->th.th_new_place, first_place,
4574 KMP_DEBUG_ASSERT( place == masters_place );
4579 case proc_bind_spread:
4582 int n_th = team->t.t_nproc;
4585 if ( first_place <= last_place ) {
4586 n_places = last_place - first_place + 1;
4589 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4591 if ( n_th <= n_places ) {
4592 int place = masters_place;
4593 int S = n_places/n_th;
4594 int s_count, rem, gap, gap_ct;
4595 rem = n_places - n_th*S;
4596 gap = rem ? n_th/rem : 1;
4599 if (update_master_only == 1)
4601 for ( f = 0; f < thidx; f++ ) {
4602 kmp_info_t *th = team->t.t_threads[f];
4603 KMP_DEBUG_ASSERT( th != NULL );
4605 th->th.th_first_place = place;
4606 th->th.th_new_place = place;
4608 while (s_count < S) {
4609 if ( place == last_place ) {
4610 place = first_place;
4612 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4620 if (rem && (gap_ct == gap)) {
4621 if ( place == last_place ) {
4622 place = first_place;
4624 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4633 th->th.th_last_place = place;
4636 if ( place == last_place ) {
4637 place = first_place;
4639 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4646 KA_TRACE( 100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4647 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4648 team->t.t_id, f, th->th.th_new_place,
4649 th->th.th_first_place, th->th.th_last_place ) );
4651 KMP_DEBUG_ASSERT( update_master_only || place == masters_place );
4654 int S, rem, gap, s_count;
4655 S = n_th / n_places;
4657 rem = n_th - ( S * n_places );
4658 gap = rem > 0 ? n_places/rem : n_places;
4659 int place = masters_place;
4662 if (update_master_only == 1)
4664 for ( f = 0; f < thidx; f++ ) {
4665 kmp_info_t *th = team->t.t_threads[f];
4666 KMP_DEBUG_ASSERT( th != NULL );
4668 th->th.th_first_place = place;
4669 th->th.th_last_place = place;
4670 th->th.th_new_place = place;
4673 if ( (s_count == S) && rem && (gap_ct == gap) ) {
4676 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
4678 if ( place == last_place ) {
4679 place = first_place;
4681 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4691 else if (s_count == S) {
4692 if ( place == last_place ) {
4693 place = first_place;
4695 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4705 KA_TRACE( 100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4706 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4707 team->t.t_id, f, th->th.th_new_place,
4708 th->th.th_first_place, th->th.th_last_place) );
4710 KMP_DEBUG_ASSERT( update_master_only || place == masters_place );
4719 KA_TRACE( 20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id ) );
4726 __kmp_allocate_team( kmp_root_t *root,
int new_nproc,
int max_nproc,
4728 ompt_parallel_id_t ompt_parallel_id,
4731 kmp_proc_bind_t new_proc_bind,
4733 kmp_internal_control_t *new_icvs,
4734 int argc USE_NESTED_HOT_ARG(kmp_info_t *master) )
4736 KMP_TIME_DEVELOPER_BLOCK(KMP_allocate_team);
4739 int use_hot_team = ! root->r.r_active;
4742 KA_TRACE( 20, (
"__kmp_allocate_team: called\n"));
4743 KMP_DEBUG_ASSERT( new_nproc >=1 && argc >=0 );
4744 KMP_DEBUG_ASSERT( max_nproc >= new_nproc );
4747 #if KMP_NESTED_HOT_TEAMS 4748 kmp_hot_team_ptr_t *hot_teams;
4750 team = master->th.th_team;
4751 level = team->t.t_active_level;
4752 if( master->th.th_teams_microtask ) {
4753 if( master->th.th_teams_size.nteams > 1 && (
4754 team->t.t_pkfn == (microtask_t)__kmp_teams_master ||
4755 master->th.th_teams_level < team->t.t_level ) ) {
4759 hot_teams = master->th.th_hot_teams;
4760 if( level < __kmp_hot_teams_max_level && hot_teams && hot_teams[level].hot_team )
4769 if( use_hot_team && new_nproc > 1 ) {
4770 KMP_DEBUG_ASSERT( new_nproc == max_nproc );
4771 #if KMP_NESTED_HOT_TEAMS 4772 team = hot_teams[level].hot_team;
4774 team = root->r.r_hot_team;
4777 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
4778 KA_TRACE( 20, (
"__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p before reinit\n",
4779 team->t.t_task_team[0], team->t.t_task_team[1] ));
4786 if (team->t.t_nproc == new_nproc) {
4787 KA_TRACE( 20, (
"__kmp_allocate_team: reusing hot team\n" ));
4790 if ( team->t.t_size_changed == -1 ) {
4791 team->t.t_size_changed = 1;
4793 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
4797 kmp_r_sched_t new_sched = new_icvs->sched;
4798 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type ||
4799 team->t.t_sched.chunk != new_sched.chunk)
4800 team->t.t_sched = new_sched;
4802 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
4804 KF_TRACE( 10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n",
4805 0, team->t.t_threads[0], team ) );
4806 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
4809 # if KMP_AFFINITY_SUPPORTED 4810 if ( ( team->t.t_size_changed == 0 )
4811 && ( team->t.t_proc_bind == new_proc_bind ) ) {
4812 if (new_proc_bind == proc_bind_spread) {
4813 __kmp_partition_places(team, 1);
4815 KA_TRACE( 200, (
"__kmp_allocate_team: reusing hot team #%d bindings: proc_bind = %d, partition = [%d,%d]\n",
4816 team->t.t_id, new_proc_bind, team->t.t_first_place,
4817 team->t.t_last_place ) );
4820 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4821 __kmp_partition_places( team );
4824 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4828 else if( team->t.t_nproc > new_nproc ) {
4829 KA_TRACE( 20, (
"__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc ));
4831 team->t.t_size_changed = 1;
4832 #if KMP_NESTED_HOT_TEAMS 4833 if( __kmp_hot_teams_mode == 0 ) {
4836 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
4837 hot_teams[level].hot_team_nth = new_nproc;
4838 #endif // KMP_NESTED_HOT_TEAMS 4840 for( f = new_nproc ; f < team->t.t_nproc ; f++ ) {
4841 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
4842 if ( __kmp_tasking_mode != tskm_immediate_exec) {
4844 team->t.t_threads[f]->th.th_task_team = NULL;
4846 __kmp_free_thread( team->t.t_threads[ f ] );
4847 team->t.t_threads[ f ] = NULL;
4849 #if KMP_NESTED_HOT_TEAMS 4851 #endif // KMP_NESTED_HOT_TEAMS 4852 team->t.t_nproc = new_nproc;
4854 if (team->t.t_sched.r_sched_type != new_icvs->sched.r_sched_type ||
4855 team->t.t_sched.chunk != new_icvs->sched.chunk)
4856 team->t.t_sched = new_icvs->sched;
4857 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
4860 for(f = 0; f < new_nproc; ++f) {
4861 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
4864 KF_TRACE( 10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n",
4865 0, team->t.t_threads[0], team ) );
4867 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
4870 for ( f = 0; f < team->t.t_nproc; f++ ) {
4871 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
4872 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
4877 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4878 # if KMP_AFFINITY_SUPPORTED 4879 __kmp_partition_places( team );
4884 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4885 kmp_affin_mask_t *old_mask;
4886 if ( KMP_AFFINITY_CAPABLE() ) {
4887 KMP_CPU_ALLOC(old_mask);
4891 KA_TRACE( 20, (
"__kmp_allocate_team: increasing hot team thread count to %d\n", new_nproc ));
4893 team->t.t_size_changed = 1;
4895 #if KMP_NESTED_HOT_TEAMS 4896 int avail_threads = hot_teams[level].hot_team_nth;
4897 if( new_nproc < avail_threads )
4898 avail_threads = new_nproc;
4899 kmp_info_t **other_threads = team->t.t_threads;
4900 for ( f = team->t.t_nproc; f < avail_threads; ++f ) {
4904 kmp_balign_t * balign = other_threads[f]->th.th_bar;
4905 for ( b = 0; b < bs_last_barrier; ++ b ) {
4906 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
4907 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4909 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
4913 if( hot_teams[level].hot_team_nth >= new_nproc ) {
4916 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
4917 team->t.t_nproc = new_nproc;
4920 team->t.t_nproc = hot_teams[level].hot_team_nth;
4921 hot_teams[level].hot_team_nth = new_nproc;
4922 #endif // KMP_NESTED_HOT_TEAMS 4923 if(team->t.t_max_nproc < new_nproc) {
4925 __kmp_reallocate_team_arrays(team, new_nproc);
4926 __kmp_reinitialize_team( team, new_icvs, NULL );
4929 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4936 __kmp_set_thread_affinity_mask_full_tmp( old_mask );
4940 for( f = team->t.t_nproc ; f < new_nproc ; f++ ) {
4941 kmp_info_t * new_worker = __kmp_allocate_thread( root, team, f );
4942 KMP_DEBUG_ASSERT( new_worker );
4943 team->t.t_threads[ f ] = new_worker;
4945 KA_TRACE( 20, (
"__kmp_allocate_team: team %d init T#%d arrived: join=%llu, plain=%llu\n",
4946 team->t.t_id, __kmp_gtid_from_tid( f, team ), team->t.t_id, f,
4947 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
4948 team->t.t_bar[bs_plain_barrier].b_arrived ) );
4952 kmp_balign_t * balign = new_worker->th.th_bar;
4953 for( b = 0; b < bs_last_barrier; ++ b ) {
4954 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
4955 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4957 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
4963 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4964 if ( KMP_AFFINITY_CAPABLE() ) {
4966 __kmp_set_system_affinity( old_mask, TRUE );
4967 KMP_CPU_FREE(old_mask);
4970 #if KMP_NESTED_HOT_TEAMS 4972 #endif // KMP_NESTED_HOT_TEAMS 4974 int old_nproc = team->t.t_nproc;
4975 __kmp_initialize_team( team, new_nproc, new_icvs, root->r.r_uber_thread->th.th_ident );
4978 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
4979 for (f=0; f < team->t.t_nproc; ++f)
4980 __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) );
4985 for (f=old_nproc; f < team->t.t_nproc; ++f)
4986 team->t.t_threads[f]->th.th_task_state = team->t.t_threads[0]->th.th_task_state_memo_stack[level];
4989 int old_state = team->t.t_threads[0]->th.th_task_state;
4990 for (f=old_nproc; f < team->t.t_nproc; ++f)
4991 team->t.t_threads[f]->th.th_task_state = old_state;
4995 for ( f = 0; f < team->t.t_nproc; ++ f ) {
4996 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
4997 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
5002 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5003 # if KMP_AFFINITY_SUPPORTED 5004 __kmp_partition_places( team );
5010 kmp_info_t *master = team->t.t_threads[0];
5011 if( master->th.th_teams_microtask ) {
5012 for( f = 1; f < new_nproc; ++f ) {
5014 kmp_info_t *thr = team->t.t_threads[f];
5015 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5016 thr->th.th_teams_level = master->th.th_teams_level;
5017 thr->th.th_teams_size = master->th.th_teams_size;
5021 #if KMP_NESTED_HOT_TEAMS 5024 for( f = 1; f < new_nproc; ++f ) {
5025 kmp_info_t *thr = team->t.t_threads[f];
5027 kmp_balign_t * balign = thr->th.th_bar;
5028 for( b = 0; b < bs_last_barrier; ++ b ) {
5029 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
5030 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5032 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
5037 #endif // KMP_NESTED_HOT_TEAMS 5040 __kmp_alloc_argv_entries( argc, team, TRUE );
5041 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5047 KF_TRACE( 10, (
" hot_team = %p\n", team ) );
5050 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
5051 KA_TRACE( 20, (
"__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p after reinit\n",
5052 team->t.t_task_team[0], team->t.t_task_team[1] ));
5057 __ompt_team_assign_id(team, ompt_parallel_id);
5067 for( team = (kmp_team_t*) __kmp_team_pool ; (team) ; )
5070 if ( team->t.t_max_nproc >= max_nproc ) {
5072 __kmp_team_pool = team->t.t_next_pool;
5075 __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
5077 KA_TRACE( 20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
5078 &team->t.t_task_team[0], &team->t.t_task_team[1]) );
5079 team->t.t_task_team[0] = NULL;
5080 team->t.t_task_team[1] = NULL;
5083 __kmp_alloc_argv_entries( argc, team, TRUE );
5084 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5086 KA_TRACE( 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5087 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5090 for ( b = 0; b < bs_last_barrier; ++ b) {
5091 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
5093 team->t.t_bar[ b ].b_master_arrived = 0;
5094 team->t.t_bar[ b ].b_team_arrived = 0;
5100 team->t.t_proc_bind = new_proc_bind;
5103 KA_TRACE( 20, (
"__kmp_allocate_team: using team from pool %d.\n", team->t.t_id ));
5106 __ompt_team_assign_id(team, ompt_parallel_id);
5117 team = __kmp_reap_team( team );
5118 __kmp_team_pool = team;
5123 team = (kmp_team_t*) __kmp_allocate(
sizeof( kmp_team_t ) );
5126 team->t.t_max_nproc = max_nproc;
5130 __kmp_allocate_team_arrays( team, max_nproc );
5132 KA_TRACE( 20, (
"__kmp_allocate_team: making a new team\n" ) );
5133 __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
5135 KA_TRACE( 20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
5136 &team->t.t_task_team[0], &team->t.t_task_team[1] ) );
5137 team->t.t_task_team[0] = NULL;
5138 team->t.t_task_team[1] = NULL;
5140 if ( __kmp_storage_map ) {
5141 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc );
5145 __kmp_alloc_argv_entries( argc, team, FALSE );
5146 team->t.t_argc = argc;
5148 KA_TRACE( 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5149 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5152 for ( b = 0; b < bs_last_barrier; ++ b ) {
5153 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
5155 team->t.t_bar[ b ].b_master_arrived = 0;
5156 team->t.t_bar[ b ].b_team_arrived = 0;
5162 team->t.t_proc_bind = new_proc_bind;
5166 __ompt_team_assign_id(team, ompt_parallel_id);
5167 team->t.ompt_serialized_team_info = NULL;
5172 KA_TRACE( 20, (
"__kmp_allocate_team: done creating a new team %d.\n", team->t.t_id ));
5183 __kmp_free_team( kmp_root_t *root, kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master) )
5186 KA_TRACE( 20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), team->t.t_id ));
5189 KMP_DEBUG_ASSERT( root );
5190 KMP_DEBUG_ASSERT( team );
5191 KMP_DEBUG_ASSERT( team->t.t_nproc <= team->t.t_max_nproc );
5192 KMP_DEBUG_ASSERT( team->t.t_threads );
5194 int use_hot_team = team == root->r.r_hot_team;
5195 #if KMP_NESTED_HOT_TEAMS 5197 kmp_hot_team_ptr_t *hot_teams;
5199 level = team->t.t_active_level - 1;
5200 if( master->th.th_teams_microtask ) {
5201 if( master->th.th_teams_size.nteams > 1 ) {
5204 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5205 master->th.th_teams_level == team->t.t_level ) {
5209 hot_teams = master->th.th_hot_teams;
5210 if( level < __kmp_hot_teams_max_level ) {
5211 KMP_DEBUG_ASSERT( team == hot_teams[level].hot_team );
5215 #endif // KMP_NESTED_HOT_TEAMS 5218 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
5219 team->t.t_copyin_counter = 0;
5223 if( ! use_hot_team ) {
5224 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
5227 for (tt_idx=0; tt_idx<2; ++tt_idx) {
5228 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5229 if ( task_team != NULL ) {
5230 for (f=0; f<team->t.t_nproc; ++f) {
5231 team->t.t_threads[f]->th.th_task_team = NULL;
5233 KA_TRACE( 20, (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n", __kmp_get_gtid(), task_team, team->t.t_id ) );
5234 #if KMP_NESTED_HOT_TEAMS 5235 __kmp_free_task_team( master, task_team );
5237 team->t.t_task_team[tt_idx] = NULL;
5243 team->t.t_parent = NULL;
5244 team->t.t_level = 0;
5245 team->t.t_active_level = 0;
5248 for ( f = 1; f < team->t.t_nproc; ++ f ) {
5249 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
5250 __kmp_free_thread( team->t.t_threads[ f ] );
5251 team->t.t_threads[ f ] = NULL;
5256 team->t.t_next_pool = (kmp_team_t*) __kmp_team_pool;
5257 __kmp_team_pool = (
volatile kmp_team_t*) team;
5266 __kmp_reap_team( kmp_team_t *team )
5268 kmp_team_t *next_pool = team->t.t_next_pool;
5270 KMP_DEBUG_ASSERT( team );
5271 KMP_DEBUG_ASSERT( team->t.t_dispatch );
5272 KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
5273 KMP_DEBUG_ASSERT( team->t.t_threads );
5274 KMP_DEBUG_ASSERT( team->t.t_argv );
5280 __kmp_free_team_arrays( team );
5281 if ( team->t.t_argv != &team->t.t_inline_argv[0] )
5282 __kmp_free( (
void*) team->t.t_argv );
5317 __kmp_free_thread( kmp_info_t *this_th )
5322 KA_TRACE( 20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5323 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid ));
5325 KMP_DEBUG_ASSERT( this_th );
5329 kmp_balign_t *balign = this_th->th.th_bar;
5330 for (b=0; b<bs_last_barrier; ++b) {
5331 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5332 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5333 balign[b].bb.team = NULL;
5335 this_th->th.th_task_state = 0;
5338 TCW_PTR(this_th->th.th_team, NULL);
5339 TCW_PTR(this_th->th.th_root, NULL);
5340 TCW_PTR(this_th->th.th_dispatch, NULL);
5346 gtid = this_th->th.th_info.ds.ds_gtid;
5347 if ( __kmp_thread_pool_insert_pt != NULL ) {
5348 KMP_DEBUG_ASSERT( __kmp_thread_pool != NULL );
5349 if ( __kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid ) {
5350 __kmp_thread_pool_insert_pt = NULL;
5361 if ( __kmp_thread_pool_insert_pt != NULL ) {
5362 scan = &( __kmp_thread_pool_insert_pt->th.th_next_pool );
5365 scan = (kmp_info_t **)&__kmp_thread_pool;
5367 for (; ( *scan != NULL ) && ( (*scan)->th.th_info.ds.ds_gtid < gtid );
5368 scan = &( (*scan)->th.th_next_pool ) );
5374 TCW_PTR(this_th->th.th_next_pool, *scan);
5375 __kmp_thread_pool_insert_pt = *scan = this_th;
5376 KMP_DEBUG_ASSERT( ( this_th->th.th_next_pool == NULL )
5377 || ( this_th->th.th_info.ds.ds_gtid
5378 < this_th->th.th_next_pool->th.th_info.ds.ds_gtid ) );
5379 TCW_4(this_th->th.th_in_pool, TRUE);
5380 __kmp_thread_pool_nth++;
5382 TCW_4(__kmp_nth, __kmp_nth - 1);
5384 #ifdef KMP_ADJUST_BLOCKTIME 5387 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5388 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
5389 if ( __kmp_nth <= __kmp_avail_proc ) {
5390 __kmp_zero_bt = FALSE;
5402 __kmp_launch_thread( kmp_info_t *this_thr )
5404 int gtid = this_thr->th.th_info.ds.ds_gtid;
5406 kmp_team_t *(*
volatile pteam);
5409 KA_TRACE( 10, (
"__kmp_launch_thread: T#%d start\n", gtid ) );
5411 if( __kmp_env_consistency_check ) {
5412 this_thr->th.th_cons = __kmp_allocate_cons_stack( gtid );
5417 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5418 this_thr->th.ompt_thread_info.wait_id = 0;
5419 this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0);
5420 if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) {
5421 __ompt_thread_begin(ompt_thread_worker, gtid);
5427 while( ! TCR_4(__kmp_global.g.g_done) ) {
5428 KMP_DEBUG_ASSERT( this_thr == __kmp_threads[ gtid ] );
5432 KA_TRACE( 20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid ));
5436 this_thr->th.ompt_thread_info.state = ompt_state_idle;
5441 __kmp_fork_barrier( gtid, KMP_GTID_DNE );
5445 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5449 pteam = (kmp_team_t *(*))(& this_thr->th.th_team);
5452 if ( TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done) ) {
5454 ompt_task_info_t *task_info;
5455 ompt_parallel_id_t my_parallel_id;
5457 task_info = __ompt_get_taskinfo(0);
5458 my_parallel_id = (*pteam)->t.ompt_team_info.parallel_id;
5462 if ( TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL ) {
5464 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5465 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
5467 updateHWFPControl (*pteam);
5471 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
5473 int tid = __kmp_tid_from_gtid(gtid);
5474 task_info->task_id = __ompt_task_id_new(tid);
5478 KMP_STOP_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
5480 KMP_TIME_DEVELOPER_BLOCK(USER_worker_invoke);
5481 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
5482 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
5483 rc = (*pteam)->t.t_invoke( gtid );
5485 KMP_START_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
5491 task_info->frame.exit_runtime_frame = 0;
5493 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5497 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5498 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
5501 __kmp_join_barrier( gtid );
5502 #if OMPT_SUPPORT && OMPT_TRACE 5504 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
5507 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
5508 my_parallel_id, task_info->task_id);
5510 task_info->frame.exit_runtime_frame = 0;
5511 task_info->task_id = 0;
5516 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
5520 ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
5521 __ompt_thread_end(ompt_thread_worker, gtid);
5525 this_thr->th.th_task_team = NULL;
5527 __kmp_common_destroy_gtid( gtid );
5529 KA_TRACE( 10, (
"__kmp_launch_thread: T#%d done\n", gtid ) );
5538 __kmp_internal_end_dest(
void *specific_gtid )
5540 #if KMP_COMPILER_ICC 5541 #pragma warning( push ) 5542 #pragma warning( disable: 810 ) // conversion from "void *" to "int" may lose significant bits 5545 int gtid = (kmp_intptr_t)specific_gtid - 1;
5546 #if KMP_COMPILER_ICC 5547 #pragma warning( pop ) 5550 KA_TRACE( 30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
5564 if(gtid >= 0 && KMP_UBER_GTID(gtid))
5565 __kmp_gtid_set_specific( gtid );
5566 #ifdef KMP_TDATA_GTID 5569 __kmp_internal_end_thread( gtid );
5572 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB 5578 __attribute__(( destructor ))
5580 __kmp_internal_end_dtor(
void )
5582 __kmp_internal_end_atexit();
5586 __kmp_internal_end_fini(
void )
5588 __kmp_internal_end_atexit();
5595 __kmp_internal_end_atexit(
void )
5597 KA_TRACE( 30, (
"__kmp_internal_end_atexit\n" ) );
5619 __kmp_internal_end_library( -1 );
5621 __kmp_close_console();
5627 kmp_info_t * thread,
5635 KMP_DEBUG_ASSERT( thread != NULL );
5637 gtid = thread->th.th_info.ds.ds_gtid;
5641 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
5643 KA_TRACE( 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", gtid ) );
5645 kmp_flag_64 flag(&thread->th.th_bar[ bs_forkjoin_barrier ].bb.b_go, thread);
5646 __kmp_release_64(&flag);
5650 __kmp_reap_worker( thread );
5665 if ( thread->th.th_active_in_pool ) {
5666 thread->th.th_active_in_pool = FALSE;
5667 KMP_TEST_THEN_DEC32(
5668 (kmp_int32 *) &__kmp_thread_pool_active_nth );
5669 KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 );
5673 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth > 0 );
5674 --__kmp_thread_pool_nth;
5679 __kmp_free_fast_memory( thread );
5682 __kmp_suspend_uninitialize_thread( thread );
5684 KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] == thread );
5685 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5690 #ifdef KMP_ADJUST_BLOCKTIME 5693 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5694 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
5695 if ( __kmp_nth <= __kmp_avail_proc ) {
5696 __kmp_zero_bt = FALSE;
5702 if( __kmp_env_consistency_check ) {
5703 if ( thread->th.th_cons ) {
5704 __kmp_free_cons_stack( thread->th.th_cons );
5705 thread->th.th_cons = NULL;
5709 if ( thread->th.th_pri_common != NULL ) {
5710 __kmp_free( thread->th.th_pri_common );
5711 thread->th.th_pri_common = NULL;
5714 if (thread->th.th_task_state_memo_stack != NULL) {
5715 __kmp_free(thread->th.th_task_state_memo_stack);
5716 thread->th.th_task_state_memo_stack = NULL;
5720 if ( thread->th.th_local.bget_data != NULL ) {
5721 __kmp_finalize_bget( thread );
5725 #if KMP_AFFINITY_SUPPORTED 5726 if ( thread->th.th_affin_mask != NULL ) {
5727 KMP_CPU_FREE( thread->th.th_affin_mask );
5728 thread->th.th_affin_mask = NULL;
5732 __kmp_reap_team( thread->th.th_serial_team );
5733 thread->th.th_serial_team = NULL;
5734 __kmp_free( thread );
5741 __kmp_internal_end(
void)
5746 __kmp_unregister_library();
5754 __kmp_reclaim_dead_roots();
5757 for( i=0 ; i<__kmp_threads_capacity ; i++ )
5759 if( __kmp_root[i]->r.r_active )
5762 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5764 if ( i < __kmp_threads_capacity ) {
5779 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5780 if ( TCR_4( __kmp_init_monitor ) ) {
5781 __kmp_reap_monitor( & __kmp_monitor );
5782 TCW_4( __kmp_init_monitor, 0 );
5784 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5785 KA_TRACE( 10, (
"__kmp_internal_end: monitor reaped\n" ) );
5790 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
5791 if( __kmp_root[i] ) {
5793 KMP_ASSERT( ! __kmp_root[i]->r.r_active );
5802 while ( __kmp_thread_pool != NULL ) {
5804 kmp_info_t * thread = (kmp_info_t *) __kmp_thread_pool;
5805 __kmp_thread_pool = thread->th.th_next_pool;
5807 thread->th.th_next_pool = NULL;
5808 thread->th.th_in_pool = FALSE;
5809 __kmp_reap_thread( thread, 0 );
5811 __kmp_thread_pool_insert_pt = NULL;
5814 while ( __kmp_team_pool != NULL ) {
5816 kmp_team_t * team = (kmp_team_t *) __kmp_team_pool;
5817 __kmp_team_pool = team->t.t_next_pool;
5819 team->t.t_next_pool = NULL;
5820 __kmp_reap_team( team );
5823 __kmp_reap_task_teams( );
5825 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
5832 TCW_SYNC_4(__kmp_init_common, FALSE);
5834 KA_TRACE( 10, (
"__kmp_internal_end: all workers reaped\n" ) );
5843 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5844 if ( TCR_4( __kmp_init_monitor ) ) {
5845 __kmp_reap_monitor( & __kmp_monitor );
5846 TCW_4( __kmp_init_monitor, 0 );
5848 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5849 KA_TRACE( 10, (
"__kmp_internal_end: monitor reaped\n" ) );
5852 TCW_4(__kmp_init_gtid, FALSE);
5862 __kmp_internal_end_library(
int gtid_req )
5870 if( __kmp_global.g.g_abort ) {
5871 KA_TRACE( 11, (
"__kmp_internal_end_library: abort, exiting\n" ));
5875 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5876 KA_TRACE( 10, (
"__kmp_internal_end_library: already finished\n" ));
5885 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
5886 KA_TRACE( 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req ));
5887 if( gtid == KMP_GTID_SHUTDOWN ) {
5888 KA_TRACE( 10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system already shutdown\n" ));
5890 }
else if( gtid == KMP_GTID_MONITOR ) {
5891 KA_TRACE( 10, (
"__kmp_internal_end_library: monitor thread, gtid not registered, or system shutdown\n" ));
5893 }
else if( gtid == KMP_GTID_DNE ) {
5894 KA_TRACE( 10, (
"__kmp_internal_end_library: gtid not registered or system shutdown\n" ));
5896 }
else if( KMP_UBER_GTID( gtid )) {
5898 if( __kmp_root[gtid]->r.r_active ) {
5899 __kmp_global.g.g_abort = -1;
5900 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5901 KA_TRACE( 10, (
"__kmp_internal_end_library: root still active, abort T#%d\n", gtid ));
5904 KA_TRACE( 10, (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid ));
5905 __kmp_unregister_root_current_thread( gtid );
5912 #ifdef DUMP_DEBUG_ON_EXIT 5913 if ( __kmp_debug_buf )
5914 __kmp_dump_debug_buffer( );
5920 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
5923 if( __kmp_global.g.g_abort ) {
5924 KA_TRACE( 10, (
"__kmp_internal_end_library: abort, exiting\n" ));
5926 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5929 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5930 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5940 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
5943 __kmp_internal_end();
5945 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
5946 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5948 KA_TRACE( 10, (
"__kmp_internal_end_library: exit\n" ) );
5950 #ifdef DUMP_DEBUG_ON_EXIT 5951 if ( __kmp_debug_buf )
5952 __kmp_dump_debug_buffer();
5956 __kmp_close_console();
5959 __kmp_fini_allocator();
5964 __kmp_internal_end_thread(
int gtid_req )
5974 if( __kmp_global.g.g_abort ) {
5975 KA_TRACE( 11, (
"__kmp_internal_end_thread: abort, exiting\n" ));
5979 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5980 KA_TRACE( 10, (
"__kmp_internal_end_thread: already finished\n" ));
5988 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
5989 KA_TRACE( 10, (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req ));
5990 if( gtid == KMP_GTID_SHUTDOWN ) {
5991 KA_TRACE( 10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system already shutdown\n" ));
5993 }
else if( gtid == KMP_GTID_MONITOR ) {
5994 KA_TRACE( 10, (
"__kmp_internal_end_thread: monitor thread, gtid not registered, or system shutdown\n" ));
5996 }
else if( gtid == KMP_GTID_DNE ) {
5997 KA_TRACE( 10, (
"__kmp_internal_end_thread: gtid not registered or system shutdown\n" ));
6000 }
else if( KMP_UBER_GTID( gtid )) {
6002 if( __kmp_root[gtid]->r.r_active ) {
6003 __kmp_global.g.g_abort = -1;
6004 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6005 KA_TRACE( 10, (
"__kmp_internal_end_thread: root still active, abort T#%d\n", gtid ));
6008 KA_TRACE( 10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n", gtid ));
6009 __kmp_unregister_root_current_thread( gtid );
6013 KA_TRACE( 10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid ));
6016 __kmp_threads[gtid]->th.th_task_team = NULL;
6019 KA_TRACE( 10, (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n", gtid ));
6023 #if defined KMP_DYNAMIC_LIB 6031 KA_TRACE( 10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req) );
6035 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6038 if( __kmp_global.g.g_abort ) {
6039 KA_TRACE( 10, (
"__kmp_internal_end_thread: abort, exiting\n" ));
6041 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6044 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
6045 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6057 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
6059 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
6060 if ( KMP_UBER_GTID( i ) ) {
6061 KA_TRACE( 10, (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i ));
6062 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6063 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6070 __kmp_internal_end();
6072 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6073 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6075 KA_TRACE( 10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req ) );
6077 #ifdef DUMP_DEBUG_ON_EXIT 6078 if ( __kmp_debug_buf )
6079 __kmp_dump_debug_buffer();
6086 static long __kmp_registration_flag = 0;
6088 static char * __kmp_registration_str = NULL;
6094 __kmp_reg_status_name() {
6100 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int) getpid() );
6105 __kmp_register_library_startup(
6109 char * name = __kmp_reg_status_name();
6116 __kmp_initialize_system_tick();
6118 __kmp_read_system_time( & time.dtime );
6119 __kmp_registration_flag = 0xCAFE0000L | ( time.ltime & 0x0000FFFFL );
6120 __kmp_registration_str =
6123 & __kmp_registration_flag,
6124 __kmp_registration_flag,
6128 KA_TRACE( 50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name, __kmp_registration_str ) );
6132 char * value = NULL;
6135 __kmp_env_set( name, __kmp_registration_str, 0 );
6137 value = __kmp_env_get( name );
6138 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
6147 char * tail = value;
6148 char * flag_addr_str = NULL;
6149 char * flag_val_str = NULL;
6150 char const * file_name = NULL;
6151 __kmp_str_split( tail,
'-', & flag_addr_str, & tail );
6152 __kmp_str_split( tail,
'-', & flag_val_str, & tail );
6154 if ( tail != NULL ) {
6155 long * flag_addr = 0;
6157 KMP_SSCANF( flag_addr_str,
"%p", & flag_addr );
6158 KMP_SSCANF( flag_val_str,
"%lx", & flag_val );
6159 if ( flag_addr != 0 && flag_val != 0 && strcmp( file_name,
"" ) != 0 ) {
6163 if ( __kmp_is_address_mapped( flag_addr ) && * flag_addr == flag_val ) {
6171 switch ( neighbor ) {
6176 file_name =
"unknown library";
6180 char * duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK" );
6181 if ( ! __kmp_str_match_true( duplicate_ok ) ) {
6185 KMP_MSG( DuplicateLibrary, KMP_LIBRARY_FILE, file_name ),
6186 KMP_HNT( DuplicateLibrary ),
6190 KMP_INTERNAL_FREE( duplicate_ok );
6191 __kmp_duplicate_library_ok = 1;
6196 __kmp_env_unset( name );
6199 KMP_DEBUG_ASSERT( 0 );
6204 KMP_INTERNAL_FREE( (
void *) value );
6207 KMP_INTERNAL_FREE( (
void *) name );
6213 __kmp_unregister_library(
void ) {
6215 char * name = __kmp_reg_status_name();
6216 char * value = __kmp_env_get( name );
6218 KMP_DEBUG_ASSERT( __kmp_registration_flag != 0 );
6219 KMP_DEBUG_ASSERT( __kmp_registration_str != NULL );
6220 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
6222 __kmp_env_unset( name );
6225 KMP_INTERNAL_FREE( __kmp_registration_str );
6226 KMP_INTERNAL_FREE( value );
6227 KMP_INTERNAL_FREE( name );
6229 __kmp_registration_flag = 0;
6230 __kmp_registration_str = NULL;
6238 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 6240 static void __kmp_check_mic_type()
6242 kmp_cpuid_t cpuid_state = {0};
6243 kmp_cpuid_t * cs_p = &cpuid_state;
6244 __kmp_x86_cpuid(1, 0, cs_p);
6246 if( (cs_p->eax & 0xff0) == 0xB10 ) {
6247 __kmp_mic_type = mic2;
6248 }
else if( (cs_p->eax & 0xf0ff0) == 0x50670 ) {
6249 __kmp_mic_type = mic3;
6251 __kmp_mic_type = non_mic;
6258 __kmp_do_serial_initialize(
void )
6263 KA_TRACE( 10, (
"__kmp_do_serial_initialize: enter\n" ) );
6265 KMP_DEBUG_ASSERT(
sizeof( kmp_int32 ) == 4 );
6266 KMP_DEBUG_ASSERT(
sizeof( kmp_uint32 ) == 4 );
6267 KMP_DEBUG_ASSERT(
sizeof( kmp_int64 ) == 8 );
6268 KMP_DEBUG_ASSERT(
sizeof( kmp_uint64 ) == 8 );
6269 KMP_DEBUG_ASSERT(
sizeof( kmp_intptr_t ) ==
sizeof(
void * ) );
6275 __kmp_validate_locks();
6278 __kmp_init_allocator();
6284 __kmp_register_library_startup( );
6287 if( TCR_4(__kmp_global.g.g_done) ) {
6288 KA_TRACE( 10, (
"__kmp_do_serial_initialize: reinitialization of library\n" ) );
6291 __kmp_global.g.g_abort = 0;
6292 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
6295 #if KMP_USE_ADAPTIVE_LOCKS 6296 #if KMP_DEBUG_ADAPTIVE_LOCKS 6297 __kmp_init_speculative_stats();
6300 #if KMP_STATS_ENABLED 6301 __kmp_init_tas_lock( & __kmp_stats_lock );
6303 __kmp_init_lock( & __kmp_global_lock );
6304 __kmp_init_queuing_lock( & __kmp_dispatch_lock );
6305 __kmp_init_lock( & __kmp_debug_lock );
6306 __kmp_init_atomic_lock( & __kmp_atomic_lock );
6307 __kmp_init_atomic_lock( & __kmp_atomic_lock_1i );
6308 __kmp_init_atomic_lock( & __kmp_atomic_lock_2i );
6309 __kmp_init_atomic_lock( & __kmp_atomic_lock_4i );
6310 __kmp_init_atomic_lock( & __kmp_atomic_lock_4r );
6311 __kmp_init_atomic_lock( & __kmp_atomic_lock_8i );
6312 __kmp_init_atomic_lock( & __kmp_atomic_lock_8r );
6313 __kmp_init_atomic_lock( & __kmp_atomic_lock_8c );
6314 __kmp_init_atomic_lock( & __kmp_atomic_lock_10r );
6315 __kmp_init_atomic_lock( & __kmp_atomic_lock_16r );
6316 __kmp_init_atomic_lock( & __kmp_atomic_lock_16c );
6317 __kmp_init_atomic_lock( & __kmp_atomic_lock_20c );
6318 __kmp_init_atomic_lock( & __kmp_atomic_lock_32c );
6319 __kmp_init_bootstrap_lock( & __kmp_forkjoin_lock );
6320 __kmp_init_bootstrap_lock( & __kmp_exit_lock );
6321 __kmp_init_bootstrap_lock( & __kmp_monitor_lock );
6322 __kmp_init_bootstrap_lock( & __kmp_tp_cached_lock );
6326 __kmp_runtime_initialize();
6328 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 6329 __kmp_check_mic_type();
6336 __kmp_abort_delay = 0;
6340 __kmp_dflt_team_nth_ub = __kmp_xproc;
6341 if( __kmp_dflt_team_nth_ub < KMP_MIN_NTH ) {
6342 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6344 if( __kmp_dflt_team_nth_ub > __kmp_sys_max_nth ) {
6345 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6347 __kmp_max_nth = __kmp_sys_max_nth;
6350 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
6351 __kmp_monitor_wakeups = KMP_WAKEUPS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
6352 __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
6354 __kmp_library = library_throughput;
6356 __kmp_static = kmp_sch_static_balanced;
6362 #if KMP_FAST_REDUCTION_BARRIER 6363 #define kmp_reduction_barrier_gather_bb ((int)1) 6364 #define kmp_reduction_barrier_release_bb ((int)1) 6365 #define kmp_reduction_barrier_gather_pat bp_hyper_bar 6366 #define kmp_reduction_barrier_release_pat bp_hyper_bar 6367 #endif // KMP_FAST_REDUCTION_BARRIER 6368 for ( i=bs_plain_barrier; i<bs_last_barrier; i++ ) {
6369 __kmp_barrier_gather_branch_bits [ i ] = __kmp_barrier_gather_bb_dflt;
6370 __kmp_barrier_release_branch_bits[ i ] = __kmp_barrier_release_bb_dflt;
6371 __kmp_barrier_gather_pattern [ i ] = __kmp_barrier_gather_pat_dflt;
6372 __kmp_barrier_release_pattern[ i ] = __kmp_barrier_release_pat_dflt;
6373 #if KMP_FAST_REDUCTION_BARRIER 6374 if( i == bs_reduction_barrier ) {
6375 __kmp_barrier_gather_branch_bits [ i ] = kmp_reduction_barrier_gather_bb;
6376 __kmp_barrier_release_branch_bits[ i ] = kmp_reduction_barrier_release_bb;
6377 __kmp_barrier_gather_pattern [ i ] = kmp_reduction_barrier_gather_pat;
6378 __kmp_barrier_release_pattern[ i ] = kmp_reduction_barrier_release_pat;
6380 #endif // KMP_FAST_REDUCTION_BARRIER 6382 #if KMP_FAST_REDUCTION_BARRIER 6383 #undef kmp_reduction_barrier_release_pat 6384 #undef kmp_reduction_barrier_gather_pat 6385 #undef kmp_reduction_barrier_release_bb 6386 #undef kmp_reduction_barrier_gather_bb 6387 #endif // KMP_FAST_REDUCTION_BARRIER 6388 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 6389 if (__kmp_mic_type == mic2) {
6391 __kmp_barrier_gather_branch_bits [ bs_plain_barrier ] = 3;
6392 __kmp_barrier_release_branch_bits[ bs_forkjoin_barrier ] = 1;
6393 __kmp_barrier_gather_pattern [ bs_forkjoin_barrier ] = bp_hierarchical_bar;
6394 __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] = bp_hierarchical_bar;
6396 #if KMP_FAST_REDUCTION_BARRIER 6397 if (__kmp_mic_type == mic2) {
6398 __kmp_barrier_gather_pattern [ bs_reduction_barrier ] = bp_hierarchical_bar;
6399 __kmp_barrier_release_pattern[ bs_reduction_barrier ] = bp_hierarchical_bar;
6406 __kmp_env_checks = TRUE;
6408 __kmp_env_checks = FALSE;
6412 __kmp_foreign_tp = TRUE;
6414 __kmp_global.g.g_dynamic = FALSE;
6415 __kmp_global.g.g_dynamic_mode = dynamic_default;
6417 __kmp_env_initialize( NULL );
6421 char const * val = __kmp_env_get(
"KMP_DUMP_CATALOG" );
6422 if ( __kmp_str_match_true( val ) ) {
6423 kmp_str_buf_t buffer;
6424 __kmp_str_buf_init( & buffer );
6425 __kmp_i18n_dump_catalog( & buffer );
6426 __kmp_printf(
"%s", buffer.str );
6427 __kmp_str_buf_free( & buffer );
6429 __kmp_env_free( & val );
6432 __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub );
6434 __kmp_tp_capacity = __kmp_default_tp_capacity(__kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
6438 KMP_DEBUG_ASSERT( __kmp_thread_pool == NULL );
6439 KMP_DEBUG_ASSERT( __kmp_thread_pool_insert_pt == NULL );
6440 KMP_DEBUG_ASSERT( __kmp_team_pool == NULL );
6441 __kmp_thread_pool = NULL;
6442 __kmp_thread_pool_insert_pt = NULL;
6443 __kmp_team_pool = NULL;
6448 size = (
sizeof(kmp_info_t*) +
sizeof(kmp_root_t*))*__kmp_threads_capacity + CACHE_LINE;
6449 __kmp_threads = (kmp_info_t**) __kmp_allocate( size );
6450 __kmp_root = (kmp_root_t**) ((
char*)__kmp_threads +
sizeof(kmp_info_t*) * __kmp_threads_capacity );
6453 KMP_DEBUG_ASSERT( __kmp_all_nth == 0 );
6454 KMP_DEBUG_ASSERT( __kmp_nth == 0 );
6459 gtid = __kmp_register_root( TRUE );
6460 KA_TRACE( 10, (
"__kmp_do_serial_initialize T#%d\n", gtid ));
6461 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
6462 KMP_ASSERT( KMP_INITIAL_GTID( gtid ) );
6466 __kmp_common_initialize();
6470 __kmp_register_atfork();
6473 #if ! defined KMP_DYNAMIC_LIB 6478 int rc = atexit( __kmp_internal_end_atexit );
6480 __kmp_msg( kmp_ms_fatal, KMP_MSG( FunctionError,
"atexit()" ), KMP_ERR( rc ), __kmp_msg_null );
6485 #if KMP_HANDLE_SIGNALS 6492 __kmp_install_signals( FALSE );
6495 __kmp_install_signals( TRUE );
6500 __kmp_init_counter ++;
6502 __kmp_init_serial = TRUE;
6504 if (__kmp_settings) {
6509 if (__kmp_display_env || __kmp_display_env_verbose) {
6510 __kmp_env_print_2();
6512 #endif // OMP_40_ENABLED 6520 KA_TRACE( 10, (
"__kmp_do_serial_initialize: exit\n" ) );
6524 __kmp_serial_initialize(
void )
6526 if ( __kmp_init_serial ) {
6529 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6530 if ( __kmp_init_serial ) {
6531 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6534 __kmp_do_serial_initialize();
6535 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6539 __kmp_do_middle_initialize(
void )
6542 int prev_dflt_team_nth;
6544 if( !__kmp_init_serial ) {
6545 __kmp_do_serial_initialize();
6548 KA_TRACE( 10, (
"__kmp_middle_initialize: enter\n" ) );
6554 prev_dflt_team_nth = __kmp_dflt_team_nth;
6556 #if KMP_AFFINITY_SUPPORTED 6561 __kmp_affinity_initialize();
6567 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6568 if ( TCR_PTR( __kmp_threads[ i ] ) != NULL ) {
6569 __kmp_affinity_set_init_mask( i, TRUE );
6574 KMP_ASSERT( __kmp_xproc > 0 );
6575 if ( __kmp_avail_proc == 0 ) {
6576 __kmp_avail_proc = __kmp_xproc;
6581 while ( ( j < __kmp_nested_nth.used ) && ! __kmp_nested_nth.nth[ j ] ) {
6582 __kmp_nested_nth.nth[ j ] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_avail_proc;
6586 if ( __kmp_dflt_team_nth == 0 ) {
6587 #ifdef KMP_DFLT_NTH_CORES 6591 __kmp_dflt_team_nth = __kmp_ncores;
6592 KA_TRACE( 20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_ncores (%d)\n",
6593 __kmp_dflt_team_nth ) );
6598 __kmp_dflt_team_nth = __kmp_avail_proc;
6599 KA_TRACE( 20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_avail_proc(%d)\n",
6600 __kmp_dflt_team_nth ) );
6604 if ( __kmp_dflt_team_nth < KMP_MIN_NTH ) {
6605 __kmp_dflt_team_nth = KMP_MIN_NTH;
6607 if( __kmp_dflt_team_nth > __kmp_sys_max_nth ) {
6608 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6615 KMP_DEBUG_ASSERT( __kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub );
6617 if ( __kmp_dflt_team_nth != prev_dflt_team_nth ) {
6624 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6625 kmp_info_t *thread = __kmp_threads[ i ];
6626 if ( thread == NULL )
continue;
6627 if ( thread->th.th_current_task->td_icvs.nproc != 0 )
continue;
6629 set__nproc( __kmp_threads[ i ], __kmp_dflt_team_nth );
6632 KA_TRACE( 20, (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6633 __kmp_dflt_team_nth) );
6635 #ifdef KMP_ADJUST_BLOCKTIME 6638 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
6639 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
6640 if ( __kmp_nth > __kmp_avail_proc ) {
6641 __kmp_zero_bt = TRUE;
6647 TCW_SYNC_4(__kmp_init_middle, TRUE);
6649 KA_TRACE( 10, (
"__kmp_do_middle_initialize: exit\n" ) );
6653 __kmp_middle_initialize(
void )
6655 if ( __kmp_init_middle ) {
6658 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6659 if ( __kmp_init_middle ) {
6660 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6663 __kmp_do_middle_initialize();
6664 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6668 __kmp_parallel_initialize(
void )
6670 int gtid = __kmp_entry_gtid();
6673 if( TCR_4(__kmp_init_parallel) )
return;
6674 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6675 if( TCR_4(__kmp_init_parallel) ) { __kmp_release_bootstrap_lock( &__kmp_initz_lock );
return; }
6678 if( TCR_4(__kmp_global.g.g_done) ) {
6679 KA_TRACE( 10, (
"__kmp_parallel_initialize: attempt to init while shutting down\n" ) );
6680 __kmp_infinite_loop();
6686 if( !__kmp_init_middle ) {
6687 __kmp_do_middle_initialize();
6691 KA_TRACE( 10, (
"__kmp_parallel_initialize: enter\n" ) );
6692 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
6694 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 6699 __kmp_store_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word );
6700 __kmp_store_mxcsr( &__kmp_init_mxcsr );
6701 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
6705 # if KMP_HANDLE_SIGNALS 6707 __kmp_install_signals( TRUE );
6711 __kmp_suspend_initialize();
6713 #if defined(USE_LOAD_BALANCE) 6714 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
6715 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
6718 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
6719 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
6723 if ( __kmp_version ) {
6724 __kmp_print_version_2();
6728 TCW_SYNC_4(__kmp_init_parallel, TRUE);
6731 KA_TRACE( 10, (
"__kmp_parallel_initialize: exit\n" ) );
6733 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6740 __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
6743 kmp_disp_t *dispatch;
6748 this_thr->th.th_local.this_construct = 0;
6749 #if KMP_CACHE_MANAGE 6750 KMP_CACHE_PREFETCH( &this_thr->th.th_bar[ bs_forkjoin_barrier ].bb.b_arrived );
6752 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
6753 KMP_DEBUG_ASSERT( dispatch );
6754 KMP_DEBUG_ASSERT( team->t.t_dispatch );
6757 dispatch->th_disp_index = 0;
6759 dispatch->th_doacross_buf_idx = 0;
6761 if( __kmp_env_consistency_check )
6762 __kmp_push_parallel( gtid, team->t.t_ident );
6768 __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
6771 if( __kmp_env_consistency_check )
6772 __kmp_pop_parallel( gtid, team->t.t_ident );
6776 __kmp_invoke_task_func(
int gtid )
6779 int tid = __kmp_tid_from_gtid( gtid );
6780 kmp_info_t *this_thr = __kmp_threads[ gtid ];
6781 kmp_team_t *team = this_thr->th.th_team;
6783 __kmp_run_before_invoked_task( gtid, tid, this_thr, team );
6785 if ( __itt_stack_caller_create_ptr ) {
6786 __kmp_itt_stack_callee_enter( (__itt_caller)team->t.t_stack_id );
6789 #if INCLUDE_SSC_MARKS 6790 SSC_MARK_INVOKING();
6795 void **exit_runtime_p;
6796 ompt_task_id_t my_task_id;
6797 ompt_parallel_id_t my_parallel_id;
6800 exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid].
6801 ompt_task_info.frame.exit_runtime_frame);
6803 exit_runtime_p = &dummy;
6807 my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
6808 my_parallel_id = team->t.ompt_team_info.parallel_id;
6810 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
6811 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
6812 my_parallel_id, my_task_id);
6818 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
6819 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
6820 rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
6821 gtid, tid, (
int) team->t.t_argc, (
void **) team->t.t_argv
6829 if ( __itt_stack_caller_create_ptr ) {
6830 __kmp_itt_stack_callee_leave( (__itt_caller)team->t.t_stack_id );
6833 __kmp_run_after_invoked_task( gtid, tid, this_thr, team );
6840 __kmp_teams_master(
int gtid )
6843 kmp_info_t *thr = __kmp_threads[ gtid ];
6844 kmp_team_t *team = thr->th.th_team;
6845 ident_t *loc = team->t.t_ident;
6846 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
6847 KMP_DEBUG_ASSERT( thr->th.th_teams_microtask );
6848 KMP_DEBUG_ASSERT( thr->th.th_set_nproc );
6849 KA_TRACE( 20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n",
6850 gtid, __kmp_tid_from_gtid( gtid ), thr->th.th_teams_microtask ) );
6853 #if INCLUDE_SSC_MARKS 6856 __kmp_fork_call( loc, gtid, fork_context_intel,
6859 (
void *)thr->th.th_teams_microtask,
6861 (microtask_t)thr->th.th_teams_microtask,
6862 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
6864 #if INCLUDE_SSC_MARKS 6870 __kmp_join_call( loc, gtid
6872 , fork_context_intel
6878 __kmp_invoke_teams_master(
int gtid )
6880 kmp_info_t *this_thr = __kmp_threads[ gtid ];
6881 kmp_team_t *team = this_thr->th.th_team;
6883 if ( !__kmp_threads[gtid]-> th.th_team->t.t_serialized )
6884 KMP_DEBUG_ASSERT( (
void*)__kmp_threads[gtid]-> th.th_team->t.t_pkfn == (
void*)__kmp_teams_master );
6886 __kmp_run_before_invoked_task( gtid, 0, this_thr, team );
6887 __kmp_teams_master( gtid );
6888 __kmp_run_after_invoked_task( gtid, 0, this_thr, team );
6899 __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads )
6901 kmp_info_t *thr = __kmp_threads[gtid];
6903 if( num_threads > 0 )
6904 thr->th.th_set_nproc = num_threads;
6912 __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
int num_threads )
6914 kmp_info_t *thr = __kmp_threads[gtid];
6915 KMP_DEBUG_ASSERT(num_teams >= 0);
6916 KMP_DEBUG_ASSERT(num_threads >= 0);
6918 if( num_teams == 0 )
6920 if( num_teams > __kmp_max_nth ) {
6921 if ( !__kmp_reserve_warn ) {
6922 __kmp_reserve_warn = 1;
6925 KMP_MSG( CantFormThrTeam, num_teams, __kmp_max_nth ),
6926 KMP_HNT( Unset_ALL_THREADS ),
6930 num_teams = __kmp_max_nth;
6933 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
6936 if( num_threads == 0 ) {
6937 if( !TCR_4(__kmp_init_middle) )
6938 __kmp_middle_initialize();
6939 num_threads = __kmp_avail_proc / num_teams;
6940 if( num_teams * num_threads > __kmp_max_nth ) {
6942 num_threads = __kmp_max_nth / num_teams;
6945 if( num_teams * num_threads > __kmp_max_nth ) {
6946 int new_threads = __kmp_max_nth / num_teams;
6947 if ( !__kmp_reserve_warn ) {
6948 __kmp_reserve_warn = 1;
6951 KMP_MSG( CantFormThrTeam, num_threads, new_threads ),
6952 KMP_HNT( Unset_ALL_THREADS ),
6956 num_threads = new_threads;
6959 thr->th.th_teams_size.nth = num_threads;
6967 __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind )
6969 kmp_info_t *thr = __kmp_threads[gtid];
6970 thr->th.th_set_proc_bind = proc_bind;
6978 __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team )
6980 kmp_info_t *this_thr = __kmp_threads[gtid];
6986 KMP_DEBUG_ASSERT( team );
6987 KMP_DEBUG_ASSERT( this_thr->th.th_team == team );
6988 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
6991 team->t.t_construct = 0;
6992 team->t.t_ordered.dt.t_value = 0;
6995 KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
6996 if ( team->t.t_max_nproc > 1 ) {
6998 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
6999 team->t.t_disp_buffer[ i ].buffer_index = i;
7001 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7005 team->t.t_disp_buffer[ 0 ].buffer_index = 0;
7007 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7012 KMP_ASSERT( this_thr->th.th_team == team );
7015 for( f=0 ; f<team->t.t_nproc ; f++ ) {
7016 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
7017 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
7022 __kmp_fork_barrier( gtid, 0 );
7027 __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team )
7029 kmp_info_t *this_thr = __kmp_threads[gtid];
7031 KMP_DEBUG_ASSERT( team );
7032 KMP_DEBUG_ASSERT( this_thr->th.th_team == team );
7033 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
7039 if (__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc ) {
7040 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n",gtid, gtid, __kmp_threads[gtid]);
7041 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, team->t.t_nproc=%d\n",
7042 gtid, __kmp_threads[gtid]->th.th_team_nproc, team, team->t.t_nproc);
7043 __kmp_print_structure();
7045 KMP_DEBUG_ASSERT( __kmp_threads[gtid] &&
7046 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc );
7049 __kmp_join_barrier( gtid );
7052 KMP_ASSERT( this_thr->th.th_team == team );
7059 #ifdef USE_LOAD_BALANCE 7066 __kmp_active_hot_team_nproc( kmp_root_t *root )
7070 kmp_team_t *hot_team;
7072 if ( root->r.r_active ) {
7075 hot_team = root->r.r_hot_team;
7076 if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) {
7077 return hot_team->t.t_nproc - 1;
7084 for ( i = 1; i < hot_team->t.t_nproc; i++ ) {
7085 if ( hot_team->t.t_threads[i]->th.th_active ) {
7097 __kmp_load_balance_nproc( kmp_root_t *root,
int set_nproc )
7101 int hot_team_active;
7102 int team_curr_active;
7105 KB_TRACE( 20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n",
7106 root, set_nproc ) );
7107 KMP_DEBUG_ASSERT( root );
7108 KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_threads[0]->th.th_current_task->td_icvs.dynamic == TRUE );
7109 KMP_DEBUG_ASSERT( set_nproc > 1 );
7111 if ( set_nproc == 1) {
7112 KB_TRACE( 20, (
"__kmp_load_balance_nproc: serial execution.\n" ) );
7123 pool_active = TCR_4(__kmp_thread_pool_active_nth);
7124 hot_team_active = __kmp_active_hot_team_nproc( root );
7125 team_curr_active = pool_active + hot_team_active + 1;
7130 system_active = __kmp_get_load_balance( __kmp_avail_proc + team_curr_active );
7131 KB_TRACE( 30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d hot team active = %d\n",
7132 system_active, pool_active, hot_team_active ) );
7134 if ( system_active < 0 ) {
7141 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7142 KMP_WARNING( CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit" );
7147 retval = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
7148 : root->r.r_hot_team->t.t_nproc);
7149 if ( retval > set_nproc ) {
7152 if ( retval < KMP_MIN_NTH ) {
7153 retval = KMP_MIN_NTH;
7156 KB_TRACE( 20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n", retval ) );
7166 if ( system_active < team_curr_active ) {
7167 system_active = team_curr_active;
7169 retval = __kmp_avail_proc - system_active + team_curr_active;
7170 if ( retval > set_nproc ) {
7173 if ( retval < KMP_MIN_NTH ) {
7174 retval = KMP_MIN_NTH;
7177 KB_TRACE( 20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval ) );
7188 __kmp_cleanup(
void )
7192 KA_TRACE( 10, (
"__kmp_cleanup: enter\n" ) );
7194 if (TCR_4(__kmp_init_parallel)) {
7195 #if KMP_HANDLE_SIGNALS 7196 __kmp_remove_signals();
7198 TCW_4(__kmp_init_parallel, FALSE);
7201 if (TCR_4(__kmp_init_middle)) {
7202 #if KMP_AFFINITY_SUPPORTED 7203 __kmp_affinity_uninitialize();
7205 __kmp_cleanup_hierarchy();
7206 TCW_4(__kmp_init_middle, FALSE);
7209 KA_TRACE( 10, (
"__kmp_cleanup: go serial cleanup\n" ) );
7211 if (__kmp_init_serial) {
7212 __kmp_runtime_destroy();
7213 __kmp_init_serial = FALSE;
7216 for ( f = 0; f < __kmp_threads_capacity; f++ ) {
7217 if ( __kmp_root[ f ] != NULL ) {
7218 __kmp_free( __kmp_root[ f ] );
7219 __kmp_root[ f ] = NULL;
7222 __kmp_free( __kmp_threads );
7225 __kmp_threads = NULL;
7227 __kmp_threads_capacity = 0;
7229 #if KMP_USE_DYNAMIC_LOCK 7230 __kmp_cleanup_indirect_user_locks();
7232 __kmp_cleanup_user_locks();
7235 #if KMP_AFFINITY_SUPPORTED 7236 KMP_INTERNAL_FREE( (
void *) __kmp_cpuinfo_file );
7237 __kmp_cpuinfo_file = NULL;
7240 #if KMP_USE_ADAPTIVE_LOCKS 7241 #if KMP_DEBUG_ADAPTIVE_LOCKS 7242 __kmp_print_speculative_stats();
7245 KMP_INTERNAL_FREE( __kmp_nested_nth.nth );
7246 __kmp_nested_nth.nth = NULL;
7247 __kmp_nested_nth.size = 0;
7248 __kmp_nested_nth.used = 0;
7250 __kmp_i18n_catclose();
7252 #if KMP_STATS_ENABLED 7253 __kmp_accumulate_stats_at_exit();
7254 __kmp_stats_list.deallocate();
7257 KA_TRACE( 10, (
"__kmp_cleanup: exit\n" ) );
7264 __kmp_ignore_mppbeg(
void )
7268 if ((env = getenv(
"KMP_IGNORE_MPPBEG" )) != NULL) {
7269 if (__kmp_str_match_false( env ))
7277 __kmp_ignore_mppend(
void )
7281 if ((env = getenv(
"KMP_IGNORE_MPPEND" )) != NULL) {
7282 if (__kmp_str_match_false( env ))
7290 __kmp_internal_begin(
void )
7297 gtid = __kmp_entry_gtid();
7298 root = __kmp_threads[ gtid ]->th.th_root;
7299 KMP_ASSERT( KMP_UBER_GTID( gtid ));
7301 if( root->r.r_begin )
return;
7302 __kmp_acquire_lock( &root->r.r_begin_lock, gtid );
7303 if( root->r.r_begin ) {
7304 __kmp_release_lock( & root->r.r_begin_lock, gtid );
7308 root->r.r_begin = TRUE;
7310 __kmp_release_lock( & root->r.r_begin_lock, gtid );
7318 __kmp_user_set_library (
enum library_type arg)
7326 gtid = __kmp_entry_gtid();
7327 thread = __kmp_threads[ gtid ];
7329 root = thread->th.th_root;
7331 KA_TRACE( 20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, library_serial ));
7332 if (root->r.r_in_parallel) {
7333 KMP_WARNING( SetLibraryIncorrectCall );
7338 case library_serial :
7339 thread->th.th_set_nproc = 0;
7340 set__nproc( thread, 1 );
7342 case library_turnaround :
7343 thread->th.th_set_nproc = 0;
7344 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
7346 case library_throughput :
7347 thread->th.th_set_nproc = 0;
7348 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
7351 KMP_FATAL( UnknownLibraryType, arg );
7354 __kmp_aux_set_library ( arg );
7358 __kmp_aux_set_stacksize(
size_t arg )
7360 if (! __kmp_init_serial)
7361 __kmp_serial_initialize();
7364 if (arg & (0x1000 - 1)) {
7365 arg &= ~(0x1000 - 1);
7370 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
7373 if (! TCR_4(__kmp_init_parallel)) {
7376 if (value < __kmp_sys_min_stksize )
7377 value = __kmp_sys_min_stksize ;
7378 else if (value > KMP_MAX_STKSIZE)
7379 value = KMP_MAX_STKSIZE;
7381 __kmp_stksize = value;
7383 __kmp_env_stksize = TRUE;
7386 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7392 __kmp_aux_set_library (
enum library_type arg)
7394 __kmp_library = arg;
7396 switch ( __kmp_library ) {
7397 case library_serial :
7399 KMP_INFORM( LibraryIsSerial );
7400 (void) __kmp_change_library( TRUE );
7403 case library_turnaround :
7404 (void) __kmp_change_library( TRUE );
7406 case library_throughput :
7407 (void) __kmp_change_library( FALSE );
7410 KMP_FATAL( UnknownLibraryType, arg );
7418 __kmp_aux_set_blocktime (
int arg, kmp_info_t *thread,
int tid)
7420 int blocktime = arg;
7424 __kmp_save_internal_controls( thread );
7427 if (blocktime < KMP_MIN_BLOCKTIME)
7428 blocktime = KMP_MIN_BLOCKTIME;
7429 else if (blocktime > KMP_MAX_BLOCKTIME)
7430 blocktime = KMP_MAX_BLOCKTIME;
7432 set__blocktime_team( thread->th.th_team, tid, blocktime );
7433 set__blocktime_team( thread->th.th_serial_team, 0, blocktime );
7436 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
7438 set__bt_intervals_team( thread->th.th_team, tid, bt_intervals );
7439 set__bt_intervals_team( thread->th.th_serial_team, 0, bt_intervals );
7444 set__bt_set_team( thread->th.th_team, tid, bt_set );
7445 set__bt_set_team( thread->th.th_serial_team, 0, bt_set );
7446 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, bt_intervals=%d, monitor_updates=%d\n",
7447 __kmp_gtid_from_tid(tid, thread->th.th_team),
7448 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals, __kmp_monitor_wakeups ) );
7452 __kmp_aux_set_defaults(
7456 if ( ! __kmp_init_serial ) {
7457 __kmp_serial_initialize();
7459 __kmp_env_initialize( str );
7463 || __kmp_display_env || __kmp_display_env_verbose
7476 PACKED_REDUCTION_METHOD_T
7477 __kmp_determine_reduction_method(
ident_t *loc, kmp_int32 global_tid,
7478 kmp_int32 num_vars,
size_t reduce_size,
void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
7479 kmp_critical_name *lck )
7487 PACKED_REDUCTION_METHOD_T retval;
7491 KMP_DEBUG_ASSERT( loc );
7492 KMP_DEBUG_ASSERT( lck );
7494 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED ( ( loc->flags & ( KMP_IDENT_ATOMIC_REDUCE ) ) == ( KMP_IDENT_ATOMIC_REDUCE ) ) 7495 #define FAST_REDUCTION_TREE_METHOD_GENERATED ( ( reduce_data ) && ( reduce_func ) ) 7497 retval = critical_reduce_block;
7499 team_size = __kmp_get_team_num_threads( global_tid );
7501 if( team_size == 1 ) {
7503 retval = empty_reduce_block;
7507 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7508 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7510 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 7512 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN 7514 int teamsize_cutoff = 4;
7516 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 7517 if( __kmp_mic_type != non_mic ) {
7518 teamsize_cutoff = 8;
7521 if( tree_available ) {
7522 if( team_size <= teamsize_cutoff ) {
7523 if ( atomic_available ) {
7524 retval = atomic_reduce_block;
7527 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7529 }
else if ( atomic_available ) {
7530 retval = atomic_reduce_block;
7533 #error "Unknown or unsupported OS" 7534 #endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN 7536 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS 7538 #if KMP_OS_LINUX || KMP_OS_WINDOWS 7542 if( atomic_available ) {
7543 if( num_vars <= 2 ) {
7544 retval = atomic_reduce_block;
7550 if( atomic_available && ( num_vars <= 3 ) ) {
7551 retval = atomic_reduce_block;
7552 }
else if( tree_available ) {
7553 if( ( reduce_size > ( 9 *
sizeof( kmp_real64 ) ) ) && ( reduce_size < ( 2000 *
sizeof( kmp_real64 ) ) ) ) {
7554 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
7559 #error "Unknown or unsupported OS" 7563 #error "Unknown or unsupported architecture" 7572 if( __kmp_force_reduction_method != reduction_method_not_defined && team_size != 1) {
7574 PACKED_REDUCTION_METHOD_T forced_retval;
7576 int atomic_available, tree_available;
7578 switch( ( forced_retval = __kmp_force_reduction_method ) )
7580 case critical_reduce_block:
7584 case atomic_reduce_block:
7585 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7586 KMP_ASSERT( atomic_available );
7589 case tree_reduce_block:
7590 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7591 KMP_ASSERT( tree_available );
7592 #if KMP_FAST_REDUCTION_BARRIER 7593 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7601 retval = forced_retval;
7604 KA_TRACE(10, (
"reduction method selected=%08x\n", retval ) );
7606 #undef FAST_REDUCTION_TREE_METHOD_GENERATED 7607 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED 7614 __kmp_get_reduce_method(
void ) {
7615 return ( ( __kmp_entry_thread()->th.th_local.packed_reduction_method ) >> 8 );
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_IDENT_AUTOPAR
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)