17 #include "kmp_atomic.h" 18 #include "kmp_wrapper_getpid.h" 19 #include "kmp_environment.h" 22 #include "kmp_settings.h" 25 #include "kmp_error.h" 26 #include "kmp_stats.h" 27 #include "kmp_wait_release.h" 28 #include "kmp_affinity.h" 31 #include "ompt-specific.h" 35 #define KMP_USE_PRCTL 0 41 #include "tsan_annotations.h" 43 #if defined(KMP_GOMP_COMPAT) 44 char const __kmp_version_alt_comp[] = KMP_VERSION_PREFIX
"alternative compiler support: yes";
47 char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX
"API version: " 57 char const __kmp_version_lock[] = KMP_VERSION_PREFIX
"lock type: run time selectable";
60 #define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) ) 65 kmp_info_t __kmp_monitor;
72 void __kmp_cleanup(
void );
74 static void __kmp_initialize_info( kmp_info_t *, kmp_team_t *,
int tid,
int gtid );
75 static void __kmp_initialize_team( kmp_team_t * team,
int new_nproc, kmp_internal_control_t * new_icvs,
ident_t * loc );
76 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 77 static void __kmp_partition_places( kmp_team_t *team,
int update_master_only=0 );
79 static void __kmp_do_serial_initialize(
void );
80 void __kmp_fork_barrier(
int gtid,
int tid );
81 void __kmp_join_barrier(
int gtid );
82 void __kmp_setup_icv_copy( kmp_team_t *team,
int new_nproc, kmp_internal_control_t * new_icvs,
ident_t *loc );
84 #ifdef USE_LOAD_BALANCE 85 static int __kmp_load_balance_nproc( kmp_root_t * root,
int set_nproc );
88 static int __kmp_expand_threads(
int nWish,
int nNeed);
90 static int __kmp_unregister_root_other_thread(
int gtid );
92 static void __kmp_unregister_library(
void );
93 static void __kmp_reap_thread( kmp_info_t * thread,
int is_root );
94 static kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
105 __kmp_get_global_thread_id( )
108 kmp_info_t **other_threads;
114 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
115 __kmp_nth, __kmp_all_nth ));
122 if ( !TCR_4(__kmp_init_gtid) )
return KMP_GTID_DNE;
124 #ifdef KMP_TDATA_GTID 125 if ( TCR_4(__kmp_gtid_mode) >= 3) {
126 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: using TDATA\n" ));
130 if ( TCR_4(__kmp_gtid_mode) >= 2) {
131 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n" ));
132 return __kmp_gtid_get_specific();
134 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n" ));
136 stack_addr = (
char*) & stack_data;
137 other_threads = __kmp_threads;
152 for( i = 0 ; i < __kmp_threads_capacity ; i++ ) {
154 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
157 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
158 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
162 if( stack_addr <= stack_base ) {
163 size_t stack_diff = stack_base - stack_addr;
165 if( stack_diff <= stack_size ) {
168 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == i );
175 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: internal alg. failed to find " 176 "thread, using TLS\n" ));
177 i = __kmp_gtid_get_specific();
185 if( ! TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow) ) {
186 KMP_FATAL( StackOverflow, i );
189 stack_base = (
char *) other_threads[i]->th.th_info.ds.ds_stackbase;
190 if( stack_addr > stack_base ) {
191 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
192 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
193 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base);
195 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, stack_base - stack_addr);
199 if ( __kmp_storage_map ) {
200 char *stack_end = (
char *) other_threads[i]->th.th_info.ds.ds_stackbase;
201 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
202 __kmp_print_storage_map_gtid( i, stack_beg, stack_end,
203 other_threads[i]->th.th_info.ds.ds_stacksize,
204 "th_%d stack (refinement)", i );
210 __kmp_get_global_thread_id_reg( )
214 if ( !__kmp_init_serial ) {
217 #ifdef KMP_TDATA_GTID 218 if ( TCR_4(__kmp_gtid_mode) >= 3 ) {
219 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n" ));
223 if ( TCR_4(__kmp_gtid_mode) >= 2 ) {
224 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n" ));
225 gtid = __kmp_gtid_get_specific();
227 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n" ));
228 gtid = __kmp_get_global_thread_id();
232 if( gtid == KMP_GTID_DNE ) {
233 KA_TRACE( 10, (
"__kmp_get_global_thread_id_reg: Encountered new root thread. " 234 "Registering a new gtid.\n" ));
235 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
236 if( !__kmp_init_serial ) {
237 __kmp_do_serial_initialize();
238 gtid = __kmp_gtid_get_specific();
240 gtid = __kmp_register_root(FALSE);
242 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
246 KMP_DEBUG_ASSERT( gtid >=0 );
253 __kmp_check_stack_overlap( kmp_info_t *th )
256 char *stack_beg = NULL;
257 char *stack_end = NULL;
260 KA_TRACE(10,(
"__kmp_check_stack_overlap: called\n"));
261 if ( __kmp_storage_map ) {
262 stack_end = (
char *) th->th.th_info.ds.ds_stackbase;
263 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
265 gtid = __kmp_gtid_from_thread( th );
267 if (gtid == KMP_GTID_MONITOR) {
268 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
269 "th_%s stack (%s)",
"mon",
270 ( th->th.th_info.ds.ds_stackgrow ) ?
"initial" :
"actual" );
272 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
273 "th_%d stack (%s)", gtid,
274 ( th->th.th_info.ds.ds_stackgrow ) ?
"initial" :
"actual" );
279 gtid = __kmp_gtid_from_thread( th );
280 if ( __kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid))
282 KA_TRACE(10,(
"__kmp_check_stack_overlap: performing extensive checking\n"));
283 if ( stack_beg == NULL ) {
284 stack_end = (
char *) th->th.th_info.ds.ds_stackbase;
285 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
288 for( f=0 ; f < __kmp_threads_capacity ; f++ ) {
289 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
291 if( f_th && f_th != th ) {
292 char *other_stack_end = (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
293 char *other_stack_beg = other_stack_end -
294 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
295 if((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
296 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
299 if ( __kmp_storage_map )
300 __kmp_print_storage_map_gtid( -1, other_stack_beg, other_stack_end,
301 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
302 "th_%d stack (overlapped)",
303 __kmp_gtid_from_thread( f_th ) );
305 __kmp_msg( kmp_ms_fatal, KMP_MSG( StackOverlap ), KMP_HNT( ChangeStackLimit ), __kmp_msg_null );
310 KA_TRACE(10,(
"__kmp_check_stack_overlap: returning\n"));
319 __kmp_infinite_loop(
void )
321 static int done = FALSE;
328 #define MAX_MESSAGE 512 331 __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
char const *format, ...) {
332 char buffer[MAX_MESSAGE];
335 va_start( ap, format);
336 KMP_SNPRINTF( buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1, p2, (
unsigned long) size, format );
337 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
338 __kmp_vprintf( kmp_err, buffer, ap );
339 #if KMP_PRINT_DATA_PLACEMENT 342 if(p1 <= p2 && (
char*)p2 - (
char*)p1 == size) {
343 if( __kmp_storage_map_verbose ) {
344 node = __kmp_get_host_node(p1);
346 __kmp_storage_map_verbose = FALSE;
350 int localProc = __kmp_get_cpu_from_gtid(gtid);
352 const int page_size = KMP_GET_PAGE_SIZE();
354 p1 = (
void *)( (
size_t)p1 & ~((size_t)page_size - 1) );
355 p2 = (
void *)( ((
size_t) p2 - 1) & ~((
size_t)page_size - 1) );
357 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid, localProc>>1);
359 __kmp_printf_no_lock(
" GTID %d\n", gtid);
367 (
char*)p1 += page_size;
368 }
while(p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
369 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last,
370 (
char*)p1 - 1, lastNode);
373 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
374 (
char*)p1 + (page_size - 1), __kmp_get_host_node(p1));
376 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
377 (
char*)p2 + (page_size - 1), __kmp_get_host_node(p2));
383 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR( StorageMapWarning ) );
386 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
390 __kmp_warn(
char const * format, ... )
392 char buffer[MAX_MESSAGE];
395 if ( __kmp_generate_warnings == kmp_warnings_off ) {
399 va_start( ap, format );
401 KMP_SNPRINTF( buffer,
sizeof(buffer) ,
"OMP warning: %s\n", format );
402 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
403 __kmp_vprintf( kmp_err, buffer, ap );
404 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
410 __kmp_abort_process()
414 __kmp_acquire_bootstrap_lock( & __kmp_exit_lock );
416 if ( __kmp_debug_buf ) {
417 __kmp_dump_debug_buffer();
420 if ( KMP_OS_WINDOWS ) {
423 __kmp_global.g.g_abort = SIGABRT;
441 __kmp_infinite_loop();
442 __kmp_release_bootstrap_lock( & __kmp_exit_lock );
447 __kmp_abort_thread(
void )
451 __kmp_infinite_loop();
462 __kmp_print_thread_storage_map( kmp_info_t *thr,
int gtid )
464 __kmp_print_storage_map_gtid( gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d", gtid );
466 __kmp_print_storage_map_gtid( gtid, &thr->th.th_info, &thr->th.th_team,
sizeof(kmp_desc_t),
467 "th_%d.th_info", gtid );
469 __kmp_print_storage_map_gtid( gtid, &thr->th.th_local, &thr->th.th_pri_head,
sizeof(kmp_local_t),
470 "th_%d.th_local", gtid );
472 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
473 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid );
475 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_plain_barrier],
476 &thr->th.th_bar[bs_plain_barrier+1],
477 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]", gtid);
479 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_forkjoin_barrier],
480 &thr->th.th_bar[bs_forkjoin_barrier+1],
481 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]", gtid);
483 #if KMP_FAST_REDUCTION_BARRIER 484 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_reduction_barrier],
485 &thr->th.th_bar[bs_reduction_barrier+1],
486 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]", gtid);
487 #endif // KMP_FAST_REDUCTION_BARRIER 496 __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
int team_id,
int num_thr )
498 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
499 __kmp_print_storage_map_gtid( -1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
502 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[0], &team->t.t_bar[bs_last_barrier],
503 sizeof(kmp_balign_team_t) * bs_last_barrier,
"%s_%d.t_bar", header, team_id );
506 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_plain_barrier], &team->t.t_bar[bs_plain_barrier+1],
507 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]", header, team_id );
509 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_forkjoin_barrier], &team->t.t_bar[bs_forkjoin_barrier+1],
510 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[forkjoin]", header, team_id );
512 #if KMP_FAST_REDUCTION_BARRIER 513 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_reduction_barrier], &team->t.t_bar[bs_reduction_barrier+1],
514 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[reduction]", header, team_id );
515 #endif // KMP_FAST_REDUCTION_BARRIER 517 __kmp_print_storage_map_gtid( -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
518 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id );
520 __kmp_print_storage_map_gtid( -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
521 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id );
523 __kmp_print_storage_map_gtid( -1, &team->t.t_disp_buffer[0], &team->t.t_disp_buffer[num_disp_buff],
524 sizeof(dispatch_shared_info_t) * num_disp_buff,
"%s_%d.t_disp_buffer",
528 __kmp_print_storage_map_gtid( -1, &team->t.t_taskq, &team->t.t_copypriv_data,
529 sizeof(kmp_taskq_t),
"%s_%d.t_taskq", header, team_id );
532 static void __kmp_init_allocator() {}
533 static void __kmp_fini_allocator() {}
537 #ifdef KMP_DYNAMIC_LIB 541 __kmp_reset_lock( kmp_bootstrap_lock_t* lck ) {
543 __kmp_init_bootstrap_lock( lck );
547 __kmp_reset_locks_on_process_detach(
int gtid_req ) {
564 for( i = 0; i < __kmp_threads_capacity; ++i ) {
565 if( !__kmp_threads )
continue;
566 kmp_info_t* th = __kmp_threads[ i ];
567 if( th == NULL )
continue;
568 int gtid = th->th.th_info.ds.ds_gtid;
569 if( gtid == gtid_req )
continue;
570 if( gtid < 0 )
continue;
572 int alive = __kmp_is_thread_alive( th, &exit_val );
577 if( thread_count == 0 )
break;
584 __kmp_reset_lock( &__kmp_forkjoin_lock );
586 __kmp_reset_lock( &__kmp_stdio_lock );
591 DllMain( HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved ) {
594 switch( fdwReason ) {
596 case DLL_PROCESS_ATTACH:
597 KA_TRACE( 10, (
"DllMain: PROCESS_ATTACH\n" ));
601 case DLL_PROCESS_DETACH:
602 KA_TRACE( 10, (
"DllMain: PROCESS_DETACH T#%d\n",
603 __kmp_gtid_get_specific() ));
605 if( lpReserved != NULL )
631 __kmp_reset_locks_on_process_detach( __kmp_gtid_get_specific() );
634 __kmp_internal_end_library( __kmp_gtid_get_specific() );
638 case DLL_THREAD_ATTACH:
639 KA_TRACE( 10, (
"DllMain: THREAD_ATTACH\n" ));
645 case DLL_THREAD_DETACH:
646 KA_TRACE( 10, (
"DllMain: THREAD_DETACH T#%d\n",
647 __kmp_gtid_get_specific() ));
649 __kmp_internal_end_thread( __kmp_gtid_get_specific() );
665 __kmp_change_library(
int status )
669 old_status = __kmp_yield_init & 1;
672 __kmp_yield_init |= 1;
675 __kmp_yield_init &= ~1;
688 __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
690 int gtid = *gtid_ref;
691 #ifdef BUILD_PARALLEL_ORDERED 692 kmp_team_t *team = __kmp_team_from_gtid( gtid );
695 if( __kmp_env_consistency_check ) {
696 if( __kmp_threads[gtid]->th.th_root->r.r_active )
697 #if KMP_USE_DYNAMIC_LOCK 698 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL, 0 );
700 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL );
703 #ifdef BUILD_PARALLEL_ORDERED 704 if( !team->t.t_serialized ) {
706 KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid( gtid ), KMP_EQ, NULL);
717 __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
719 int gtid = *gtid_ref;
720 #ifdef BUILD_PARALLEL_ORDERED 721 int tid = __kmp_tid_from_gtid( gtid );
722 kmp_team_t *team = __kmp_team_from_gtid( gtid );
725 if( __kmp_env_consistency_check ) {
726 if( __kmp_threads[gtid]->th.th_root->r.r_active )
727 __kmp_pop_sync( gtid, ct_ordered_in_parallel, loc_ref );
729 #ifdef BUILD_PARALLEL_ORDERED 730 if ( ! team->t.t_serialized ) {
735 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc );
737 #if OMPT_SUPPORT && OMPT_BLAME 739 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
741 kmp_info_t *this_thread = __kmp_threads[gtid];
742 ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
743 this_thread->th.ompt_thread_info.wait_id);
761 __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws )
767 if( ! TCR_4(__kmp_init_parallel) )
768 __kmp_parallel_initialize();
770 th = __kmp_threads[ gtid ];
771 team = th->th.th_team;
774 th->th.th_ident = id_ref;
776 if ( team->t.t_serialized ) {
779 kmp_int32 old_this = th->th.th_local.this_construct;
781 ++th->th.th_local.this_construct;
786 if (team->t.t_construct == old_this) {
787 status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this,
788 th->th.th_local.this_construct);
791 if ( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && KMP_MASTER_GTID(gtid) &&
793 th->th.th_teams_microtask == NULL &&
795 team->t.t_active_level == 1 )
797 __kmp_itt_metadata_single( id_ref );
802 if( __kmp_env_consistency_check ) {
803 if (status && push_ws) {
804 __kmp_push_workshare( gtid, ct_psingle, id_ref );
806 __kmp_check_workshare( gtid, ct_psingle, id_ref );
811 __kmp_itt_single_start( gtid );
818 __kmp_exit_single(
int gtid )
821 __kmp_itt_single_end( gtid );
823 if( __kmp_env_consistency_check )
824 __kmp_pop_workshare( gtid, ct_psingle, NULL );
837 __kmp_reserve_threads( kmp_root_t *root, kmp_team_t *parent_team,
838 int master_tid,
int set_nthreads
846 KMP_DEBUG_ASSERT( __kmp_init_serial );
847 KMP_DEBUG_ASSERT( root && parent_team );
853 new_nthreads = set_nthreads;
854 if ( ! get__dynamic_2( parent_team, master_tid ) ) {
857 #ifdef USE_LOAD_BALANCE 858 else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) {
859 new_nthreads = __kmp_load_balance_nproc( root, set_nthreads );
860 if ( new_nthreads == 1 ) {
861 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d load balance reduced reservation to 1 thread\n",
865 if ( new_nthreads < set_nthreads ) {
866 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d load balance reduced reservation to %d threads\n",
867 master_tid, new_nthreads ));
871 else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) {
872 new_nthreads = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
873 : root->r.r_hot_team->t.t_nproc);
874 if ( new_nthreads <= 1 ) {
875 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d thread limit reduced reservation to 1 thread\n",
879 if ( new_nthreads < set_nthreads ) {
880 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d thread limit reduced reservation to %d threads\n",
881 master_tid, new_nthreads ));
884 new_nthreads = set_nthreads;
887 else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) {
888 if ( set_nthreads > 2 ) {
889 new_nthreads = __kmp_get_random( parent_team->t.t_threads[master_tid] );
890 new_nthreads = ( new_nthreads % set_nthreads ) + 1;
891 if ( new_nthreads == 1 ) {
892 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d dynamic random reduced reservation to 1 thread\n",
896 if ( new_nthreads < set_nthreads ) {
897 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d dynamic random reduced reservation to %d threads\n",
898 master_tid, new_nthreads ));
909 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
910 root->r.r_hot_team->t.t_nproc ) > __kmp_max_nth ) {
911 int tl_nthreads = __kmp_max_nth - __kmp_nth + ( root->r.r_active ? 1 :
912 root->r.r_hot_team->t.t_nproc );
913 if ( tl_nthreads <= 0 ) {
920 if ( ! get__dynamic_2( parent_team, master_tid )
921 && ( ! __kmp_reserve_warn ) ) {
922 __kmp_reserve_warn = 1;
925 KMP_MSG( CantFormThrTeam, set_nthreads, tl_nthreads ),
926 KMP_HNT( Unset_ALL_THREADS ),
930 if ( tl_nthreads == 1 ) {
931 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to 1 thread\n",
935 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to %d threads\n",
936 master_tid, tl_nthreads ));
937 new_nthreads = tl_nthreads;
946 capacity = __kmp_threads_capacity;
947 if ( TCR_PTR(__kmp_threads[0]) == NULL ) {
950 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
951 root->r.r_hot_team->t.t_nproc ) > capacity ) {
955 int slotsRequired = __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
956 root->r.r_hot_team->t.t_nproc ) - capacity;
957 int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired);
958 if ( slotsAdded < slotsRequired ) {
962 new_nthreads -= ( slotsRequired - slotsAdded );
963 KMP_ASSERT( new_nthreads >= 1 );
968 if ( ! get__dynamic_2( parent_team, master_tid )
969 && ( ! __kmp_reserve_warn ) ) {
970 __kmp_reserve_warn = 1;
971 if ( __kmp_tp_cached ) {
974 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
975 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
976 KMP_HNT( PossibleSystemLimitOnThreads ),
983 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
984 KMP_HNT( SystemLimitOnThreads ),
992 if ( new_nthreads == 1 ) {
993 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d serializing team after reclaiming dead roots and rechecking; requested %d threads\n",
994 __kmp_get_gtid(), set_nthreads ) );
998 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested %d threads\n",
999 __kmp_get_gtid(), new_nthreads, set_nthreads ));
1000 return new_nthreads;
1011 __kmp_fork_team_threads( kmp_root_t *root, kmp_team_t *team,
1012 kmp_info_t *master_th,
int master_gtid )
1017 KA_TRACE( 10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc ) );
1018 KMP_DEBUG_ASSERT( master_gtid == __kmp_get_gtid() );
1022 master_th->th.th_info.ds.ds_tid = 0;
1023 master_th->th.th_team = team;
1024 master_th->th.th_team_nproc = team->t.t_nproc;
1025 master_th->th.th_team_master = master_th;
1026 master_th->th.th_team_serialized = FALSE;
1027 master_th->th.th_dispatch = & team->t.t_dispatch[ 0 ];
1030 #if KMP_NESTED_HOT_TEAMS 1032 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
1034 int level = team->t.t_active_level - 1;
1035 if( master_th->th.th_teams_microtask ) {
1036 if( master_th->th.th_teams_size.nteams > 1 ) {
1039 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1040 master_th->th.th_teams_level == team->t.t_level ) {
1044 if( level < __kmp_hot_teams_max_level ) {
1045 if( hot_teams[level].hot_team ) {
1047 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1051 hot_teams[level].hot_team = team;
1052 hot_teams[level].hot_team_nth = team->t.t_nproc;
1059 use_hot_team = team == root->r.r_hot_team;
1061 if ( !use_hot_team ) {
1064 team->t.t_threads[ 0 ] = master_th;
1065 __kmp_initialize_info( master_th, team, 0, master_gtid );
1068 for ( i=1 ; i < team->t.t_nproc ; i++ ) {
1071 kmp_info_t *thr = __kmp_allocate_thread( root, team, i );
1072 team->t.t_threads[ i ] = thr;
1073 KMP_DEBUG_ASSERT( thr );
1074 KMP_DEBUG_ASSERT( thr->th.th_team == team );
1076 KA_TRACE( 20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived T#%d(%d:%d) join =%llu, plain=%llu\n",
1077 __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0,
1078 __kmp_gtid_from_tid( i, team ), team->t.t_id, i,
1079 team->t.t_bar[ bs_forkjoin_barrier ].b_arrived,
1080 team->t.t_bar[ bs_plain_barrier ].b_arrived ) );
1082 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1083 thr->th.th_teams_level = master_th->th.th_teams_level;
1084 thr->th.th_teams_size = master_th->th.th_teams_size;
1088 kmp_balign_t * balign = team->t.t_threads[ i ]->th.th_bar;
1089 for ( b = 0; b < bs_last_barrier; ++ b ) {
1090 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
1091 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1093 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
1099 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 1100 __kmp_partition_places( team );
1108 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1115 propagateFPControl(kmp_team_t * team)
1117 if ( __kmp_inherit_fp_control ) {
1118 kmp_int16 x87_fpu_control_word;
1122 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
1123 __kmp_store_mxcsr( &mxcsr );
1124 mxcsr &= KMP_X86_MXCSR_MASK;
1133 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1134 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1137 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1141 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1147 updateHWFPControl(kmp_team_t * team)
1149 if ( __kmp_inherit_fp_control && team->t.t_fp_control_saved ) {
1154 kmp_int16 x87_fpu_control_word;
1156 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
1157 __kmp_store_mxcsr( &mxcsr );
1158 mxcsr &= KMP_X86_MXCSR_MASK;
1160 if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) {
1161 __kmp_clear_x87_fpu_status_word();
1162 __kmp_load_x87_fpu_control_word( &team->t.t_x87_fpu_control_word );
1165 if ( team->t.t_mxcsr != mxcsr ) {
1166 __kmp_load_mxcsr( &team->t.t_mxcsr );
1171 # define propagateFPControl(x) ((void)0) 1172 # define updateHWFPControl(x) ((void)0) 1176 __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc );
1182 __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid)
1184 kmp_info_t *this_thr;
1185 kmp_team_t *serial_team;
1187 KC_TRACE( 10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid ) );
1194 if( ! TCR_4( __kmp_init_parallel ) )
1195 __kmp_parallel_initialize();
1197 this_thr = __kmp_threads[ global_tid ];
1198 serial_team = this_thr->th.th_serial_team;
1201 KMP_DEBUG_ASSERT( serial_team );
1204 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1205 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1206 KMP_DEBUG_ASSERT( serial_team->t.t_task_team[this_thr->th.th_task_state] == NULL );
1207 KA_TRACE( 20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / team %p, new task_team = NULL\n",
1208 global_tid, this_thr->th.th_task_team, this_thr->th.th_team ) );
1209 this_thr->th.th_task_team = NULL;
1213 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1214 if ( this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
1215 proc_bind = proc_bind_false;
1217 else if ( proc_bind == proc_bind_default ) {
1222 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1227 this_thr->th.th_set_proc_bind = proc_bind_default;
1230 if( this_thr->th.th_team != serial_team ) {
1232 int level = this_thr->th.th_team->t.t_level;
1234 if( serial_team->t.t_serialized ) {
1237 kmp_team_t *new_team;
1239 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
1242 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1245 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1252 & this_thr->th.th_current_task->td_icvs,
1253 0 USE_NESTED_HOT_ARG(NULL) );
1254 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1255 KMP_ASSERT( new_team );
1258 new_team->t.t_threads[0] = this_thr;
1259 new_team->t.t_parent = this_thr->th.th_team;
1260 serial_team = new_team;
1261 this_thr->th.th_serial_team = serial_team;
1263 KF_TRACE( 10, (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1264 global_tid, serial_team ) );
1271 KF_TRACE( 10, (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1272 global_tid, serial_team ) );
1276 KMP_DEBUG_ASSERT( serial_team->t.t_threads );
1277 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
1278 KMP_DEBUG_ASSERT( this_thr->th.th_team != serial_team );
1279 serial_team->t.t_ident = loc;
1280 serial_team->t.t_serialized = 1;
1281 serial_team->t.t_nproc = 1;
1282 serial_team->t.t_parent = this_thr->th.th_team;
1283 serial_team->t.t_sched = this_thr->th.th_team->t.t_sched;
1284 this_thr->th.th_team = serial_team;
1285 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1287 KF_TRACE( 10, (
"__kmpc_serialized_parallel: T#d curtask=%p\n",
1288 global_tid, this_thr->th.th_current_task ) );
1289 KMP_ASSERT( this_thr->th.th_current_task->td_flags.executing == 1 );
1290 this_thr->th.th_current_task->td_flags.executing = 0;
1292 __kmp_push_current_task_to_thread( this_thr, serial_team, 0 );
1297 & this_thr->th.th_current_task->td_icvs,
1298 & this_thr->th.th_current_task->td_parent->td_icvs );
1301 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
1302 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
1306 if ( __kmp_nested_proc_bind.used && ( level + 1 < __kmp_nested_proc_bind.used ) ) {
1307 this_thr->th.th_current_task->td_icvs.proc_bind
1308 = __kmp_nested_proc_bind.bind_types[ level + 1 ];
1313 serial_team->t.t_pkfn = (microtask_t)( ~0 );
1315 this_thr->th.th_info.ds.ds_tid = 0;
1318 this_thr->th.th_team_nproc = 1;
1319 this_thr->th.th_team_master = this_thr;
1320 this_thr->th.th_team_serialized = 1;
1322 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1323 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1325 propagateFPControl (serial_team);
1328 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1329 if ( !serial_team->t.t_dispatch->th_disp_buffer ) {
1330 serial_team->t.t_dispatch->th_disp_buffer = (dispatch_private_info_t *)
1331 __kmp_allocate(
sizeof( dispatch_private_info_t ) );
1333 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1336 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1337 __ompt_team_assign_id(serial_team, ompt_parallel_id);
1345 KMP_DEBUG_ASSERT( this_thr->th.th_team == serial_team );
1346 KMP_DEBUG_ASSERT( serial_team->t.t_threads );
1347 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
1348 ++ serial_team->t.t_serialized;
1349 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1352 int level = this_thr->th.th_team->t.t_level;
1354 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
1355 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
1357 serial_team->t.t_level++;
1358 KF_TRACE( 10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level of serial team %p to %d\n",
1359 global_tid, serial_team, serial_team->t.t_level ) );
1362 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1364 dispatch_private_info_t * disp_buffer = (dispatch_private_info_t *)
1365 __kmp_allocate(
sizeof( dispatch_private_info_t ) );
1366 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1367 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1369 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1374 if ( __kmp_env_consistency_check )
1375 __kmp_push_parallel( global_tid, NULL );
1385 enum fork_context_e call_context,
1388 void *unwrapped_task,
1390 microtask_t microtask,
1393 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1403 int master_this_cons;
1405 kmp_team_t *parent_team;
1406 kmp_info_t *master_th;
1410 int master_set_numthreads;
1416 #if KMP_NESTED_HOT_TEAMS 1417 kmp_hot_team_ptr_t **p_hot_teams;
1420 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1423 KA_TRACE( 20, (
"__kmp_fork_call: enter T#%d\n", gtid ));
1424 if ( __kmp_stkpadding > 0 && __kmp_root[gtid] != NULL ) {
1427 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1429 if ( __kmp_stkpadding > KMP_MAX_STKPADDING )
1430 __kmp_stkpadding += (short)((kmp_int64)dummy);
1434 KMP_DEBUG_ASSERT( __kmp_init_serial );
1435 if( ! TCR_4(__kmp_init_parallel) )
1436 __kmp_parallel_initialize();
1439 master_th = __kmp_threads[ gtid ];
1440 parent_team = master_th->th.th_team;
1441 master_tid = master_th->th.th_info.ds.ds_tid;
1442 master_this_cons = master_th->th.th_local.this_construct;
1443 root = master_th->th.th_root;
1444 master_active = root->r.r_active;
1445 master_set_numthreads = master_th->th.th_set_nproc;
1448 ompt_parallel_id_t ompt_parallel_id;
1449 ompt_task_id_t ompt_task_id;
1450 ompt_frame_t *ompt_frame;
1451 ompt_task_id_t my_task_id;
1452 ompt_parallel_id_t my_parallel_id;
1455 ompt_parallel_id = __ompt_parallel_id_new(gtid);
1456 ompt_task_id = __ompt_get_task_id_internal(0);
1457 ompt_frame = __ompt_get_task_frame_internal(0);
1462 level = parent_team->t.t_level;
1463 active_level = parent_team->t.t_active_level;
1465 teams_level = master_th->th.th_teams_level;
1467 #if KMP_NESTED_HOT_TEAMS 1468 p_hot_teams = &master_th->th.th_hot_teams;
1469 if( *p_hot_teams == NULL && __kmp_hot_teams_max_level > 0 ) {
1470 *p_hot_teams = (kmp_hot_team_ptr_t*)__kmp_allocate(
1471 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1472 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1473 (*p_hot_teams)[0].hot_team_nth = 1;
1479 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) {
1480 int team_size = master_set_numthreads;
1482 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)(
1483 ompt_task_id, ompt_frame, ompt_parallel_id,
1484 team_size, unwrapped_task, OMPT_INVOKER(call_context));
1488 master_th->th.th_ident = loc;
1491 if ( master_th->th.th_teams_microtask &&
1492 ap && microtask != (microtask_t)__kmp_teams_master && level == teams_level ) {
1496 parent_team->t.t_ident = loc;
1497 __kmp_alloc_argv_entries( argc, parent_team, TRUE );
1498 parent_team->t.t_argc = argc;
1499 argv = (
void**)parent_team->t.t_argv;
1500 for( i=argc-1; i >= 0; --i )
1502 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 1503 *argv++ = va_arg( *ap,
void * );
1505 *argv++ = va_arg( ap,
void * );
1508 if ( parent_team == master_th->th.th_serial_team ) {
1511 KMP_DEBUG_ASSERT( parent_team->t.t_serialized > 1 );
1512 parent_team->t.t_serialized--;
1517 void **exit_runtime_p;
1519 ompt_lw_taskteam_t lw_taskteam;
1522 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1523 unwrapped_task, ompt_parallel_id);
1524 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1525 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1527 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1531 my_task_id = lw_taskteam.ompt_task_info.task_id;
1532 my_parallel_id = parent_team->t.ompt_team_info.parallel_id;
1533 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
1534 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1535 my_parallel_id, my_task_id);
1540 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1542 exit_runtime_p = &dummy;
1547 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1548 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1549 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
1557 *exit_runtime_p = NULL;
1560 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
1562 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
1563 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1564 ompt_parallel_id, ompt_task_id);
1567 __ompt_lw_taskteam_unlink(master_th);
1569 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1572 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
1573 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
1574 ompt_parallel_id, ompt_task_id,
1575 OMPT_INVOKER(call_context));
1577 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1583 parent_team->t.t_pkfn = microtask;
1585 parent_team->t.ompt_team_info.microtask = unwrapped_task;
1587 parent_team->t.t_invoke = invoker;
1588 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
1589 parent_team->t.t_active_level ++;
1590 parent_team->t.t_level ++;
1593 if ( master_set_numthreads ) {
1594 if ( master_set_numthreads < master_th->th.th_teams_size.nth ) {
1596 kmp_info_t **other_threads = parent_team->t.t_threads;
1597 parent_team->t.t_nproc = master_set_numthreads;
1598 for ( i = 0; i < master_set_numthreads; ++i ) {
1599 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1603 master_th->th.th_set_nproc = 0;
1607 if ( __kmp_debugging ) {
1608 int nth = __kmp_omp_num_threads( loc );
1610 master_set_numthreads = nth;
1615 KF_TRACE( 10, (
"__kmp_fork_call: before internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
1616 __kmp_internal_fork( loc, gtid, parent_team );
1617 KF_TRACE( 10, (
"__kmp_fork_call: after internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
1620 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
1621 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
1624 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1625 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1626 if (! parent_team->t.t_invoke( gtid )) {
1627 KMP_ASSERT2( 0,
"cannot invoke microtask for MASTER thread" );
1630 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
1631 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
1634 KA_TRACE( 20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid ));
1641 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1642 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
1646 if ( parent_team->t.t_active_level >= master_th->th.th_current_task->td_icvs.max_active_levels ) {
1650 int enter_teams = ((ap==NULL && active_level==0)||(ap && teams_level>0 && teams_level==level));
1652 nthreads = master_set_numthreads ?
1653 master_set_numthreads : get__nproc_2( parent_team, master_tid );
1658 if ( ( !get__nested(master_th) && (root->r.r_in_parallel
1662 ) ) || ( __kmp_library == library_serial ) ) {
1663 KC_TRACE( 10, (
"__kmp_fork_call: T#%d serializing team; requested %d threads\n",
1668 if ( nthreads > 1 ) {
1670 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
1672 nthreads = __kmp_reserve_threads(root, parent_team, master_tid, nthreads
1680 if ( nthreads == 1 ) {
1684 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1688 KMP_DEBUG_ASSERT( nthreads > 0 );
1691 master_th->th.th_set_nproc = 0;
1694 if ( nthreads == 1 ) {
1696 #if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 1697 void * args[ argc ];
1699 void * * args = (
void**) KMP_ALLOCA( argc *
sizeof(
void * ) );
1702 KA_TRACE( 20, (
"__kmp_fork_call: T#%d serializing parallel region\n", gtid ));
1706 if ( call_context == fork_context_intel ) {
1708 master_th->th.th_serial_team->t.t_ident = loc;
1712 master_th->th.th_serial_team->t.t_level--;
1717 void **exit_runtime_p;
1719 ompt_lw_taskteam_t lw_taskteam;
1722 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1723 unwrapped_task, ompt_parallel_id);
1724 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1725 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1727 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1730 my_task_id = lw_taskteam.ompt_task_info.task_id;
1731 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
1732 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1733 ompt_parallel_id, my_task_id);
1738 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1740 exit_runtime_p = &dummy;
1745 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1746 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1747 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
1755 *exit_runtime_p = NULL;
1757 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
1760 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
1761 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1762 ompt_parallel_id, ompt_task_id);
1766 __ompt_lw_taskteam_unlink(master_th);
1768 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1770 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
1771 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
1772 ompt_parallel_id, ompt_task_id,
1773 OMPT_INVOKER(call_context));
1775 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1778 }
else if ( microtask == (microtask_t)__kmp_teams_master ) {
1779 KMP_DEBUG_ASSERT( master_th->th.th_team == master_th->th.th_serial_team );
1780 team = master_th->th.th_team;
1782 team->t.t_invoke = invoker;
1783 __kmp_alloc_argv_entries( argc, team, TRUE );
1784 team->t.t_argc = argc;
1785 argv = (
void**) team->t.t_argv;
1787 for( i=argc-1; i >= 0; --i )
1789 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1790 *argv++ = va_arg( *ap,
void * );
1792 *argv++ = va_arg( ap,
void * );
1795 for( i=0; i < argc; ++i )
1797 argv[i] = parent_team->t.t_argv[i];
1804 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1805 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1811 for( i=argc-1; i >= 0; --i )
1813 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1814 *argv++ = va_arg( *ap,
void * );
1816 *argv++ = va_arg( ap,
void * );
1822 void **exit_runtime_p;
1824 ompt_lw_taskteam_t lw_taskteam;
1827 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1828 unwrapped_task, ompt_parallel_id);
1829 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1830 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1832 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1836 my_task_id = lw_taskteam.ompt_task_info.task_id;
1837 my_parallel_id = ompt_parallel_id;
1838 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
1839 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1840 my_parallel_id, my_task_id);
1845 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1847 exit_runtime_p = &dummy;
1852 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1853 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1854 __kmp_invoke_microtask( microtask, gtid, 0, argc, args
1862 *exit_runtime_p = NULL;
1865 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
1867 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
1868 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1869 my_parallel_id, my_task_id);
1873 __ompt_lw_taskteam_unlink(master_th);
1875 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1877 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
1878 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
1879 ompt_parallel_id, ompt_task_id,
1880 OMPT_INVOKER(call_context));
1882 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1889 else if ( call_context == fork_context_gnu ) {
1891 ompt_lw_taskteam_t *lwt = (ompt_lw_taskteam_t *)
1892 __kmp_allocate(
sizeof(ompt_lw_taskteam_t));
1893 __ompt_lw_taskteam_init(lwt, master_th, gtid,
1894 unwrapped_task, ompt_parallel_id);
1896 lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid);
1897 lwt->ompt_task_info.frame.exit_runtime_frame = NULL;
1898 __ompt_lw_taskteam_link(lwt, master_th);
1902 KA_TRACE( 20, (
"__kmp_fork_call: T#%d serial exit\n", gtid ));
1906 KMP_ASSERT2( call_context < fork_context_last,
"__kmp_fork_call: unknown fork_context parameter" );
1910 KA_TRACE( 20, (
"__kmp_fork_call: T#%d serial exit\n", gtid ));
1917 KF_TRACE( 10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, curtask=%p, curtask_max_aclevel=%d\n",
1918 parent_team->t.t_active_level, master_th, master_th->th.th_current_task,
1919 master_th->th.th_current_task->td_icvs.max_active_levels ) );
1922 master_th->th.th_current_task->td_flags.executing = 0;
1925 if ( !master_th->th.th_teams_microtask || level > teams_level )
1929 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
1933 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
1934 if ((level+1 < __kmp_nested_nth.used) && (__kmp_nested_nth.nth[level+1] != nthreads_icv)) {
1935 nthreads_icv = __kmp_nested_nth.nth[level+1];
1943 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1944 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1945 if ( master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
1946 proc_bind = proc_bind_false;
1949 if (proc_bind == proc_bind_default) {
1951 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1956 if ((level+1 < __kmp_nested_proc_bind.used)
1957 && (__kmp_nested_proc_bind.bind_types[level+1] != master_th->th.th_current_task->td_icvs.proc_bind)) {
1958 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level+1];
1963 master_th->th.th_set_proc_bind = proc_bind_default;
1966 if ((nthreads_icv > 0)
1968 || (proc_bind_icv != proc_bind_default)
1971 kmp_internal_control_t new_icvs;
1972 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
1973 new_icvs.next = NULL;
1974 if (nthreads_icv > 0) {
1975 new_icvs.nproc = nthreads_icv;
1979 if (proc_bind_icv != proc_bind_default) {
1980 new_icvs.proc_bind = proc_bind_icv;
1985 KF_TRACE( 10, (
"__kmp_fork_call: before __kmp_allocate_team\n" ) );
1986 team = __kmp_allocate_team(root, nthreads, nthreads,
1993 &new_icvs, argc USE_NESTED_HOT_ARG(master_th) );
1996 KF_TRACE( 10, (
"__kmp_fork_call: before __kmp_allocate_team\n" ) );
1997 team = __kmp_allocate_team(root, nthreads, nthreads,
2004 &master_th->th.th_current_task->td_icvs, argc
2005 USE_NESTED_HOT_ARG(master_th) );
2007 KF_TRACE( 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team ) );
2010 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2011 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2012 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2013 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2014 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2016 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.microtask, unwrapped_task);
2018 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2021 if ( !master_th->th.th_teams_microtask || level > teams_level ) {
2023 int new_level = parent_team->t.t_level + 1;
2024 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2025 new_level = parent_team->t.t_active_level + 1;
2026 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2030 int new_level = parent_team->t.t_level;
2031 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2032 new_level = parent_team->t.t_active_level;
2033 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2036 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2037 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type || team->t.t_sched.chunk != new_sched.chunk)
2038 team->t.t_sched = new_sched;
2041 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2045 propagateFPControl(team);
2047 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2052 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
2054 KA_TRACE( 20, (
"__kmp_fork_call: Master T#%d pushing task_team %p / team %p, new task_team %p / team %p\n",
2055 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
2056 parent_team, team->t.t_task_team[master_th->th.th_task_state], team ) );
2058 if ( active_level || master_th->th.th_task_team ) {
2060 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2061 if (master_th->th.th_task_state_top >= master_th->th.th_task_state_stack_sz) {
2062 kmp_uint32 new_size = 2*master_th->th.th_task_state_stack_sz;
2063 kmp_uint8 *old_stack, *new_stack;
2065 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2066 for (i=0; i<master_th->th.th_task_state_stack_sz; ++i) {
2067 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2069 for (i=master_th->th.th_task_state_stack_sz; i<new_size; ++i) {
2072 old_stack = master_th->th.th_task_state_memo_stack;
2073 master_th->th.th_task_state_memo_stack = new_stack;
2074 master_th->th.th_task_state_stack_sz = new_size;
2075 __kmp_free(old_stack);
2078 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
2079 master_th->th.th_task_state_top++;
2080 #if KMP_NESTED_HOT_TEAMS 2081 if (team == master_th->th.th_hot_teams[active_level].hot_team) {
2082 master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
2086 master_th->th.th_task_state = 0;
2087 #if KMP_NESTED_HOT_TEAMS 2091 #if !KMP_NESTED_HOT_TEAMS 2092 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || (team == root->r.r_hot_team));
2096 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2097 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, team->t.t_nproc ));
2098 KMP_DEBUG_ASSERT( team != root->r.r_hot_team ||
2099 ( team->t.t_master_tid == 0 &&
2100 ( team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized ) ));
2104 argv = (
void**)team->t.t_argv;
2108 for ( i=argc-1; i >= 0; --i ) {
2110 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 2111 void *new_argv = va_arg(*ap,
void *);
2113 void *new_argv = va_arg(ap,
void *);
2115 KMP_CHECK_UPDATE(*argv, new_argv);
2120 for ( i=0; i < argc; ++i ) {
2122 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2128 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2129 if (!root->r.r_active)
2130 root->r.r_active = TRUE;
2132 __kmp_fork_team_threads( root, team, master_th, gtid );
2133 __kmp_setup_icv_copy( team, nthreads, &master_th->th.th_current_task->td_icvs, loc );
2136 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2139 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2142 if ( team->t.t_active_level == 1
2144 && !master_th->th.th_teams_microtask
2148 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) &&
2149 ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
2151 kmp_uint64 tmp_time = 0;
2152 if ( __itt_get_timestamp_ptr )
2153 tmp_time = __itt_get_timestamp();
2155 master_th->th.th_frame_time = tmp_time;
2156 if ( __kmp_forkjoin_frames_mode == 3 )
2157 team->t.t_region_time = tmp_time;
2160 if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) &&
2161 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode )
2163 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2169 KMP_DEBUG_ASSERT( team == __kmp_threads[gtid]->th.th_team );
2171 KF_TRACE(10, (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2172 root, team, master_th, gtid));
2175 if ( __itt_stack_caller_create_ptr ) {
2176 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2184 __kmp_internal_fork( loc, gtid, team );
2185 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, master_th=%p, gtid=%d\n",
2186 root, team, master_th, gtid));
2189 if (call_context == fork_context_gnu) {
2190 KA_TRACE( 20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid ));
2195 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
2196 gtid, team->t.t_id, team->t.t_pkfn ) );
2200 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
2201 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
2202 if (! team->t.t_invoke( gtid )) {
2203 KMP_ASSERT2( 0,
"cannot invoke microtask for MASTER thread" );
2206 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
2207 gtid, team->t.t_id, team->t.t_pkfn ) );
2210 KA_TRACE( 20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid ));
2214 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2223 __kmp_join_restore_state(
2228 thread->th.ompt_thread_info.state = ((team->t.t_serialized) ?
2229 ompt_state_work_serial : ompt_state_work_parallel);
2236 ompt_parallel_id_t parallel_id,
2237 fork_context_e fork_context)
2239 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
2240 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
2241 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
2242 parallel_id, task_info->task_id, OMPT_INVOKER(fork_context));
2245 task_info->frame.reenter_runtime_frame = NULL;
2246 __kmp_join_restore_state(thread,team);
2251 __kmp_join_call(
ident_t *loc,
int gtid
2253 ,
enum fork_context_e fork_context
2260 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2262 kmp_team_t *parent_team;
2263 kmp_info_t *master_th;
2268 KA_TRACE( 20, (
"__kmp_join_call: enter T#%d\n", gtid ));
2271 master_th = __kmp_threads[ gtid ];
2272 root = master_th->th.th_root;
2273 team = master_th->th.th_team;
2274 parent_team = team->t.t_parent;
2276 master_th->th.th_ident = loc;
2280 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2285 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2286 KA_TRACE( 20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n",
2287 __kmp_gtid_from_thread( master_th ), team,
2288 team->t.t_task_team[master_th->th.th_task_state], master_th->th.th_task_team) );
2289 KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state] );
2293 if( team->t.t_serialized ) {
2295 if ( master_th->th.th_teams_microtask ) {
2297 int level = team->t.t_level;
2298 int tlevel = master_th->th.th_teams_level;
2299 if ( level == tlevel ) {
2303 }
else if ( level == tlevel + 1 ) {
2306 team->t.t_serialized++;
2314 __kmp_join_restore_state(master_th, parent_team);
2321 master_active = team->t.t_master_active;
2329 __kmp_internal_join( loc, gtid, team );
2333 master_th->th.th_task_state = 0;
2340 ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id;
2344 if ( __itt_stack_caller_create_ptr ) {
2345 __kmp_itt_stack_caller_destroy( (__itt_caller)team->t.t_stack_id );
2349 if ( team->t.t_active_level == 1
2351 && !master_th->th.th_teams_microtask
2354 master_th->th.th_ident = loc;
2356 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && __kmp_forkjoin_frames_mode == 3 )
2357 __kmp_itt_frame_submit( gtid, team->t.t_region_time, master_th->th.th_frame_time,
2358 0, loc, master_th->th.th_team_nproc, 1 );
2359 else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) &&
2360 ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames )
2361 __kmp_itt_region_joined( gtid );
2366 if ( master_th->th.th_teams_microtask &&
2368 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2369 team->t.t_level == master_th->th.th_teams_level + 1 ) {
2376 team->t.t_active_level --;
2377 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
2380 if ( master_th->th.th_team_nproc < master_th->th.th_teams_size.nth ) {
2381 int old_num = master_th->th.th_team_nproc;
2382 int new_num = master_th->th.th_teams_size.nth;
2383 kmp_info_t **other_threads = team->t.t_threads;
2384 team->t.t_nproc = new_num;
2385 for ( i = 0; i < old_num; ++i ) {
2386 other_threads[i]->th.th_team_nproc = new_num;
2389 for ( i = old_num; i < new_num; ++i ) {
2392 kmp_balign_t * balign = other_threads[i]->th.th_bar;
2393 for ( b = 0; b < bs_last_barrier; ++ b ) {
2394 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
2395 KMP_DEBUG_ASSERT(balign[ b ].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2397 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
2400 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2402 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2409 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
2418 master_th->th.th_info .ds.ds_tid = team->t.t_master_tid;
2419 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2421 master_th->th.th_dispatch =
2422 & parent_team->t.t_dispatch[ team->t.t_master_tid ];
2428 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2431 if ( !master_th->th.th_teams_microtask || team->t.t_level > master_th->th.th_teams_level )
2435 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
2437 KMP_DEBUG_ASSERT( root->r.r_in_parallel >= 0 );
2439 #if OMPT_SUPPORT && OMPT_TRACE 2441 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
2442 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
2443 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
2444 parallel_id, task_info->task_id);
2446 task_info->frame.exit_runtime_frame = NULL;
2447 task_info->task_id = 0;
2451 KF_TRACE( 10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n",
2452 0, master_th, team ) );
2453 __kmp_pop_current_task_from_thread( master_th );
2455 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 2459 master_th->th.th_first_place = team->t.t_first_place;
2460 master_th->th.th_last_place = team->t.t_last_place;
2463 updateHWFPControl (team);
2465 if ( root->r.r_active != master_active )
2466 root->r.r_active = master_active;
2468 __kmp_free_team( root, team USE_NESTED_HOT_ARG(master_th) );
2476 master_th->th.th_team = parent_team;
2477 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2478 master_th->th.th_team_master = parent_team->t.t_threads[0];
2479 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2482 if( parent_team->t.t_serialized &&
2483 parent_team != master_th->th.th_serial_team &&
2484 parent_team != root->r.r_root_team ) {
2485 __kmp_free_team( root, master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL) );
2486 master_th->th.th_serial_team = parent_team;
2489 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2490 if (master_th->th.th_task_state_top > 0) {
2491 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2493 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
2494 --master_th->th.th_task_state_top;
2496 master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
2499 master_th->th.th_task_team = parent_team->t.t_task_team[master_th->th.th_task_state];
2500 KA_TRACE( 20, (
"__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
2501 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, parent_team ) );
2506 master_th->th.th_current_task->td_flags.executing = 1;
2508 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2512 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
2517 KA_TRACE( 20, (
"__kmp_join_call: exit T#%d\n", gtid ));
2526 __kmp_save_internal_controls ( kmp_info_t * thread )
2529 if ( thread->th.th_team != thread->th.th_serial_team ) {
2532 if (thread->th.th_team->t.t_serialized > 1) {
2535 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2538 if ( thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2539 thread->th.th_team->t.t_serialized ) {
2544 kmp_internal_control_t * control = (kmp_internal_control_t *) __kmp_allocate(
sizeof(kmp_internal_control_t));
2546 copy_icvs( control, & thread->th.th_current_task->td_icvs );
2548 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2550 control->next = thread->th.th_team->t.t_control_stack_top;
2551 thread->th.th_team->t.t_control_stack_top = control;
2558 __kmp_set_num_threads(
int new_nth,
int gtid )
2563 KF_TRACE( 10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth ));
2564 KMP_DEBUG_ASSERT( __kmp_init_serial );
2568 else if (new_nth > __kmp_max_nth)
2569 new_nth = __kmp_max_nth;
2572 thread = __kmp_threads[gtid];
2574 __kmp_save_internal_controls( thread );
2576 set__nproc( thread, new_nth );
2583 root = thread->th.th_root;
2584 if ( __kmp_init_parallel && ( ! root->r.r_active )
2585 && ( root->r.r_hot_team->t.t_nproc > new_nth )
2586 #
if KMP_NESTED_HOT_TEAMS
2587 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2590 kmp_team_t *hot_team = root->r.r_hot_team;
2593 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2596 for ( f = new_nth; f < hot_team->t.t_nproc; f++ ) {
2597 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
2598 if ( __kmp_tasking_mode != tskm_immediate_exec) {
2600 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2602 __kmp_free_thread( hot_team->t.t_threads[f] );
2603 hot_team->t.t_threads[f] = NULL;
2605 hot_team->t.t_nproc = new_nth;
2606 #if KMP_NESTED_HOT_TEAMS 2607 if( thread->th.th_hot_teams ) {
2608 KMP_DEBUG_ASSERT( hot_team == thread->th.th_hot_teams[0].hot_team );
2609 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2613 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2618 for( f=0 ; f < new_nth; f++ ) {
2619 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
2620 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2623 hot_team->t.t_size_changed = -1;
2629 __kmp_set_max_active_levels(
int gtid,
int max_active_levels )
2633 KF_TRACE( 10, (
"__kmp_set_max_active_levels: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2634 KMP_DEBUG_ASSERT( __kmp_init_serial );
2637 if( max_active_levels < 0 ) {
2638 KMP_WARNING( ActiveLevelsNegative, max_active_levels );
2642 KF_TRACE( 10, (
"__kmp_set_max_active_levels: the call is ignored: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2645 if( max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT ) {
2649 KMP_WARNING( ActiveLevelsExceedLimit, max_active_levels, KMP_MAX_ACTIVE_LEVELS_LIMIT );
2650 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2655 KF_TRACE( 10, (
"__kmp_set_max_active_levels: after validation: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2657 thread = __kmp_threads[ gtid ];
2659 __kmp_save_internal_controls( thread );
2661 set__max_active_levels( thread, max_active_levels );
2667 __kmp_get_max_active_levels(
int gtid )
2671 KF_TRACE( 10, (
"__kmp_get_max_active_levels: thread %d\n", gtid ) );
2672 KMP_DEBUG_ASSERT( __kmp_init_serial );
2674 thread = __kmp_threads[ gtid ];
2675 KMP_DEBUG_ASSERT( thread->th.th_current_task );
2676 KF_TRACE( 10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, curtask_maxaclevel=%d\n",
2677 gtid, thread->th.th_current_task, thread->th.th_current_task->td_icvs.max_active_levels ) );
2678 return thread->th.th_current_task->td_icvs.max_active_levels;
2683 __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk )
2688 KF_TRACE( 10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", gtid, (
int)kind, chunk ));
2689 KMP_DEBUG_ASSERT( __kmp_init_serial );
2695 if ( kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2696 ( kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std ) )
2701 KMP_MSG( ScheduleKindOutOfRange, kind ),
2702 KMP_HNT( DefaultScheduleKindUsed,
"static, no chunk" ),
2705 kind = kmp_sched_default;
2709 thread = __kmp_threads[ gtid ];
2711 __kmp_save_internal_controls( thread );
2713 if ( kind < kmp_sched_upper_std ) {
2714 if ( kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK ) {
2717 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2719 thread->th.th_current_task->td_icvs.sched.r_sched_type = __kmp_sch_map[ kind - kmp_sched_lower - 1 ];
2723 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2724 __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
2726 if ( kind == kmp_sched_auto ) {
2728 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2730 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2736 __kmp_get_schedule(
int gtid, kmp_sched_t * kind,
int * chunk )
2741 KF_TRACE( 10, (
"__kmp_get_schedule: thread %d\n", gtid ));
2742 KMP_DEBUG_ASSERT( __kmp_init_serial );
2744 thread = __kmp_threads[ gtid ];
2746 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2748 switch ( th_type ) {
2750 case kmp_sch_static_greedy:
2751 case kmp_sch_static_balanced:
2752 *kind = kmp_sched_static;
2755 case kmp_sch_static_chunked:
2756 *kind = kmp_sched_static;
2758 case kmp_sch_dynamic_chunked:
2759 *kind = kmp_sched_dynamic;
2762 case kmp_sch_guided_iterative_chunked:
2763 case kmp_sch_guided_analytical_chunked:
2764 *kind = kmp_sched_guided;
2767 *kind = kmp_sched_auto;
2769 case kmp_sch_trapezoidal:
2770 *kind = kmp_sched_trapezoidal;
2772 #if KMP_STATIC_STEAL_ENABLED 2774 *kind = kmp_sched_static_steal;
2778 KMP_FATAL( UnknownSchedulingType, th_type );
2781 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
2785 __kmp_get_ancestor_thread_num(
int gtid,
int level ) {
2791 KF_TRACE( 10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level ));
2792 KMP_DEBUG_ASSERT( __kmp_init_serial );
2795 if( level == 0 )
return 0;
2796 if( level < 0 )
return -1;
2797 thr = __kmp_threads[ gtid ];
2798 team = thr->th.th_team;
2799 ii = team->t.t_level;
2800 if( level > ii )
return -1;
2803 if( thr->th.th_teams_microtask ) {
2805 int tlevel = thr->th.th_teams_level;
2806 if( level <= tlevel ) {
2807 KMP_DEBUG_ASSERT( ii >= tlevel );
2809 if ( ii == tlevel ) {
2818 if( ii == level )
return __kmp_tid_from_gtid( gtid );
2820 dd = team->t.t_serialized;
2824 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
2827 if( ( team->t.t_serialized ) && ( !dd ) ) {
2828 team = team->t.t_parent;
2832 team = team->t.t_parent;
2833 dd = team->t.t_serialized;
2838 return ( dd > 1 ) ? ( 0 ) : ( team->t.t_master_tid );
2842 __kmp_get_team_size(
int gtid,
int level ) {
2848 KF_TRACE( 10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level ));
2849 KMP_DEBUG_ASSERT( __kmp_init_serial );
2852 if( level == 0 )
return 1;
2853 if( level < 0 )
return -1;
2854 thr = __kmp_threads[ gtid ];
2855 team = thr->th.th_team;
2856 ii = team->t.t_level;
2857 if( level > ii )
return -1;
2860 if( thr->th.th_teams_microtask ) {
2862 int tlevel = thr->th.th_teams_level;
2863 if( level <= tlevel ) {
2864 KMP_DEBUG_ASSERT( ii >= tlevel );
2866 if ( ii == tlevel ) {
2877 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
2880 if( team->t.t_serialized && ( !dd ) ) {
2881 team = team->t.t_parent;
2885 team = team->t.t_parent;
2890 return team->t.t_nproc;
2894 __kmp_get_schedule_global() {
2898 kmp_r_sched_t r_sched;
2904 r_sched.r_sched_type = __kmp_static;
2906 r_sched.r_sched_type = __kmp_guided;
2908 r_sched.r_sched_type = __kmp_sched;
2911 if ( __kmp_chunk < KMP_DEFAULT_CHUNK ) {
2912 r_sched.chunk = KMP_DEFAULT_CHUNK;
2914 r_sched.chunk = __kmp_chunk;
2929 __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc )
2932 KMP_DEBUG_ASSERT( team );
2933 if( !realloc || argc > team->t.t_max_argc ) {
2935 KA_TRACE( 100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, current entries=%d\n",
2936 team->t.t_id, argc, ( realloc ) ? team->t.t_max_argc : 0 ));
2938 if ( realloc && team->t.t_argv != &team->t.t_inline_argv[0] )
2939 __kmp_free( (
void *) team->t.t_argv );
2941 if ( argc <= KMP_INLINE_ARGV_ENTRIES ) {
2943 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
2944 KA_TRACE( 100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d argv entries\n",
2945 team->t.t_id, team->t.t_max_argc ));
2946 team->t.t_argv = &team->t.t_inline_argv[0];
2947 if ( __kmp_storage_map ) {
2948 __kmp_print_storage_map_gtid( -1, &team->t.t_inline_argv[0],
2949 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
2950 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
2951 "team_%d.t_inline_argv",
2956 team->t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ?
2957 KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc;
2958 KA_TRACE( 100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n",
2959 team->t.t_id, team->t.t_max_argc ));
2960 team->t.t_argv = (
void**) __kmp_page_allocate(
sizeof(
void*) * team->t.t_max_argc );
2961 if ( __kmp_storage_map ) {
2962 __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc],
2963 sizeof(
void *) * team->t.t_max_argc,
"team_%d.t_argv",
2971 __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth)
2974 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
2975 team->t.t_threads = (kmp_info_t**) __kmp_allocate(
sizeof(kmp_info_t*) * max_nth );
2976 team->t.t_disp_buffer = (dispatch_shared_info_t*)
2977 __kmp_allocate(
sizeof(dispatch_shared_info_t) * num_disp_buff );
2978 team->t.t_dispatch = (kmp_disp_t*) __kmp_allocate(
sizeof(kmp_disp_t) * max_nth );
2979 team->t.t_implicit_task_taskdata = (kmp_taskdata_t*) __kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth );
2980 team->t.t_max_nproc = max_nth;
2983 for(i = 0 ; i < num_disp_buff; ++i) {
2984 team->t.t_disp_buffer[i].buffer_index = i;
2986 team->t.t_disp_buffer[i].doacross_buf_idx = i;
2992 __kmp_free_team_arrays(kmp_team_t *team) {
2995 for ( i = 0; i < team->t.t_max_nproc; ++ i ) {
2996 if ( team->t.t_dispatch[ i ].th_disp_buffer != NULL ) {
2997 __kmp_free( team->t.t_dispatch[ i ].th_disp_buffer );
2998 team->t.t_dispatch[ i ].th_disp_buffer = NULL;
3001 __kmp_free(team->t.t_threads);
3002 __kmp_free(team->t.t_disp_buffer);
3003 __kmp_free(team->t.t_dispatch);
3004 __kmp_free(team->t.t_implicit_task_taskdata);
3005 team->t.t_threads = NULL;
3006 team->t.t_disp_buffer = NULL;
3007 team->t.t_dispatch = NULL;
3008 team->t.t_implicit_task_taskdata = 0;
3012 __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3013 kmp_info_t **oldThreads = team->t.t_threads;
3015 __kmp_free(team->t.t_disp_buffer);
3016 __kmp_free(team->t.t_dispatch);
3017 __kmp_free(team->t.t_implicit_task_taskdata);
3018 __kmp_allocate_team_arrays(team, max_nth);
3020 KMP_MEMCPY(team->t.t_threads, oldThreads, team->t.t_nproc * sizeof (kmp_info_t*));
3022 __kmp_free(oldThreads);
3025 static kmp_internal_control_t
3026 __kmp_get_global_icvs(
void ) {
3028 kmp_r_sched_t r_sched = __kmp_get_schedule_global();
3031 KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.used > 0 );
3034 kmp_internal_control_t g_icvs = {
3036 (kmp_int8)__kmp_dflt_nested,
3037 (kmp_int8)__kmp_global.g.g_dynamic,
3038 (kmp_int8)__kmp_env_blocktime,
3039 __kmp_dflt_blocktime,
3043 __kmp_dflt_team_nth,
3045 __kmp_dflt_max_active_levels,
3048 __kmp_nested_proc_bind.bind_types[0],
3049 __kmp_default_device,
3057 static kmp_internal_control_t
3058 __kmp_get_x_global_icvs(
const kmp_team_t *team ) {
3060 kmp_internal_control_t gx_icvs;
3061 gx_icvs.serial_nesting_level = 0;
3062 copy_icvs( & gx_icvs, & team->t.t_threads[0]->th.th_current_task->td_icvs );
3063 gx_icvs.next = NULL;
3069 __kmp_initialize_root( kmp_root_t *root )
3072 kmp_team_t *root_team;
3073 kmp_team_t *hot_team;
3074 int hot_team_max_nth;
3075 kmp_r_sched_t r_sched = __kmp_get_schedule_global();
3076 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3077 KMP_DEBUG_ASSERT( root );
3078 KMP_ASSERT( ! root->r.r_begin );
3081 __kmp_init_lock( &root->r.r_begin_lock );
3082 root->r.r_begin = FALSE;
3083 root->r.r_active = FALSE;
3084 root->r.r_in_parallel = 0;
3085 root->r.r_blocktime = __kmp_dflt_blocktime;
3086 root->r.r_nested = __kmp_dflt_nested;
3090 KF_TRACE( 10, (
"__kmp_initialize_root: before root_team\n" ) );
3093 __kmp_allocate_team(
3101 __kmp_nested_proc_bind.bind_types[0],
3105 USE_NESTED_HOT_ARG(NULL)
3109 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)( ~ 0 ));
3112 KF_TRACE( 10, (
"__kmp_initialize_root: after root_team = %p\n", root_team ) );
3114 root->r.r_root_team = root_team;
3115 root_team->t.t_control_stack_top = NULL;
3118 root_team->t.t_threads[0] = NULL;
3119 root_team->t.t_nproc = 1;
3120 root_team->t.t_serialized = 1;
3122 root_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3123 root_team->t.t_sched.chunk = r_sched.chunk;
3124 KA_TRACE( 20, (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3125 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
3129 KF_TRACE( 10, (
"__kmp_initialize_root: before hot_team\n" ) );
3132 __kmp_allocate_team(
3135 __kmp_dflt_team_nth_ub * 2,
3140 __kmp_nested_proc_bind.bind_types[0],
3144 USE_NESTED_HOT_ARG(NULL)
3146 KF_TRACE( 10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team ) );
3148 root->r.r_hot_team = hot_team;
3149 root_team->t.t_control_stack_top = NULL;
3152 hot_team->t.t_parent = root_team;
3155 hot_team_max_nth = hot_team->t.t_max_nproc;
3156 for ( f = 0; f < hot_team_max_nth; ++ f ) {
3157 hot_team->t.t_threads[ f ] = NULL;
3159 hot_team->t.t_nproc = 1;
3161 hot_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3162 hot_team->t.t_sched.chunk = r_sched.chunk;
3163 hot_team->t.t_size_changed = 0;
3169 typedef struct kmp_team_list_item {
3170 kmp_team_p
const * entry;
3171 struct kmp_team_list_item * next;
3172 } kmp_team_list_item_t;
3173 typedef kmp_team_list_item_t * kmp_team_list_t;
3177 __kmp_print_structure_team_accum(
3178 kmp_team_list_t list,
3179 kmp_team_p
const * team
3189 KMP_DEBUG_ASSERT( list != NULL );
3190 if ( team == NULL ) {
3194 __kmp_print_structure_team_accum( list, team->t.t_parent );
3195 __kmp_print_structure_team_accum( list, team->t.t_next_pool );
3199 while ( l->next != NULL && l->entry != team ) {
3202 if ( l->next != NULL ) {
3208 while ( l->next != NULL && l->entry->t.t_id <= team->t.t_id ) {
3214 kmp_team_list_item_t * item =
3215 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof( kmp_team_list_item_t ) );
3224 __kmp_print_structure_team(
3226 kmp_team_p
const * team
3229 __kmp_printf(
"%s", title );
3230 if ( team != NULL ) {
3231 __kmp_printf(
"%2x %p\n", team->t.t_id, team );
3233 __kmp_printf(
" - (nil)\n" );
3238 __kmp_print_structure_thread(
3240 kmp_info_p
const * thread
3243 __kmp_printf(
"%s", title );
3244 if ( thread != NULL ) {
3245 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread );
3247 __kmp_printf(
" - (nil)\n" );
3252 __kmp_print_structure(
3256 kmp_team_list_t list;
3259 list = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof( kmp_team_list_item_t ) );
3263 __kmp_printf(
"\n------------------------------\nGlobal Thread Table\n------------------------------\n" );
3266 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3267 __kmp_printf(
"%2d", gtid );
3268 if ( __kmp_threads != NULL ) {
3269 __kmp_printf(
" %p", __kmp_threads[ gtid ] );
3271 if ( __kmp_root != NULL ) {
3272 __kmp_printf(
" %p", __kmp_root[ gtid ] );
3274 __kmp_printf(
"\n" );
3279 __kmp_printf(
"\n------------------------------\nThreads\n------------------------------\n" );
3280 if ( __kmp_threads != NULL ) {
3282 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3283 kmp_info_t
const * thread = __kmp_threads[ gtid ];
3284 if ( thread != NULL ) {
3285 __kmp_printf(
"GTID %2d %p:\n", gtid, thread );
3286 __kmp_printf(
" Our Root: %p\n", thread->th.th_root );
3287 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team );
3288 __kmp_print_structure_team(
" Serial Team: ", thread->th.th_serial_team );
3289 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc );
3290 __kmp_print_structure_thread(
" Master: ", thread->th.th_team_master );
3291 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized );
3292 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc );
3294 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind );
3296 __kmp_print_structure_thread(
" Next in pool: ", thread->th.th_next_pool );
3297 __kmp_printf(
"\n" );
3298 __kmp_print_structure_team_accum( list, thread->th.th_team );
3299 __kmp_print_structure_team_accum( list, thread->th.th_serial_team );
3303 __kmp_printf(
"Threads array is not allocated.\n" );
3307 __kmp_printf(
"\n------------------------------\nUbers\n------------------------------\n" );
3308 if ( __kmp_root != NULL ) {
3310 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3311 kmp_root_t
const * root = __kmp_root[ gtid ];
3312 if ( root != NULL ) {
3313 __kmp_printf(
"GTID %2d %p:\n", gtid, root );
3314 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team );
3315 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team );
3316 __kmp_print_structure_thread(
" Uber Thread: ", root->r.r_uber_thread );
3317 __kmp_printf(
" Active?: %2d\n", root->r.r_active );
3318 __kmp_printf(
" Nested?: %2d\n", root->r.r_nested );
3319 __kmp_printf(
" In Parallel: %2d\n", root->r.r_in_parallel );
3320 __kmp_printf(
"\n" );
3321 __kmp_print_structure_team_accum( list, root->r.r_root_team );
3322 __kmp_print_structure_team_accum( list, root->r.r_hot_team );
3326 __kmp_printf(
"Ubers array is not allocated.\n" );
3329 __kmp_printf(
"\n------------------------------\nTeams\n------------------------------\n" );
3330 while ( list->next != NULL ) {
3331 kmp_team_p
const * team = list->entry;
3333 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team );
3334 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent );
3335 __kmp_printf(
" Master TID: %2d\n", team->t.t_master_tid );
3336 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc );
3337 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized );
3338 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc );
3339 for ( i = 0; i < team->t.t_nproc; ++ i ) {
3340 __kmp_printf(
" Thread %2d: ", i );
3341 __kmp_print_structure_thread(
"", team->t.t_threads[ i ] );
3343 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool );
3344 __kmp_printf(
"\n" );
3349 __kmp_printf(
"\n------------------------------\nPools\n------------------------------\n" );
3350 __kmp_print_structure_thread(
"Thread pool: ", (kmp_info_t *)__kmp_thread_pool );
3351 __kmp_print_structure_team(
"Team pool: ", (kmp_team_t *)__kmp_team_pool );
3352 __kmp_printf(
"\n" );
3355 while ( list != NULL ) {
3356 kmp_team_list_item_t * item = list;
3358 KMP_INTERNAL_FREE( item );
3370 static const unsigned __kmp_primes[] = {
3371 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5,
3372 0xba5703f5, 0xb495a877, 0xe1626741, 0x79695e6b,
3373 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3374 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b,
3375 0xbe4d6fe9, 0x5f15e201, 0x99afc3fd, 0xf3f16801,
3376 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3377 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed,
3378 0x085a3d61, 0x46eb5ea7, 0x3d9910ed, 0x2e687b5b,
3379 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3380 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7,
3381 0x54581edb, 0xf2480f45, 0x0bb9288f, 0xef1affc7,
3382 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3383 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b,
3384 0xfc411073, 0xc3749363, 0xb892d829, 0x3549366b,
3385 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3386 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f
3393 __kmp_get_random( kmp_info_t * thread )
3395 unsigned x = thread->th.th_x;
3396 unsigned short r = x>>16;
3398 thread->th.th_x = x*thread->th.th_a+1;
3400 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3401 thread->th.th_info.ds.ds_tid, r) );
3409 __kmp_init_random( kmp_info_t * thread )
3411 unsigned seed = thread->th.th_info.ds.ds_tid;
3413 thread->th.th_a = __kmp_primes[seed%(
sizeof(__kmp_primes)/
sizeof(__kmp_primes[0]))];
3414 thread->th.th_x = (seed+1)*thread->th.th_a+1;
3415 KA_TRACE(30, (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a) );
3422 __kmp_reclaim_dead_roots(
void) {
3425 for(i = 0; i < __kmp_threads_capacity; ++i) {
3426 if( KMP_UBER_GTID( i ) &&
3427 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3428 !__kmp_root[i]->r.r_active ) {
3429 r += __kmp_unregister_root_other_thread(i);
3458 __kmp_expand_threads(
int nWish,
int nNeed) {
3461 int __kmp_actual_max_nth;
3465 #if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB 3468 added = __kmp_reclaim_dead_roots();
3486 int minimumRequiredCapacity;
3488 kmp_info_t **newThreads;
3489 kmp_root_t **newRoot;
3511 old_tp_cached = __kmp_tp_cached;
3512 __kmp_actual_max_nth = old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth;
3513 KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity);
3517 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3521 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3527 nTarget = __kmp_actual_max_nth - __kmp_threads_capacity;
3534 minimumRequiredCapacity = __kmp_threads_capacity + nTarget;
3536 newCapacity = __kmp_threads_capacity;
3539 newCapacity <= (__kmp_actual_max_nth >> 1) ?
3540 (newCapacity << 1) :
3541 __kmp_actual_max_nth;
3542 }
while(newCapacity < minimumRequiredCapacity);
3543 newThreads = (kmp_info_t**) __kmp_allocate((
sizeof(kmp_info_t*) +
sizeof(kmp_root_t*)) * newCapacity + CACHE_LINE);
3544 newRoot = (kmp_root_t**) ((
char*)newThreads +
sizeof(kmp_info_t*) * newCapacity );
3545 KMP_MEMCPY(newThreads, __kmp_threads, __kmp_threads_capacity *
sizeof(kmp_info_t*));
3546 KMP_MEMCPY(newRoot, __kmp_root, __kmp_threads_capacity *
sizeof(kmp_root_t*));
3547 memset(newThreads + __kmp_threads_capacity, 0,
3548 (newCapacity - __kmp_threads_capacity) *
sizeof(kmp_info_t*));
3549 memset(newRoot + __kmp_threads_capacity, 0,
3550 (newCapacity - __kmp_threads_capacity) *
sizeof(kmp_root_t*));
3552 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3558 __kmp_free(newThreads);
3561 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3562 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3564 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3565 __kmp_free(newThreads);
3571 *(kmp_info_t**
volatile*)&__kmp_threads = newThreads;
3572 *(kmp_root_t**
volatile*)&__kmp_root = newRoot;
3573 added += newCapacity - __kmp_threads_capacity;
3574 *(
volatile int*)&__kmp_threads_capacity = newCapacity;
3575 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3586 __kmp_register_root(
int initial_thread )
3588 kmp_info_t *root_thread;
3592 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3593 KA_TRACE( 20, (
"__kmp_register_root: entered\n"));
3611 capacity = __kmp_threads_capacity;
3612 if ( ! initial_thread && TCR_PTR(__kmp_threads[0]) == NULL ) {
3617 if ( __kmp_all_nth >= capacity && !__kmp_expand_threads( 1, 1 ) ) {
3618 if ( __kmp_tp_cached ) {
3621 KMP_MSG( CantRegisterNewThread ),
3622 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
3623 KMP_HNT( PossibleSystemLimitOnThreads ),
3630 KMP_MSG( CantRegisterNewThread ),
3631 KMP_HNT( SystemLimitOnThreads ),
3640 for( gtid=(initial_thread ? 0 : 1) ; TCR_PTR(__kmp_threads[gtid]) != NULL ; gtid++ )
3642 KA_TRACE( 1, (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid ));
3643 KMP_ASSERT( gtid < __kmp_threads_capacity );
3647 TCW_4(__kmp_nth, __kmp_nth + 1);
3654 if ( __kmp_adjust_gtid_mode ) {
3655 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
3656 if ( TCR_4(__kmp_gtid_mode) != 2) {
3657 TCW_4(__kmp_gtid_mode, 2);
3661 if (TCR_4(__kmp_gtid_mode) != 1 ) {
3662 TCW_4(__kmp_gtid_mode, 1);
3667 #ifdef KMP_ADJUST_BLOCKTIME 3670 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
3671 if ( __kmp_nth > __kmp_avail_proc ) {
3672 __kmp_zero_bt = TRUE;
3678 if( ! ( root = __kmp_root[gtid] )) {
3679 root = __kmp_root[gtid] = (kmp_root_t*) __kmp_allocate(
sizeof(kmp_root_t) );
3680 KMP_DEBUG_ASSERT( ! root->r.r_root_team );
3683 #if KMP_STATS_ENABLED 3685 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3687 KMP_SET_THREAD_STATE(SERIAL_REGION);
3690 __kmp_initialize_root( root );
3693 if( root->r.r_uber_thread ) {
3694 root_thread = root->r.r_uber_thread;
3696 root_thread = (kmp_info_t*) __kmp_allocate(
sizeof(kmp_info_t) );
3697 if ( __kmp_storage_map ) {
3698 __kmp_print_thread_storage_map( root_thread, gtid );
3700 root_thread->th.th_info .ds.ds_gtid = gtid;
3701 root_thread->th.th_root = root;
3702 if( __kmp_env_consistency_check ) {
3703 root_thread->th.th_cons = __kmp_allocate_cons_stack( gtid );
3706 __kmp_initialize_fast_memory( root_thread );
3710 KMP_DEBUG_ASSERT( root_thread->th.th_local.bget_data == NULL );
3711 __kmp_initialize_bget( root_thread );
3713 __kmp_init_random( root_thread );
3717 if( ! root_thread->th.th_serial_team ) {
3718 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3719 KF_TRACE( 10, (
"__kmp_register_root: before serial_team\n" ) );
3721 root_thread->th.th_serial_team = __kmp_allocate_team( root, 1, 1,
3729 0 USE_NESTED_HOT_ARG(NULL) );
3731 KMP_ASSERT( root_thread->th.th_serial_team );
3732 KF_TRACE( 10, (
"__kmp_register_root: after serial_team = %p\n",
3733 root_thread->th.th_serial_team ) );
3736 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3738 root->r.r_root_team->t.t_threads[0] = root_thread;
3739 root->r.r_hot_team ->t.t_threads[0] = root_thread;
3740 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3741 root_thread->th.th_serial_team->t.t_serialized = 0;
3742 root->r.r_uber_thread = root_thread;
3745 __kmp_initialize_info( root_thread, root->r.r_root_team, 0, gtid );
3746 TCW_4(__kmp_init_gtid, TRUE);
3749 __kmp_gtid_set_specific( gtid );
3752 __kmp_itt_thread_name( gtid );
3755 #ifdef KMP_TDATA_GTID 3758 __kmp_create_worker( gtid, root_thread, __kmp_stksize );
3759 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == gtid );
3761 KA_TRACE( 20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, plain=%u\n",
3762 gtid, __kmp_gtid_from_tid( 0, root->r.r_hot_team ),
3763 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3764 KMP_INIT_BARRIER_STATE ) );
3767 for ( b = 0; b < bs_last_barrier; ++ b ) {
3768 root_thread->th.th_bar[ b ].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3770 root_thread->th.th_bar[ b ].bb.b_worker_arrived = 0;
3774 KMP_DEBUG_ASSERT( root->r.r_hot_team->t.t_bar[ bs_forkjoin_barrier ].b_arrived == KMP_INIT_BARRIER_STATE );
3776 #if KMP_AFFINITY_SUPPORTED 3778 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3779 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3780 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3781 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3784 if ( TCR_4(__kmp_init_middle) ) {
3785 __kmp_affinity_set_init_mask( gtid, TRUE );
3789 __kmp_root_counter ++;
3792 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3797 #if KMP_NESTED_HOT_TEAMS 3799 __kmp_free_hot_teams( kmp_root_t *root, kmp_info_t *thr,
int level,
const int max_level )
3802 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3803 if( !hot_teams || !hot_teams[level].hot_team ) {
3806 KMP_DEBUG_ASSERT( level < max_level );
3807 kmp_team_t *team = hot_teams[level].hot_team;
3808 nth = hot_teams[level].hot_team_nth;
3810 if( level < max_level - 1 ) {
3811 for( i = 0; i < nth; ++i ) {
3812 kmp_info_t *th = team->t.t_threads[i];
3813 n += __kmp_free_hot_teams( root, th, level + 1, max_level );
3814 if( i > 0 && th->th.th_hot_teams ) {
3815 __kmp_free( th->th.th_hot_teams );
3816 th->th.th_hot_teams = NULL;
3820 __kmp_free_team( root, team, NULL );
3829 __kmp_reset_root(
int gtid, kmp_root_t *root)
3831 kmp_team_t * root_team = root->r.r_root_team;
3832 kmp_team_t * hot_team = root->r.r_hot_team;
3833 int n = hot_team->t.t_nproc;
3836 KMP_DEBUG_ASSERT( ! root->r.r_active );
3838 root->r.r_root_team = NULL;
3839 root->r.r_hot_team = NULL;
3842 __kmp_free_team( root, root_team USE_NESTED_HOT_ARG(NULL) );
3843 #if KMP_NESTED_HOT_TEAMS 3844 if( __kmp_hot_teams_max_level > 0 ) {
3845 for( i = 0; i < hot_team->t.t_nproc; ++i ) {
3846 kmp_info_t *th = hot_team->t.t_threads[i];
3847 if( __kmp_hot_teams_max_level > 1 ) {
3848 n += __kmp_free_hot_teams( root, th, 1, __kmp_hot_teams_max_level );
3850 if( th->th.th_hot_teams ) {
3851 __kmp_free( th->th.th_hot_teams );
3852 th->th.th_hot_teams = NULL;
3857 __kmp_free_team( root, hot_team USE_NESTED_HOT_ARG(NULL) );
3863 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
3864 __kmp_wait_to_unref_task_teams();
3869 KA_TRACE( 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
"\n",
3870 (LPVOID)&(root->r.r_uber_thread->th),
3871 root->r.r_uber_thread->th.th_info.ds.ds_thread ) );
3872 __kmp_free_handle( root->r.r_uber_thread->th.th_info.ds.ds_thread );
3877 ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
3878 int gtid = __kmp_get_gtid();
3879 __ompt_thread_end(ompt_thread_initial, gtid);
3883 TCW_4(__kmp_nth, __kmp_nth - 1);
3884 __kmp_reap_thread( root->r.r_uber_thread, 1 );
3887 root->r.r_uber_thread = NULL;
3889 root->r.r_begin = FALSE;
3895 __kmp_unregister_root_current_thread(
int gtid )
3897 KA_TRACE( 1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid ));
3902 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3903 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
3904 KC_TRACE( 10, (
"__kmp_unregister_root_current_thread: already finished, exiting T#%d\n", gtid ));
3905 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3908 kmp_root_t *root = __kmp_root[gtid];
3910 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
3911 KMP_ASSERT( KMP_UBER_GTID( gtid ));
3912 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
3913 KMP_ASSERT( root->r.r_active == FALSE );
3919 kmp_info_t * thread = __kmp_threads[gtid];
3920 kmp_team_t * team = thread->th.th_team;
3921 kmp_task_team_t * task_team = thread->th.th_task_team;
3924 if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks ) {
3927 thread->th.ompt_thread_info.state = ompt_state_undefined;
3929 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
3933 __kmp_reset_root(gtid, root);
3936 __kmp_gtid_set_specific( KMP_GTID_DNE );
3937 #ifdef KMP_TDATA_GTID 3938 __kmp_gtid = KMP_GTID_DNE;
3942 KC_TRACE( 10, (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid ));
3944 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3953 __kmp_unregister_root_other_thread(
int gtid )
3955 kmp_root_t *root = __kmp_root[gtid];
3958 KA_TRACE( 1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid ));
3959 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
3960 KMP_ASSERT( KMP_UBER_GTID( gtid ));
3961 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
3962 KMP_ASSERT( root->r.r_active == FALSE );
3964 r = __kmp_reset_root(gtid, root);
3965 KC_TRACE( 10, (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid ));
3971 void __kmp_task_info() {
3973 kmp_int32 gtid = __kmp_entry_gtid();
3974 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
3975 kmp_info_t *this_thr = __kmp_threads[ gtid ];
3976 kmp_team_t *steam = this_thr->th.th_serial_team;
3977 kmp_team_t *team = this_thr->th.th_team;
3979 __kmp_printf(
"__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p ptask=%p\n",
3980 gtid, tid, this_thr, team, this_thr->th.th_current_task, team->t.t_implicit_task_taskdata[tid].td_parent );
3988 __kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team,
int tid,
int gtid )
3992 kmp_info_t *master = team->t.t_threads[0];
3993 KMP_DEBUG_ASSERT( this_thr != NULL );
3994 KMP_DEBUG_ASSERT( this_thr->th.th_serial_team );
3995 KMP_DEBUG_ASSERT( team );
3996 KMP_DEBUG_ASSERT( team->t.t_threads );
3997 KMP_DEBUG_ASSERT( team->t.t_dispatch );
3998 KMP_DEBUG_ASSERT( master );
3999 KMP_DEBUG_ASSERT( master->th.th_root );
4003 TCW_SYNC_PTR(this_thr->th.th_team, team);
4005 this_thr->th.th_info.ds.ds_tid = tid;
4006 this_thr->th.th_set_nproc = 0;
4008 this_thr->th.th_set_proc_bind = proc_bind_default;
4009 # if KMP_AFFINITY_SUPPORTED 4010 this_thr->th.th_new_place = this_thr->th.th_current_place;
4013 this_thr->th.th_root = master->th.th_root;
4016 this_thr->th.th_team_nproc = team->t.t_nproc;
4017 this_thr->th.th_team_master = master;
4018 this_thr->th.th_team_serialized = team->t.t_serialized;
4019 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
4021 KMP_DEBUG_ASSERT( team->t.t_implicit_task_taskdata );
4023 KF_TRACE( 10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4024 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4026 __kmp_init_implicit_task( this_thr->th.th_team_master->th.th_ident, this_thr, team, tid, TRUE );
4028 KF_TRACE( 10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4029 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4033 this_thr->th.th_dispatch = &team->t.t_dispatch[ tid ];
4035 this_thr->th.th_local.this_construct = 0;
4038 this_thr->th.th_local.tv_data = 0;
4041 if ( ! this_thr->th.th_pri_common ) {
4042 this_thr->th.th_pri_common = (
struct common_table *) __kmp_allocate(
sizeof(
struct common_table) );
4043 if ( __kmp_storage_map ) {
4044 __kmp_print_storage_map_gtid(
4045 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4046 sizeof(
struct common_table ),
"th_%d.th_pri_common\n", gtid
4049 this_thr->th.th_pri_head = NULL;
4054 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4058 size_t disp_size =
sizeof( dispatch_private_info_t ) *
4059 ( team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers );
4060 KD_TRACE( 10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, team->t.t_max_nproc ) );
4061 KMP_ASSERT( dispatch );
4062 KMP_DEBUG_ASSERT( team->t.t_dispatch );
4063 KMP_DEBUG_ASSERT( dispatch == &team->t.t_dispatch[ tid ] );
4065 dispatch->th_disp_index = 0;
4067 dispatch->th_doacross_buf_idx = 0;
4069 if( ! dispatch->th_disp_buffer ) {
4070 dispatch->th_disp_buffer = (dispatch_private_info_t *) __kmp_allocate( disp_size );
4072 if ( __kmp_storage_map ) {
4073 __kmp_print_storage_map_gtid( gtid, &dispatch->th_disp_buffer[ 0 ],
4074 &dispatch->th_disp_buffer[ team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers ],
4075 disp_size,
"th_%d.th_dispatch.th_disp_buffer " 4076 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4077 gtid, team->t.t_id, gtid );
4080 memset( & dispatch->th_disp_buffer[0],
'\0', disp_size );
4083 dispatch->th_dispatch_pr_current = 0;
4084 dispatch->th_dispatch_sh_current = 0;
4086 dispatch->th_deo_fcn = 0;
4087 dispatch->th_dxo_fcn = 0;
4090 this_thr->th.th_next_pool = NULL;
4092 if (!this_thr->th.th_task_state_memo_stack) {
4094 this_thr->th.th_task_state_memo_stack = (kmp_uint8 *) __kmp_allocate( 4*
sizeof(kmp_uint8) );
4095 this_thr->th.th_task_state_top = 0;
4096 this_thr->th.th_task_state_stack_sz = 4;
4097 for (i=0; i<this_thr->th.th_task_state_stack_sz; ++i)
4098 this_thr->th.th_task_state_memo_stack[i] = 0;
4101 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
4102 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
4115 __kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team,
int new_tid )
4117 kmp_team_t *serial_team;
4118 kmp_info_t *new_thr;
4121 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid() ));
4122 KMP_DEBUG_ASSERT( root && team );
4123 #if !KMP_NESTED_HOT_TEAMS 4124 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( __kmp_get_gtid() ));
4129 if ( __kmp_thread_pool ) {
4131 new_thr = (kmp_info_t*)__kmp_thread_pool;
4132 __kmp_thread_pool = (
volatile kmp_info_t *) new_thr->th.th_next_pool;
4133 if ( new_thr == __kmp_thread_pool_insert_pt ) {
4134 __kmp_thread_pool_insert_pt = NULL;
4136 TCW_4(new_thr->th.th_in_pool, FALSE);
4141 __kmp_thread_pool_nth--;
4143 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4144 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid ));
4145 KMP_ASSERT( ! new_thr->th.th_team );
4146 KMP_DEBUG_ASSERT( __kmp_nth < __kmp_threads_capacity );
4147 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth >= 0 );
4150 __kmp_initialize_info( new_thr, team, new_tid, new_thr->th.th_info.ds.ds_gtid );
4151 KMP_DEBUG_ASSERT( new_thr->th.th_serial_team );
4153 TCW_4(__kmp_nth, __kmp_nth + 1);
4155 new_thr->th.th_task_state = 0;
4156 new_thr->th.th_task_state_top = 0;
4157 new_thr->th.th_task_state_stack_sz = 4;
4159 #ifdef KMP_ADJUST_BLOCKTIME 4162 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4163 if ( __kmp_nth > __kmp_avail_proc ) {
4164 __kmp_zero_bt = TRUE;
4172 kmp_balign_t * balign = new_thr->th.th_bar;
4173 for( b = 0; b < bs_last_barrier; ++ b )
4174 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4177 KF_TRACE( 10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4178 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid ));
4186 KMP_ASSERT( __kmp_nth == __kmp_all_nth );
4187 KMP_ASSERT( __kmp_all_nth < __kmp_threads_capacity );
4194 if ( ! TCR_4( __kmp_init_monitor ) ) {
4195 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
4196 if ( ! TCR_4( __kmp_init_monitor ) ) {
4197 KF_TRACE( 10, (
"before __kmp_create_monitor\n" ) );
4198 TCW_4( __kmp_init_monitor, 1 );
4199 __kmp_create_monitor( & __kmp_monitor );
4200 KF_TRACE( 10, (
"after __kmp_create_monitor\n" ) );
4209 while ( TCR_4(__kmp_init_monitor) < 2 ) {
4212 KF_TRACE( 10, (
"after monitor thread has started\n" ) );
4215 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
4220 for( new_gtid=1 ; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid ) {
4221 KMP_DEBUG_ASSERT( new_gtid < __kmp_threads_capacity );
4225 new_thr = (kmp_info_t*) __kmp_allocate(
sizeof(kmp_info_t) );
4227 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4229 if ( __kmp_storage_map ) {
4230 __kmp_print_thread_storage_map( new_thr, new_gtid );
4235 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs( team );
4236 KF_TRACE( 10, (
"__kmp_allocate_thread: before th_serial/serial_team\n" ) );
4238 new_thr->th.th_serial_team = serial_team =
4239 (kmp_team_t*) __kmp_allocate_team( root, 1, 1,
4247 0 USE_NESTED_HOT_ARG(NULL) );
4249 KMP_ASSERT ( serial_team );
4250 serial_team->t.t_serialized = 0;
4251 serial_team->t.t_threads[0] = new_thr;
4252 KF_TRACE( 10, (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4256 __kmp_initialize_info( new_thr, team, new_tid, new_gtid );
4259 __kmp_initialize_fast_memory( new_thr );
4263 KMP_DEBUG_ASSERT( new_thr->th.th_local.bget_data == NULL );
4264 __kmp_initialize_bget( new_thr );
4267 __kmp_init_random( new_thr );
4270 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4271 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
4274 kmp_balign_t * balign = new_thr->th.th_bar;
4275 for(b=0; b<bs_last_barrier; ++b) {
4276 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4277 balign[b].bb.team = NULL;
4278 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4279 balign[b].bb.use_oncore_barrier = 0;
4282 new_thr->th.th_spin_here = FALSE;
4283 new_thr->th.th_next_waiting = 0;
4285 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 4286 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4287 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4288 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4289 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4292 TCW_4(new_thr->th.th_in_pool, FALSE);
4293 new_thr->th.th_active_in_pool = FALSE;
4294 TCW_4(new_thr->th.th_active, TRUE);
4305 if ( __kmp_adjust_gtid_mode ) {
4306 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
4307 if ( TCR_4(__kmp_gtid_mode) != 2) {
4308 TCW_4(__kmp_gtid_mode, 2);
4312 if (TCR_4(__kmp_gtid_mode) != 1 ) {
4313 TCW_4(__kmp_gtid_mode, 1);
4318 #ifdef KMP_ADJUST_BLOCKTIME 4321 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4322 if ( __kmp_nth > __kmp_avail_proc ) {
4323 __kmp_zero_bt = TRUE;
4329 KF_TRACE( 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr ));
4330 __kmp_create_worker( new_gtid, new_thr, __kmp_stksize );
4331 KF_TRACE( 10, (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr ));
4333 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), new_gtid ));
4348 __kmp_reinitialize_team( kmp_team_t *team, kmp_internal_control_t *new_icvs,
ident_t *loc ) {
4349 KF_TRACE( 10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4350 team->t.t_threads[0], team ) );
4351 KMP_DEBUG_ASSERT( team && new_icvs);
4352 KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc );
4353 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4355 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4358 __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE );
4359 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4361 KF_TRACE( 10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4362 team->t.t_threads[0], team ) );
4370 __kmp_initialize_team(
4373 kmp_internal_control_t * new_icvs,
4376 KF_TRACE( 10, (
"__kmp_initialize_team: enter: team=%p\n", team ) );
4379 KMP_DEBUG_ASSERT( team );
4380 KMP_DEBUG_ASSERT( new_nproc <= team->t.t_max_nproc );
4381 KMP_DEBUG_ASSERT( team->t.t_threads );
4384 team->t.t_master_tid = 0;
4386 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4387 team->t.t_nproc = new_nproc;
4390 team->t.t_next_pool = NULL;
4393 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4394 team->t.t_invoke = NULL;
4397 team->t.t_sched = new_icvs->sched;
4399 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 4400 team->t.t_fp_control_saved = FALSE;
4401 team->t.t_x87_fpu_control_word = 0;
4402 team->t.t_mxcsr = 0;
4405 team->t.t_construct = 0;
4406 __kmp_init_lock( & team->t.t_single_lock );
4408 team->t.t_ordered .dt.t_value = 0;
4409 team->t.t_master_active = FALSE;
4411 memset( & team->t.t_taskq,
'\0',
sizeof( kmp_taskq_t ));
4414 team->t.t_copypriv_data = NULL;
4416 team->t.t_copyin_counter = 0;
4418 team->t.t_control_stack_top = NULL;
4420 __kmp_reinitialize_team( team, new_icvs, loc );
4423 KF_TRACE( 10, (
"__kmp_initialize_team: exit: team=%p\n", team ) );
4426 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4429 __kmp_set_thread_affinity_mask_full_tmp( kmp_affin_mask_t *old_mask )
4431 if ( KMP_AFFINITY_CAPABLE() ) {
4433 if ( old_mask != NULL ) {
4434 status = __kmp_get_system_affinity( old_mask, TRUE );
4436 if ( status != 0 ) {
4439 KMP_MSG( ChangeThreadAffMaskError ),
4445 __kmp_set_system_affinity( __kmp_affin_fullMask, TRUE );
4450 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 4459 __kmp_partition_places( kmp_team_t *team,
int update_master_only )
4464 kmp_info_t *master_th = team->t.t_threads[0];
4465 KMP_DEBUG_ASSERT( master_th != NULL );
4466 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4467 int first_place = master_th->th.th_first_place;
4468 int last_place = master_th->th.th_last_place;
4469 int masters_place = master_th->th.th_current_place;
4470 team->t.t_first_place = first_place;
4471 team->t.t_last_place = last_place;
4473 KA_TRACE( 20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) bound to place %d partition = [%d,%d]\n",
4474 proc_bind, __kmp_gtid_from_thread( team->t.t_threads[0] ), team->t.t_id,
4475 masters_place, first_place, last_place ) );
4477 switch ( proc_bind ) {
4479 case proc_bind_default:
4485 KMP_DEBUG_ASSERT( team->t.t_nproc == 1 );
4488 case proc_bind_master:
4491 int n_th = team->t.t_nproc;
4492 for ( f = 1; f < n_th; f++ ) {
4493 kmp_info_t *th = team->t.t_threads[f];
4494 KMP_DEBUG_ASSERT( th != NULL );
4495 th->th.th_first_place = first_place;
4496 th->th.th_last_place = last_place;
4497 th->th.th_new_place = masters_place;
4499 KA_TRACE( 100, (
"__kmp_partition_places: master: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4500 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4501 team->t.t_id, f, masters_place, first_place, last_place ) );
4506 case proc_bind_close:
4509 int n_th = team->t.t_nproc;
4511 if ( first_place <= last_place ) {
4512 n_places = last_place - first_place + 1;
4515 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4517 if ( n_th <= n_places ) {
4518 int place = masters_place;
4519 for ( f = 1; f < n_th; f++ ) {
4520 kmp_info_t *th = team->t.t_threads[f];
4521 KMP_DEBUG_ASSERT( th != NULL );
4523 if ( place == last_place ) {
4524 place = first_place;
4526 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4532 th->th.th_first_place = first_place;
4533 th->th.th_last_place = last_place;
4534 th->th.th_new_place = place;
4536 KA_TRACE( 100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4537 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4538 team->t.t_id, f, place, first_place, last_place ) );
4542 int S, rem, gap, s_count;
4543 S = n_th / n_places;
4545 rem = n_th - ( S * n_places );
4546 gap = rem > 0 ? n_places/rem : n_places;
4547 int place = masters_place;
4549 for ( f = 0; f < n_th; f++ ) {
4550 kmp_info_t *th = team->t.t_threads[f];
4551 KMP_DEBUG_ASSERT( th != NULL );
4553 th->th.th_first_place = first_place;
4554 th->th.th_last_place = last_place;
4555 th->th.th_new_place = place;
4558 if ( (s_count == S) && rem && (gap_ct == gap) ) {
4561 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
4563 if ( place == last_place ) {
4564 place = first_place;
4566 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4576 else if (s_count == S) {
4577 if ( place == last_place ) {
4578 place = first_place;
4580 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4590 KA_TRACE( 100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4591 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4592 team->t.t_id, f, th->th.th_new_place, first_place,
4595 KMP_DEBUG_ASSERT( place == masters_place );
4600 case proc_bind_spread:
4603 int n_th = team->t.t_nproc;
4606 if ( first_place <= last_place ) {
4607 n_places = last_place - first_place + 1;
4610 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4612 if ( n_th <= n_places ) {
4613 int place = masters_place;
4614 int S = n_places/n_th;
4615 int s_count, rem, gap, gap_ct;
4616 rem = n_places - n_th*S;
4617 gap = rem ? n_th/rem : 1;
4620 if (update_master_only == 1)
4622 for ( f = 0; f < thidx; f++ ) {
4623 kmp_info_t *th = team->t.t_threads[f];
4624 KMP_DEBUG_ASSERT( th != NULL );
4626 th->th.th_first_place = place;
4627 th->th.th_new_place = place;
4629 while (s_count < S) {
4630 if ( place == last_place ) {
4631 place = first_place;
4633 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4641 if (rem && (gap_ct == gap)) {
4642 if ( place == last_place ) {
4643 place = first_place;
4645 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4654 th->th.th_last_place = place;
4657 if ( place == last_place ) {
4658 place = first_place;
4660 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4667 KA_TRACE( 100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4668 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4669 team->t.t_id, f, th->th.th_new_place,
4670 th->th.th_first_place, th->th.th_last_place ) );
4672 KMP_DEBUG_ASSERT( update_master_only || place == masters_place );
4675 int S, rem, gap, s_count;
4676 S = n_th / n_places;
4678 rem = n_th - ( S * n_places );
4679 gap = rem > 0 ? n_places/rem : n_places;
4680 int place = masters_place;
4683 if (update_master_only == 1)
4685 for ( f = 0; f < thidx; f++ ) {
4686 kmp_info_t *th = team->t.t_threads[f];
4687 KMP_DEBUG_ASSERT( th != NULL );
4689 th->th.th_first_place = place;
4690 th->th.th_last_place = place;
4691 th->th.th_new_place = place;
4694 if ( (s_count == S) && rem && (gap_ct == gap) ) {
4697 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
4699 if ( place == last_place ) {
4700 place = first_place;
4702 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4712 else if (s_count == S) {
4713 if ( place == last_place ) {
4714 place = first_place;
4716 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4726 KA_TRACE( 100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4727 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4728 team->t.t_id, f, th->th.th_new_place,
4729 th->th.th_first_place, th->th.th_last_place) );
4731 KMP_DEBUG_ASSERT( update_master_only || place == masters_place );
4740 KA_TRACE( 20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id ) );
4747 __kmp_allocate_team( kmp_root_t *root,
int new_nproc,
int max_nproc,
4749 ompt_parallel_id_t ompt_parallel_id,
4752 kmp_proc_bind_t new_proc_bind,
4754 kmp_internal_control_t *new_icvs,
4755 int argc USE_NESTED_HOT_ARG(kmp_info_t *master) )
4757 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
4760 int use_hot_team = ! root->r.r_active;
4763 KA_TRACE( 20, (
"__kmp_allocate_team: called\n"));
4764 KMP_DEBUG_ASSERT( new_nproc >=1 && argc >=0 );
4765 KMP_DEBUG_ASSERT( max_nproc >= new_nproc );
4768 #if KMP_NESTED_HOT_TEAMS 4769 kmp_hot_team_ptr_t *hot_teams;
4771 team = master->th.th_team;
4772 level = team->t.t_active_level;
4773 if( master->th.th_teams_microtask ) {
4774 if( master->th.th_teams_size.nteams > 1 && (
4775 team->t.t_pkfn == (microtask_t)__kmp_teams_master ||
4776 master->th.th_teams_level < team->t.t_level ) ) {
4780 hot_teams = master->th.th_hot_teams;
4781 if( level < __kmp_hot_teams_max_level && hot_teams && hot_teams[level].hot_team )
4790 if( use_hot_team && new_nproc > 1 ) {
4791 KMP_DEBUG_ASSERT( new_nproc == max_nproc );
4792 #if KMP_NESTED_HOT_TEAMS 4793 team = hot_teams[level].hot_team;
4795 team = root->r.r_hot_team;
4798 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
4799 KA_TRACE( 20, (
"__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p before reinit\n",
4800 team->t.t_task_team[0], team->t.t_task_team[1] ));
4807 if (team->t.t_nproc == new_nproc) {
4808 KA_TRACE( 20, (
"__kmp_allocate_team: reusing hot team\n" ));
4811 if ( team->t.t_size_changed == -1 ) {
4812 team->t.t_size_changed = 1;
4814 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
4818 kmp_r_sched_t new_sched = new_icvs->sched;
4819 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type ||
4820 team->t.t_sched.chunk != new_sched.chunk)
4821 team->t.t_sched = new_sched;
4823 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
4825 KF_TRACE( 10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n",
4826 0, team->t.t_threads[0], team ) );
4827 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
4830 # if KMP_AFFINITY_SUPPORTED 4831 if ( ( team->t.t_size_changed == 0 )
4832 && ( team->t.t_proc_bind == new_proc_bind ) ) {
4833 if (new_proc_bind == proc_bind_spread) {
4834 __kmp_partition_places(team, 1);
4836 KA_TRACE( 200, (
"__kmp_allocate_team: reusing hot team #%d bindings: proc_bind = %d, partition = [%d,%d]\n",
4837 team->t.t_id, new_proc_bind, team->t.t_first_place,
4838 team->t.t_last_place ) );
4841 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4842 __kmp_partition_places( team );
4845 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4849 else if( team->t.t_nproc > new_nproc ) {
4850 KA_TRACE( 20, (
"__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc ));
4852 team->t.t_size_changed = 1;
4853 #if KMP_NESTED_HOT_TEAMS 4854 if( __kmp_hot_teams_mode == 0 ) {
4857 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
4858 hot_teams[level].hot_team_nth = new_nproc;
4859 #endif // KMP_NESTED_HOT_TEAMS 4861 for( f = new_nproc ; f < team->t.t_nproc ; f++ ) {
4862 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
4863 if ( __kmp_tasking_mode != tskm_immediate_exec) {
4865 team->t.t_threads[f]->th.th_task_team = NULL;
4867 __kmp_free_thread( team->t.t_threads[ f ] );
4868 team->t.t_threads[ f ] = NULL;
4870 #if KMP_NESTED_HOT_TEAMS 4874 for (f=new_nproc; f<team->t.t_nproc; ++f) {
4875 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
4876 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
4877 for (
int b=0; b<bs_last_barrier; ++b) {
4878 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
4879 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
4881 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
4885 #endif // KMP_NESTED_HOT_TEAMS 4886 team->t.t_nproc = new_nproc;
4888 if (team->t.t_sched.r_sched_type != new_icvs->sched.r_sched_type ||
4889 team->t.t_sched.chunk != new_icvs->sched.chunk)
4890 team->t.t_sched = new_icvs->sched;
4891 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
4894 for(f = 0; f < new_nproc; ++f) {
4895 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
4898 KF_TRACE( 10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n",
4899 0, team->t.t_threads[0], team ) );
4901 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
4904 for ( f = 0; f < team->t.t_nproc; f++ ) {
4905 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
4906 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
4911 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4912 # if KMP_AFFINITY_SUPPORTED 4913 __kmp_partition_places( team );
4918 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4919 kmp_affin_mask_t *old_mask;
4920 if ( KMP_AFFINITY_CAPABLE() ) {
4921 KMP_CPU_ALLOC(old_mask);
4925 KA_TRACE( 20, (
"__kmp_allocate_team: increasing hot team thread count to %d\n", new_nproc ));
4927 team->t.t_size_changed = 1;
4929 #if KMP_NESTED_HOT_TEAMS 4930 int avail_threads = hot_teams[level].hot_team_nth;
4931 if( new_nproc < avail_threads )
4932 avail_threads = new_nproc;
4933 kmp_info_t **other_threads = team->t.t_threads;
4934 for ( f = team->t.t_nproc; f < avail_threads; ++f ) {
4938 kmp_balign_t * balign = other_threads[f]->th.th_bar;
4939 for ( b = 0; b < bs_last_barrier; ++ b ) {
4940 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
4941 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4943 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
4947 if( hot_teams[level].hot_team_nth >= new_nproc ) {
4950 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
4951 team->t.t_nproc = new_nproc;
4954 team->t.t_nproc = hot_teams[level].hot_team_nth;
4955 hot_teams[level].hot_team_nth = new_nproc;
4956 #endif // KMP_NESTED_HOT_TEAMS 4957 if(team->t.t_max_nproc < new_nproc) {
4959 __kmp_reallocate_team_arrays(team, new_nproc);
4960 __kmp_reinitialize_team( team, new_icvs, NULL );
4963 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4970 __kmp_set_thread_affinity_mask_full_tmp( old_mask );
4974 for( f = team->t.t_nproc ; f < new_nproc ; f++ ) {
4975 kmp_info_t * new_worker = __kmp_allocate_thread( root, team, f );
4976 KMP_DEBUG_ASSERT( new_worker );
4977 team->t.t_threads[ f ] = new_worker;
4979 KA_TRACE( 20, (
"__kmp_allocate_team: team %d init T#%d arrived: join=%llu, plain=%llu\n",
4980 team->t.t_id, __kmp_gtid_from_tid( f, team ), team->t.t_id, f,
4981 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
4982 team->t.t_bar[bs_plain_barrier].b_arrived ) );
4986 kmp_balign_t * balign = new_worker->th.th_bar;
4987 for( b = 0; b < bs_last_barrier; ++ b ) {
4988 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
4989 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4991 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
4997 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4998 if ( KMP_AFFINITY_CAPABLE() ) {
5000 __kmp_set_system_affinity( old_mask, TRUE );
5001 KMP_CPU_FREE(old_mask);
5004 #if KMP_NESTED_HOT_TEAMS 5006 #endif // KMP_NESTED_HOT_TEAMS 5008 int old_nproc = team->t.t_nproc;
5009 __kmp_initialize_team( team, new_nproc, new_icvs, root->r.r_uber_thread->th.th_ident );
5012 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5013 for (f=0; f < team->t.t_nproc; ++f)
5014 __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) );
5019 for (f=old_nproc; f < team->t.t_nproc; ++f)
5020 team->t.t_threads[f]->th.th_task_state = team->t.t_threads[0]->th.th_task_state_memo_stack[level];
5023 int old_state = team->t.t_threads[0]->th.th_task_state;
5024 for (f=old_nproc; f < team->t.t_nproc; ++f)
5025 team->t.t_threads[f]->th.th_task_state = old_state;
5029 for ( f = 0; f < team->t.t_nproc; ++ f ) {
5030 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
5031 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
5036 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5037 # if KMP_AFFINITY_SUPPORTED 5038 __kmp_partition_places( team );
5044 kmp_info_t *master = team->t.t_threads[0];
5045 if( master->th.th_teams_microtask ) {
5046 for( f = 1; f < new_nproc; ++f ) {
5048 kmp_info_t *thr = team->t.t_threads[f];
5049 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5050 thr->th.th_teams_level = master->th.th_teams_level;
5051 thr->th.th_teams_size = master->th.th_teams_size;
5055 #if KMP_NESTED_HOT_TEAMS 5058 for( f = 1; f < new_nproc; ++f ) {
5059 kmp_info_t *thr = team->t.t_threads[f];
5061 kmp_balign_t * balign = thr->th.th_bar;
5062 for( b = 0; b < bs_last_barrier; ++ b ) {
5063 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
5064 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5066 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
5071 #endif // KMP_NESTED_HOT_TEAMS 5074 __kmp_alloc_argv_entries( argc, team, TRUE );
5075 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5081 KF_TRACE( 10, (
" hot_team = %p\n", team ) );
5084 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
5085 KA_TRACE( 20, (
"__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p after reinit\n",
5086 team->t.t_task_team[0], team->t.t_task_team[1] ));
5091 __ompt_team_assign_id(team, ompt_parallel_id);
5101 for( team = (kmp_team_t*) __kmp_team_pool ; (team) ; )
5104 if ( team->t.t_max_nproc >= max_nproc ) {
5106 __kmp_team_pool = team->t.t_next_pool;
5109 __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
5111 KA_TRACE( 20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
5112 &team->t.t_task_team[0], &team->t.t_task_team[1]) );
5113 team->t.t_task_team[0] = NULL;
5114 team->t.t_task_team[1] = NULL;
5117 __kmp_alloc_argv_entries( argc, team, TRUE );
5118 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5120 KA_TRACE( 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5121 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5124 for ( b = 0; b < bs_last_barrier; ++ b) {
5125 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
5127 team->t.t_bar[ b ].b_master_arrived = 0;
5128 team->t.t_bar[ b ].b_team_arrived = 0;
5134 team->t.t_proc_bind = new_proc_bind;
5137 KA_TRACE( 20, (
"__kmp_allocate_team: using team from pool %d.\n", team->t.t_id ));
5140 __ompt_team_assign_id(team, ompt_parallel_id);
5151 team = __kmp_reap_team( team );
5152 __kmp_team_pool = team;
5157 team = (kmp_team_t*) __kmp_allocate(
sizeof( kmp_team_t ) );
5160 team->t.t_max_nproc = max_nproc;
5164 __kmp_allocate_team_arrays( team, max_nproc );
5166 KA_TRACE( 20, (
"__kmp_allocate_team: making a new team\n" ) );
5167 __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
5169 KA_TRACE( 20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
5170 &team->t.t_task_team[0], &team->t.t_task_team[1] ) );
5171 team->t.t_task_team[0] = NULL;
5172 team->t.t_task_team[1] = NULL;
5174 if ( __kmp_storage_map ) {
5175 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc );
5179 __kmp_alloc_argv_entries( argc, team, FALSE );
5180 team->t.t_argc = argc;
5182 KA_TRACE( 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5183 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5186 for ( b = 0; b < bs_last_barrier; ++ b ) {
5187 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
5189 team->t.t_bar[ b ].b_master_arrived = 0;
5190 team->t.t_bar[ b ].b_team_arrived = 0;
5196 team->t.t_proc_bind = new_proc_bind;
5200 __ompt_team_assign_id(team, ompt_parallel_id);
5201 team->t.ompt_serialized_team_info = NULL;
5206 KA_TRACE( 20, (
"__kmp_allocate_team: done creating a new team %d.\n", team->t.t_id ));
5217 __kmp_free_team( kmp_root_t *root, kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master) )
5220 KA_TRACE( 20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), team->t.t_id ));
5223 KMP_DEBUG_ASSERT( root );
5224 KMP_DEBUG_ASSERT( team );
5225 KMP_DEBUG_ASSERT( team->t.t_nproc <= team->t.t_max_nproc );
5226 KMP_DEBUG_ASSERT( team->t.t_threads );
5228 int use_hot_team = team == root->r.r_hot_team;
5229 #if KMP_NESTED_HOT_TEAMS 5231 kmp_hot_team_ptr_t *hot_teams;
5233 level = team->t.t_active_level - 1;
5234 if( master->th.th_teams_microtask ) {
5235 if( master->th.th_teams_size.nteams > 1 ) {
5238 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5239 master->th.th_teams_level == team->t.t_level ) {
5243 hot_teams = master->th.th_hot_teams;
5244 if( level < __kmp_hot_teams_max_level ) {
5245 KMP_DEBUG_ASSERT( team == hot_teams[level].hot_team );
5249 #endif // KMP_NESTED_HOT_TEAMS 5252 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
5253 team->t.t_copyin_counter = 0;
5257 if( ! use_hot_team ) {
5258 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
5261 for (tt_idx=0; tt_idx<2; ++tt_idx) {
5262 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5263 if ( task_team != NULL ) {
5264 for (f=0; f<team->t.t_nproc; ++f) {
5265 team->t.t_threads[f]->th.th_task_team = NULL;
5267 KA_TRACE( 20, (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n", __kmp_get_gtid(), task_team, team->t.t_id ) );
5268 #if KMP_NESTED_HOT_TEAMS 5269 __kmp_free_task_team( master, task_team );
5271 team->t.t_task_team[tt_idx] = NULL;
5277 team->t.t_parent = NULL;
5278 team->t.t_level = 0;
5279 team->t.t_active_level = 0;
5282 for ( f = 1; f < team->t.t_nproc; ++ f ) {
5283 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
5284 __kmp_free_thread( team->t.t_threads[ f ] );
5285 team->t.t_threads[ f ] = NULL;
5290 team->t.t_next_pool = (kmp_team_t*) __kmp_team_pool;
5291 __kmp_team_pool = (
volatile kmp_team_t*) team;
5300 __kmp_reap_team( kmp_team_t *team )
5302 kmp_team_t *next_pool = team->t.t_next_pool;
5304 KMP_DEBUG_ASSERT( team );
5305 KMP_DEBUG_ASSERT( team->t.t_dispatch );
5306 KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
5307 KMP_DEBUG_ASSERT( team->t.t_threads );
5308 KMP_DEBUG_ASSERT( team->t.t_argv );
5314 __kmp_free_team_arrays( team );
5315 if ( team->t.t_argv != &team->t.t_inline_argv[0] )
5316 __kmp_free( (
void*) team->t.t_argv );
5351 __kmp_free_thread( kmp_info_t *this_th )
5356 KA_TRACE( 20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5357 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid ));
5359 KMP_DEBUG_ASSERT( this_th );
5363 kmp_balign_t *balign = this_th->th.th_bar;
5364 for (b=0; b<bs_last_barrier; ++b) {
5365 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5366 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5367 balign[b].bb.team = NULL;
5368 balign[b].bb.leaf_kids = 0;
5370 this_th->th.th_task_state = 0;
5373 TCW_PTR(this_th->th.th_team, NULL);
5374 TCW_PTR(this_th->th.th_root, NULL);
5375 TCW_PTR(this_th->th.th_dispatch, NULL);
5381 gtid = this_th->th.th_info.ds.ds_gtid;
5382 if ( __kmp_thread_pool_insert_pt != NULL ) {
5383 KMP_DEBUG_ASSERT( __kmp_thread_pool != NULL );
5384 if ( __kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid ) {
5385 __kmp_thread_pool_insert_pt = NULL;
5396 if ( __kmp_thread_pool_insert_pt != NULL ) {
5397 scan = &( __kmp_thread_pool_insert_pt->th.th_next_pool );
5400 scan = (kmp_info_t **)&__kmp_thread_pool;
5402 for (; ( *scan != NULL ) && ( (*scan)->th.th_info.ds.ds_gtid < gtid );
5403 scan = &( (*scan)->th.th_next_pool ) );
5409 TCW_PTR(this_th->th.th_next_pool, *scan);
5410 __kmp_thread_pool_insert_pt = *scan = this_th;
5411 KMP_DEBUG_ASSERT( ( this_th->th.th_next_pool == NULL )
5412 || ( this_th->th.th_info.ds.ds_gtid
5413 < this_th->th.th_next_pool->th.th_info.ds.ds_gtid ) );
5414 TCW_4(this_th->th.th_in_pool, TRUE);
5415 __kmp_thread_pool_nth++;
5417 TCW_4(__kmp_nth, __kmp_nth - 1);
5419 #ifdef KMP_ADJUST_BLOCKTIME 5422 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5423 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
5424 if ( __kmp_nth <= __kmp_avail_proc ) {
5425 __kmp_zero_bt = FALSE;
5437 __kmp_launch_thread( kmp_info_t *this_thr )
5439 int gtid = this_thr->th.th_info.ds.ds_gtid;
5441 kmp_team_t *(*
volatile pteam);
5444 KA_TRACE( 10, (
"__kmp_launch_thread: T#%d start\n", gtid ) );
5446 if( __kmp_env_consistency_check ) {
5447 this_thr->th.th_cons = __kmp_allocate_cons_stack( gtid );
5452 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5453 this_thr->th.ompt_thread_info.wait_id = 0;
5454 this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0);
5455 if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) {
5456 __ompt_thread_begin(ompt_thread_worker, gtid);
5462 while( ! TCR_4(__kmp_global.g.g_done) ) {
5463 KMP_DEBUG_ASSERT( this_thr == __kmp_threads[ gtid ] );
5467 KA_TRACE( 20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid ));
5471 this_thr->th.ompt_thread_info.state = ompt_state_idle;
5476 __kmp_fork_barrier( gtid, KMP_GTID_DNE );
5480 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5484 pteam = (kmp_team_t *(*))(& this_thr->th.th_team);
5487 if ( TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done) ) {
5489 ompt_task_info_t *task_info;
5490 ompt_parallel_id_t my_parallel_id;
5492 task_info = __ompt_get_taskinfo(0);
5493 my_parallel_id = (*pteam)->t.ompt_team_info.parallel_id;
5497 if ( TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL ) {
5499 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5500 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
5502 updateHWFPControl (*pteam);
5506 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
5508 int tid = __kmp_tid_from_gtid(gtid);
5509 task_info->task_id = __ompt_task_id_new(tid);
5514 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
5515 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
5516 rc = (*pteam)->t.t_invoke( gtid );
5523 task_info->frame.exit_runtime_frame = NULL;
5525 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5529 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5530 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
5533 __kmp_join_barrier( gtid );
5534 #if OMPT_SUPPORT && OMPT_TRACE 5536 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
5539 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
5540 my_parallel_id, task_info->task_id);
5542 task_info->frame.exit_runtime_frame = NULL;
5543 task_info->task_id = 0;
5548 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
5552 ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
5553 __ompt_thread_end(ompt_thread_worker, gtid);
5557 this_thr->th.th_task_team = NULL;
5559 __kmp_common_destroy_gtid( gtid );
5561 KA_TRACE( 10, (
"__kmp_launch_thread: T#%d done\n", gtid ) );
5570 __kmp_internal_end_dest(
void *specific_gtid )
5572 #if KMP_COMPILER_ICC 5573 #pragma warning( push ) 5574 #pragma warning( disable: 810 ) // conversion from "void *" to "int" may lose significant bits 5577 int gtid = (kmp_intptr_t)specific_gtid - 1;
5578 #if KMP_COMPILER_ICC 5579 #pragma warning( pop ) 5582 KA_TRACE( 30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
5596 if(gtid >= 0 && KMP_UBER_GTID(gtid))
5597 __kmp_gtid_set_specific( gtid );
5598 #ifdef KMP_TDATA_GTID 5601 __kmp_internal_end_thread( gtid );
5604 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB 5610 __attribute__(( destructor ))
5612 __kmp_internal_end_dtor(
void )
5614 __kmp_internal_end_atexit();
5618 __kmp_internal_end_fini(
void )
5620 __kmp_internal_end_atexit();
5627 __kmp_internal_end_atexit(
void )
5629 KA_TRACE( 30, (
"__kmp_internal_end_atexit\n" ) );
5651 __kmp_internal_end_library( -1 );
5653 __kmp_close_console();
5659 kmp_info_t * thread,
5667 KMP_DEBUG_ASSERT( thread != NULL );
5669 gtid = thread->th.th_info.ds.ds_gtid;
5673 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
5675 KA_TRACE( 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", gtid ) );
5677 ANNOTATE_HAPPENS_BEFORE(thread);
5678 kmp_flag_64 flag(&thread->th.th_bar[ bs_forkjoin_barrier ].bb.b_go, thread);
5679 __kmp_release_64(&flag);
5683 __kmp_reap_worker( thread );
5698 if ( thread->th.th_active_in_pool ) {
5699 thread->th.th_active_in_pool = FALSE;
5700 KMP_TEST_THEN_DEC32(
5701 (kmp_int32 *) &__kmp_thread_pool_active_nth );
5702 KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 );
5706 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth > 0 );
5707 --__kmp_thread_pool_nth;
5710 __kmp_free_implicit_task(thread);
5714 __kmp_free_fast_memory( thread );
5717 __kmp_suspend_uninitialize_thread( thread );
5719 KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] == thread );
5720 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5725 #ifdef KMP_ADJUST_BLOCKTIME 5728 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5729 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
5730 if ( __kmp_nth <= __kmp_avail_proc ) {
5731 __kmp_zero_bt = FALSE;
5737 if( __kmp_env_consistency_check ) {
5738 if ( thread->th.th_cons ) {
5739 __kmp_free_cons_stack( thread->th.th_cons );
5740 thread->th.th_cons = NULL;
5744 if ( thread->th.th_pri_common != NULL ) {
5745 __kmp_free( thread->th.th_pri_common );
5746 thread->th.th_pri_common = NULL;
5749 if (thread->th.th_task_state_memo_stack != NULL) {
5750 __kmp_free(thread->th.th_task_state_memo_stack);
5751 thread->th.th_task_state_memo_stack = NULL;
5755 if ( thread->th.th_local.bget_data != NULL ) {
5756 __kmp_finalize_bget( thread );
5760 #if KMP_AFFINITY_SUPPORTED 5761 if ( thread->th.th_affin_mask != NULL ) {
5762 KMP_CPU_FREE( thread->th.th_affin_mask );
5763 thread->th.th_affin_mask = NULL;
5767 __kmp_reap_team( thread->th.th_serial_team );
5768 thread->th.th_serial_team = NULL;
5769 __kmp_free( thread );
5776 __kmp_internal_end(
void)
5781 __kmp_unregister_library();
5789 __kmp_reclaim_dead_roots();
5792 for( i=0 ; i<__kmp_threads_capacity ; i++ )
5794 if( __kmp_root[i]->r.r_active )
5797 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5799 if ( i < __kmp_threads_capacity ) {
5815 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5816 if ( TCR_4( __kmp_init_monitor ) ) {
5817 __kmp_reap_monitor( & __kmp_monitor );
5818 TCW_4( __kmp_init_monitor, 0 );
5820 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5821 KA_TRACE( 10, (
"__kmp_internal_end: monitor reaped\n" ) );
5822 #endif // KMP_USE_MONITOR 5827 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
5828 if( __kmp_root[i] ) {
5830 KMP_ASSERT( ! __kmp_root[i]->r.r_active );
5839 while ( __kmp_thread_pool != NULL ) {
5841 kmp_info_t * thread = (kmp_info_t *) __kmp_thread_pool;
5842 __kmp_thread_pool = thread->th.th_next_pool;
5844 thread->th.th_next_pool = NULL;
5845 thread->th.th_in_pool = FALSE;
5846 __kmp_reap_thread( thread, 0 );
5848 __kmp_thread_pool_insert_pt = NULL;
5851 while ( __kmp_team_pool != NULL ) {
5853 kmp_team_t * team = (kmp_team_t *) __kmp_team_pool;
5854 __kmp_team_pool = team->t.t_next_pool;
5856 team->t.t_next_pool = NULL;
5857 __kmp_reap_team( team );
5860 __kmp_reap_task_teams( );
5862 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
5869 TCW_SYNC_4(__kmp_init_common, FALSE);
5871 KA_TRACE( 10, (
"__kmp_internal_end: all workers reaped\n" ) );
5881 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5882 if ( TCR_4( __kmp_init_monitor ) ) {
5883 __kmp_reap_monitor( & __kmp_monitor );
5884 TCW_4( __kmp_init_monitor, 0 );
5886 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5887 KA_TRACE( 10, (
"__kmp_internal_end: monitor reaped\n" ) );
5890 TCW_4(__kmp_init_gtid, FALSE);
5900 __kmp_internal_end_library(
int gtid_req )
5908 if( __kmp_global.g.g_abort ) {
5909 KA_TRACE( 11, (
"__kmp_internal_end_library: abort, exiting\n" ));
5913 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5914 KA_TRACE( 10, (
"__kmp_internal_end_library: already finished\n" ));
5923 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
5924 KA_TRACE( 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req ));
5925 if( gtid == KMP_GTID_SHUTDOWN ) {
5926 KA_TRACE( 10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system already shutdown\n" ));
5928 }
else if( gtid == KMP_GTID_MONITOR ) {
5929 KA_TRACE( 10, (
"__kmp_internal_end_library: monitor thread, gtid not registered, or system shutdown\n" ));
5931 }
else if( gtid == KMP_GTID_DNE ) {
5932 KA_TRACE( 10, (
"__kmp_internal_end_library: gtid not registered or system shutdown\n" ));
5934 }
else if( KMP_UBER_GTID( gtid )) {
5936 if( __kmp_root[gtid]->r.r_active ) {
5937 __kmp_global.g.g_abort = -1;
5938 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5939 KA_TRACE( 10, (
"__kmp_internal_end_library: root still active, abort T#%d\n", gtid ));
5942 KA_TRACE( 10, (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid ));
5943 __kmp_unregister_root_current_thread( gtid );
5950 #ifdef DUMP_DEBUG_ON_EXIT 5951 if ( __kmp_debug_buf )
5952 __kmp_dump_debug_buffer( );
5958 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
5961 if( __kmp_global.g.g_abort ) {
5962 KA_TRACE( 10, (
"__kmp_internal_end_library: abort, exiting\n" ));
5964 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5967 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5968 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5978 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
5981 __kmp_internal_end();
5983 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
5984 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5986 KA_TRACE( 10, (
"__kmp_internal_end_library: exit\n" ) );
5988 #ifdef DUMP_DEBUG_ON_EXIT 5989 if ( __kmp_debug_buf )
5990 __kmp_dump_debug_buffer();
5994 __kmp_close_console();
5997 __kmp_fini_allocator();
6002 __kmp_internal_end_thread(
int gtid_req )
6012 if( __kmp_global.g.g_abort ) {
6013 KA_TRACE( 11, (
"__kmp_internal_end_thread: abort, exiting\n" ));
6017 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
6018 KA_TRACE( 10, (
"__kmp_internal_end_thread: already finished\n" ));
6026 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
6027 KA_TRACE( 10, (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req ));
6028 if( gtid == KMP_GTID_SHUTDOWN ) {
6029 KA_TRACE( 10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system already shutdown\n" ));
6031 }
else if( gtid == KMP_GTID_MONITOR ) {
6032 KA_TRACE( 10, (
"__kmp_internal_end_thread: monitor thread, gtid not registered, or system shutdown\n" ));
6034 }
else if( gtid == KMP_GTID_DNE ) {
6035 KA_TRACE( 10, (
"__kmp_internal_end_thread: gtid not registered or system shutdown\n" ));
6038 }
else if( KMP_UBER_GTID( gtid )) {
6040 if( __kmp_root[gtid]->r.r_active ) {
6041 __kmp_global.g.g_abort = -1;
6042 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6043 KA_TRACE( 10, (
"__kmp_internal_end_thread: root still active, abort T#%d\n", gtid ));
6046 KA_TRACE( 10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n", gtid ));
6047 __kmp_unregister_root_current_thread( gtid );
6051 KA_TRACE( 10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid ));
6054 __kmp_threads[gtid]->th.th_task_team = NULL;
6057 KA_TRACE( 10, (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n", gtid ));
6061 #if defined KMP_DYNAMIC_LIB 6069 KA_TRACE( 10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req) );
6073 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6076 if( __kmp_global.g.g_abort ) {
6077 KA_TRACE( 10, (
"__kmp_internal_end_thread: abort, exiting\n" ));
6079 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6082 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
6083 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6095 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
6097 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
6098 if ( KMP_UBER_GTID( i ) ) {
6099 KA_TRACE( 10, (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i ));
6100 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6101 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6108 __kmp_internal_end();
6110 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6111 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6113 KA_TRACE( 10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req ) );
6115 #ifdef DUMP_DEBUG_ON_EXIT 6116 if ( __kmp_debug_buf )
6117 __kmp_dump_debug_buffer();
6124 static long __kmp_registration_flag = 0;
6126 static char * __kmp_registration_str = NULL;
6132 __kmp_reg_status_name() {
6138 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int) getpid() );
6143 __kmp_register_library_startup(
6147 char * name = __kmp_reg_status_name();
6153 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 6154 __kmp_initialize_system_tick();
6156 __kmp_read_system_time( & time.dtime );
6157 __kmp_registration_flag = 0xCAFE0000L | ( time.ltime & 0x0000FFFFL );
6158 __kmp_registration_str =
6161 & __kmp_registration_flag,
6162 __kmp_registration_flag,
6166 KA_TRACE( 50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name, __kmp_registration_str ) );
6170 char * value = NULL;
6173 __kmp_env_set( name, __kmp_registration_str, 0 );
6175 value = __kmp_env_get( name );
6176 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
6185 char * tail = value;
6186 char * flag_addr_str = NULL;
6187 char * flag_val_str = NULL;
6188 char const * file_name = NULL;
6189 __kmp_str_split( tail,
'-', & flag_addr_str, & tail );
6190 __kmp_str_split( tail,
'-', & flag_val_str, & tail );
6192 if ( tail != NULL ) {
6193 long * flag_addr = 0;
6195 KMP_SSCANF( flag_addr_str,
"%p", & flag_addr );
6196 KMP_SSCANF( flag_val_str,
"%lx", & flag_val );
6197 if ( flag_addr != 0 && flag_val != 0 && strcmp( file_name,
"" ) != 0 ) {
6201 if ( __kmp_is_address_mapped( flag_addr ) && * flag_addr == flag_val ) {
6209 switch ( neighbor ) {
6214 file_name =
"unknown library";
6218 char * duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK" );
6219 if ( ! __kmp_str_match_true( duplicate_ok ) ) {
6223 KMP_MSG( DuplicateLibrary, KMP_LIBRARY_FILE, file_name ),
6224 KMP_HNT( DuplicateLibrary ),
6228 KMP_INTERNAL_FREE( duplicate_ok );
6229 __kmp_duplicate_library_ok = 1;
6234 __kmp_env_unset( name );
6237 KMP_DEBUG_ASSERT( 0 );
6242 KMP_INTERNAL_FREE( (
void *) value );
6245 KMP_INTERNAL_FREE( (
void *) name );
6251 __kmp_unregister_library(
void ) {
6253 char * name = __kmp_reg_status_name();
6254 char * value = __kmp_env_get( name );
6256 KMP_DEBUG_ASSERT( __kmp_registration_flag != 0 );
6257 KMP_DEBUG_ASSERT( __kmp_registration_str != NULL );
6258 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
6260 __kmp_env_unset( name );
6263 KMP_INTERNAL_FREE( __kmp_registration_str );
6264 KMP_INTERNAL_FREE( value );
6265 KMP_INTERNAL_FREE( name );
6267 __kmp_registration_flag = 0;
6268 __kmp_registration_str = NULL;
6276 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 6278 static void __kmp_check_mic_type()
6280 kmp_cpuid_t cpuid_state = {0};
6281 kmp_cpuid_t * cs_p = &cpuid_state;
6282 __kmp_x86_cpuid(1, 0, cs_p);
6284 if( (cs_p->eax & 0xff0) == 0xB10 ) {
6285 __kmp_mic_type = mic2;
6286 }
else if( (cs_p->eax & 0xf0ff0) == 0x50670 ) {
6287 __kmp_mic_type = mic3;
6289 __kmp_mic_type = non_mic;
6296 __kmp_do_serial_initialize(
void )
6301 KA_TRACE( 10, (
"__kmp_do_serial_initialize: enter\n" ) );
6303 KMP_DEBUG_ASSERT(
sizeof( kmp_int32 ) == 4 );
6304 KMP_DEBUG_ASSERT(
sizeof( kmp_uint32 ) == 4 );
6305 KMP_DEBUG_ASSERT(
sizeof( kmp_int64 ) == 8 );
6306 KMP_DEBUG_ASSERT(
sizeof( kmp_uint64 ) == 8 );
6307 KMP_DEBUG_ASSERT(
sizeof( kmp_intptr_t ) ==
sizeof(
void * ) );
6313 __kmp_validate_locks();
6316 __kmp_init_allocator();
6322 __kmp_register_library_startup( );
6325 if( TCR_4(__kmp_global.g.g_done) ) {
6326 KA_TRACE( 10, (
"__kmp_do_serial_initialize: reinitialization of library\n" ) );
6329 __kmp_global.g.g_abort = 0;
6330 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
6333 #if KMP_USE_ADAPTIVE_LOCKS 6334 #if KMP_DEBUG_ADAPTIVE_LOCKS 6335 __kmp_init_speculative_stats();
6338 #if KMP_STATS_ENABLED 6341 __kmp_init_lock( & __kmp_global_lock );
6342 __kmp_init_queuing_lock( & __kmp_dispatch_lock );
6343 __kmp_init_lock( & __kmp_debug_lock );
6344 __kmp_init_atomic_lock( & __kmp_atomic_lock );
6345 __kmp_init_atomic_lock( & __kmp_atomic_lock_1i );
6346 __kmp_init_atomic_lock( & __kmp_atomic_lock_2i );
6347 __kmp_init_atomic_lock( & __kmp_atomic_lock_4i );
6348 __kmp_init_atomic_lock( & __kmp_atomic_lock_4r );
6349 __kmp_init_atomic_lock( & __kmp_atomic_lock_8i );
6350 __kmp_init_atomic_lock( & __kmp_atomic_lock_8r );
6351 __kmp_init_atomic_lock( & __kmp_atomic_lock_8c );
6352 __kmp_init_atomic_lock( & __kmp_atomic_lock_10r );
6353 __kmp_init_atomic_lock( & __kmp_atomic_lock_16r );
6354 __kmp_init_atomic_lock( & __kmp_atomic_lock_16c );
6355 __kmp_init_atomic_lock( & __kmp_atomic_lock_20c );
6356 __kmp_init_atomic_lock( & __kmp_atomic_lock_32c );
6357 __kmp_init_bootstrap_lock( & __kmp_forkjoin_lock );
6358 __kmp_init_bootstrap_lock( & __kmp_exit_lock );
6360 __kmp_init_bootstrap_lock( & __kmp_monitor_lock );
6362 __kmp_init_bootstrap_lock( & __kmp_tp_cached_lock );
6366 __kmp_runtime_initialize();
6368 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 6369 __kmp_check_mic_type();
6376 __kmp_abort_delay = 0;
6380 __kmp_dflt_team_nth_ub = __kmp_xproc;
6381 if( __kmp_dflt_team_nth_ub < KMP_MIN_NTH ) {
6382 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6384 if( __kmp_dflt_team_nth_ub > __kmp_sys_max_nth ) {
6385 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6387 __kmp_max_nth = __kmp_sys_max_nth;
6390 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
6392 __kmp_monitor_wakeups = KMP_WAKEUPS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
6393 __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
6396 __kmp_library = library_throughput;
6398 __kmp_static = kmp_sch_static_balanced;
6404 #if KMP_FAST_REDUCTION_BARRIER 6405 #define kmp_reduction_barrier_gather_bb ((int)1) 6406 #define kmp_reduction_barrier_release_bb ((int)1) 6407 #define kmp_reduction_barrier_gather_pat bp_hyper_bar 6408 #define kmp_reduction_barrier_release_pat bp_hyper_bar 6409 #endif // KMP_FAST_REDUCTION_BARRIER 6410 for ( i=bs_plain_barrier; i<bs_last_barrier; i++ ) {
6411 __kmp_barrier_gather_branch_bits [ i ] = __kmp_barrier_gather_bb_dflt;
6412 __kmp_barrier_release_branch_bits[ i ] = __kmp_barrier_release_bb_dflt;
6413 __kmp_barrier_gather_pattern [ i ] = __kmp_barrier_gather_pat_dflt;
6414 __kmp_barrier_release_pattern[ i ] = __kmp_barrier_release_pat_dflt;
6415 #if KMP_FAST_REDUCTION_BARRIER 6416 if( i == bs_reduction_barrier ) {
6417 __kmp_barrier_gather_branch_bits [ i ] = kmp_reduction_barrier_gather_bb;
6418 __kmp_barrier_release_branch_bits[ i ] = kmp_reduction_barrier_release_bb;
6419 __kmp_barrier_gather_pattern [ i ] = kmp_reduction_barrier_gather_pat;
6420 __kmp_barrier_release_pattern[ i ] = kmp_reduction_barrier_release_pat;
6422 #endif // KMP_FAST_REDUCTION_BARRIER 6424 #if KMP_FAST_REDUCTION_BARRIER 6425 #undef kmp_reduction_barrier_release_pat 6426 #undef kmp_reduction_barrier_gather_pat 6427 #undef kmp_reduction_barrier_release_bb 6428 #undef kmp_reduction_barrier_gather_bb 6429 #endif // KMP_FAST_REDUCTION_BARRIER 6430 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 6431 if (__kmp_mic_type == mic2) {
6433 __kmp_barrier_gather_branch_bits [ bs_plain_barrier ] = 3;
6434 __kmp_barrier_release_branch_bits[ bs_forkjoin_barrier ] = 1;
6435 __kmp_barrier_gather_pattern [ bs_forkjoin_barrier ] = bp_hierarchical_bar;
6436 __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] = bp_hierarchical_bar;
6438 #if KMP_FAST_REDUCTION_BARRIER 6439 if (__kmp_mic_type == mic2) {
6440 __kmp_barrier_gather_pattern [ bs_reduction_barrier ] = bp_hierarchical_bar;
6441 __kmp_barrier_release_pattern[ bs_reduction_barrier ] = bp_hierarchical_bar;
6448 __kmp_env_checks = TRUE;
6450 __kmp_env_checks = FALSE;
6454 __kmp_foreign_tp = TRUE;
6456 __kmp_global.g.g_dynamic = FALSE;
6457 __kmp_global.g.g_dynamic_mode = dynamic_default;
6459 __kmp_env_initialize( NULL );
6463 char const * val = __kmp_env_get(
"KMP_DUMP_CATALOG" );
6464 if ( __kmp_str_match_true( val ) ) {
6465 kmp_str_buf_t buffer;
6466 __kmp_str_buf_init( & buffer );
6467 __kmp_i18n_dump_catalog( & buffer );
6468 __kmp_printf(
"%s", buffer.str );
6469 __kmp_str_buf_free( & buffer );
6471 __kmp_env_free( & val );
6474 __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub );
6476 __kmp_tp_capacity = __kmp_default_tp_capacity(__kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
6480 KMP_DEBUG_ASSERT( __kmp_thread_pool == NULL );
6481 KMP_DEBUG_ASSERT( __kmp_thread_pool_insert_pt == NULL );
6482 KMP_DEBUG_ASSERT( __kmp_team_pool == NULL );
6483 __kmp_thread_pool = NULL;
6484 __kmp_thread_pool_insert_pt = NULL;
6485 __kmp_team_pool = NULL;
6490 size = (
sizeof(kmp_info_t*) +
sizeof(kmp_root_t*))*__kmp_threads_capacity + CACHE_LINE;
6491 __kmp_threads = (kmp_info_t**) __kmp_allocate( size );
6492 __kmp_root = (kmp_root_t**) ((
char*)__kmp_threads +
sizeof(kmp_info_t*) * __kmp_threads_capacity );
6495 KMP_DEBUG_ASSERT( __kmp_all_nth == 0 );
6496 KMP_DEBUG_ASSERT( __kmp_nth == 0 );
6501 gtid = __kmp_register_root( TRUE );
6502 KA_TRACE( 10, (
"__kmp_do_serial_initialize T#%d\n", gtid ));
6503 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
6504 KMP_ASSERT( KMP_INITIAL_GTID( gtid ) );
6508 __kmp_common_initialize();
6512 __kmp_register_atfork();
6515 #if ! defined KMP_DYNAMIC_LIB 6520 int rc = atexit( __kmp_internal_end_atexit );
6522 __kmp_msg( kmp_ms_fatal, KMP_MSG( FunctionError,
"atexit()" ), KMP_ERR( rc ), __kmp_msg_null );
6527 #if KMP_HANDLE_SIGNALS 6534 __kmp_install_signals( FALSE );
6537 __kmp_install_signals( TRUE );
6542 __kmp_init_counter ++;
6544 __kmp_init_serial = TRUE;
6546 if (__kmp_settings) {
6551 if (__kmp_display_env || __kmp_display_env_verbose) {
6552 __kmp_env_print_2();
6554 #endif // OMP_40_ENABLED 6562 KA_TRACE( 10, (
"__kmp_do_serial_initialize: exit\n" ) );
6566 __kmp_serial_initialize(
void )
6568 if ( __kmp_init_serial ) {
6571 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6572 if ( __kmp_init_serial ) {
6573 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6576 __kmp_do_serial_initialize();
6577 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6581 __kmp_do_middle_initialize(
void )
6584 int prev_dflt_team_nth;
6586 if( !__kmp_init_serial ) {
6587 __kmp_do_serial_initialize();
6590 KA_TRACE( 10, (
"__kmp_middle_initialize: enter\n" ) );
6596 prev_dflt_team_nth = __kmp_dflt_team_nth;
6598 #if KMP_AFFINITY_SUPPORTED 6603 __kmp_affinity_initialize();
6609 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6610 if ( TCR_PTR( __kmp_threads[ i ] ) != NULL ) {
6611 __kmp_affinity_set_init_mask( i, TRUE );
6616 KMP_ASSERT( __kmp_xproc > 0 );
6617 if ( __kmp_avail_proc == 0 ) {
6618 __kmp_avail_proc = __kmp_xproc;
6623 while ( ( j < __kmp_nested_nth.used ) && ! __kmp_nested_nth.nth[ j ] ) {
6624 __kmp_nested_nth.nth[ j ] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_avail_proc;
6628 if ( __kmp_dflt_team_nth == 0 ) {
6629 #ifdef KMP_DFLT_NTH_CORES 6633 __kmp_dflt_team_nth = __kmp_ncores;
6634 KA_TRACE( 20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_ncores (%d)\n",
6635 __kmp_dflt_team_nth ) );
6640 __kmp_dflt_team_nth = __kmp_avail_proc;
6641 KA_TRACE( 20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_avail_proc(%d)\n",
6642 __kmp_dflt_team_nth ) );
6646 if ( __kmp_dflt_team_nth < KMP_MIN_NTH ) {
6647 __kmp_dflt_team_nth = KMP_MIN_NTH;
6649 if( __kmp_dflt_team_nth > __kmp_sys_max_nth ) {
6650 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6657 KMP_DEBUG_ASSERT( __kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub );
6659 if ( __kmp_dflt_team_nth != prev_dflt_team_nth ) {
6666 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6667 kmp_info_t *thread = __kmp_threads[ i ];
6668 if ( thread == NULL )
continue;
6669 if ( thread->th.th_current_task->td_icvs.nproc != 0 )
continue;
6671 set__nproc( __kmp_threads[ i ], __kmp_dflt_team_nth );
6674 KA_TRACE( 20, (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6675 __kmp_dflt_team_nth) );
6677 #ifdef KMP_ADJUST_BLOCKTIME 6680 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
6681 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
6682 if ( __kmp_nth > __kmp_avail_proc ) {
6683 __kmp_zero_bt = TRUE;
6689 TCW_SYNC_4(__kmp_init_middle, TRUE);
6691 KA_TRACE( 10, (
"__kmp_do_middle_initialize: exit\n" ) );
6695 __kmp_middle_initialize(
void )
6697 if ( __kmp_init_middle ) {
6700 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6701 if ( __kmp_init_middle ) {
6702 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6705 __kmp_do_middle_initialize();
6706 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6710 __kmp_parallel_initialize(
void )
6712 int gtid = __kmp_entry_gtid();
6715 if( TCR_4(__kmp_init_parallel) )
return;
6716 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6717 if( TCR_4(__kmp_init_parallel) ) { __kmp_release_bootstrap_lock( &__kmp_initz_lock );
return; }
6720 if( TCR_4(__kmp_global.g.g_done) ) {
6721 KA_TRACE( 10, (
"__kmp_parallel_initialize: attempt to init while shutting down\n" ) );
6722 __kmp_infinite_loop();
6728 if( !__kmp_init_middle ) {
6729 __kmp_do_middle_initialize();
6733 KA_TRACE( 10, (
"__kmp_parallel_initialize: enter\n" ) );
6734 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
6736 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 6741 __kmp_store_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word );
6742 __kmp_store_mxcsr( &__kmp_init_mxcsr );
6743 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
6747 # if KMP_HANDLE_SIGNALS 6749 __kmp_install_signals( TRUE );
6753 __kmp_suspend_initialize();
6755 #if defined(USE_LOAD_BALANCE) 6756 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
6757 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
6760 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
6761 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
6765 if ( __kmp_version ) {
6766 __kmp_print_version_2();
6770 TCW_SYNC_4(__kmp_init_parallel, TRUE);
6773 KA_TRACE( 10, (
"__kmp_parallel_initialize: exit\n" ) );
6775 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6782 __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
6785 kmp_disp_t *dispatch;
6790 this_thr->th.th_local.this_construct = 0;
6791 #if KMP_CACHE_MANAGE 6792 KMP_CACHE_PREFETCH( &this_thr->th.th_bar[ bs_forkjoin_barrier ].bb.b_arrived );
6794 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
6795 KMP_DEBUG_ASSERT( dispatch );
6796 KMP_DEBUG_ASSERT( team->t.t_dispatch );
6799 dispatch->th_disp_index = 0;
6801 dispatch->th_doacross_buf_idx = 0;
6803 if( __kmp_env_consistency_check )
6804 __kmp_push_parallel( gtid, team->t.t_ident );
6810 __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
6813 if( __kmp_env_consistency_check )
6814 __kmp_pop_parallel( gtid, team->t.t_ident );
6816 __kmp_finish_implicit_task(this_thr);
6820 __kmp_invoke_task_func(
int gtid )
6823 int tid = __kmp_tid_from_gtid( gtid );
6824 kmp_info_t *this_thr = __kmp_threads[ gtid ];
6825 kmp_team_t *team = this_thr->th.th_team;
6827 __kmp_run_before_invoked_task( gtid, tid, this_thr, team );
6829 if ( __itt_stack_caller_create_ptr ) {
6830 __kmp_itt_stack_callee_enter( (__itt_caller)team->t.t_stack_id );
6833 #if INCLUDE_SSC_MARKS 6834 SSC_MARK_INVOKING();
6839 void **exit_runtime_p;
6840 ompt_task_id_t my_task_id;
6841 ompt_parallel_id_t my_parallel_id;
6844 exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid].
6845 ompt_task_info.frame.exit_runtime_frame);
6847 exit_runtime_p = &dummy;
6851 my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
6852 my_parallel_id = team->t.ompt_team_info.parallel_id;
6854 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
6855 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
6856 my_parallel_id, my_task_id);
6862 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
6863 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
6864 rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
6865 gtid, tid, (
int) team->t.t_argc, (
void **) team->t.t_argv
6871 *exit_runtime_p = NULL;
6876 if ( __itt_stack_caller_create_ptr ) {
6877 __kmp_itt_stack_callee_leave( (__itt_caller)team->t.t_stack_id );
6880 __kmp_run_after_invoked_task( gtid, tid, this_thr, team );
6887 __kmp_teams_master(
int gtid )
6890 kmp_info_t *thr = __kmp_threads[ gtid ];
6891 kmp_team_t *team = thr->th.th_team;
6892 ident_t *loc = team->t.t_ident;
6893 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
6894 KMP_DEBUG_ASSERT( thr->th.th_teams_microtask );
6895 KMP_DEBUG_ASSERT( thr->th.th_set_nproc );
6896 KA_TRACE( 20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n",
6897 gtid, __kmp_tid_from_gtid( gtid ), thr->th.th_teams_microtask ) );
6900 #if INCLUDE_SSC_MARKS 6903 __kmp_fork_call( loc, gtid, fork_context_intel,
6906 (
void *)thr->th.th_teams_microtask,
6908 (microtask_t)thr->th.th_teams_microtask,
6909 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
6911 #if INCLUDE_SSC_MARKS 6917 __kmp_join_call( loc, gtid
6919 , fork_context_intel
6925 __kmp_invoke_teams_master(
int gtid )
6927 kmp_info_t *this_thr = __kmp_threads[ gtid ];
6928 kmp_team_t *team = this_thr->th.th_team;
6930 if ( !__kmp_threads[gtid]-> th.th_team->t.t_serialized )
6931 KMP_DEBUG_ASSERT( (
void*)__kmp_threads[gtid]-> th.th_team->t.t_pkfn == (
void*)__kmp_teams_master );
6933 __kmp_run_before_invoked_task( gtid, 0, this_thr, team );
6934 __kmp_teams_master( gtid );
6935 __kmp_run_after_invoked_task( gtid, 0, this_thr, team );
6946 __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads )
6948 kmp_info_t *thr = __kmp_threads[gtid];
6950 if( num_threads > 0 )
6951 thr->th.th_set_nproc = num_threads;
6959 __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
int num_threads )
6961 kmp_info_t *thr = __kmp_threads[gtid];
6962 KMP_DEBUG_ASSERT(num_teams >= 0);
6963 KMP_DEBUG_ASSERT(num_threads >= 0);
6965 if( num_teams == 0 )
6967 if( num_teams > __kmp_max_nth ) {
6968 if ( !__kmp_reserve_warn ) {
6969 __kmp_reserve_warn = 1;
6972 KMP_MSG( CantFormThrTeam, num_teams, __kmp_max_nth ),
6973 KMP_HNT( Unset_ALL_THREADS ),
6977 num_teams = __kmp_max_nth;
6980 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
6983 if( num_threads == 0 ) {
6984 if( !TCR_4(__kmp_init_middle) )
6985 __kmp_middle_initialize();
6986 num_threads = __kmp_avail_proc / num_teams;
6987 if( num_teams * num_threads > __kmp_max_nth ) {
6989 num_threads = __kmp_max_nth / num_teams;
6992 if( num_teams * num_threads > __kmp_max_nth ) {
6993 int new_threads = __kmp_max_nth / num_teams;
6994 if ( !__kmp_reserve_warn ) {
6995 __kmp_reserve_warn = 1;
6998 KMP_MSG( CantFormThrTeam, num_threads, new_threads ),
6999 KMP_HNT( Unset_ALL_THREADS ),
7003 num_threads = new_threads;
7006 thr->th.th_teams_size.nth = num_threads;
7014 __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind )
7016 kmp_info_t *thr = __kmp_threads[gtid];
7017 thr->th.th_set_proc_bind = proc_bind;
7025 __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team )
7027 kmp_info_t *this_thr = __kmp_threads[gtid];
7033 KMP_DEBUG_ASSERT( team );
7034 KMP_DEBUG_ASSERT( this_thr->th.th_team == team );
7035 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
7038 team->t.t_construct = 0;
7039 team->t.t_ordered.dt.t_value = 0;
7042 KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
7043 if ( team->t.t_max_nproc > 1 ) {
7045 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7046 team->t.t_disp_buffer[ i ].buffer_index = i;
7048 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7052 team->t.t_disp_buffer[ 0 ].buffer_index = 0;
7054 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7059 KMP_ASSERT( this_thr->th.th_team == team );
7062 for( f=0 ; f<team->t.t_nproc ; f++ ) {
7063 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
7064 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
7069 __kmp_fork_barrier( gtid, 0 );
7074 __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team )
7076 kmp_info_t *this_thr = __kmp_threads[gtid];
7078 KMP_DEBUG_ASSERT( team );
7079 KMP_DEBUG_ASSERT( this_thr->th.th_team == team );
7080 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
7086 if (__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc ) {
7087 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n",gtid, gtid, __kmp_threads[gtid]);
7088 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, team->t.t_nproc=%d\n",
7089 gtid, __kmp_threads[gtid]->th.th_team_nproc, team, team->t.t_nproc);
7090 __kmp_print_structure();
7092 KMP_DEBUG_ASSERT( __kmp_threads[gtid] &&
7093 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc );
7096 __kmp_join_barrier( gtid );
7099 KMP_ASSERT( this_thr->th.th_team == team );
7106 #ifdef USE_LOAD_BALANCE 7113 __kmp_active_hot_team_nproc( kmp_root_t *root )
7117 kmp_team_t *hot_team;
7119 if ( root->r.r_active ) {
7122 hot_team = root->r.r_hot_team;
7123 if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) {
7124 return hot_team->t.t_nproc - 1;
7131 for ( i = 1; i < hot_team->t.t_nproc; i++ ) {
7132 if ( hot_team->t.t_threads[i]->th.th_active ) {
7144 __kmp_load_balance_nproc( kmp_root_t *root,
int set_nproc )
7148 int hot_team_active;
7149 int team_curr_active;
7152 KB_TRACE( 20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n",
7153 root, set_nproc ) );
7154 KMP_DEBUG_ASSERT( root );
7155 KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_threads[0]->th.th_current_task->td_icvs.dynamic == TRUE );
7156 KMP_DEBUG_ASSERT( set_nproc > 1 );
7158 if ( set_nproc == 1) {
7159 KB_TRACE( 20, (
"__kmp_load_balance_nproc: serial execution.\n" ) );
7170 pool_active = TCR_4(__kmp_thread_pool_active_nth);
7171 hot_team_active = __kmp_active_hot_team_nproc( root );
7172 team_curr_active = pool_active + hot_team_active + 1;
7177 system_active = __kmp_get_load_balance( __kmp_avail_proc + team_curr_active );
7178 KB_TRACE( 30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d hot team active = %d\n",
7179 system_active, pool_active, hot_team_active ) );
7181 if ( system_active < 0 ) {
7188 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7189 KMP_WARNING( CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit" );
7194 retval = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
7195 : root->r.r_hot_team->t.t_nproc);
7196 if ( retval > set_nproc ) {
7199 if ( retval < KMP_MIN_NTH ) {
7200 retval = KMP_MIN_NTH;
7203 KB_TRACE( 20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n", retval ) );
7213 if ( system_active < team_curr_active ) {
7214 system_active = team_curr_active;
7216 retval = __kmp_avail_proc - system_active + team_curr_active;
7217 if ( retval > set_nproc ) {
7220 if ( retval < KMP_MIN_NTH ) {
7221 retval = KMP_MIN_NTH;
7224 KB_TRACE( 20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval ) );
7235 __kmp_cleanup(
void )
7239 KA_TRACE( 10, (
"__kmp_cleanup: enter\n" ) );
7241 if (TCR_4(__kmp_init_parallel)) {
7242 #if KMP_HANDLE_SIGNALS 7243 __kmp_remove_signals();
7245 TCW_4(__kmp_init_parallel, FALSE);
7248 if (TCR_4(__kmp_init_middle)) {
7249 #if KMP_AFFINITY_SUPPORTED 7250 __kmp_affinity_uninitialize();
7252 __kmp_cleanup_hierarchy();
7253 TCW_4(__kmp_init_middle, FALSE);
7256 KA_TRACE( 10, (
"__kmp_cleanup: go serial cleanup\n" ) );
7258 if (__kmp_init_serial) {
7259 __kmp_runtime_destroy();
7260 __kmp_init_serial = FALSE;
7263 for ( f = 0; f < __kmp_threads_capacity; f++ ) {
7264 if ( __kmp_root[ f ] != NULL ) {
7265 __kmp_free( __kmp_root[ f ] );
7266 __kmp_root[ f ] = NULL;
7269 __kmp_free( __kmp_threads );
7272 __kmp_threads = NULL;
7274 __kmp_threads_capacity = 0;
7276 #if KMP_USE_DYNAMIC_LOCK 7277 __kmp_cleanup_indirect_user_locks();
7279 __kmp_cleanup_user_locks();
7282 #if KMP_AFFINITY_SUPPORTED 7283 KMP_INTERNAL_FREE( (
void *) __kmp_cpuinfo_file );
7284 __kmp_cpuinfo_file = NULL;
7287 #if KMP_USE_ADAPTIVE_LOCKS 7288 #if KMP_DEBUG_ADAPTIVE_LOCKS 7289 __kmp_print_speculative_stats();
7292 KMP_INTERNAL_FREE( __kmp_nested_nth.nth );
7293 __kmp_nested_nth.nth = NULL;
7294 __kmp_nested_nth.size = 0;
7295 __kmp_nested_nth.used = 0;
7296 KMP_INTERNAL_FREE( __kmp_nested_proc_bind.bind_types );
7297 __kmp_nested_proc_bind.bind_types = NULL;
7298 __kmp_nested_proc_bind.size = 0;
7299 __kmp_nested_proc_bind.used = 0;
7301 __kmp_i18n_catclose();
7303 #if KMP_STATS_ENABLED 7307 KA_TRACE( 10, (
"__kmp_cleanup: exit\n" ) );
7314 __kmp_ignore_mppbeg(
void )
7318 if ((env = getenv(
"KMP_IGNORE_MPPBEG" )) != NULL) {
7319 if (__kmp_str_match_false( env ))
7327 __kmp_ignore_mppend(
void )
7331 if ((env = getenv(
"KMP_IGNORE_MPPEND" )) != NULL) {
7332 if (__kmp_str_match_false( env ))
7340 __kmp_internal_begin(
void )
7347 gtid = __kmp_entry_gtid();
7348 root = __kmp_threads[ gtid ]->th.th_root;
7349 KMP_ASSERT( KMP_UBER_GTID( gtid ));
7351 if( root->r.r_begin )
return;
7352 __kmp_acquire_lock( &root->r.r_begin_lock, gtid );
7353 if( root->r.r_begin ) {
7354 __kmp_release_lock( & root->r.r_begin_lock, gtid );
7358 root->r.r_begin = TRUE;
7360 __kmp_release_lock( & root->r.r_begin_lock, gtid );
7368 __kmp_user_set_library (
enum library_type arg)
7376 gtid = __kmp_entry_gtid();
7377 thread = __kmp_threads[ gtid ];
7379 root = thread->th.th_root;
7381 KA_TRACE( 20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, library_serial ));
7382 if (root->r.r_in_parallel) {
7383 KMP_WARNING( SetLibraryIncorrectCall );
7388 case library_serial :
7389 thread->th.th_set_nproc = 0;
7390 set__nproc( thread, 1 );
7392 case library_turnaround :
7393 thread->th.th_set_nproc = 0;
7394 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
7396 case library_throughput :
7397 thread->th.th_set_nproc = 0;
7398 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
7401 KMP_FATAL( UnknownLibraryType, arg );
7404 __kmp_aux_set_library ( arg );
7408 __kmp_aux_set_stacksize(
size_t arg )
7410 if (! __kmp_init_serial)
7411 __kmp_serial_initialize();
7414 if (arg & (0x1000 - 1)) {
7415 arg &= ~(0x1000 - 1);
7420 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
7423 if (! TCR_4(__kmp_init_parallel)) {
7426 if (value < __kmp_sys_min_stksize )
7427 value = __kmp_sys_min_stksize ;
7428 else if (value > KMP_MAX_STKSIZE)
7429 value = KMP_MAX_STKSIZE;
7431 __kmp_stksize = value;
7433 __kmp_env_stksize = TRUE;
7436 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7442 __kmp_aux_set_library (
enum library_type arg)
7444 __kmp_library = arg;
7446 switch ( __kmp_library ) {
7447 case library_serial :
7449 KMP_INFORM( LibraryIsSerial );
7450 (void) __kmp_change_library( TRUE );
7453 case library_turnaround :
7454 (void) __kmp_change_library( TRUE );
7456 case library_throughput :
7457 (void) __kmp_change_library( FALSE );
7460 KMP_FATAL( UnknownLibraryType, arg );
7468 __kmp_aux_set_blocktime (
int arg, kmp_info_t *thread,
int tid)
7470 int blocktime = arg;
7476 __kmp_save_internal_controls( thread );
7479 if (blocktime < KMP_MIN_BLOCKTIME)
7480 blocktime = KMP_MIN_BLOCKTIME;
7481 else if (blocktime > KMP_MAX_BLOCKTIME)
7482 blocktime = KMP_MAX_BLOCKTIME;
7484 set__blocktime_team( thread->th.th_team, tid, blocktime );
7485 set__blocktime_team( thread->th.th_serial_team, 0, blocktime );
7489 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
7491 set__bt_intervals_team( thread->th.th_team, tid, bt_intervals );
7492 set__bt_intervals_team( thread->th.th_serial_team, 0, bt_intervals );
7498 set__bt_set_team( thread->th.th_team, tid, bt_set );
7499 set__bt_set_team( thread->th.th_serial_team, 0, bt_set );
7501 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, " 7502 "bt_intervals=%d, monitor_updates=%d\n",
7503 __kmp_gtid_from_tid(tid, thread->th.th_team),
7504 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
7505 __kmp_monitor_wakeups));
7507 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
7508 __kmp_gtid_from_tid(tid, thread->th.th_team),
7509 thread->th.th_team->t.t_id, tid, blocktime));
7514 __kmp_aux_set_defaults(
7518 if ( ! __kmp_init_serial ) {
7519 __kmp_serial_initialize();
7521 __kmp_env_initialize( str );
7525 || __kmp_display_env || __kmp_display_env_verbose
7538 PACKED_REDUCTION_METHOD_T
7539 __kmp_determine_reduction_method(
ident_t *loc, kmp_int32 global_tid,
7540 kmp_int32 num_vars,
size_t reduce_size,
void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
7541 kmp_critical_name *lck )
7549 PACKED_REDUCTION_METHOD_T retval;
7553 KMP_DEBUG_ASSERT( loc );
7554 KMP_DEBUG_ASSERT( lck );
7556 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED ( ( loc->flags & ( KMP_IDENT_ATOMIC_REDUCE ) ) == ( KMP_IDENT_ATOMIC_REDUCE ) ) 7557 #define FAST_REDUCTION_TREE_METHOD_GENERATED ( ( reduce_data ) && ( reduce_func ) ) 7559 retval = critical_reduce_block;
7561 team_size = __kmp_get_team_num_threads( global_tid );
7563 if( team_size == 1 ) {
7565 retval = empty_reduce_block;
7569 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7570 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7572 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 7574 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN 7576 int teamsize_cutoff = 4;
7578 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 7579 if( __kmp_mic_type != non_mic ) {
7580 teamsize_cutoff = 8;
7583 if( tree_available ) {
7584 if( team_size <= teamsize_cutoff ) {
7585 if ( atomic_available ) {
7586 retval = atomic_reduce_block;
7589 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7591 }
else if ( atomic_available ) {
7592 retval = atomic_reduce_block;
7595 #error "Unknown or unsupported OS" 7596 #endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN 7598 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS 7600 #if KMP_OS_LINUX || KMP_OS_WINDOWS 7604 if( atomic_available ) {
7605 if( num_vars <= 2 ) {
7606 retval = atomic_reduce_block;
7612 if( atomic_available && ( num_vars <= 3 ) ) {
7613 retval = atomic_reduce_block;
7614 }
else if( tree_available ) {
7615 if( ( reduce_size > ( 9 *
sizeof( kmp_real64 ) ) ) && ( reduce_size < ( 2000 *
sizeof( kmp_real64 ) ) ) ) {
7616 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
7621 #error "Unknown or unsupported OS" 7625 #error "Unknown or unsupported architecture" 7634 if( __kmp_force_reduction_method != reduction_method_not_defined && team_size != 1) {
7636 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
7638 int atomic_available, tree_available;
7640 switch( ( forced_retval = __kmp_force_reduction_method ) )
7642 case critical_reduce_block:
7646 case atomic_reduce_block:
7647 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7648 if( ! atomic_available ) {
7649 KMP_WARNING(RedMethodNotSupported,
"atomic");
7650 forced_retval = critical_reduce_block;
7654 case tree_reduce_block:
7655 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7656 if( ! tree_available ) {
7657 KMP_WARNING(RedMethodNotSupported,
"tree");
7658 forced_retval = critical_reduce_block;
7660 #if KMP_FAST_REDUCTION_BARRIER 7661 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7670 retval = forced_retval;
7673 KA_TRACE(10, (
"reduction method selected=%08x\n", retval ) );
7675 #undef FAST_REDUCTION_TREE_METHOD_GENERATED 7676 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED 7683 __kmp_get_reduce_method(
void ) {
7684 return ( ( __kmp_entry_thread()->th.th_local.packed_reduction_method ) >> 8 );
#define KMP_START_EXPLICIT_TIMER(name)
"Starts" an explicit timer which will need a corresponding KMP_STOP_EXPLICIT_TIMER() macro...
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_IDENT_AUTOPAR
#define KMP_INIT_PARTITIONED_TIMERS(name)
Initializes the paritioned timers to begin with name.
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)