17 #include "kmp_atomic.h" 18 #include "kmp_wrapper_getpid.h" 19 #include "kmp_environment.h" 22 #include "kmp_settings.h" 25 #include "kmp_error.h" 26 #include "kmp_stats.h" 27 #include "kmp_wait_release.h" 30 #include "ompt-specific.h" 34 #define KMP_USE_PRCTL 0 35 #define KMP_USE_POOLED_ALLOC 0 42 #if defined(KMP_GOMP_COMPAT) 43 char const __kmp_version_alt_comp[] = KMP_VERSION_PREFIX
"alternative compiler support: yes";
46 char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX
"API version: " 54 char const __kmp_version_lock[] = KMP_VERSION_PREFIX
"lock type: run time selectable";
58 #define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) ) 63 kmp_info_t __kmp_monitor;
70 void __kmp_cleanup(
void );
72 static void __kmp_initialize_info( kmp_info_t *, kmp_team_t *,
int tid,
int gtid );
73 static void __kmp_initialize_team( kmp_team_t * team,
int new_nproc, kmp_internal_control_t * new_icvs,
ident_t * loc );
74 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 75 static void __kmp_partition_places( kmp_team_t *team );
77 static void __kmp_do_serial_initialize(
void );
78 void __kmp_fork_barrier(
int gtid,
int tid );
79 void __kmp_join_barrier(
int gtid );
80 void __kmp_setup_icv_copy( kmp_team_t *team,
int new_nproc, kmp_internal_control_t * new_icvs,
ident_t *loc );
82 #ifdef USE_LOAD_BALANCE 83 static int __kmp_load_balance_nproc( kmp_root_t * root,
int set_nproc );
86 static int __kmp_expand_threads(
int nWish,
int nNeed);
88 static int __kmp_unregister_root_other_thread(
int gtid );
90 static void __kmp_unregister_library(
void );
91 static void __kmp_reap_thread( kmp_info_t * thread,
int is_root );
92 static kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
103 __kmp_get_global_thread_id( )
106 kmp_info_t **other_threads;
112 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
113 __kmp_nth, __kmp_all_nth ));
120 if ( !TCR_4(__kmp_init_gtid) )
return KMP_GTID_DNE;
122 #ifdef KMP_TDATA_GTID 123 if ( TCR_4(__kmp_gtid_mode) >= 3) {
124 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: using TDATA\n" ));
128 if ( TCR_4(__kmp_gtid_mode) >= 2) {
129 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n" ));
130 return __kmp_gtid_get_specific();
132 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n" ));
134 stack_addr = (
char*) & stack_data;
135 other_threads = __kmp_threads;
150 for( i = 0 ; i < __kmp_threads_capacity ; i++ ) {
152 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
155 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
156 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
160 if( stack_addr <= stack_base ) {
161 size_t stack_diff = stack_base - stack_addr;
163 if( stack_diff <= stack_size ) {
166 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == i );
173 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: internal alg. failed to find " 174 "thread, using TLS\n" ));
175 i = __kmp_gtid_get_specific();
183 if( ! TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow) ) {
184 KMP_FATAL( StackOverflow, i );
187 stack_base = (
char *) other_threads[i]->th.th_info.ds.ds_stackbase;
188 if( stack_addr > stack_base ) {
189 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
190 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
191 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base);
193 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, stack_base - stack_addr);
197 if ( __kmp_storage_map ) {
198 char *stack_end = (
char *) other_threads[i]->th.th_info.ds.ds_stackbase;
199 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
200 __kmp_print_storage_map_gtid( i, stack_beg, stack_end,
201 other_threads[i]->th.th_info.ds.ds_stacksize,
202 "th_%d stack (refinement)", i );
208 __kmp_get_global_thread_id_reg( )
212 if ( !__kmp_init_serial ) {
215 #ifdef KMP_TDATA_GTID 216 if ( TCR_4(__kmp_gtid_mode) >= 3 ) {
217 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n" ));
221 if ( TCR_4(__kmp_gtid_mode) >= 2 ) {
222 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n" ));
223 gtid = __kmp_gtid_get_specific();
225 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n" ));
226 gtid = __kmp_get_global_thread_id();
230 if( gtid == KMP_GTID_DNE ) {
231 KA_TRACE( 10, (
"__kmp_get_global_thread_id_reg: Encountered new root thread. " 232 "Registering a new gtid.\n" ));
233 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
234 if( !__kmp_init_serial ) {
235 __kmp_do_serial_initialize();
236 gtid = __kmp_gtid_get_specific();
238 gtid = __kmp_register_root(FALSE);
240 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
244 KMP_DEBUG_ASSERT( gtid >=0 );
251 __kmp_check_stack_overlap( kmp_info_t *th )
254 char *stack_beg = NULL;
255 char *stack_end = NULL;
258 KA_TRACE(10,(
"__kmp_check_stack_overlap: called\n"));
259 if ( __kmp_storage_map ) {
260 stack_end = (
char *) th->th.th_info.ds.ds_stackbase;
261 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
263 gtid = __kmp_gtid_from_thread( th );
265 if (gtid == KMP_GTID_MONITOR) {
266 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
267 "th_%s stack (%s)",
"mon",
268 ( th->th.th_info.ds.ds_stackgrow ) ?
"initial" :
"actual" );
270 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
271 "th_%d stack (%s)", gtid,
272 ( th->th.th_info.ds.ds_stackgrow ) ?
"initial" :
"actual" );
277 gtid = __kmp_gtid_from_thread( th );
278 if ( __kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid))
280 KA_TRACE(10,(
"__kmp_check_stack_overlap: performing extensive checking\n"));
281 if ( stack_beg == NULL ) {
282 stack_end = (
char *) th->th.th_info.ds.ds_stackbase;
283 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
286 for( f=0 ; f < __kmp_threads_capacity ; f++ ) {
287 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
289 if( f_th && f_th != th ) {
290 char *other_stack_end = (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
291 char *other_stack_beg = other_stack_end -
292 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
293 if((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
294 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
297 if ( __kmp_storage_map )
298 __kmp_print_storage_map_gtid( -1, other_stack_beg, other_stack_end,
299 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
300 "th_%d stack (overlapped)",
301 __kmp_gtid_from_thread( f_th ) );
303 __kmp_msg( kmp_ms_fatal, KMP_MSG( StackOverlap ), KMP_HNT( ChangeStackLimit ), __kmp_msg_null );
308 KA_TRACE(10,(
"__kmp_check_stack_overlap: returning\n"));
317 __kmp_infinite_loop(
void )
319 static int done = FALSE;
326 #define MAX_MESSAGE 512 329 __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
char const *format, ...) {
330 char buffer[MAX_MESSAGE];
333 va_start( ap, format);
334 KMP_SNPRINTF( buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1, p2, (
unsigned long) size, format );
335 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
336 __kmp_vprintf( kmp_err, buffer, ap );
337 #if KMP_PRINT_DATA_PLACEMENT 340 if(p1 <= p2 && (
char*)p2 - (
char*)p1 == size) {
341 if( __kmp_storage_map_verbose ) {
342 node = __kmp_get_host_node(p1);
344 __kmp_storage_map_verbose = FALSE;
348 int localProc = __kmp_get_cpu_from_gtid(gtid);
350 p1 = (
void *)( (
size_t)p1 & ~((size_t)PAGE_SIZE - 1) );
351 p2 = (
void *)( ((
size_t) p2 - 1) & ~((
size_t)PAGE_SIZE - 1) );
353 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid, localProc>>1);
355 __kmp_printf_no_lock(
" GTID %d\n", gtid);
363 (
char*)p1 += PAGE_SIZE;
364 }
while(p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
365 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last,
366 (
char*)p1 - 1, lastNode);
369 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
370 (
char*)p1 + (PAGE_SIZE - 1), __kmp_get_host_node(p1));
372 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
373 (
char*)p2 + (PAGE_SIZE - 1), __kmp_get_host_node(p2));
379 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR( StorageMapWarning ) );
382 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
386 __kmp_warn(
char const * format, ... )
388 char buffer[MAX_MESSAGE];
391 if ( __kmp_generate_warnings == kmp_warnings_off ) {
395 va_start( ap, format );
397 KMP_SNPRINTF( buffer,
sizeof(buffer) ,
"OMP warning: %s\n", format );
398 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
399 __kmp_vprintf( kmp_err, buffer, ap );
400 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
406 __kmp_abort_process()
410 __kmp_acquire_bootstrap_lock( & __kmp_exit_lock );
412 if ( __kmp_debug_buf ) {
413 __kmp_dump_debug_buffer();
416 if ( KMP_OS_WINDOWS ) {
419 __kmp_global.g.g_abort = SIGABRT;
437 __kmp_infinite_loop();
438 __kmp_release_bootstrap_lock( & __kmp_exit_lock );
443 __kmp_abort_thread(
void )
447 __kmp_infinite_loop();
458 __kmp_print_thread_storage_map( kmp_info_t *thr,
int gtid )
460 __kmp_print_storage_map_gtid( gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d", gtid );
462 __kmp_print_storage_map_gtid( gtid, &thr->th.th_info, &thr->th.th_team,
sizeof(kmp_desc_t),
463 "th_%d.th_info", gtid );
465 __kmp_print_storage_map_gtid( gtid, &thr->th.th_local, &thr->th.th_pri_head,
sizeof(kmp_local_t),
466 "th_%d.th_local", gtid );
468 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
469 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid );
471 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_plain_barrier],
472 &thr->th.th_bar[bs_plain_barrier+1],
473 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]", gtid);
475 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_forkjoin_barrier],
476 &thr->th.th_bar[bs_forkjoin_barrier+1],
477 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]", gtid);
479 #if KMP_FAST_REDUCTION_BARRIER 480 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_reduction_barrier],
481 &thr->th.th_bar[bs_reduction_barrier+1],
482 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]", gtid);
483 #endif // KMP_FAST_REDUCTION_BARRIER 492 __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
int team_id,
int num_thr )
494 int num_disp_buff = team->t.t_max_nproc > 1 ? KMP_MAX_DISP_BUF : 2;
495 __kmp_print_storage_map_gtid( -1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
498 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[0], &team->t.t_bar[bs_last_barrier],
499 sizeof(kmp_balign_team_t) * bs_last_barrier,
"%s_%d.t_bar", header, team_id );
502 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_plain_barrier], &team->t.t_bar[bs_plain_barrier+1],
503 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]", header, team_id );
505 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_forkjoin_barrier], &team->t.t_bar[bs_forkjoin_barrier+1],
506 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[forkjoin]", header, team_id );
508 #if KMP_FAST_REDUCTION_BARRIER 509 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_reduction_barrier], &team->t.t_bar[bs_reduction_barrier+1],
510 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[reduction]", header, team_id );
511 #endif // KMP_FAST_REDUCTION_BARRIER 513 __kmp_print_storage_map_gtid( -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
514 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id );
516 __kmp_print_storage_map_gtid( -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
517 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id );
519 __kmp_print_storage_map_gtid( -1, &team->t.t_disp_buffer[0], &team->t.t_disp_buffer[num_disp_buff],
520 sizeof(dispatch_shared_info_t) * num_disp_buff,
"%s_%d.t_disp_buffer",
553 __kmp_print_storage_map_gtid( -1, &team->t.t_taskq, &team->t.t_copypriv_data,
554 sizeof(kmp_taskq_t),
"%s_%d.t_taskq", header, team_id );
557 static void __kmp_init_allocator() {}
558 static void __kmp_fini_allocator() {}
562 #ifdef KMP_DYNAMIC_LIB 567 __kmp_reset_lock( kmp_bootstrap_lock_t* lck ) {
569 __kmp_init_bootstrap_lock( lck );
573 __kmp_reset_locks_on_process_detach(
int gtid_req ) {
590 for( i = 0; i < __kmp_threads_capacity; ++i ) {
591 if( !__kmp_threads )
continue;
592 kmp_info_t* th = __kmp_threads[ i ];
593 if( th == NULL )
continue;
594 int gtid = th->th.th_info.ds.ds_gtid;
595 if( gtid == gtid_req )
continue;
596 if( gtid < 0 )
continue;
598 int alive = __kmp_is_thread_alive( th, &exit_val );
603 if( thread_count == 0 )
break;
610 __kmp_reset_lock( &__kmp_forkjoin_lock );
612 __kmp_reset_lock( &__kmp_stdio_lock );
619 DllMain( HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved ) {
622 switch( fdwReason ) {
624 case DLL_PROCESS_ATTACH:
625 KA_TRACE( 10, (
"DllMain: PROCESS_ATTACH\n" ));
629 case DLL_PROCESS_DETACH:
630 KA_TRACE( 10, (
"DllMain: PROCESS_DETACH T#%d\n",
631 __kmp_gtid_get_specific() ));
633 if( lpReserved != NULL )
660 __kmp_reset_locks_on_process_detach( __kmp_gtid_get_specific() );
663 __kmp_internal_end_library( __kmp_gtid_get_specific() );
667 case DLL_THREAD_ATTACH:
668 KA_TRACE( 10, (
"DllMain: THREAD_ATTACH\n" ));
674 case DLL_THREAD_DETACH:
675 KA_TRACE( 10, (
"DllMain: THREAD_DETACH T#%d\n",
676 __kmp_gtid_get_specific() ));
678 __kmp_internal_end_thread( __kmp_gtid_get_specific() );
694 __kmp_change_library(
int status )
698 old_status = __kmp_yield_init & 1;
701 __kmp_yield_init |= 1;
704 __kmp_yield_init &= ~1;
717 __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
719 int gtid = *gtid_ref;
720 #ifdef BUILD_PARALLEL_ORDERED 721 kmp_team_t *team = __kmp_team_from_gtid( gtid );
724 if( __kmp_env_consistency_check ) {
725 if( __kmp_threads[gtid]->th.th_root->r.r_active )
726 #if KMP_USE_DYNAMIC_LOCK 727 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL, 0 );
729 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL );
732 #ifdef BUILD_PARALLEL_ORDERED 733 if( !team->t.t_serialized ) {
735 KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid( gtid ), KMP_EQ, NULL);
746 __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
748 int gtid = *gtid_ref;
749 #ifdef BUILD_PARALLEL_ORDERED 750 int tid = __kmp_tid_from_gtid( gtid );
751 kmp_team_t *team = __kmp_team_from_gtid( gtid );
754 if( __kmp_env_consistency_check ) {
755 if( __kmp_threads[gtid]->th.th_root->r.r_active )
756 __kmp_pop_sync( gtid, ct_ordered_in_parallel, loc_ref );
758 #ifdef BUILD_PARALLEL_ORDERED 759 if ( ! team->t.t_serialized ) {
764 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc );
766 #if OMPT_SUPPORT && OMPT_BLAME 768 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
770 kmp_info_t *this_thread = __kmp_threads[gtid];
771 ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
772 this_thread->th.ompt_thread_info.wait_id);
790 __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws )
796 if( ! TCR_4(__kmp_init_parallel) )
797 __kmp_parallel_initialize();
799 th = __kmp_threads[ gtid ];
800 team = th->th.th_team;
803 th->th.th_ident = id_ref;
805 if ( team->t.t_serialized ) {
808 kmp_int32 old_this = th->th.th_local.this_construct;
810 ++th->th.th_local.this_construct;
815 status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this,
816 th->th.th_local.this_construct);
818 if ( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && KMP_MASTER_GTID(gtid) &&
820 th->th.th_teams_microtask == NULL &&
822 team->t.t_active_level == 1 )
824 __kmp_itt_metadata_single( id_ref );
829 if( __kmp_env_consistency_check ) {
830 if (status && push_ws) {
831 __kmp_push_workshare( gtid, ct_psingle, id_ref );
833 __kmp_check_workshare( gtid, ct_psingle, id_ref );
838 __kmp_itt_single_start( gtid );
845 __kmp_exit_single(
int gtid )
848 __kmp_itt_single_end( gtid );
850 if( __kmp_env_consistency_check )
851 __kmp_pop_workshare( gtid, ct_psingle, NULL );
864 __kmp_reserve_threads( kmp_root_t *root, kmp_team_t *parent_team,
865 int master_tid,
int set_nthreads
873 KMP_DEBUG_ASSERT( __kmp_init_serial );
874 KMP_DEBUG_ASSERT( root && parent_team );
880 new_nthreads = set_nthreads;
881 if ( ! get__dynamic_2( parent_team, master_tid ) ) {
884 #ifdef USE_LOAD_BALANCE 885 else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) {
886 new_nthreads = __kmp_load_balance_nproc( root, set_nthreads );
887 if ( new_nthreads == 1 ) {
888 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d load balance reduced reservation to 1 thread\n",
892 if ( new_nthreads < set_nthreads ) {
893 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d load balance reduced reservation to %d threads\n",
894 master_tid, new_nthreads ));
898 else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) {
899 new_nthreads = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
900 : root->r.r_hot_team->t.t_nproc);
901 if ( new_nthreads <= 1 ) {
902 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d thread limit reduced reservation to 1 thread\n",
906 if ( new_nthreads < set_nthreads ) {
907 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d thread limit reduced reservation to %d threads\n",
908 master_tid, new_nthreads ));
911 new_nthreads = set_nthreads;
914 else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) {
915 if ( set_nthreads > 2 ) {
916 new_nthreads = __kmp_get_random( parent_team->t.t_threads[master_tid] );
917 new_nthreads = ( new_nthreads % set_nthreads ) + 1;
918 if ( new_nthreads == 1 ) {
919 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d dynamic random reduced reservation to 1 thread\n",
923 if ( new_nthreads < set_nthreads ) {
924 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d dynamic random reduced reservation to %d threads\n",
925 master_tid, new_nthreads ));
936 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
937 root->r.r_hot_team->t.t_nproc ) > __kmp_max_nth ) {
938 int tl_nthreads = __kmp_max_nth - __kmp_nth + ( root->r.r_active ? 1 :
939 root->r.r_hot_team->t.t_nproc );
940 if ( tl_nthreads <= 0 ) {
947 if ( ! get__dynamic_2( parent_team, master_tid )
948 && ( ! __kmp_reserve_warn ) ) {
949 __kmp_reserve_warn = 1;
952 KMP_MSG( CantFormThrTeam, set_nthreads, tl_nthreads ),
953 KMP_HNT( Unset_ALL_THREADS ),
957 if ( tl_nthreads == 1 ) {
958 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to 1 thread\n",
962 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to %d threads\n",
963 master_tid, tl_nthreads ));
964 new_nthreads = tl_nthreads;
974 capacity = __kmp_threads_capacity;
975 if ( TCR_PTR(__kmp_threads[0]) == NULL ) {
978 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
979 root->r.r_hot_team->t.t_nproc ) > capacity ) {
983 int slotsRequired = __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
984 root->r.r_hot_team->t.t_nproc ) - capacity;
985 int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired);
986 if ( slotsAdded < slotsRequired ) {
990 new_nthreads -= ( slotsRequired - slotsAdded );
991 KMP_ASSERT( new_nthreads >= 1 );
996 if ( ! get__dynamic_2( parent_team, master_tid )
997 && ( ! __kmp_reserve_warn ) ) {
998 __kmp_reserve_warn = 1;
999 if ( __kmp_tp_cached ) {
1002 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
1003 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
1004 KMP_HNT( PossibleSystemLimitOnThreads ),
1011 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
1012 KMP_HNT( SystemLimitOnThreads ),
1020 if ( new_nthreads == 1 ) {
1021 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d serializing team after reclaiming dead roots and rechecking; requested %d threads\n",
1022 __kmp_get_gtid(), set_nthreads ) );
1026 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested %d threads\n",
1027 __kmp_get_gtid(), new_nthreads, set_nthreads ));
1028 return new_nthreads;
1039 __kmp_fork_team_threads( kmp_root_t *root, kmp_team_t *team,
1040 kmp_info_t *master_th,
int master_gtid )
1045 KA_TRACE( 10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc ) );
1046 KMP_DEBUG_ASSERT( master_gtid == __kmp_get_gtid() );
1050 master_th->th.th_info.ds.ds_tid = 0;
1051 master_th->th.th_team = team;
1052 master_th->th.th_team_nproc = team->t.t_nproc;
1053 master_th->th.th_team_master = master_th;
1054 master_th->th.th_team_serialized = FALSE;
1055 master_th->th.th_dispatch = & team->t.t_dispatch[ 0 ];
1058 #if KMP_NESTED_HOT_TEAMS 1060 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
1062 int level = team->t.t_active_level - 1;
1063 if( master_th->th.th_teams_microtask ) {
1064 if( master_th->th.th_teams_size.nteams > 1 ) {
1067 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1068 master_th->th.th_teams_level == team->t.t_level ) {
1072 if( level < __kmp_hot_teams_max_level ) {
1073 if( hot_teams[level].hot_team ) {
1075 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1079 hot_teams[level].hot_team = team;
1080 hot_teams[level].hot_team_nth = team->t.t_nproc;
1087 use_hot_team = team == root->r.r_hot_team;
1089 if ( !use_hot_team ) {
1092 team->t.t_threads[ 0 ] = master_th;
1093 __kmp_initialize_info( master_th, team, 0, master_gtid );
1096 for ( i=1 ; i < team->t.t_nproc ; i++ ) {
1099 kmp_info_t *thr = __kmp_allocate_thread( root, team, i );
1100 team->t.t_threads[ i ] = thr;
1101 KMP_DEBUG_ASSERT( thr );
1102 KMP_DEBUG_ASSERT( thr->th.th_team == team );
1104 KA_TRACE( 20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived T#%d(%d:%d) join =%llu, plain=%llu\n",
1105 __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0,
1106 __kmp_gtid_from_tid( i, team ), team->t.t_id, i,
1107 team->t.t_bar[ bs_forkjoin_barrier ].b_arrived,
1108 team->t.t_bar[ bs_plain_barrier ].b_arrived ) );
1110 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1111 thr->th.th_teams_level = master_th->th.th_teams_level;
1112 thr->th.th_teams_size = master_th->th.th_teams_size;
1116 kmp_balign_t * balign = team->t.t_threads[ i ]->th.th_bar;
1117 for ( b = 0; b < bs_last_barrier; ++ b ) {
1118 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
1119 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1121 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
1127 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 1128 __kmp_partition_places( team );
1136 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1143 propagateFPControl(kmp_team_t * team)
1145 if ( __kmp_inherit_fp_control ) {
1146 kmp_int16 x87_fpu_control_word;
1150 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
1151 __kmp_store_mxcsr( &mxcsr );
1152 mxcsr &= KMP_X86_MXCSR_MASK;
1161 if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) {
1162 team->t.t_x87_fpu_control_word = x87_fpu_control_word;
1164 if ( team->t.t_mxcsr != mxcsr ) {
1165 team->t.t_mxcsr = mxcsr;
1169 if (!team->t.t_fp_control_saved) {
1170 team->t.t_fp_control_saved = TRUE;
1175 if (team->t.t_fp_control_saved)
1176 team->t.t_fp_control_saved = FALSE;
1182 updateHWFPControl(kmp_team_t * team)
1184 if ( __kmp_inherit_fp_control && team->t.t_fp_control_saved ) {
1189 kmp_int16 x87_fpu_control_word;
1191 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
1192 __kmp_store_mxcsr( &mxcsr );
1193 mxcsr &= KMP_X86_MXCSR_MASK;
1195 if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) {
1196 __kmp_clear_x87_fpu_status_word();
1197 __kmp_load_x87_fpu_control_word( &team->t.t_x87_fpu_control_word );
1200 if ( team->t.t_mxcsr != mxcsr ) {
1201 __kmp_load_mxcsr( &team->t.t_mxcsr );
1206 # define propagateFPControl(x) ((void)0) 1207 # define updateHWFPControl(x) ((void)0) 1211 __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc );
1217 __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid)
1219 kmp_info_t *this_thr;
1220 kmp_team_t *serial_team;
1222 KC_TRACE( 10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid ) );
1229 if( ! TCR_4( __kmp_init_parallel ) )
1230 __kmp_parallel_initialize();
1232 this_thr = __kmp_threads[ global_tid ];
1233 serial_team = this_thr->th.th_serial_team;
1236 KMP_DEBUG_ASSERT( serial_team );
1239 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1240 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1241 KMP_DEBUG_ASSERT( serial_team->t.t_task_team[this_thr->th.th_task_state] == NULL );
1242 KA_TRACE( 20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / team %p, new task_team = NULL\n",
1243 global_tid, this_thr->th.th_task_team, this_thr->th.th_team ) );
1244 this_thr->th.th_task_team = NULL;
1248 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1249 if ( this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
1250 proc_bind = proc_bind_false;
1252 else if ( proc_bind == proc_bind_default ) {
1257 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1262 this_thr->th.th_set_proc_bind = proc_bind_default;
1265 if( this_thr->th.th_team != serial_team ) {
1267 int level = this_thr->th.th_team->t.t_level;
1269 if( serial_team->t.t_serialized ) {
1272 kmp_team_t *new_team;
1274 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
1277 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1280 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1287 & this_thr->th.th_current_task->td_icvs,
1288 0 USE_NESTED_HOT_ARG(NULL) );
1289 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1290 KMP_ASSERT( new_team );
1293 new_team->t.t_threads[0] = this_thr;
1294 new_team->t.t_parent = this_thr->th.th_team;
1295 serial_team = new_team;
1296 this_thr->th.th_serial_team = serial_team;
1298 KF_TRACE( 10, (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1299 global_tid, serial_team ) );
1306 KF_TRACE( 10, (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1307 global_tid, serial_team ) );
1311 KMP_DEBUG_ASSERT( serial_team->t.t_threads );
1312 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
1313 KMP_DEBUG_ASSERT( this_thr->th.th_team != serial_team );
1314 serial_team->t.t_ident = loc;
1315 serial_team->t.t_serialized = 1;
1316 serial_team->t.t_nproc = 1;
1317 serial_team->t.t_parent = this_thr->th.th_team;
1318 serial_team->t.t_sched = this_thr->th.th_team->t.t_sched;
1319 this_thr->th.th_team = serial_team;
1320 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1322 KF_TRACE( 10, (
"__kmpc_serialized_parallel: T#d curtask=%p\n",
1323 global_tid, this_thr->th.th_current_task ) );
1324 KMP_ASSERT( this_thr->th.th_current_task->td_flags.executing == 1 );
1325 this_thr->th.th_current_task->td_flags.executing = 0;
1327 __kmp_push_current_task_to_thread( this_thr, serial_team, 0 );
1332 & this_thr->th.th_current_task->td_icvs,
1333 & this_thr->th.th_current_task->td_parent->td_icvs );
1336 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
1337 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
1341 if ( __kmp_nested_proc_bind.used && ( level + 1 < __kmp_nested_proc_bind.used ) ) {
1342 this_thr->th.th_current_task->td_icvs.proc_bind
1343 = __kmp_nested_proc_bind.bind_types[ level + 1 ];
1348 serial_team->t.t_pkfn = (microtask_t)( ~0 );
1350 this_thr->th.th_info.ds.ds_tid = 0;
1353 this_thr->th.th_team_nproc = 1;
1354 this_thr->th.th_team_master = this_thr;
1355 this_thr->th.th_team_serialized = 1;
1357 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1358 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1360 propagateFPControl (serial_team);
1363 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1364 if ( !serial_team->t.t_dispatch->th_disp_buffer ) {
1365 serial_team->t.t_dispatch->th_disp_buffer = (dispatch_private_info_t *)
1366 __kmp_allocate(
sizeof( dispatch_private_info_t ) );
1368 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1371 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1372 __ompt_team_assign_id(serial_team, ompt_parallel_id);
1380 KMP_DEBUG_ASSERT( this_thr->th.th_team == serial_team );
1381 KMP_DEBUG_ASSERT( serial_team->t.t_threads );
1382 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
1383 ++ serial_team->t.t_serialized;
1384 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1387 int level = this_thr->th.th_team->t.t_level;
1389 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
1390 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
1392 serial_team->t.t_level++;
1393 KF_TRACE( 10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level of serial team %p to %d\n",
1394 global_tid, serial_team, serial_team->t.t_level ) );
1397 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1399 dispatch_private_info_t * disp_buffer = (dispatch_private_info_t *)
1400 __kmp_allocate(
sizeof( dispatch_private_info_t ) );
1401 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1402 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1404 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1409 if ( __kmp_env_consistency_check )
1410 __kmp_push_parallel( global_tid, NULL );
1414 if ( serial_team->t.t_level == 1
1416 && this_thr->th.th_teams_microtask == NULL
1421 if ( ( __itt_get_timestamp_ptr || KMP_ITT_DEBUG ) &&
1422 ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
1424 serial_team->t.t_region_time = this_thr->th.th_frame_time_serialized = __itt_get_timestamp();
1427 if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) &&
1428 __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode )
1430 this_thr->th.th_ident = loc;
1432 __kmp_itt_region_forking( global_tid, this_thr->th.th_team_nproc, 0, 1 );
1444 enum fork_context_e call_context,
1447 void *unwrapped_task,
1449 microtask_t microtask,
1452 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1462 int master_this_cons;
1464 kmp_team_t *parent_team;
1465 kmp_info_t *master_th;
1469 int master_set_numthreads;
1475 #if KMP_NESTED_HOT_TEAMS 1476 kmp_hot_team_ptr_t **p_hot_teams;
1479 KMP_TIME_DEVELOPER_BLOCK(KMP_fork_call);
1482 KA_TRACE( 20, (
"__kmp_fork_call: enter T#%d\n", gtid ));
1483 if ( __kmp_stkpadding > 0 && __kmp_root[gtid] != NULL ) {
1486 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1488 if ( __kmp_stkpadding > KMP_MAX_STKPADDING )
1489 __kmp_stkpadding += (short)((kmp_int64)dummy);
1493 KMP_DEBUG_ASSERT( __kmp_init_serial );
1494 if( ! TCR_4(__kmp_init_parallel) )
1495 __kmp_parallel_initialize();
1498 master_th = __kmp_threads[ gtid ];
1499 parent_team = master_th->th.th_team;
1500 master_tid = master_th->th.th_info.ds.ds_tid;
1501 master_this_cons = master_th->th.th_local.this_construct;
1502 root = master_th->th.th_root;
1503 master_active = root->r.r_active;
1504 master_set_numthreads = master_th->th.th_set_nproc;
1507 ompt_parallel_id_t ompt_parallel_id;
1508 ompt_task_id_t ompt_task_id;
1509 ompt_frame_t *ompt_frame;
1510 ompt_task_id_t my_task_id;
1511 ompt_parallel_id_t my_parallel_id;
1514 ompt_parallel_id = __ompt_parallel_id_new(gtid);
1515 ompt_task_id = __ompt_get_task_id_internal(0);
1516 ompt_frame = __ompt_get_task_frame_internal(0);
1521 level = parent_team->t.t_level;
1523 active_level = parent_team->t.t_active_level;
1524 teams_level = master_th->th.th_teams_level;
1526 #if KMP_NESTED_HOT_TEAMS 1527 p_hot_teams = &master_th->th.th_hot_teams;
1528 if( *p_hot_teams == NULL && __kmp_hot_teams_max_level > 0 ) {
1529 *p_hot_teams = (kmp_hot_team_ptr_t*)__kmp_allocate(
1530 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1531 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1532 (*p_hot_teams)[0].hot_team_nth = 1;
1538 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) {
1539 int team_size = master_set_numthreads;
1541 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)(
1542 ompt_task_id, ompt_frame, ompt_parallel_id,
1543 team_size, unwrapped_task, OMPT_INVOKER(call_context));
1547 master_th->th.th_ident = loc;
1550 if ( master_th->th.th_teams_microtask &&
1551 ap && microtask != (microtask_t)__kmp_teams_master && level == teams_level ) {
1555 parent_team->t.t_ident = loc;
1556 parent_team->t.t_argc = argc;
1557 argv = (
void**)parent_team->t.t_argv;
1558 for( i=argc-1; i >= 0; --i )
1560 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 1561 *argv++ = va_arg( *ap,
void * );
1563 *argv++ = va_arg( ap,
void * );
1566 if ( parent_team == master_th->th.th_serial_team ) {
1569 KMP_DEBUG_ASSERT( parent_team->t.t_serialized > 1 );
1570 parent_team->t.t_serialized--;
1575 void **exit_runtime_p;
1577 ompt_lw_taskteam_t lw_taskteam;
1580 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1581 unwrapped_task, ompt_parallel_id);
1582 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1583 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1585 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1589 my_task_id = lw_taskteam.ompt_task_info.task_id;
1590 my_parallel_id = parent_team->t.ompt_team_info.parallel_id;
1591 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
1592 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1593 my_parallel_id, my_task_id);
1598 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1600 exit_runtime_p = &dummy;
1606 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
1616 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
1618 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
1619 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1620 ompt_parallel_id, ompt_task_id);
1623 __ompt_lw_taskteam_unlink(master_th);
1625 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1628 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
1629 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
1630 ompt_parallel_id, ompt_task_id,
1631 OMPT_INVOKER(call_context));
1633 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1639 parent_team->t.t_pkfn = microtask;
1641 parent_team->t.ompt_team_info.microtask = unwrapped_task;
1643 parent_team->t.t_invoke = invoker;
1644 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
1645 parent_team->t.t_active_level ++;
1646 parent_team->t.t_level ++;
1649 if ( master_set_numthreads ) {
1650 if ( master_set_numthreads < master_th->th.th_teams_size.nth ) {
1652 kmp_info_t **other_threads = parent_team->t.t_threads;
1653 parent_team->t.t_nproc = master_set_numthreads;
1654 for ( i = 0; i < master_set_numthreads; ++i ) {
1655 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1659 master_th->th.th_set_nproc = 0;
1663 if ( __kmp_debugging ) {
1664 int nth = __kmp_omp_num_threads( loc );
1666 master_set_numthreads = nth;
1671 KF_TRACE( 10, (
"__kmp_fork_call: before internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
1672 __kmp_internal_fork( loc, gtid, parent_team );
1673 KF_TRACE( 10, (
"__kmp_fork_call: after internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
1676 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
1677 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
1681 if (! parent_team->t.t_invoke( gtid )) {
1682 KMP_ASSERT2( 0,
"cannot invoke microtask for MASTER thread" );
1685 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
1686 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
1689 KA_TRACE( 20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid ));
1696 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1697 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
1701 if ( parent_team->t.t_active_level >= master_th->th.th_current_task->td_icvs.max_active_levels ) {
1705 int enter_teams = ((ap==NULL && active_level==0)||(ap && teams_level>0 && teams_level==level));
1707 nthreads = master_set_numthreads ?
1708 master_set_numthreads : get__nproc_2( parent_team, master_tid );
1713 if ( ( !get__nested(master_th) && (root->r.r_in_parallel
1717 ) ) || ( __kmp_library == library_serial ) ) {
1718 KC_TRACE( 10, (
"__kmp_fork_call: T#%d serializing team; requested %d threads\n",
1723 if ( nthreads > 1 ) {
1725 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
1727 nthreads = __kmp_reserve_threads(root, parent_team, master_tid, nthreads
1735 if ( nthreads == 1 ) {
1739 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1743 KMP_DEBUG_ASSERT( nthreads > 0 );
1746 master_th->th.th_set_nproc = 0;
1749 if ( nthreads == 1 ) {
1751 #if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 1752 void * args[ argc ];
1754 void * * args = (
void**) KMP_ALLOCA( argc *
sizeof(
void * ) );
1757 KA_TRACE( 20, (
"__kmp_fork_call: T#%d serializing parallel region\n", gtid ));
1761 if ( call_context == fork_context_intel ) {
1763 master_th->th.th_serial_team->t.t_ident = loc;
1767 master_th->th.th_serial_team->t.t_level--;
1772 void **exit_runtime_p;
1774 ompt_lw_taskteam_t lw_taskteam;
1777 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1778 unwrapped_task, ompt_parallel_id);
1779 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1780 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1782 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1785 my_task_id = lw_taskteam.ompt_task_info.task_id;
1786 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
1787 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1788 ompt_parallel_id, my_task_id);
1793 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1795 exit_runtime_p = &dummy;
1801 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
1810 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
1813 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
1814 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1815 ompt_parallel_id, ompt_task_id);
1819 __ompt_lw_taskteam_unlink(master_th);
1821 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1823 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
1824 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
1825 ompt_parallel_id, ompt_task_id,
1826 OMPT_INVOKER(call_context));
1828 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1831 }
else if ( microtask == (microtask_t)__kmp_teams_master ) {
1832 KMP_DEBUG_ASSERT( master_th->th.th_team == master_th->th.th_serial_team );
1833 team = master_th->th.th_team;
1835 team->t.t_invoke = invoker;
1836 __kmp_alloc_argv_entries( argc, team, TRUE );
1837 team->t.t_argc = argc;
1838 argv = (
void**) team->t.t_argv;
1840 for( i=argc-1; i >= 0; --i )
1842 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1843 *argv++ = va_arg( *ap,
void * );
1845 *argv++ = va_arg( ap,
void * );
1848 for( i=0; i < argc; ++i )
1850 argv[i] = parent_team->t.t_argv[i];
1863 for( i=argc-1; i >= 0; --i )
1865 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1866 *argv++ = va_arg( *ap,
void * );
1868 *argv++ = va_arg( ap,
void * );
1874 void **exit_runtime_p;
1876 ompt_lw_taskteam_t lw_taskteam;
1879 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1880 unwrapped_task, ompt_parallel_id);
1881 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1882 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1884 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1888 my_task_id = lw_taskteam.ompt_task_info.task_id;
1889 my_parallel_id = ompt_parallel_id;
1890 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
1891 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1892 my_parallel_id, my_task_id);
1897 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1899 exit_runtime_p = &dummy;
1905 __kmp_invoke_microtask( microtask, gtid, 0, argc, args
1915 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
1917 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
1918 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1919 my_parallel_id, my_task_id);
1923 __ompt_lw_taskteam_unlink(master_th);
1925 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1927 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
1928 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
1929 ompt_parallel_id, ompt_task_id,
1930 OMPT_INVOKER(call_context));
1932 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1939 else if ( call_context == fork_context_gnu ) {
1941 ompt_lw_taskteam_t *lwt = (ompt_lw_taskteam_t *)
1942 __kmp_allocate(
sizeof(ompt_lw_taskteam_t));
1943 __ompt_lw_taskteam_init(lwt, master_th, gtid,
1944 unwrapped_task, ompt_parallel_id);
1946 lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid);
1947 lwt->ompt_task_info.frame.exit_runtime_frame = 0;
1948 __ompt_lw_taskteam_link(lwt, master_th);
1952 KA_TRACE( 20, (
"__kmp_fork_call: T#%d serial exit\n", gtid ));
1956 KMP_ASSERT2( call_context < fork_context_last,
"__kmp_fork_call: unknown fork_context parameter" );
1960 KA_TRACE( 20, (
"__kmp_fork_call: T#%d serial exit\n", gtid ));
1967 KF_TRACE( 10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, curtask=%p, curtask_max_aclevel=%d\n",
1968 parent_team->t.t_active_level, master_th, master_th->th.th_current_task,
1969 master_th->th.th_current_task->td_icvs.max_active_levels ) );
1972 master_th->th.th_current_task->td_flags.executing = 0;
1975 if ( !master_th->th.th_teams_microtask || level > teams_level )
1979 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
1983 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
1984 if ((level+1 < __kmp_nested_nth.used) && (__kmp_nested_nth.nth[level+1] != nthreads_icv)) {
1985 nthreads_icv = __kmp_nested_nth.nth[level+1];
1993 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1994 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1995 if ( master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
1996 proc_bind = proc_bind_false;
1999 if (proc_bind == proc_bind_default) {
2001 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2006 if ((level+1 < __kmp_nested_proc_bind.used)
2007 && (__kmp_nested_proc_bind.bind_types[level+1] != master_th->th.th_current_task->td_icvs.proc_bind)) {
2008 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level+1];
2013 master_th->th.th_set_proc_bind = proc_bind_default;
2016 if ((nthreads_icv > 0)
2018 || (proc_bind_icv != proc_bind_default)
2021 kmp_internal_control_t new_icvs;
2022 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2023 new_icvs.next = NULL;
2024 if (nthreads_icv > 0) {
2025 new_icvs.nproc = nthreads_icv;
2029 if (proc_bind_icv != proc_bind_default) {
2030 new_icvs.proc_bind = proc_bind_icv;
2035 KF_TRACE( 10, (
"__kmp_fork_call: before __kmp_allocate_team\n" ) );
2036 team = __kmp_allocate_team(root, nthreads, nthreads,
2043 &new_icvs, argc USE_NESTED_HOT_ARG(master_th) );
2046 KF_TRACE( 10, (
"__kmp_fork_call: before __kmp_allocate_team\n" ) );
2047 team = __kmp_allocate_team(root, nthreads, nthreads,
2054 &master_th->th.th_current_task->td_icvs, argc
2055 USE_NESTED_HOT_ARG(master_th) );
2057 KF_TRACE( 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team ) );
2060 team->t.t_master_tid = master_tid;
2061 team->t.t_master_this_cons = master_this_cons;
2062 team->t.t_ident = loc;
2063 team->t.t_parent = parent_team;
2064 TCW_SYNC_PTR(team->t.t_pkfn, microtask);
2066 TCW_SYNC_PTR(team->t.ompt_team_info.microtask, unwrapped_task);
2068 team->t.t_invoke = invoker;
2071 if ( !master_th->th.th_teams_microtask || level > teams_level ) {
2073 team->t.t_level = parent_team->t.t_level + 1;
2074 team->t.t_active_level = parent_team->t.t_active_level + 1;
2078 team->t.t_level = parent_team->t.t_level;
2079 team->t.t_active_level = parent_team->t.t_active_level;
2082 team->t.t_sched = get__sched_2(parent_team, master_tid);
2085 team->t.t_cancel_request = cancel_noreq;
2089 propagateFPControl(team);
2091 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2096 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
2098 KA_TRACE( 20, (
"__kmp_fork_call: Master T#%d pushing task_team %p / team %p, new task_team %p / team %p\n",
2099 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
2100 parent_team, team->t.t_task_team[master_th->th.th_task_state], team ) );
2103 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2104 if (master_th->th.th_task_state_top >= master_th->th.th_task_state_stack_sz) {
2105 kmp_uint32 new_size = 2*master_th->th.th_task_state_stack_sz;
2106 kmp_uint8 *old_stack, *new_stack;
2108 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2109 for (i=0; i<master_th->th.th_task_state_stack_sz; ++i) {
2110 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2112 for (i=master_th->th.th_task_state_stack_sz; i<new_size; ++i) {
2115 old_stack = master_th->th.th_task_state_memo_stack;
2116 master_th->th.th_task_state_memo_stack = new_stack;
2117 master_th->th.th_task_state_stack_sz = new_size;
2118 __kmp_free(old_stack);
2121 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
2122 master_th->th.th_task_state_top++;
2123 #if KMP_NESTED_HOT_TEAMS 2124 if (team == master_th->th.th_hot_teams[level].hot_team) {
2125 master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
2129 master_th->th.th_task_state = 0;
2130 #if KMP_NESTED_HOT_TEAMS 2134 #if !KMP_NESTED_HOT_TEAMS 2135 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || (team == root->r.r_hot_team));
2139 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2140 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, team->t.t_nproc ));
2141 KMP_DEBUG_ASSERT( team != root->r.r_hot_team ||
2142 ( team->t.t_master_tid == 0 &&
2143 ( team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized ) ));
2147 argv = (
void**)team->t.t_argv;
2151 for ( i=argc-1; i >= 0; --i )
2153 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
2154 *argv++ = va_arg( *ap,
void * );
2156 *argv++ = va_arg( ap,
void * );
2160 for ( i=0; i < argc; ++i )
2162 argv[i] = team->t.t_parent->t.t_argv[i];
2167 team->t.t_master_active = master_active;
2168 if (!root->r.r_active)
2169 root->r.r_active = TRUE;
2171 __kmp_fork_team_threads( root, team, master_th, gtid );
2172 __kmp_setup_icv_copy( team, nthreads, &master_th->th.th_current_task->td_icvs, loc );
2175 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2178 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2182 if ( team->t.t_active_level == 1
2184 && !master_th->th.th_teams_microtask
2188 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) &&
2189 ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
2191 kmp_uint64 tmp_time = 0;
2192 if ( __itt_get_timestamp_ptr )
2193 tmp_time = __itt_get_timestamp();
2195 master_th->th.th_frame_time = tmp_time;
2196 if ( __kmp_forkjoin_frames_mode == 3 )
2197 team->t.t_region_time = tmp_time;
2200 if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) &&
2201 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode )
2203 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2209 KMP_DEBUG_ASSERT( team == __kmp_threads[gtid]->th.th_team );
2211 KF_TRACE(10, (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2212 root, team, master_th, gtid));
2215 if ( __itt_stack_caller_create_ptr ) {
2216 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2224 __kmp_internal_fork( loc, gtid, team );
2225 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, master_th=%p, gtid=%d\n",
2226 root, team, master_th, gtid));
2229 if (call_context == fork_context_gnu) {
2230 KA_TRACE( 20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid ));
2235 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
2236 gtid, team->t.t_id, team->t.t_pkfn ) );
2242 if (! team->t.t_invoke( gtid )) {
2243 KMP_ASSERT2( 0,
"cannot invoke microtask for MASTER thread" );
2246 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
2247 gtid, team->t.t_id, team->t.t_pkfn ) );
2250 KA_TRACE( 20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid ));
2254 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2263 __kmp_join_restore_state(
2268 thread->th.ompt_thread_info.state = ((team->t.t_serialized) ?
2269 ompt_state_work_serial : ompt_state_work_parallel);
2276 ompt_parallel_id_t parallel_id,
2277 fork_context_e fork_context)
2279 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
2280 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
2281 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
2282 parallel_id, task_info->task_id, OMPT_INVOKER(fork_context));
2285 __kmp_join_restore_state(thread,team);
2290 __kmp_join_call(
ident_t *loc,
int gtid
2292 ,
enum fork_context_e fork_context
2299 KMP_TIME_DEVELOPER_BLOCK(KMP_join_call);
2301 kmp_team_t *parent_team;
2302 kmp_info_t *master_th;
2307 KA_TRACE( 20, (
"__kmp_join_call: enter T#%d\n", gtid ));
2310 master_th = __kmp_threads[ gtid ];
2311 root = master_th->th.th_root;
2312 team = master_th->th.th_team;
2313 parent_team = team->t.t_parent;
2315 master_th->th.th_ident = loc;
2319 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2324 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2325 KA_TRACE( 20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n",
2326 __kmp_gtid_from_thread( master_th ), team,
2327 team->t.t_task_team[master_th->th.th_task_state], master_th->th.th_task_team) );
2328 KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state] );
2332 if( team->t.t_serialized ) {
2334 if ( master_th->th.th_teams_microtask ) {
2336 int level = team->t.t_level;
2337 int tlevel = master_th->th.th_teams_level;
2338 if ( level == tlevel ) {
2342 }
else if ( level == tlevel + 1 ) {
2345 team->t.t_serialized++;
2353 __kmp_join_restore_state(master_th, parent_team);
2360 master_active = team->t.t_master_active;
2368 __kmp_internal_join( loc, gtid, team );
2372 master_th->th.th_task_state = 0;
2379 ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id;
2383 if ( __itt_stack_caller_create_ptr ) {
2384 __kmp_itt_stack_caller_destroy( (__itt_caller)team->t.t_stack_id );
2388 if ( team->t.t_active_level == 1
2390 && !master_th->th.th_teams_microtask
2393 master_th->th.th_ident = loc;
2395 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && __kmp_forkjoin_frames_mode == 3 )
2396 __kmp_itt_frame_submit( gtid, team->t.t_region_time, master_th->th.th_frame_time,
2397 0, loc, master_th->th.th_team_nproc, 1 );
2398 else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) &&
2399 ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames )
2400 __kmp_itt_region_joined( gtid );
2405 if ( master_th->th.th_teams_microtask &&
2407 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2408 team->t.t_level == master_th->th.th_teams_level + 1 ) {
2415 team->t.t_active_level --;
2416 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
2419 if ( master_th->th.th_team_nproc < master_th->th.th_teams_size.nth ) {
2420 int old_num = master_th->th.th_team_nproc;
2421 int new_num = master_th->th.th_teams_size.nth;
2422 kmp_info_t **other_threads = team->t.t_threads;
2423 team->t.t_nproc = new_num;
2424 for ( i = 0; i < old_num; ++i ) {
2425 other_threads[i]->th.th_team_nproc = new_num;
2428 for ( i = old_num; i < new_num; ++i ) {
2431 kmp_balign_t * balign = other_threads[i]->th.th_bar;
2432 for ( b = 0; b < bs_last_barrier; ++ b ) {
2433 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
2434 KMP_DEBUG_ASSERT(balign[ b ].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2436 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
2439 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2441 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2448 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
2457 master_th->th.th_info .ds.ds_tid = team->t.t_master_tid;
2458 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2460 master_th->th.th_dispatch =
2461 & parent_team->t.t_dispatch[ team->t.t_master_tid ];
2467 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2470 if ( !master_th->th.th_teams_microtask || team->t.t_level > master_th->th.th_teams_level )
2474 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
2476 KMP_DEBUG_ASSERT( root->r.r_in_parallel >= 0 );
2478 KF_TRACE( 10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n",
2479 0, master_th, team ) );
2480 __kmp_pop_current_task_from_thread( master_th );
2482 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 2486 master_th->th.th_first_place = team->t.t_first_place;
2487 master_th->th.th_last_place = team->t.t_last_place;
2490 updateHWFPControl (team);
2492 if ( root->r.r_active != master_active )
2493 root->r.r_active = master_active;
2495 __kmp_free_team( root, team USE_NESTED_HOT_ARG(master_th) );
2503 master_th->th.th_team = parent_team;
2504 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2505 master_th->th.th_team_master = parent_team->t.t_threads[0];
2506 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2509 if( parent_team->t.t_serialized &&
2510 parent_team != master_th->th.th_serial_team &&
2511 parent_team != root->r.r_root_team ) {
2512 __kmp_free_team( root, master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL) );
2513 master_th->th.th_serial_team = parent_team;
2516 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2517 if (master_th->th.th_task_state_top > 0) {
2518 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2520 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
2521 --master_th->th.th_task_state_top;
2523 master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
2526 master_th->th.th_task_team = parent_team->t.t_task_team[master_th->th.th_task_state];
2527 KA_TRACE( 20, (
"__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
2528 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, parent_team ) );
2533 master_th->th.th_current_task->td_flags.executing = 1;
2535 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2539 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
2544 KA_TRACE( 20, (
"__kmp_join_call: exit T#%d\n", gtid ));
2553 __kmp_save_internal_controls ( kmp_info_t * thread )
2556 if ( thread->th.th_team != thread->th.th_serial_team ) {
2559 if (thread->th.th_team->t.t_serialized > 1) {
2562 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2565 if ( thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2566 thread->th.th_team->t.t_serialized ) {
2571 kmp_internal_control_t * control = (kmp_internal_control_t *) __kmp_allocate(
sizeof(kmp_internal_control_t));
2573 copy_icvs( control, & thread->th.th_current_task->td_icvs );
2575 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2577 control->next = thread->th.th_team->t.t_control_stack_top;
2578 thread->th.th_team->t.t_control_stack_top = control;
2585 __kmp_set_num_threads(
int new_nth,
int gtid )
2590 KF_TRACE( 10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth ));
2591 KMP_DEBUG_ASSERT( __kmp_init_serial );
2595 else if (new_nth > __kmp_max_nth)
2596 new_nth = __kmp_max_nth;
2599 thread = __kmp_threads[gtid];
2601 __kmp_save_internal_controls( thread );
2603 set__nproc( thread, new_nth );
2610 root = thread->th.th_root;
2611 if ( __kmp_init_parallel && ( ! root->r.r_active )
2612 && ( root->r.r_hot_team->t.t_nproc > new_nth )
2613 #
if KMP_NESTED_HOT_TEAMS
2614 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2617 kmp_team_t *hot_team = root->r.r_hot_team;
2620 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2624 for ( f = new_nth; f < hot_team->t.t_nproc; f++ ) {
2625 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
2626 if ( __kmp_tasking_mode != tskm_immediate_exec) {
2628 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2630 __kmp_free_thread( hot_team->t.t_threads[f] );
2631 hot_team->t.t_threads[f] = NULL;
2633 hot_team->t.t_nproc = new_nth;
2634 #if KMP_NESTED_HOT_TEAMS 2635 if( thread->th.th_hot_teams ) {
2636 KMP_DEBUG_ASSERT( hot_team == thread->th.th_hot_teams[0].hot_team );
2637 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2642 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2647 for( f=0 ; f < new_nth; f++ ) {
2648 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
2649 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2652 hot_team->t.t_size_changed = -1;
2658 __kmp_set_max_active_levels(
int gtid,
int max_active_levels )
2662 KF_TRACE( 10, (
"__kmp_set_max_active_levels: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2663 KMP_DEBUG_ASSERT( __kmp_init_serial );
2666 if( max_active_levels < 0 ) {
2667 KMP_WARNING( ActiveLevelsNegative, max_active_levels );
2671 KF_TRACE( 10, (
"__kmp_set_max_active_levels: the call is ignored: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2674 if( max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT ) {
2678 KMP_WARNING( ActiveLevelsExceedLimit, max_active_levels, KMP_MAX_ACTIVE_LEVELS_LIMIT );
2679 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2684 KF_TRACE( 10, (
"__kmp_set_max_active_levels: after validation: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2686 thread = __kmp_threads[ gtid ];
2688 __kmp_save_internal_controls( thread );
2690 set__max_active_levels( thread, max_active_levels );
2696 __kmp_get_max_active_levels(
int gtid )
2700 KF_TRACE( 10, (
"__kmp_get_max_active_levels: thread %d\n", gtid ) );
2701 KMP_DEBUG_ASSERT( __kmp_init_serial );
2703 thread = __kmp_threads[ gtid ];
2704 KMP_DEBUG_ASSERT( thread->th.th_current_task );
2705 KF_TRACE( 10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, curtask_maxaclevel=%d\n",
2706 gtid, thread->th.th_current_task, thread->th.th_current_task->td_icvs.max_active_levels ) );
2707 return thread->th.th_current_task->td_icvs.max_active_levels;
2712 __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk )
2717 KF_TRACE( 10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", gtid, (
int)kind, chunk ));
2718 KMP_DEBUG_ASSERT( __kmp_init_serial );
2724 if ( kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2725 ( kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std ) )
2730 KMP_MSG( ScheduleKindOutOfRange, kind ),
2731 KMP_HNT( DefaultScheduleKindUsed,
"static, no chunk" ),
2734 kind = kmp_sched_default;
2738 thread = __kmp_threads[ gtid ];
2740 __kmp_save_internal_controls( thread );
2742 if ( kind < kmp_sched_upper_std ) {
2743 if ( kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK ) {
2746 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2748 thread->th.th_current_task->td_icvs.sched.r_sched_type = __kmp_sch_map[ kind - kmp_sched_lower - 1 ];
2752 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2753 __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
2755 if ( kind == kmp_sched_auto ) {
2757 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2759 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2765 __kmp_get_schedule(
int gtid, kmp_sched_t * kind,
int * chunk )
2770 KF_TRACE( 10, (
"__kmp_get_schedule: thread %d\n", gtid ));
2771 KMP_DEBUG_ASSERT( __kmp_init_serial );
2773 thread = __kmp_threads[ gtid ];
2776 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2778 switch ( th_type ) {
2780 case kmp_sch_static_greedy:
2781 case kmp_sch_static_balanced:
2782 *kind = kmp_sched_static;
2785 case kmp_sch_static_chunked:
2786 *kind = kmp_sched_static;
2788 case kmp_sch_dynamic_chunked:
2789 *kind = kmp_sched_dynamic;
2792 case kmp_sch_guided_iterative_chunked:
2793 case kmp_sch_guided_analytical_chunked:
2794 *kind = kmp_sched_guided;
2797 *kind = kmp_sched_auto;
2799 case kmp_sch_trapezoidal:
2800 *kind = kmp_sched_trapezoidal;
2808 KMP_FATAL( UnknownSchedulingType, th_type );
2812 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
2816 __kmp_get_ancestor_thread_num(
int gtid,
int level ) {
2822 KF_TRACE( 10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level ));
2823 KMP_DEBUG_ASSERT( __kmp_init_serial );
2826 if( level == 0 )
return 0;
2827 if( level < 0 )
return -1;
2828 thr = __kmp_threads[ gtid ];
2829 team = thr->th.th_team;
2830 ii = team->t.t_level;
2831 if( level > ii )
return -1;
2834 if( thr->th.th_teams_microtask ) {
2836 int tlevel = thr->th.th_teams_level;
2837 if( level <= tlevel ) {
2838 KMP_DEBUG_ASSERT( ii >= tlevel );
2840 if ( ii == tlevel ) {
2849 if( ii == level )
return __kmp_tid_from_gtid( gtid );
2851 dd = team->t.t_serialized;
2855 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
2858 if( ( team->t.t_serialized ) && ( !dd ) ) {
2859 team = team->t.t_parent;
2863 team = team->t.t_parent;
2864 dd = team->t.t_serialized;
2869 return ( dd > 1 ) ? ( 0 ) : ( team->t.t_master_tid );
2873 __kmp_get_team_size(
int gtid,
int level ) {
2879 KF_TRACE( 10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level ));
2880 KMP_DEBUG_ASSERT( __kmp_init_serial );
2883 if( level == 0 )
return 1;
2884 if( level < 0 )
return -1;
2885 thr = __kmp_threads[ gtid ];
2886 team = thr->th.th_team;
2887 ii = team->t.t_level;
2888 if( level > ii )
return -1;
2891 if( thr->th.th_teams_microtask ) {
2893 int tlevel = thr->th.th_teams_level;
2894 if( level <= tlevel ) {
2895 KMP_DEBUG_ASSERT( ii >= tlevel );
2897 if ( ii == tlevel ) {
2908 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
2911 if( team->t.t_serialized && ( !dd ) ) {
2912 team = team->t.t_parent;
2916 team = team->t.t_parent;
2921 return team->t.t_nproc;
2925 __kmp_get_schedule_global() {
2929 kmp_r_sched_t r_sched;
2935 r_sched.r_sched_type = __kmp_static;
2937 r_sched.r_sched_type = __kmp_guided;
2939 r_sched.r_sched_type = __kmp_sched;
2942 if ( __kmp_chunk < KMP_DEFAULT_CHUNK ) {
2943 r_sched.chunk = KMP_DEFAULT_CHUNK;
2945 r_sched.chunk = __kmp_chunk;
2960 __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc )
2963 KMP_DEBUG_ASSERT( team );
2964 if( !realloc || argc > team->t.t_max_argc ) {
2966 KA_TRACE( 100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, current entries=%d\n",
2967 team->t.t_id, argc, ( realloc ) ? team->t.t_max_argc : 0 ));
2969 if ( realloc && team->t.t_argv != &team->t.t_inline_argv[0] )
2970 __kmp_free( (
void *) team->t.t_argv );
2972 if ( argc <= KMP_INLINE_ARGV_ENTRIES ) {
2974 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
2975 KA_TRACE( 100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d argv entries\n",
2976 team->t.t_id, team->t.t_max_argc ));
2977 team->t.t_argv = &team->t.t_inline_argv[0];
2978 if ( __kmp_storage_map ) {
2979 __kmp_print_storage_map_gtid( -1, &team->t.t_inline_argv[0],
2980 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
2981 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
2982 "team_%d.t_inline_argv",
2987 team->t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ?
2988 KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc;
2989 KA_TRACE( 100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n",
2990 team->t.t_id, team->t.t_max_argc ));
2991 team->t.t_argv = (
void**) __kmp_page_allocate(
sizeof(
void*) * team->t.t_max_argc );
2992 if ( __kmp_storage_map ) {
2993 __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc],
2994 sizeof(
void *) * team->t.t_max_argc,
"team_%d.t_argv",
3002 __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth)
3005 int num_disp_buff = max_nth > 1 ? KMP_MAX_DISP_BUF : 2;
3006 #if KMP_USE_POOLED_ALLOC 3008 char *ptr = __kmp_allocate(max_nth *
3009 (
sizeof(kmp_info_t*) +
sizeof(dispatch_shared_info_t)*num_disp_buf
3010 +
sizeof(kmp_disp_t) +
sizeof(
int)*6
3012 +
sizeof(kmp_r_sched_t)
3013 +
sizeof(kmp_taskdata_t) ) );
3015 team->t.t_threads = (kmp_info_t**) ptr; ptr +=
sizeof(kmp_info_t*) * max_nth;
3016 team->t.t_disp_buffer = (dispatch_shared_info_t*) ptr;
3017 ptr +=
sizeof(dispatch_shared_info_t) * num_disp_buff;
3018 team->t.t_dispatch = (kmp_disp_t*) ptr; ptr +=
sizeof(kmp_disp_t) * max_nth;
3019 team->t.t_set_nproc = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
3020 team->t.t_set_dynamic = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
3021 team->t.t_set_nested = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
3022 team->t.t_set_blocktime = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
3023 team->t.t_set_bt_intervals = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
3024 team->t.t_set_bt_set = (
int*) ptr;
3025 ptr +=
sizeof(int) * max_nth;
3027 team->t.t_set_sched = (kmp_r_sched_t*) ptr;
3028 ptr +=
sizeof(kmp_r_sched_t) * max_nth;
3029 team->t.t_implicit_task_taskdata = (kmp_taskdata_t*) ptr;
3030 ptr +=
sizeof(kmp_taskdata_t) * max_nth;
3033 team->t.t_threads = (kmp_info_t**) __kmp_allocate(
sizeof(kmp_info_t*) * max_nth );
3034 team->t.t_disp_buffer = (dispatch_shared_info_t*)
3035 __kmp_allocate(
sizeof(dispatch_shared_info_t) * num_disp_buff );
3036 team->t.t_dispatch = (kmp_disp_t*) __kmp_allocate(
sizeof(kmp_disp_t) * max_nth );
3039 team->t.t_implicit_task_taskdata = (kmp_taskdata_t*) __kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth );
3041 team->t.t_max_nproc = max_nth;
3044 for(i = 0 ; i < num_disp_buff; ++i)
3045 team->t.t_disp_buffer[i].buffer_index = i;
3049 __kmp_free_team_arrays(kmp_team_t *team) {
3052 for ( i = 0; i < team->t.t_max_nproc; ++ i ) {
3053 if ( team->t.t_dispatch[ i ].th_disp_buffer != NULL ) {
3054 __kmp_free( team->t.t_dispatch[ i ].th_disp_buffer );
3055 team->t.t_dispatch[ i ].th_disp_buffer = NULL;
3058 __kmp_free(team->t.t_threads);
3059 #if !KMP_USE_POOLED_ALLOC 3060 __kmp_free(team->t.t_disp_buffer);
3061 __kmp_free(team->t.t_dispatch);
3064 __kmp_free(team->t.t_implicit_task_taskdata);
3066 team->t.t_threads = NULL;
3067 team->t.t_disp_buffer = NULL;
3068 team->t.t_dispatch = NULL;
3071 team->t.t_implicit_task_taskdata = 0;
3075 __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3076 kmp_info_t **oldThreads = team->t.t_threads;
3078 #if !KMP_USE_POOLED_ALLOC 3079 __kmp_free(team->t.t_disp_buffer);
3080 __kmp_free(team->t.t_dispatch);
3083 __kmp_free(team->t.t_implicit_task_taskdata);
3085 __kmp_allocate_team_arrays(team, max_nth);
3087 KMP_MEMCPY(team->t.t_threads, oldThreads, team->t.t_nproc * sizeof (kmp_info_t*));
3089 __kmp_free(oldThreads);
3092 static kmp_internal_control_t
3093 __kmp_get_global_icvs(
void ) {
3095 kmp_r_sched_t r_sched = __kmp_get_schedule_global();
3098 KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.used > 0 );
3101 kmp_internal_control_t g_icvs = {
3103 (kmp_int8)__kmp_dflt_nested,
3104 (kmp_int8)__kmp_global.g.g_dynamic,
3105 (kmp_int8)__kmp_env_blocktime,
3106 __kmp_dflt_blocktime,
3108 __kmp_dflt_team_nth,
3110 __kmp_dflt_max_active_levels,
3113 __kmp_nested_proc_bind.bind_types[0],
3121 static kmp_internal_control_t
3122 __kmp_get_x_global_icvs(
const kmp_team_t *team ) {
3124 kmp_internal_control_t gx_icvs;
3125 gx_icvs.serial_nesting_level = 0;
3126 copy_icvs( & gx_icvs, & team->t.t_threads[0]->th.th_current_task->td_icvs );
3127 gx_icvs.next = NULL;
3133 __kmp_initialize_root( kmp_root_t *root )
3136 kmp_team_t *root_team;
3137 kmp_team_t *hot_team;
3138 int hot_team_max_nth;
3139 kmp_r_sched_t r_sched = __kmp_get_schedule_global();
3140 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3141 KMP_DEBUG_ASSERT( root );
3142 KMP_ASSERT( ! root->r.r_begin );
3145 __kmp_init_lock( &root->r.r_begin_lock );
3146 root->r.r_begin = FALSE;
3147 root->r.r_active = FALSE;
3148 root->r.r_in_parallel = 0;
3149 root->r.r_blocktime = __kmp_dflt_blocktime;
3150 root->r.r_nested = __kmp_dflt_nested;
3154 KF_TRACE( 10, (
"__kmp_initialize_root: before root_team\n" ) );
3157 __kmp_allocate_team(
3165 __kmp_nested_proc_bind.bind_types[0],
3169 USE_NESTED_HOT_ARG(NULL)
3173 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)( ~ 0 ));
3176 KF_TRACE( 10, (
"__kmp_initialize_root: after root_team = %p\n", root_team ) );
3178 root->r.r_root_team = root_team;
3179 root_team->t.t_control_stack_top = NULL;
3182 root_team->t.t_threads[0] = NULL;
3183 root_team->t.t_nproc = 1;
3184 root_team->t.t_serialized = 1;
3186 root_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3187 root_team->t.t_sched.chunk = r_sched.chunk;
3188 KA_TRACE( 20, (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3189 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
3193 KF_TRACE( 10, (
"__kmp_initialize_root: before hot_team\n" ) );
3196 __kmp_allocate_team(
3199 __kmp_dflt_team_nth_ub * 2,
3204 __kmp_nested_proc_bind.bind_types[0],
3208 USE_NESTED_HOT_ARG(NULL)
3210 KF_TRACE( 10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team ) );
3212 root->r.r_hot_team = hot_team;
3213 root_team->t.t_control_stack_top = NULL;
3216 hot_team->t.t_parent = root_team;
3219 hot_team_max_nth = hot_team->t.t_max_nproc;
3220 for ( f = 0; f < hot_team_max_nth; ++ f ) {
3221 hot_team->t.t_threads[ f ] = NULL;
3223 hot_team->t.t_nproc = 1;
3225 hot_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3226 hot_team->t.t_sched.chunk = r_sched.chunk;
3227 hot_team->t.t_size_changed = 0;
3233 typedef struct kmp_team_list_item {
3234 kmp_team_p
const * entry;
3235 struct kmp_team_list_item * next;
3236 } kmp_team_list_item_t;
3237 typedef kmp_team_list_item_t * kmp_team_list_t;
3241 __kmp_print_structure_team_accum(
3242 kmp_team_list_t list,
3243 kmp_team_p
const * team
3253 KMP_DEBUG_ASSERT( list != NULL );
3254 if ( team == NULL ) {
3258 __kmp_print_structure_team_accum( list, team->t.t_parent );
3259 __kmp_print_structure_team_accum( list, team->t.t_next_pool );
3263 while ( l->next != NULL && l->entry != team ) {
3266 if ( l->next != NULL ) {
3272 while ( l->next != NULL && l->entry->t.t_id <= team->t.t_id ) {
3278 kmp_team_list_item_t * item =
3279 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof( kmp_team_list_item_t ) );
3288 __kmp_print_structure_team(
3290 kmp_team_p
const * team
3293 __kmp_printf(
"%s", title );
3294 if ( team != NULL ) {
3295 __kmp_printf(
"%2x %p\n", team->t.t_id, team );
3297 __kmp_printf(
" - (nil)\n" );
3302 __kmp_print_structure_thread(
3304 kmp_info_p
const * thread
3307 __kmp_printf(
"%s", title );
3308 if ( thread != NULL ) {
3309 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread );
3311 __kmp_printf(
" - (nil)\n" );
3316 __kmp_print_structure(
3320 kmp_team_list_t list;
3323 list = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof( kmp_team_list_item_t ) );
3327 __kmp_printf(
"\n------------------------------\nGlobal Thread Table\n------------------------------\n" );
3330 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3331 __kmp_printf(
"%2d", gtid );
3332 if ( __kmp_threads != NULL ) {
3333 __kmp_printf(
" %p", __kmp_threads[ gtid ] );
3335 if ( __kmp_root != NULL ) {
3336 __kmp_printf(
" %p", __kmp_root[ gtid ] );
3338 __kmp_printf(
"\n" );
3343 __kmp_printf(
"\n------------------------------\nThreads\n------------------------------\n" );
3344 if ( __kmp_threads != NULL ) {
3346 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3347 kmp_info_t
const * thread = __kmp_threads[ gtid ];
3348 if ( thread != NULL ) {
3349 __kmp_printf(
"GTID %2d %p:\n", gtid, thread );
3350 __kmp_printf(
" Our Root: %p\n", thread->th.th_root );
3351 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team );
3352 __kmp_print_structure_team(
" Serial Team: ", thread->th.th_serial_team );
3353 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc );
3354 __kmp_print_structure_thread(
" Master: ", thread->th.th_team_master );
3355 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized );
3356 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc );
3358 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind );
3360 __kmp_print_structure_thread(
" Next in pool: ", thread->th.th_next_pool );
3361 __kmp_printf(
"\n" );
3362 __kmp_print_structure_team_accum( list, thread->th.th_team );
3363 __kmp_print_structure_team_accum( list, thread->th.th_serial_team );
3367 __kmp_printf(
"Threads array is not allocated.\n" );
3371 __kmp_printf(
"\n------------------------------\nUbers\n------------------------------\n" );
3372 if ( __kmp_root != NULL ) {
3374 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3375 kmp_root_t
const * root = __kmp_root[ gtid ];
3376 if ( root != NULL ) {
3377 __kmp_printf(
"GTID %2d %p:\n", gtid, root );
3378 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team );
3379 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team );
3380 __kmp_print_structure_thread(
" Uber Thread: ", root->r.r_uber_thread );
3381 __kmp_printf(
" Active?: %2d\n", root->r.r_active );
3382 __kmp_printf(
" Nested?: %2d\n", root->r.r_nested );
3383 __kmp_printf(
" In Parallel: %2d\n", root->r.r_in_parallel );
3384 __kmp_printf(
"\n" );
3385 __kmp_print_structure_team_accum( list, root->r.r_root_team );
3386 __kmp_print_structure_team_accum( list, root->r.r_hot_team );
3390 __kmp_printf(
"Ubers array is not allocated.\n" );
3393 __kmp_printf(
"\n------------------------------\nTeams\n------------------------------\n" );
3394 while ( list->next != NULL ) {
3395 kmp_team_p
const * team = list->entry;
3397 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team );
3398 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent );
3399 __kmp_printf(
" Master TID: %2d\n", team->t.t_master_tid );
3400 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc );
3401 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized );
3402 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc );
3403 for ( i = 0; i < team->t.t_nproc; ++ i ) {
3404 __kmp_printf(
" Thread %2d: ", i );
3405 __kmp_print_structure_thread(
"", team->t.t_threads[ i ] );
3407 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool );
3408 __kmp_printf(
"\n" );
3413 __kmp_printf(
"\n------------------------------\nPools\n------------------------------\n" );
3414 __kmp_print_structure_thread(
"Thread pool: ", (kmp_info_t *)__kmp_thread_pool );
3415 __kmp_print_structure_team(
"Team pool: ", (kmp_team_t *)__kmp_team_pool );
3416 __kmp_printf(
"\n" );
3419 while ( list != NULL ) {
3420 kmp_team_list_item_t * item = list;
3422 KMP_INTERNAL_FREE( item );
3434 static const unsigned __kmp_primes[] = {
3435 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5,
3436 0xba5703f5, 0xb495a877, 0xe1626741, 0x79695e6b,
3437 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3438 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b,
3439 0xbe4d6fe9, 0x5f15e201, 0x99afc3fd, 0xf3f16801,
3440 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3441 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed,
3442 0x085a3d61, 0x46eb5ea7, 0x3d9910ed, 0x2e687b5b,
3443 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3444 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7,
3445 0x54581edb, 0xf2480f45, 0x0bb9288f, 0xef1affc7,
3446 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3447 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b,
3448 0xfc411073, 0xc3749363, 0xb892d829, 0x3549366b,
3449 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3450 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f
3457 __kmp_get_random( kmp_info_t * thread )
3459 unsigned x = thread->th.th_x;
3460 unsigned short r = x>>16;
3462 thread->th.th_x = x*thread->th.th_a+1;
3464 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3465 thread->th.th_info.ds.ds_tid, r) );
3473 __kmp_init_random( kmp_info_t * thread )
3475 unsigned seed = thread->th.th_info.ds.ds_tid;
3477 thread->th.th_a = __kmp_primes[seed%(
sizeof(__kmp_primes)/
sizeof(__kmp_primes[0]))];
3478 thread->th.th_x = (seed+1)*thread->th.th_a+1;
3479 KA_TRACE(30, (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a) );
3486 __kmp_reclaim_dead_roots(
void) {
3489 for(i = 0; i < __kmp_threads_capacity; ++i) {
3490 if( KMP_UBER_GTID( i ) &&
3491 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3492 !__kmp_root[i]->r.r_active ) {
3493 r += __kmp_unregister_root_other_thread(i);
3522 __kmp_expand_threads(
int nWish,
int nNeed) {
3525 int __kmp_actual_max_nth;
3529 #if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB 3532 added = __kmp_reclaim_dead_roots();
3550 int minimumRequiredCapacity;
3552 kmp_info_t **newThreads;
3553 kmp_root_t **newRoot;
3575 old_tp_cached = __kmp_tp_cached;
3576 __kmp_actual_max_nth = old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth;
3577 KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity);
3581 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3585 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3591 nTarget = __kmp_actual_max_nth - __kmp_threads_capacity;
3598 minimumRequiredCapacity = __kmp_threads_capacity + nTarget;
3600 newCapacity = __kmp_threads_capacity;
3603 newCapacity <= (__kmp_actual_max_nth >> 1) ?
3604 (newCapacity << 1) :
3605 __kmp_actual_max_nth;
3606 }
while(newCapacity < minimumRequiredCapacity);
3607 newThreads = (kmp_info_t**) __kmp_allocate((
sizeof(kmp_info_t*) +
sizeof(kmp_root_t*)) * newCapacity + CACHE_LINE);
3608 newRoot = (kmp_root_t**) ((
char*)newThreads +
sizeof(kmp_info_t*) * newCapacity );
3609 KMP_MEMCPY(newThreads, __kmp_threads, __kmp_threads_capacity *
sizeof(kmp_info_t*));
3610 KMP_MEMCPY(newRoot, __kmp_root, __kmp_threads_capacity *
sizeof(kmp_root_t*));
3611 memset(newThreads + __kmp_threads_capacity, 0,
3612 (newCapacity - __kmp_threads_capacity) *
sizeof(kmp_info_t*));
3613 memset(newRoot + __kmp_threads_capacity, 0,
3614 (newCapacity - __kmp_threads_capacity) *
sizeof(kmp_root_t*));
3616 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3622 __kmp_free(newThreads);
3625 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3626 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3628 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3629 __kmp_free(newThreads);
3635 *(kmp_info_t**
volatile*)&__kmp_threads = newThreads;
3636 *(kmp_root_t**
volatile*)&__kmp_root = newRoot;
3637 added += newCapacity - __kmp_threads_capacity;
3638 *(
volatile int*)&__kmp_threads_capacity = newCapacity;
3639 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3650 __kmp_register_root(
int initial_thread )
3652 kmp_info_t *root_thread;
3656 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3657 KA_TRACE( 20, (
"__kmp_register_root: entered\n"));
3675 capacity = __kmp_threads_capacity;
3676 if ( ! initial_thread && TCR_PTR(__kmp_threads[0]) == NULL ) {
3681 if ( __kmp_all_nth >= capacity && !__kmp_expand_threads( 1, 1 ) ) {
3682 if ( __kmp_tp_cached ) {
3685 KMP_MSG( CantRegisterNewThread ),
3686 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
3687 KMP_HNT( PossibleSystemLimitOnThreads ),
3694 KMP_MSG( CantRegisterNewThread ),
3695 KMP_HNT( SystemLimitOnThreads ),
3704 for( gtid=(initial_thread ? 0 : 1) ; TCR_PTR(__kmp_threads[gtid]) != NULL ; gtid++ )
3706 KA_TRACE( 1, (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid ));
3707 KMP_ASSERT( gtid < __kmp_threads_capacity );
3711 TCW_4(__kmp_nth, __kmp_nth + 1);
3718 if ( __kmp_adjust_gtid_mode ) {
3719 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
3720 if ( TCR_4(__kmp_gtid_mode) != 2) {
3721 TCW_4(__kmp_gtid_mode, 2);
3725 if (TCR_4(__kmp_gtid_mode) != 1 ) {
3726 TCW_4(__kmp_gtid_mode, 1);
3731 #ifdef KMP_ADJUST_BLOCKTIME 3734 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
3735 if ( __kmp_nth > __kmp_avail_proc ) {
3736 __kmp_zero_bt = TRUE;
3742 if( ! ( root = __kmp_root[gtid] )) {
3743 root = __kmp_root[gtid] = (kmp_root_t*) __kmp_allocate(
sizeof(kmp_root_t) );
3744 KMP_DEBUG_ASSERT( ! root->r.r_root_team );
3747 __kmp_initialize_root( root );
3750 if( root->r.r_uber_thread ) {
3751 root_thread = root->r.r_uber_thread;
3753 root_thread = (kmp_info_t*) __kmp_allocate(
sizeof(kmp_info_t) );
3754 if ( __kmp_storage_map ) {
3755 __kmp_print_thread_storage_map( root_thread, gtid );
3757 root_thread->th.th_info .ds.ds_gtid = gtid;
3758 root_thread->th.th_root = root;
3759 if( __kmp_env_consistency_check ) {
3760 root_thread->th.th_cons = __kmp_allocate_cons_stack( gtid );
3763 __kmp_initialize_fast_memory( root_thread );
3767 KMP_DEBUG_ASSERT( root_thread->th.th_local.bget_data == NULL );
3768 __kmp_initialize_bget( root_thread );
3770 __kmp_init_random( root_thread );
3774 if( ! root_thread->th.th_serial_team ) {
3775 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3776 KF_TRACE( 10, (
"__kmp_register_root: before serial_team\n" ) );
3778 root_thread->th.th_serial_team = __kmp_allocate_team( root, 1, 1,
3786 0 USE_NESTED_HOT_ARG(NULL) );
3788 KMP_ASSERT( root_thread->th.th_serial_team );
3789 KF_TRACE( 10, (
"__kmp_register_root: after serial_team = %p\n",
3790 root_thread->th.th_serial_team ) );
3793 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3795 root->r.r_root_team->t.t_threads[0] = root_thread;
3796 root->r.r_hot_team ->t.t_threads[0] = root_thread;
3797 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3798 root_thread->th.th_serial_team->t.t_serialized = 0;
3799 root->r.r_uber_thread = root_thread;
3802 __kmp_initialize_info( root_thread, root->r.r_root_team, 0, gtid );
3805 __kmp_gtid_set_specific( gtid );
3807 __kmp_itt_thread_name( gtid );
3809 #ifdef KMP_TDATA_GTID 3812 __kmp_create_worker( gtid, root_thread, __kmp_stksize );
3813 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == gtid );
3814 TCW_4(__kmp_init_gtid, TRUE);
3816 KA_TRACE( 20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, plain=%u\n",
3817 gtid, __kmp_gtid_from_tid( 0, root->r.r_hot_team ),
3818 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3819 KMP_INIT_BARRIER_STATE ) );
3822 for ( b = 0; b < bs_last_barrier; ++ b ) {
3823 root_thread->th.th_bar[ b ].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3825 root_thread->th.th_bar[ b ].bb.b_worker_arrived = 0;
3829 KMP_DEBUG_ASSERT( root->r.r_hot_team->t.t_bar[ bs_forkjoin_barrier ].b_arrived == KMP_INIT_BARRIER_STATE );
3831 #if KMP_AFFINITY_SUPPORTED 3832 if ( TCR_4(__kmp_init_middle) ) {
3833 __kmp_affinity_set_init_mask( gtid, TRUE );
3837 __kmp_root_counter ++;
3840 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3845 #if KMP_NESTED_HOT_TEAMS 3847 __kmp_free_hot_teams( kmp_root_t *root, kmp_info_t *thr,
int level,
const int max_level )
3850 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3851 if( !hot_teams || !hot_teams[level].hot_team ) {
3854 KMP_DEBUG_ASSERT( level < max_level );
3855 kmp_team_t *team = hot_teams[level].hot_team;
3856 nth = hot_teams[level].hot_team_nth;
3858 if( level < max_level - 1 ) {
3859 for( i = 0; i < nth; ++i ) {
3860 kmp_info_t *th = team->t.t_threads[i];
3861 n += __kmp_free_hot_teams( root, th, level + 1, max_level );
3862 if( i > 0 && th->th.th_hot_teams ) {
3863 __kmp_free( th->th.th_hot_teams );
3864 th->th.th_hot_teams = NULL;
3868 __kmp_free_team( root, team, NULL );
3877 __kmp_reset_root(
int gtid, kmp_root_t *root)
3879 kmp_team_t * root_team = root->r.r_root_team;
3880 kmp_team_t * hot_team = root->r.r_hot_team;
3881 int n = hot_team->t.t_nproc;
3884 KMP_DEBUG_ASSERT( ! root->r.r_active );
3886 root->r.r_root_team = NULL;
3887 root->r.r_hot_team = NULL;
3890 __kmp_free_team( root, root_team USE_NESTED_HOT_ARG(NULL) );
3891 #if KMP_NESTED_HOT_TEAMS 3892 if( __kmp_hot_teams_max_level > 1 ) {
3893 for( i = 0; i < hot_team->t.t_nproc; ++i ) {
3894 kmp_info_t *th = hot_team->t.t_threads[i];
3895 n += __kmp_free_hot_teams( root, th, 1, __kmp_hot_teams_max_level );
3896 if( th->th.th_hot_teams ) {
3897 __kmp_free( th->th.th_hot_teams );
3898 th->th.th_hot_teams = NULL;
3903 __kmp_free_team( root, hot_team USE_NESTED_HOT_ARG(NULL) );
3909 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
3910 __kmp_wait_to_unref_task_teams();
3915 KA_TRACE( 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
"\n",
3916 (LPVOID)&(root->r.r_uber_thread->th),
3917 root->r.r_uber_thread->th.th_info.ds.ds_thread ) );
3918 __kmp_free_handle( root->r.r_uber_thread->th.th_info.ds.ds_thread );
3923 ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
3924 int gtid = __kmp_get_gtid();
3925 __ompt_thread_end(ompt_thread_initial, gtid);
3929 TCW_4(__kmp_nth, __kmp_nth - 1);
3930 __kmp_reap_thread( root->r.r_uber_thread, 1 );
3933 root->r.r_uber_thread = NULL;
3935 root->r.r_begin = FALSE;
3941 __kmp_unregister_root_current_thread(
int gtid )
3943 KA_TRACE( 1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid ));
3948 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3949 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
3950 KC_TRACE( 10, (
"__kmp_unregister_root_current_thread: already finished, exiting T#%d\n", gtid ));
3951 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3954 kmp_root_t *root = __kmp_root[gtid];
3956 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
3957 KMP_ASSERT( KMP_UBER_GTID( gtid ));
3958 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
3959 KMP_ASSERT( root->r.r_active == FALSE );
3965 kmp_info_t * thread = __kmp_threads[gtid];
3966 kmp_team_t * team = thread->th.th_team;
3967 kmp_task_team_t * task_team = thread->th.th_task_team;
3970 if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks ) {
3973 thread->th.ompt_thread_info.state = ompt_state_undefined;
3975 __kmp_task_team_wait(thread, team, NULL );
3979 __kmp_reset_root(gtid, root);
3982 __kmp_gtid_set_specific( KMP_GTID_DNE );
3983 #ifdef KMP_TDATA_GTID 3984 __kmp_gtid = KMP_GTID_DNE;
3988 KC_TRACE( 10, (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid ));
3990 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3999 __kmp_unregister_root_other_thread(
int gtid )
4001 kmp_root_t *root = __kmp_root[gtid];
4004 KA_TRACE( 1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid ));
4005 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
4006 KMP_ASSERT( KMP_UBER_GTID( gtid ));
4007 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
4008 KMP_ASSERT( root->r.r_active == FALSE );
4010 r = __kmp_reset_root(gtid, root);
4011 KC_TRACE( 10, (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid ));
4017 void __kmp_task_info() {
4019 kmp_int32 gtid = __kmp_entry_gtid();
4020 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
4021 kmp_info_t *this_thr = __kmp_threads[ gtid ];
4022 kmp_team_t *steam = this_thr->th.th_serial_team;
4023 kmp_team_t *team = this_thr->th.th_team;
4025 __kmp_printf(
"__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p ptask=%p\n",
4026 gtid, tid, this_thr, team, this_thr->th.th_current_task, team->t.t_implicit_task_taskdata[tid].td_parent );
4034 __kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team,
int tid,
int gtid )
4038 kmp_info_t *master = team->t.t_threads[0];
4039 KMP_DEBUG_ASSERT( this_thr != NULL );
4040 KMP_DEBUG_ASSERT( this_thr->th.th_serial_team );
4041 KMP_DEBUG_ASSERT( team );
4042 KMP_DEBUG_ASSERT( team->t.t_threads );
4043 KMP_DEBUG_ASSERT( team->t.t_dispatch );
4044 KMP_DEBUG_ASSERT( master );
4045 KMP_DEBUG_ASSERT( master->th.th_root );
4049 TCW_SYNC_PTR(this_thr->th.th_team, team);
4051 this_thr->th.th_info.ds.ds_tid = tid;
4052 this_thr->th.th_set_nproc = 0;
4054 this_thr->th.th_set_proc_bind = proc_bind_default;
4055 # if KMP_AFFINITY_SUPPORTED 4056 this_thr->th.th_new_place = this_thr->th.th_current_place;
4059 this_thr->th.th_root = master->th.th_root;
4062 this_thr->th.th_team_nproc = team->t.t_nproc;
4063 this_thr->th.th_team_master = master;
4064 this_thr->th.th_team_serialized = team->t.t_serialized;
4065 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
4067 KMP_DEBUG_ASSERT( team->t.t_implicit_task_taskdata );
4069 KF_TRACE( 10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4070 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4072 __kmp_init_implicit_task( this_thr->th.th_team_master->th.th_ident, this_thr, team, tid, TRUE );
4074 KF_TRACE( 10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4075 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4079 this_thr->th.th_dispatch = &team->t.t_dispatch[ tid ];
4081 this_thr->th.th_local.this_construct = 0;
4084 this_thr->th.th_local.tv_data = 0;
4087 if ( ! this_thr->th.th_pri_common ) {
4088 this_thr->th.th_pri_common = (
struct common_table *) __kmp_allocate(
sizeof(
struct common_table) );
4089 if ( __kmp_storage_map ) {
4090 __kmp_print_storage_map_gtid(
4091 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4092 sizeof(
struct common_table ),
"th_%d.th_pri_common\n", gtid
4095 this_thr->th.th_pri_head = NULL;
4100 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4104 size_t disp_size =
sizeof( dispatch_private_info_t ) *
4105 ( team->t.t_max_nproc == 1 ? 1 : KMP_MAX_DISP_BUF );
4106 KD_TRACE( 10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, team->t.t_max_nproc ) );
4107 KMP_ASSERT( dispatch );
4108 KMP_DEBUG_ASSERT( team->t.t_dispatch );
4109 KMP_DEBUG_ASSERT( dispatch == &team->t.t_dispatch[ tid ] );
4111 dispatch->th_disp_index = 0;
4113 if( ! dispatch->th_disp_buffer ) {
4114 dispatch->th_disp_buffer = (dispatch_private_info_t *) __kmp_allocate( disp_size );
4116 if ( __kmp_storage_map ) {
4117 __kmp_print_storage_map_gtid( gtid, &dispatch->th_disp_buffer[ 0 ],
4118 &dispatch->th_disp_buffer[ team->t.t_max_nproc == 1 ? 1 : KMP_MAX_DISP_BUF ],
4119 disp_size,
"th_%d.th_dispatch.th_disp_buffer " 4120 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4121 gtid, team->t.t_id, gtid );
4124 memset( & dispatch->th_disp_buffer[0],
'\0', disp_size );
4127 dispatch->th_dispatch_pr_current = 0;
4128 dispatch->th_dispatch_sh_current = 0;
4130 dispatch->th_deo_fcn = 0;
4131 dispatch->th_dxo_fcn = 0;
4134 this_thr->th.th_next_pool = NULL;
4136 if (!this_thr->th.th_task_state_memo_stack) {
4138 this_thr->th.th_task_state_memo_stack = (kmp_uint8 *) __kmp_allocate( 4*
sizeof(kmp_uint8) );
4139 this_thr->th.th_task_state_top = 0;
4140 this_thr->th.th_task_state_stack_sz = 4;
4141 for (i=0; i<this_thr->th.th_task_state_stack_sz; ++i)
4142 this_thr->th.th_task_state_memo_stack[i] = 0;
4145 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
4146 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
4159 __kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team,
int new_tid )
4161 kmp_team_t *serial_team;
4162 kmp_info_t *new_thr;
4165 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid() ));
4166 KMP_DEBUG_ASSERT( root && team );
4167 #if !KMP_NESTED_HOT_TEAMS 4168 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( __kmp_get_gtid() ));
4173 if ( __kmp_thread_pool ) {
4175 new_thr = (kmp_info_t*)__kmp_thread_pool;
4176 __kmp_thread_pool = (
volatile kmp_info_t *) new_thr->th.th_next_pool;
4177 if ( new_thr == __kmp_thread_pool_insert_pt ) {
4178 __kmp_thread_pool_insert_pt = NULL;
4180 TCW_4(new_thr->th.th_in_pool, FALSE);
4186 __kmp_thread_pool_nth--;
4188 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4189 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid ));
4190 KMP_ASSERT( ! new_thr->th.th_team );
4191 KMP_DEBUG_ASSERT( __kmp_nth < __kmp_threads_capacity );
4192 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth >= 0 );
4195 __kmp_initialize_info( new_thr, team, new_tid, new_thr->th.th_info.ds.ds_gtid );
4196 KMP_DEBUG_ASSERT( new_thr->th.th_serial_team );
4198 TCW_4(__kmp_nth, __kmp_nth + 1);
4200 new_thr->th.th_task_state = 0;
4201 new_thr->th.th_task_state_top = 0;
4202 new_thr->th.th_task_state_stack_sz = 4;
4204 #ifdef KMP_ADJUST_BLOCKTIME 4207 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4208 if ( __kmp_nth > __kmp_avail_proc ) {
4209 __kmp_zero_bt = TRUE;
4217 kmp_balign_t * balign = new_thr->th.th_bar;
4218 for( b = 0; b < bs_last_barrier; ++ b )
4219 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4222 KF_TRACE( 10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4223 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid ));
4231 KMP_ASSERT( __kmp_nth == __kmp_all_nth );
4232 KMP_ASSERT( __kmp_all_nth < __kmp_threads_capacity );
4238 if ( ! TCR_4( __kmp_init_monitor ) ) {
4239 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
4240 if ( ! TCR_4( __kmp_init_monitor ) ) {
4241 KF_TRACE( 10, (
"before __kmp_create_monitor\n" ) );
4242 TCW_4( __kmp_init_monitor, 1 );
4243 __kmp_create_monitor( & __kmp_monitor );
4244 KF_TRACE( 10, (
"after __kmp_create_monitor\n" ) );
4253 while ( TCR_4(__kmp_init_monitor) < 2 ) {
4256 KF_TRACE( 10, (
"after monitor thread has started\n" ) );
4259 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
4263 for( new_gtid=1 ; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid ) {
4264 KMP_DEBUG_ASSERT( new_gtid < __kmp_threads_capacity );
4268 new_thr = (kmp_info_t*) __kmp_allocate(
sizeof(kmp_info_t) );
4270 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4272 if ( __kmp_storage_map ) {
4273 __kmp_print_thread_storage_map( new_thr, new_gtid );
4278 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs( team );
4279 KF_TRACE( 10, (
"__kmp_allocate_thread: before th_serial/serial_team\n" ) );
4281 new_thr->th.th_serial_team = serial_team =
4282 (kmp_team_t*) __kmp_allocate_team( root, 1, 1,
4290 0 USE_NESTED_HOT_ARG(NULL) );
4292 KMP_ASSERT ( serial_team );
4293 serial_team->t.t_serialized = 0;
4294 serial_team->t.t_threads[0] = new_thr;
4295 KF_TRACE( 10, (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4299 __kmp_initialize_info( new_thr, team, new_tid, new_gtid );
4302 __kmp_initialize_fast_memory( new_thr );
4306 KMP_DEBUG_ASSERT( new_thr->th.th_local.bget_data == NULL );
4307 __kmp_initialize_bget( new_thr );
4310 __kmp_init_random( new_thr );
4313 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4314 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
4317 kmp_balign_t * balign = new_thr->th.th_bar;
4318 for(b=0; b<bs_last_barrier; ++b) {
4319 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4320 balign[b].bb.team = NULL;
4321 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4322 balign[b].bb.use_oncore_barrier = 0;
4325 new_thr->th.th_spin_here = FALSE;
4326 new_thr->th.th_next_waiting = 0;
4328 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 4329 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4330 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4331 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4332 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4335 TCW_4(new_thr->th.th_in_pool, FALSE);
4336 new_thr->th.th_active_in_pool = FALSE;
4337 TCW_4(new_thr->th.th_active, TRUE);
4348 if ( __kmp_adjust_gtid_mode ) {
4349 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
4350 if ( TCR_4(__kmp_gtid_mode) != 2) {
4351 TCW_4(__kmp_gtid_mode, 2);
4355 if (TCR_4(__kmp_gtid_mode) != 1 ) {
4356 TCW_4(__kmp_gtid_mode, 1);
4361 #ifdef KMP_ADJUST_BLOCKTIME 4364 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4365 if ( __kmp_nth > __kmp_avail_proc ) {
4366 __kmp_zero_bt = TRUE;
4372 KF_TRACE( 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr ));
4373 __kmp_create_worker( new_gtid, new_thr, __kmp_stksize );
4374 KF_TRACE( 10, (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr ));
4377 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), new_gtid ));
4392 __kmp_reinitialize_team( kmp_team_t *team, kmp_internal_control_t *new_icvs,
ident_t *loc ) {
4393 KF_TRACE( 10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4394 team->t.t_threads[0], team ) );
4395 KMP_DEBUG_ASSERT( team && new_icvs);
4396 KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc );
4397 team->t.t_ident = loc;
4399 team->t.t_id = KMP_GEN_TEAM_ID();
4402 __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE );
4403 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4405 KF_TRACE( 10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4406 team->t.t_threads[0], team ) );
4414 __kmp_initialize_team(
4417 kmp_internal_control_t * new_icvs,
4420 KF_TRACE( 10, (
"__kmp_initialize_team: enter: team=%p\n", team ) );
4423 KMP_DEBUG_ASSERT( team );
4424 KMP_DEBUG_ASSERT( new_nproc <= team->t.t_max_nproc );
4425 KMP_DEBUG_ASSERT( team->t.t_threads );
4428 team->t.t_master_tid = 0;
4430 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4431 team->t.t_nproc = new_nproc;
4434 team->t.t_next_pool = NULL;
4437 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4438 team->t.t_invoke = NULL;
4441 team->t.t_sched = new_icvs->sched;
4443 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 4444 team->t.t_fp_control_saved = FALSE;
4445 team->t.t_x87_fpu_control_word = 0;
4446 team->t.t_mxcsr = 0;
4449 team->t.t_construct = 0;
4450 __kmp_init_lock( & team->t.t_single_lock );
4452 team->t.t_ordered .dt.t_value = 0;
4453 team->t.t_master_active = FALSE;
4455 memset( & team->t.t_taskq,
'\0',
sizeof( kmp_taskq_t ));
4458 team->t.t_copypriv_data = NULL;
4460 team->t.t_copyin_counter = 0;
4462 team->t.t_control_stack_top = NULL;
4464 __kmp_reinitialize_team( team, new_icvs, loc );
4467 KF_TRACE( 10, (
"__kmp_initialize_team: exit: team=%p\n", team ) );
4470 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4473 __kmp_set_thread_affinity_mask_full_tmp( kmp_affin_mask_t *old_mask )
4475 if ( KMP_AFFINITY_CAPABLE() ) {
4477 if ( old_mask != NULL ) {
4478 status = __kmp_get_system_affinity( old_mask, TRUE );
4480 if ( status != 0 ) {
4483 KMP_MSG( ChangeThreadAffMaskError ),
4489 __kmp_set_system_affinity( __kmp_affinity_get_fullMask(), TRUE );
4494 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 4503 __kmp_partition_places( kmp_team_t *team )
4508 kmp_info_t *master_th = team->t.t_threads[0];
4509 KMP_DEBUG_ASSERT( master_th != NULL );
4510 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4511 int first_place = master_th->th.th_first_place;
4512 int last_place = master_th->th.th_last_place;
4513 int masters_place = master_th->th.th_current_place;
4514 team->t.t_first_place = first_place;
4515 team->t.t_last_place = last_place;
4517 KA_TRACE( 20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) bound to place %d partition = [%d,%d]\n",
4518 proc_bind, __kmp_gtid_from_thread( team->t.t_threads[0] ), team->t.t_id,
4519 masters_place, first_place, last_place ) );
4521 switch ( proc_bind ) {
4523 case proc_bind_default:
4529 KMP_DEBUG_ASSERT( team->t.t_nproc == 1 );
4532 case proc_bind_master:
4535 int n_th = team->t.t_nproc;
4536 for ( f = 1; f < n_th; f++ ) {
4537 kmp_info_t *th = team->t.t_threads[f];
4538 KMP_DEBUG_ASSERT( th != NULL );
4539 th->th.th_first_place = first_place;
4540 th->th.th_last_place = last_place;
4541 th->th.th_new_place = masters_place;
4543 KA_TRACE( 100, (
"__kmp_partition_places: master: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4544 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4545 team->t.t_id, f, masters_place, first_place, last_place ) );
4550 case proc_bind_close:
4553 int n_th = team->t.t_nproc;
4555 if ( first_place <= last_place ) {
4556 n_places = last_place - first_place + 1;
4559 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4561 if ( n_th <= n_places ) {
4562 int place = masters_place;
4563 for ( f = 1; f < n_th; f++ ) {
4564 kmp_info_t *th = team->t.t_threads[f];
4565 KMP_DEBUG_ASSERT( th != NULL );
4567 if ( place == last_place ) {
4568 place = first_place;
4570 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4576 th->th.th_first_place = first_place;
4577 th->th.th_last_place = last_place;
4578 th->th.th_new_place = place;
4580 KA_TRACE( 100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4581 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4582 team->t.t_id, f, place, first_place, last_place ) );
4586 int S, rem, gap, s_count;
4587 S = n_th / n_places;
4589 rem = n_th - ( S * n_places );
4590 gap = rem > 0 ? n_places/rem : n_places;
4591 int place = masters_place;
4593 for ( f = 0; f < n_th; f++ ) {
4594 kmp_info_t *th = team->t.t_threads[f];
4595 KMP_DEBUG_ASSERT( th != NULL );
4597 th->th.th_first_place = first_place;
4598 th->th.th_last_place = last_place;
4599 th->th.th_new_place = place;
4602 if ( (s_count == S) && rem && (gap_ct == gap) ) {
4605 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
4607 if ( place == last_place ) {
4608 place = first_place;
4610 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4620 else if (s_count == S) {
4621 if ( place == last_place ) {
4622 place = first_place;
4624 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4634 KA_TRACE( 100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4635 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4636 team->t.t_id, f, th->th.th_new_place, first_place,
4639 KMP_DEBUG_ASSERT( place == masters_place );
4644 case proc_bind_spread:
4647 int n_th = team->t.t_nproc;
4649 if ( first_place <= last_place ) {
4650 n_places = last_place - first_place + 1;
4653 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4655 if ( n_th <= n_places ) {
4656 int place = masters_place;
4657 int S = n_places/n_th;
4658 int s_count, rem, gap, gap_ct;
4659 rem = n_places - n_th*S;
4660 gap = rem ? n_th/rem : 1;
4662 for ( f = 0; f < n_th; f++ ) {
4663 kmp_info_t *th = team->t.t_threads[f];
4664 KMP_DEBUG_ASSERT( th != NULL );
4666 th->th.th_first_place = place;
4667 th->th.th_new_place = place;
4669 while (s_count < S) {
4670 if ( place == last_place ) {
4671 place = first_place;
4673 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4681 if (rem && (gap_ct == gap)) {
4682 if ( place == last_place ) {
4683 place = first_place;
4685 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4694 th->th.th_last_place = place;
4697 if ( place == last_place ) {
4698 place = first_place;
4700 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4707 KA_TRACE( 100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4708 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4709 team->t.t_id, f, th->th.th_new_place,
4710 th->th.th_first_place, th->th.th_last_place ) );
4712 KMP_DEBUG_ASSERT( place == masters_place );
4715 int S, rem, gap, s_count;
4716 S = n_th / n_places;
4718 rem = n_th - ( S * n_places );
4719 gap = rem > 0 ? n_places/rem : n_places;
4720 int place = masters_place;
4722 for ( f = 0; f < n_th; f++ ) {
4723 kmp_info_t *th = team->t.t_threads[f];
4724 KMP_DEBUG_ASSERT( th != NULL );
4726 th->th.th_first_place = place;
4727 th->th.th_last_place = place;
4728 th->th.th_new_place = place;
4731 if ( (s_count == S) && rem && (gap_ct == gap) ) {
4734 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
4736 if ( place == last_place ) {
4737 place = first_place;
4739 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4749 else if (s_count == S) {
4750 if ( place == last_place ) {
4751 place = first_place;
4753 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4763 KA_TRACE( 100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4764 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4765 team->t.t_id, f, th->th.th_new_place,
4766 th->th.th_first_place, th->th.th_last_place) );
4768 KMP_DEBUG_ASSERT( place == masters_place );
4777 KA_TRACE( 20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id ) );
4784 __kmp_allocate_team( kmp_root_t *root,
int new_nproc,
int max_nproc,
4786 ompt_parallel_id_t ompt_parallel_id,
4789 kmp_proc_bind_t new_proc_bind,
4791 kmp_internal_control_t *new_icvs,
4792 int argc USE_NESTED_HOT_ARG(kmp_info_t *master) )
4794 KMP_TIME_DEVELOPER_BLOCK(KMP_allocate_team);
4797 int use_hot_team = ! root->r.r_active;
4800 KA_TRACE( 20, (
"__kmp_allocate_team: called\n"));
4801 KMP_DEBUG_ASSERT( new_nproc >=1 && argc >=0 );
4802 KMP_DEBUG_ASSERT( max_nproc >= new_nproc );
4805 #if KMP_NESTED_HOT_TEAMS 4806 kmp_hot_team_ptr_t *hot_teams;
4808 team = master->th.th_team;
4809 level = team->t.t_active_level;
4810 if( master->th.th_teams_microtask ) {
4811 if( master->th.th_teams_size.nteams > 1 && (
4812 team->t.t_pkfn == (microtask_t)__kmp_teams_master ||
4813 master->th.th_teams_level < team->t.t_level ) ) {
4817 hot_teams = master->th.th_hot_teams;
4818 if( level < __kmp_hot_teams_max_level && hot_teams && hot_teams[level].hot_team )
4827 if( use_hot_team && new_nproc > 1 ) {
4828 KMP_DEBUG_ASSERT( new_nproc == max_nproc );
4829 #if KMP_NESTED_HOT_TEAMS 4830 team = hot_teams[level].hot_team;
4832 team = root->r.r_hot_team;
4835 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
4836 KA_TRACE( 20, (
"__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p before reinit\n",
4837 team->t.t_task_team[0], team->t.t_task_team[1] ));
4844 if (team->t.t_nproc == new_nproc) {
4845 KA_TRACE( 20, (
"__kmp_allocate_team: reusing hot team\n" ));
4848 if ( team->t.t_size_changed == -1 ) {
4849 team->t.t_size_changed = 1;
4851 team->t.t_size_changed = 0;
4855 team->t.t_sched = new_icvs->sched;
4857 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
4859 KF_TRACE( 10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n",
4860 0, team->t.t_threads[0], team ) );
4861 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
4864 # if KMP_AFFINITY_SUPPORTED 4865 if ( ( team->t.t_size_changed == 0 )
4866 && ( team->t.t_proc_bind == new_proc_bind ) ) {
4867 KA_TRACE( 200, (
"__kmp_allocate_team: reusing hot team #%d bindings: proc_bind = %d, partition = [%d,%d]\n",
4868 team->t.t_id, new_proc_bind, team->t.t_first_place,
4869 team->t.t_last_place ) );
4872 team->t.t_proc_bind = new_proc_bind;
4873 __kmp_partition_places( team );
4876 if ( team->t.t_proc_bind != new_proc_bind ) {
4877 team->t.t_proc_bind = new_proc_bind;
4882 else if( team->t.t_nproc > new_nproc ) {
4883 KA_TRACE( 20, (
"__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc ));
4885 team->t.t_size_changed = 1;
4886 #if KMP_NESTED_HOT_TEAMS 4887 if( __kmp_hot_teams_mode == 0 ) {
4890 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
4891 hot_teams[level].hot_team_nth = new_nproc;
4892 #endif // KMP_NESTED_HOT_TEAMS 4894 for( f = new_nproc ; f < team->t.t_nproc ; f++ ) {
4895 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
4896 if ( __kmp_tasking_mode != tskm_immediate_exec) {
4898 team->t.t_threads[f]->th.th_task_team = NULL;
4900 __kmp_free_thread( team->t.t_threads[ f ] );
4901 team->t.t_threads[ f ] = NULL;
4903 #if KMP_NESTED_HOT_TEAMS 4905 #endif // KMP_NESTED_HOT_TEAMS 4906 team->t.t_nproc = new_nproc;
4908 team->t.t_sched = new_icvs->sched;
4909 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
4912 for(f = 0; f < new_nproc; ++f) {
4913 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
4916 KF_TRACE( 10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n",
4917 0, team->t.t_threads[0], team ) );
4919 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
4922 for ( f = 0; f < team->t.t_nproc; f++ ) {
4923 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
4924 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
4929 team->t.t_proc_bind = new_proc_bind;
4930 # if KMP_AFFINITY_SUPPORTED 4931 __kmp_partition_places( team );
4936 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4937 kmp_affin_mask_t *old_mask;
4938 if ( KMP_AFFINITY_CAPABLE() ) {
4939 KMP_CPU_ALLOC(old_mask);
4943 KA_TRACE( 20, (
"__kmp_allocate_team: increasing hot team thread count to %d\n", new_nproc ));
4945 team->t.t_size_changed = 1;
4948 #if KMP_NESTED_HOT_TEAMS 4949 int avail_threads = hot_teams[level].hot_team_nth;
4950 if( new_nproc < avail_threads )
4951 avail_threads = new_nproc;
4952 kmp_info_t **other_threads = team->t.t_threads;
4953 for ( f = team->t.t_nproc; f < avail_threads; ++f ) {
4957 kmp_balign_t * balign = other_threads[f]->th.th_bar;
4958 for ( b = 0; b < bs_last_barrier; ++ b ) {
4959 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
4960 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4962 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
4966 if( hot_teams[level].hot_team_nth >= new_nproc ) {
4969 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
4970 team->t.t_nproc = new_nproc;
4973 team->t.t_nproc = hot_teams[level].hot_team_nth;
4974 hot_teams[level].hot_team_nth = new_nproc;
4975 #endif // KMP_NESTED_HOT_TEAMS 4976 if(team->t.t_max_nproc < new_nproc) {
4978 __kmp_reallocate_team_arrays(team, new_nproc);
4979 __kmp_reinitialize_team( team, new_icvs, NULL );
4982 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4989 __kmp_set_thread_affinity_mask_full_tmp( old_mask );
4993 for( f = team->t.t_nproc ; f < new_nproc ; f++ ) {
4994 kmp_info_t * new_worker = __kmp_allocate_thread( root, team, f );
4995 KMP_DEBUG_ASSERT( new_worker );
4996 team->t.t_threads[ f ] = new_worker;
4998 KA_TRACE( 20, (
"__kmp_allocate_team: team %d init T#%d arrived: join=%llu, plain=%llu\n",
4999 team->t.t_id, __kmp_gtid_from_tid( f, team ), team->t.t_id, f,
5000 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5001 team->t.t_bar[bs_plain_barrier].b_arrived ) );
5005 kmp_balign_t * balign = new_worker->th.th_bar;
5006 for( b = 0; b < bs_last_barrier; ++ b ) {
5007 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
5008 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5010 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
5016 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 5017 if ( KMP_AFFINITY_CAPABLE() ) {
5019 __kmp_set_system_affinity( old_mask, TRUE );
5020 KMP_CPU_FREE(old_mask);
5023 #if KMP_NESTED_HOT_TEAMS 5025 #endif // KMP_NESTED_HOT_TEAMS 5027 int old_nproc = team->t.t_nproc;
5028 __kmp_initialize_team( team, new_nproc, new_icvs, root->r.r_uber_thread->th.th_ident );
5031 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5032 for (f=0; f < team->t.t_nproc; ++f)
5033 __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) );
5038 for (f=old_nproc; f < team->t.t_nproc; ++f)
5039 team->t.t_threads[f]->th.th_task_state = team->t.t_threads[0]->th.th_task_state_memo_stack[level];
5042 int old_state = team->t.t_threads[0]->th.th_task_state;
5043 for (f=old_nproc; f < team->t.t_nproc; ++f)
5044 team->t.t_threads[f]->th.th_task_state = old_state;
5048 for ( f = 0; f < team->t.t_nproc; ++ f ) {
5049 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
5050 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
5055 team->t.t_proc_bind = new_proc_bind;
5056 # if KMP_AFFINITY_SUPPORTED 5057 __kmp_partition_places( team );
5063 kmp_info_t *master = team->t.t_threads[0];
5064 if( master->th.th_teams_microtask ) {
5065 for( f = 1; f < new_nproc; ++f ) {
5067 kmp_info_t *thr = team->t.t_threads[f];
5068 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5069 thr->th.th_teams_level = master->th.th_teams_level;
5070 thr->th.th_teams_size = master->th.th_teams_size;
5074 #if KMP_NESTED_HOT_TEAMS 5077 for( f = 1; f < new_nproc; ++f ) {
5078 kmp_info_t *thr = team->t.t_threads[f];
5080 kmp_balign_t * balign = thr->th.th_bar;
5081 for( b = 0; b < bs_last_barrier; ++ b ) {
5082 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
5083 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5085 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
5090 #endif // KMP_NESTED_HOT_TEAMS 5093 __kmp_alloc_argv_entries( argc, team, TRUE );
5094 team->t.t_argc = argc;
5100 KF_TRACE( 10, (
" hot_team = %p\n", team ) );
5103 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
5104 KA_TRACE( 20, (
"__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p after reinit\n",
5105 team->t.t_task_team[0], team->t.t_task_team[1] ));
5110 __ompt_team_assign_id(team, ompt_parallel_id);
5120 for( team = (kmp_team_t*) __kmp_team_pool ; (team) ; )
5123 if ( team->t.t_max_nproc >= max_nproc ) {
5125 __kmp_team_pool = team->t.t_next_pool;
5128 __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
5130 KA_TRACE( 20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
5131 &team->t.t_task_team[0], &team->t.t_task_team[1]) );
5132 team->t.t_task_team[0] = NULL;
5133 team->t.t_task_team[1] = NULL;
5136 __kmp_alloc_argv_entries( argc, team, TRUE );
5137 team->t.t_argc = argc;
5139 KA_TRACE( 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5140 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5143 for ( b = 0; b < bs_last_barrier; ++ b) {
5144 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
5146 team->t.t_bar[ b ].b_master_arrived = 0;
5147 team->t.t_bar[ b ].b_team_arrived = 0;
5153 team->t.t_proc_bind = new_proc_bind;
5156 KA_TRACE( 20, (
"__kmp_allocate_team: using team from pool %d.\n", team->t.t_id ));
5159 __ompt_team_assign_id(team, ompt_parallel_id);
5170 team = __kmp_reap_team( team );
5171 __kmp_team_pool = team;
5176 team = (kmp_team_t*) __kmp_allocate(
sizeof( kmp_team_t ) );
5179 team->t.t_max_nproc = max_nproc;
5183 __kmp_allocate_team_arrays( team, max_nproc );
5185 KA_TRACE( 20, (
"__kmp_allocate_team: making a new team\n" ) );
5186 __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
5188 KA_TRACE( 20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
5189 &team->t.t_task_team[0], &team->t.t_task_team[1] ) );
5190 team->t.t_task_team[0] = NULL;
5191 team->t.t_task_team[1] = NULL;
5193 if ( __kmp_storage_map ) {
5194 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc );
5198 __kmp_alloc_argv_entries( argc, team, FALSE );
5199 team->t.t_argc = argc;
5201 KA_TRACE( 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5202 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5205 for ( b = 0; b < bs_last_barrier; ++ b ) {
5206 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
5208 team->t.t_bar[ b ].b_master_arrived = 0;
5209 team->t.t_bar[ b ].b_team_arrived = 0;
5215 team->t.t_proc_bind = new_proc_bind;
5219 __ompt_team_assign_id(team, ompt_parallel_id);
5220 team->t.ompt_serialized_team_info = NULL;
5225 KA_TRACE( 20, (
"__kmp_allocate_team: done creating a new team %d.\n", team->t.t_id ));
5236 __kmp_free_team( kmp_root_t *root, kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master) )
5239 KA_TRACE( 20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), team->t.t_id ));
5242 KMP_DEBUG_ASSERT( root );
5243 KMP_DEBUG_ASSERT( team );
5244 KMP_DEBUG_ASSERT( team->t.t_nproc <= team->t.t_max_nproc );
5245 KMP_DEBUG_ASSERT( team->t.t_threads );
5247 int use_hot_team = team == root->r.r_hot_team;
5248 #if KMP_NESTED_HOT_TEAMS 5250 kmp_hot_team_ptr_t *hot_teams;
5252 level = team->t.t_active_level - 1;
5253 if( master->th.th_teams_microtask ) {
5254 if( master->th.th_teams_size.nteams > 1 ) {
5257 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5258 master->th.th_teams_level == team->t.t_level ) {
5262 hot_teams = master->th.th_hot_teams;
5263 if( level < __kmp_hot_teams_max_level ) {
5264 KMP_DEBUG_ASSERT( team == hot_teams[level].hot_team );
5268 #endif // KMP_NESTED_HOT_TEAMS 5271 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
5272 team->t.t_copyin_counter = 0;
5276 if( ! use_hot_team ) {
5277 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
5280 for (tt_idx=0; tt_idx<2; ++tt_idx) {
5281 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5282 if ( task_team != NULL ) {
5283 for (f=0; f<team->t.t_nproc; ++f) {
5284 team->t.t_threads[f]->th.th_task_team = NULL;
5286 KA_TRACE( 20, (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n", __kmp_get_gtid(), task_team, team->t.t_id ) );
5287 #if KMP_NESTED_HOT_TEAMS 5288 __kmp_free_task_team( master, task_team );
5290 team->t.t_task_team[tt_idx] = NULL;
5296 team->t.t_parent = NULL;
5300 for ( f = 1; f < team->t.t_nproc; ++ f ) {
5301 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
5302 __kmp_free_thread( team->t.t_threads[ f ] );
5303 team->t.t_threads[ f ] = NULL;
5309 team->t.t_next_pool = (kmp_team_t*) __kmp_team_pool;
5310 __kmp_team_pool = (
volatile kmp_team_t*) team;
5319 __kmp_reap_team( kmp_team_t *team )
5321 kmp_team_t *next_pool = team->t.t_next_pool;
5323 KMP_DEBUG_ASSERT( team );
5324 KMP_DEBUG_ASSERT( team->t.t_dispatch );
5325 KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
5326 KMP_DEBUG_ASSERT( team->t.t_threads );
5327 KMP_DEBUG_ASSERT( team->t.t_argv );
5333 __kmp_free_team_arrays( team );
5334 if ( team->t.t_argv != &team->t.t_inline_argv[0] )
5335 __kmp_free( (
void*) team->t.t_argv );
5370 __kmp_free_thread( kmp_info_t *this_th )
5375 KA_TRACE( 20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5376 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid ));
5378 KMP_DEBUG_ASSERT( this_th );
5382 kmp_balign_t *balign = this_th->th.th_bar;
5383 for (b=0; b<bs_last_barrier; ++b) {
5384 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5385 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5386 balign[b].bb.team = NULL;
5388 this_th->th.th_task_state = 0;
5392 TCW_PTR(this_th->th.th_team, NULL);
5393 TCW_PTR(this_th->th.th_root, NULL);
5394 TCW_PTR(this_th->th.th_dispatch, NULL);
5400 gtid = this_th->th.th_info.ds.ds_gtid;
5401 if ( __kmp_thread_pool_insert_pt != NULL ) {
5402 KMP_DEBUG_ASSERT( __kmp_thread_pool != NULL );
5403 if ( __kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid ) {
5404 __kmp_thread_pool_insert_pt = NULL;
5415 if ( __kmp_thread_pool_insert_pt != NULL ) {
5416 scan = &( __kmp_thread_pool_insert_pt->th.th_next_pool );
5419 scan = (kmp_info_t **)&__kmp_thread_pool;
5421 for (; ( *scan != NULL ) && ( (*scan)->th.th_info.ds.ds_gtid < gtid );
5422 scan = &( (*scan)->th.th_next_pool ) );
5428 TCW_PTR(this_th->th.th_next_pool, *scan);
5429 __kmp_thread_pool_insert_pt = *scan = this_th;
5430 KMP_DEBUG_ASSERT( ( this_th->th.th_next_pool == NULL )
5431 || ( this_th->th.th_info.ds.ds_gtid
5432 < this_th->th.th_next_pool->th.th_info.ds.ds_gtid ) );
5433 TCW_4(this_th->th.th_in_pool, TRUE);
5434 __kmp_thread_pool_nth++;
5436 TCW_4(__kmp_nth, __kmp_nth - 1);
5438 #ifdef KMP_ADJUST_BLOCKTIME 5441 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5442 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
5443 if ( __kmp_nth <= __kmp_avail_proc ) {
5444 __kmp_zero_bt = FALSE;
5456 __kmp_launch_thread( kmp_info_t *this_thr )
5458 int gtid = this_thr->th.th_info.ds.ds_gtid;
5460 kmp_team_t *(*
volatile pteam);
5463 KA_TRACE( 10, (
"__kmp_launch_thread: T#%d start\n", gtid ) );
5465 if( __kmp_env_consistency_check ) {
5466 this_thr->th.th_cons = __kmp_allocate_cons_stack( gtid );
5471 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5472 this_thr->th.ompt_thread_info.wait_id = 0;
5473 this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0);
5474 if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) {
5475 __ompt_thread_begin(ompt_thread_worker, gtid);
5481 while( ! TCR_4(__kmp_global.g.g_done) ) {
5482 KMP_DEBUG_ASSERT( this_thr == __kmp_threads[ gtid ] );
5486 KA_TRACE( 20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid ));
5490 this_thr->th.ompt_thread_info.state = ompt_state_idle;
5495 __kmp_fork_barrier( gtid, KMP_GTID_DNE );
5499 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5503 pteam = (kmp_team_t *(*))(& this_thr->th.th_team);
5506 if ( TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done) ) {
5508 if ( TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL ) {
5510 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5511 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
5513 updateHWFPControl (*pteam);
5517 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
5519 int tid = __kmp_tid_from_gtid(gtid);
5520 (*pteam)->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id =
5521 __ompt_task_id_new(tid);
5525 KMP_STOP_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
5527 KMP_TIME_DEVELOPER_BLOCK(USER_worker_invoke);
5528 rc = (*pteam)->t.t_invoke( gtid );
5530 KMP_START_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
5536 int tid = __kmp_tid_from_gtid(gtid);
5537 (*pteam)->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_runtime_frame = 0;
5539 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5543 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5544 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
5547 __kmp_join_barrier( gtid );
5550 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
5554 ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
5555 __ompt_thread_end(ompt_thread_worker, gtid);
5559 this_thr->th.th_task_team = NULL;
5561 __kmp_common_destroy_gtid( gtid );
5563 KA_TRACE( 10, (
"__kmp_launch_thread: T#%d done\n", gtid ) );
5572 __kmp_internal_end_dest(
void *specific_gtid )
5574 #if KMP_COMPILER_ICC 5575 #pragma warning( push ) 5576 #pragma warning( disable: 810 ) // conversion from "void *" to "int" may lose significant bits 5579 int gtid = (kmp_intptr_t)specific_gtid - 1;
5580 #if KMP_COMPILER_ICC 5581 #pragma warning( pop ) 5584 KA_TRACE( 30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
5598 if(gtid >= 0 && KMP_UBER_GTID(gtid))
5599 __kmp_gtid_set_specific( gtid );
5600 #ifdef KMP_TDATA_GTID 5603 __kmp_internal_end_thread( gtid );
5606 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB 5612 __attribute__(( destructor ))
5614 __kmp_internal_end_dtor(
void )
5616 __kmp_internal_end_atexit();
5620 __kmp_internal_end_fini(
void )
5622 __kmp_internal_end_atexit();
5629 __kmp_internal_end_atexit(
void )
5631 KA_TRACE( 30, (
"__kmp_internal_end_atexit\n" ) );
5653 __kmp_internal_end_library( -1 );
5655 __kmp_close_console();
5661 kmp_info_t * thread,
5669 KMP_DEBUG_ASSERT( thread != NULL );
5671 gtid = thread->th.th_info.ds.ds_gtid;
5675 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
5677 KA_TRACE( 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", gtid ) );
5679 kmp_flag_64 flag(&thread->th.th_bar[ bs_forkjoin_barrier ].bb.b_go, thread);
5680 __kmp_release_64(&flag);
5685 __kmp_reap_worker( thread );
5700 if ( thread->th.th_active_in_pool ) {
5701 thread->th.th_active_in_pool = FALSE;
5702 KMP_TEST_THEN_DEC32(
5703 (kmp_int32 *) &__kmp_thread_pool_active_nth );
5704 KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 );
5708 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth > 0 );
5709 --__kmp_thread_pool_nth;
5714 __kmp_free_fast_memory( thread );
5717 __kmp_suspend_uninitialize_thread( thread );
5719 KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] == thread );
5720 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5725 #ifdef KMP_ADJUST_BLOCKTIME 5728 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5729 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
5730 if ( __kmp_nth <= __kmp_avail_proc ) {
5731 __kmp_zero_bt = FALSE;
5737 if( __kmp_env_consistency_check ) {
5738 if ( thread->th.th_cons ) {
5739 __kmp_free_cons_stack( thread->th.th_cons );
5740 thread->th.th_cons = NULL;
5744 if ( thread->th.th_pri_common != NULL ) {
5745 __kmp_free( thread->th.th_pri_common );
5746 thread->th.th_pri_common = NULL;
5749 if (thread->th.th_task_state_memo_stack != NULL) {
5750 __kmp_free(thread->th.th_task_state_memo_stack);
5751 thread->th.th_task_state_memo_stack = NULL;
5755 if ( thread->th.th_local.bget_data != NULL ) {
5756 __kmp_finalize_bget( thread );
5760 #if KMP_AFFINITY_SUPPORTED 5761 if ( thread->th.th_affin_mask != NULL ) {
5762 KMP_CPU_FREE( thread->th.th_affin_mask );
5763 thread->th.th_affin_mask = NULL;
5767 __kmp_reap_team( thread->th.th_serial_team );
5768 thread->th.th_serial_team = NULL;
5769 __kmp_free( thread );
5776 __kmp_internal_end(
void)
5781 __kmp_unregister_library();
5789 __kmp_reclaim_dead_roots();
5792 for( i=0 ; i<__kmp_threads_capacity ; i++ )
5794 if( __kmp_root[i]->r.r_active )
5797 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5799 if ( i < __kmp_threads_capacity ) {
5814 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5815 if ( TCR_4( __kmp_init_monitor ) ) {
5816 __kmp_reap_monitor( & __kmp_monitor );
5817 TCW_4( __kmp_init_monitor, 0 );
5819 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5820 KA_TRACE( 10, (
"__kmp_internal_end: monitor reaped\n" ) );
5825 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
5826 if( __kmp_root[i] ) {
5828 KMP_ASSERT( ! __kmp_root[i]->r.r_active );
5837 while ( __kmp_thread_pool != NULL ) {
5839 kmp_info_t * thread = (kmp_info_t *) __kmp_thread_pool;
5840 __kmp_thread_pool = thread->th.th_next_pool;
5842 thread->th.th_next_pool = NULL;
5843 thread->th.th_in_pool = FALSE;
5844 __kmp_reap_thread( thread, 0 );
5846 __kmp_thread_pool_insert_pt = NULL;
5849 while ( __kmp_team_pool != NULL ) {
5851 kmp_team_t * team = (kmp_team_t *) __kmp_team_pool;
5852 __kmp_team_pool = team->t.t_next_pool;
5854 team->t.t_next_pool = NULL;
5855 __kmp_reap_team( team );
5858 __kmp_reap_task_teams( );
5860 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
5867 TCW_SYNC_4(__kmp_init_common, FALSE);
5869 KA_TRACE( 10, (
"__kmp_internal_end: all workers reaped\n" ) );
5878 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5879 if ( TCR_4( __kmp_init_monitor ) ) {
5880 __kmp_reap_monitor( & __kmp_monitor );
5881 TCW_4( __kmp_init_monitor, 0 );
5883 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5884 KA_TRACE( 10, (
"__kmp_internal_end: monitor reaped\n" ) );
5887 TCW_4(__kmp_init_gtid, FALSE);
5898 __kmp_internal_end_library(
int gtid_req )
5906 if( __kmp_global.g.g_abort ) {
5907 KA_TRACE( 11, (
"__kmp_internal_end_library: abort, exiting\n" ));
5911 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5912 KA_TRACE( 10, (
"__kmp_internal_end_library: already finished\n" ));
5921 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
5922 KA_TRACE( 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req ));
5923 if( gtid == KMP_GTID_SHUTDOWN ) {
5924 KA_TRACE( 10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system already shutdown\n" ));
5926 }
else if( gtid == KMP_GTID_MONITOR ) {
5927 KA_TRACE( 10, (
"__kmp_internal_end_library: monitor thread, gtid not registered, or system shutdown\n" ));
5929 }
else if( gtid == KMP_GTID_DNE ) {
5930 KA_TRACE( 10, (
"__kmp_internal_end_library: gtid not registered or system shutdown\n" ));
5932 }
else if( KMP_UBER_GTID( gtid )) {
5934 if( __kmp_root[gtid]->r.r_active ) {
5935 __kmp_global.g.g_abort = -1;
5936 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5937 KA_TRACE( 10, (
"__kmp_internal_end_library: root still active, abort T#%d\n", gtid ));
5940 KA_TRACE( 10, (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid ));
5941 __kmp_unregister_root_current_thread( gtid );
5948 #ifdef DUMP_DEBUG_ON_EXIT 5949 if ( __kmp_debug_buf )
5950 __kmp_dump_debug_buffer( );
5956 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
5959 if( __kmp_global.g.g_abort ) {
5960 KA_TRACE( 10, (
"__kmp_internal_end_library: abort, exiting\n" ));
5962 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5965 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5966 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5976 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
5979 __kmp_internal_end();
5981 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
5982 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5984 KA_TRACE( 10, (
"__kmp_internal_end_library: exit\n" ) );
5986 #ifdef DUMP_DEBUG_ON_EXIT 5987 if ( __kmp_debug_buf )
5988 __kmp_dump_debug_buffer();
5992 __kmp_close_console();
5995 __kmp_fini_allocator();
6000 __kmp_internal_end_thread(
int gtid_req )
6010 if( __kmp_global.g.g_abort ) {
6011 KA_TRACE( 11, (
"__kmp_internal_end_thread: abort, exiting\n" ));
6015 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
6016 KA_TRACE( 10, (
"__kmp_internal_end_thread: already finished\n" ));
6024 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
6025 KA_TRACE( 10, (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req ));
6026 if( gtid == KMP_GTID_SHUTDOWN ) {
6027 KA_TRACE( 10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system already shutdown\n" ));
6029 }
else if( gtid == KMP_GTID_MONITOR ) {
6030 KA_TRACE( 10, (
"__kmp_internal_end_thread: monitor thread, gtid not registered, or system shutdown\n" ));
6032 }
else if( gtid == KMP_GTID_DNE ) {
6033 KA_TRACE( 10, (
"__kmp_internal_end_thread: gtid not registered or system shutdown\n" ));
6036 }
else if( KMP_UBER_GTID( gtid )) {
6038 if( __kmp_root[gtid]->r.r_active ) {
6039 __kmp_global.g.g_abort = -1;
6040 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6041 KA_TRACE( 10, (
"__kmp_internal_end_thread: root still active, abort T#%d\n", gtid ));
6044 KA_TRACE( 10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n", gtid ));
6045 __kmp_unregister_root_current_thread( gtid );
6049 KA_TRACE( 10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid ));
6052 __kmp_threads[gtid]->th.th_task_team = NULL;
6055 KA_TRACE( 10, (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n", gtid ));
6059 #if defined KMP_DYNAMIC_LIB 6067 KA_TRACE( 10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req) );
6071 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6074 if( __kmp_global.g.g_abort ) {
6075 KA_TRACE( 10, (
"__kmp_internal_end_thread: abort, exiting\n" ));
6077 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6080 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
6081 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6093 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
6095 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
6096 if ( KMP_UBER_GTID( i ) ) {
6097 KA_TRACE( 10, (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i ));
6098 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6099 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6106 __kmp_internal_end();
6108 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6109 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6111 KA_TRACE( 10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req ) );
6113 #ifdef DUMP_DEBUG_ON_EXIT 6114 if ( __kmp_debug_buf )
6115 __kmp_dump_debug_buffer();
6122 static long __kmp_registration_flag = 0;
6124 static char * __kmp_registration_str = NULL;
6130 __kmp_reg_status_name() {
6136 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int) getpid() );
6141 __kmp_register_library_startup(
6145 char * name = __kmp_reg_status_name();
6152 __kmp_initialize_system_tick();
6154 __kmp_read_system_time( & time.dtime );
6155 __kmp_registration_flag = 0xCAFE0000L | ( time.ltime & 0x0000FFFFL );
6156 __kmp_registration_str =
6159 & __kmp_registration_flag,
6160 __kmp_registration_flag,
6164 KA_TRACE( 50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name, __kmp_registration_str ) );
6168 char * value = NULL;
6171 __kmp_env_set( name, __kmp_registration_str, 0 );
6173 value = __kmp_env_get( name );
6174 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
6183 char * tail = value;
6184 char * flag_addr_str = NULL;
6185 char * flag_val_str = NULL;
6186 char const * file_name = NULL;
6187 __kmp_str_split( tail,
'-', & flag_addr_str, & tail );
6188 __kmp_str_split( tail,
'-', & flag_val_str, & tail );
6190 if ( tail != NULL ) {
6191 long * flag_addr = 0;
6193 KMP_SSCANF( flag_addr_str,
"%p", & flag_addr );
6194 KMP_SSCANF( flag_val_str,
"%lx", & flag_val );
6195 if ( flag_addr != 0 && flag_val != 0 && strcmp( file_name,
"" ) != 0 ) {
6199 if ( __kmp_is_address_mapped( flag_addr ) && * flag_addr == flag_val ) {
6207 switch ( neighbor ) {
6212 file_name =
"unknown library";
6216 char * duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK" );
6217 if ( ! __kmp_str_match_true( duplicate_ok ) ) {
6221 KMP_MSG( DuplicateLibrary, KMP_LIBRARY_FILE, file_name ),
6222 KMP_HNT( DuplicateLibrary ),
6226 KMP_INTERNAL_FREE( duplicate_ok );
6227 __kmp_duplicate_library_ok = 1;
6232 __kmp_env_unset( name );
6235 KMP_DEBUG_ASSERT( 0 );
6240 KMP_INTERNAL_FREE( (
void *) value );
6243 KMP_INTERNAL_FREE( (
void *) name );
6249 __kmp_unregister_library(
void ) {
6251 char * name = __kmp_reg_status_name();
6252 char * value = __kmp_env_get( name );
6254 KMP_DEBUG_ASSERT( __kmp_registration_flag != 0 );
6255 KMP_DEBUG_ASSERT( __kmp_registration_str != NULL );
6256 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
6258 __kmp_env_unset( name );
6261 KMP_INTERNAL_FREE( __kmp_registration_str );
6262 KMP_INTERNAL_FREE( value );
6263 KMP_INTERNAL_FREE( name );
6265 __kmp_registration_flag = 0;
6266 __kmp_registration_str = NULL;
6274 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 6276 static void __kmp_check_mic_type()
6278 kmp_cpuid_t cpuid_state = {0};
6279 kmp_cpuid_t * cs_p = &cpuid_state;
6280 __kmp_x86_cpuid(1, 0, cs_p);
6282 if( (cs_p->eax & 0xff0) == 0xB10 ) {
6283 __kmp_mic_type = mic2;
6284 }
else if( (cs_p->eax & 0xf0ff0) == 0x50670 ) {
6285 __kmp_mic_type = mic3;
6287 __kmp_mic_type = non_mic;
6294 __kmp_do_serial_initialize(
void )
6299 KA_TRACE( 10, (
"__kmp_do_serial_initialize: enter\n" ) );
6301 KMP_DEBUG_ASSERT(
sizeof( kmp_int32 ) == 4 );
6302 KMP_DEBUG_ASSERT(
sizeof( kmp_uint32 ) == 4 );
6303 KMP_DEBUG_ASSERT(
sizeof( kmp_int64 ) == 8 );
6304 KMP_DEBUG_ASSERT(
sizeof( kmp_uint64 ) == 8 );
6305 KMP_DEBUG_ASSERT(
sizeof( kmp_intptr_t ) ==
sizeof(
void * ) );
6311 __kmp_validate_locks();
6314 __kmp_init_allocator();
6320 __kmp_register_library_startup( );
6323 if( TCR_4(__kmp_global.g.g_done) ) {
6324 KA_TRACE( 10, (
"__kmp_do_serial_initialize: reinitialization of library\n" ) );
6327 __kmp_global.g.g_abort = 0;
6328 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
6331 #if KMP_USE_ADAPTIVE_LOCKS 6332 #if KMP_DEBUG_ADAPTIVE_LOCKS 6333 __kmp_init_speculative_stats();
6336 #if KMP_STATS_ENABLED 6337 __kmp_init_tas_lock( & __kmp_stats_lock );
6339 __kmp_init_lock( & __kmp_global_lock );
6340 __kmp_init_queuing_lock( & __kmp_dispatch_lock );
6341 __kmp_init_lock( & __kmp_debug_lock );
6342 __kmp_init_atomic_lock( & __kmp_atomic_lock );
6343 __kmp_init_atomic_lock( & __kmp_atomic_lock_1i );
6344 __kmp_init_atomic_lock( & __kmp_atomic_lock_2i );
6345 __kmp_init_atomic_lock( & __kmp_atomic_lock_4i );
6346 __kmp_init_atomic_lock( & __kmp_atomic_lock_4r );
6347 __kmp_init_atomic_lock( & __kmp_atomic_lock_8i );
6348 __kmp_init_atomic_lock( & __kmp_atomic_lock_8r );
6349 __kmp_init_atomic_lock( & __kmp_atomic_lock_8c );
6350 __kmp_init_atomic_lock( & __kmp_atomic_lock_10r );
6351 __kmp_init_atomic_lock( & __kmp_atomic_lock_16r );
6352 __kmp_init_atomic_lock( & __kmp_atomic_lock_16c );
6353 __kmp_init_atomic_lock( & __kmp_atomic_lock_20c );
6354 __kmp_init_atomic_lock( & __kmp_atomic_lock_32c );
6355 __kmp_init_bootstrap_lock( & __kmp_forkjoin_lock );
6356 __kmp_init_bootstrap_lock( & __kmp_exit_lock );
6357 __kmp_init_bootstrap_lock( & __kmp_monitor_lock );
6358 __kmp_init_bootstrap_lock( & __kmp_tp_cached_lock );
6362 __kmp_runtime_initialize();
6364 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 6365 __kmp_check_mic_type();
6372 __kmp_abort_delay = 0;
6376 __kmp_dflt_team_nth_ub = __kmp_xproc;
6377 if( __kmp_dflt_team_nth_ub < KMP_MIN_NTH ) {
6378 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6380 if( __kmp_dflt_team_nth_ub > __kmp_sys_max_nth ) {
6381 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6383 __kmp_max_nth = __kmp_sys_max_nth;
6386 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
6387 __kmp_monitor_wakeups = KMP_WAKEUPS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
6388 __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
6390 __kmp_library = library_throughput;
6392 __kmp_static = kmp_sch_static_balanced;
6398 #if KMP_FAST_REDUCTION_BARRIER 6399 #define kmp_reduction_barrier_gather_bb ((int)1) 6400 #define kmp_reduction_barrier_release_bb ((int)1) 6401 #define kmp_reduction_barrier_gather_pat bp_hyper_bar 6402 #define kmp_reduction_barrier_release_pat bp_hyper_bar 6403 #endif // KMP_FAST_REDUCTION_BARRIER 6404 for ( i=bs_plain_barrier; i<bs_last_barrier; i++ ) {
6405 __kmp_barrier_gather_branch_bits [ i ] = __kmp_barrier_gather_bb_dflt;
6406 __kmp_barrier_release_branch_bits[ i ] = __kmp_barrier_release_bb_dflt;
6407 __kmp_barrier_gather_pattern [ i ] = __kmp_barrier_gather_pat_dflt;
6408 __kmp_barrier_release_pattern[ i ] = __kmp_barrier_release_pat_dflt;
6409 #if KMP_FAST_REDUCTION_BARRIER 6410 if( i == bs_reduction_barrier ) {
6411 __kmp_barrier_gather_branch_bits [ i ] = kmp_reduction_barrier_gather_bb;
6412 __kmp_barrier_release_branch_bits[ i ] = kmp_reduction_barrier_release_bb;
6413 __kmp_barrier_gather_pattern [ i ] = kmp_reduction_barrier_gather_pat;
6414 __kmp_barrier_release_pattern[ i ] = kmp_reduction_barrier_release_pat;
6416 #endif // KMP_FAST_REDUCTION_BARRIER 6418 #if KMP_FAST_REDUCTION_BARRIER 6419 #undef kmp_reduction_barrier_release_pat 6420 #undef kmp_reduction_barrier_gather_pat 6421 #undef kmp_reduction_barrier_release_bb 6422 #undef kmp_reduction_barrier_gather_bb 6423 #endif // KMP_FAST_REDUCTION_BARRIER 6424 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 6425 if (__kmp_mic_type == mic2) {
6427 __kmp_barrier_gather_branch_bits [ bs_plain_barrier ] = 3;
6428 __kmp_barrier_release_branch_bits[ bs_forkjoin_barrier ] = 1;
6429 __kmp_barrier_gather_pattern [ bs_forkjoin_barrier ] = bp_hierarchical_bar;
6430 __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] = bp_hierarchical_bar;
6432 #if KMP_FAST_REDUCTION_BARRIER 6433 if (__kmp_mic_type == mic2) {
6434 __kmp_barrier_gather_pattern [ bs_reduction_barrier ] = bp_hierarchical_bar;
6435 __kmp_barrier_release_pattern[ bs_reduction_barrier ] = bp_hierarchical_bar;
6442 __kmp_env_checks = TRUE;
6444 __kmp_env_checks = FALSE;
6448 __kmp_foreign_tp = TRUE;
6450 __kmp_global.g.g_dynamic = FALSE;
6451 __kmp_global.g.g_dynamic_mode = dynamic_default;
6453 __kmp_env_initialize( NULL );
6457 char const * val = __kmp_env_get(
"KMP_DUMP_CATALOG" );
6458 if ( __kmp_str_match_true( val ) ) {
6459 kmp_str_buf_t buffer;
6460 __kmp_str_buf_init( & buffer );
6461 __kmp_i18n_dump_catalog( & buffer );
6462 __kmp_printf(
"%s", buffer.str );
6463 __kmp_str_buf_free( & buffer );
6465 __kmp_env_free( & val );
6468 __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub );
6470 __kmp_tp_capacity = __kmp_default_tp_capacity(__kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
6474 KMP_DEBUG_ASSERT( __kmp_thread_pool == NULL );
6475 KMP_DEBUG_ASSERT( __kmp_thread_pool_insert_pt == NULL );
6476 KMP_DEBUG_ASSERT( __kmp_team_pool == NULL );
6477 __kmp_thread_pool = NULL;
6478 __kmp_thread_pool_insert_pt = NULL;
6479 __kmp_team_pool = NULL;
6484 size = (
sizeof(kmp_info_t*) +
sizeof(kmp_root_t*))*__kmp_threads_capacity + CACHE_LINE;
6485 __kmp_threads = (kmp_info_t**) __kmp_allocate( size );
6486 __kmp_root = (kmp_root_t**) ((
char*)__kmp_threads +
sizeof(kmp_info_t*) * __kmp_threads_capacity );
6489 KMP_DEBUG_ASSERT( __kmp_all_nth == 0 );
6490 KMP_DEBUG_ASSERT( __kmp_nth == 0 );
6495 gtid = __kmp_register_root( TRUE );
6496 KA_TRACE( 10, (
"__kmp_do_serial_initialize T#%d\n", gtid ));
6497 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
6498 KMP_ASSERT( KMP_INITIAL_GTID( gtid ) );
6502 __kmp_common_initialize();
6506 __kmp_register_atfork();
6509 #if ! defined KMP_DYNAMIC_LIB 6514 int rc = atexit( __kmp_internal_end_atexit );
6516 __kmp_msg( kmp_ms_fatal, KMP_MSG( FunctionError,
"atexit()" ), KMP_ERR( rc ), __kmp_msg_null );
6521 #if KMP_HANDLE_SIGNALS 6528 __kmp_install_signals( FALSE );
6531 __kmp_install_signals( TRUE );
6536 __kmp_init_counter ++;
6538 __kmp_init_serial = TRUE;
6540 if (__kmp_settings) {
6545 if (__kmp_display_env || __kmp_display_env_verbose) {
6546 __kmp_env_print_2();
6548 #endif // OMP_40_ENABLED 6556 KA_TRACE( 10, (
"__kmp_do_serial_initialize: exit\n" ) );
6560 __kmp_serial_initialize(
void )
6562 if ( __kmp_init_serial ) {
6565 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6566 if ( __kmp_init_serial ) {
6567 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6570 __kmp_do_serial_initialize();
6571 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6575 __kmp_do_middle_initialize(
void )
6578 int prev_dflt_team_nth;
6580 if( !__kmp_init_serial ) {
6581 __kmp_do_serial_initialize();
6584 KA_TRACE( 10, (
"__kmp_middle_initialize: enter\n" ) );
6590 prev_dflt_team_nth = __kmp_dflt_team_nth;
6592 #if KMP_AFFINITY_SUPPORTED 6597 __kmp_affinity_initialize();
6603 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6604 if ( TCR_PTR( __kmp_threads[ i ] ) != NULL ) {
6605 __kmp_affinity_set_init_mask( i, TRUE );
6610 KMP_ASSERT( __kmp_xproc > 0 );
6611 if ( __kmp_avail_proc == 0 ) {
6612 __kmp_avail_proc = __kmp_xproc;
6617 while ( ( j < __kmp_nested_nth.used ) && ! __kmp_nested_nth.nth[ j ] ) {
6618 __kmp_nested_nth.nth[ j ] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_avail_proc;
6622 if ( __kmp_dflt_team_nth == 0 ) {
6623 #ifdef KMP_DFLT_NTH_CORES 6627 __kmp_dflt_team_nth = __kmp_ncores;
6628 KA_TRACE( 20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_ncores (%d)\n",
6629 __kmp_dflt_team_nth ) );
6634 __kmp_dflt_team_nth = __kmp_avail_proc;
6635 KA_TRACE( 20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_avail_proc(%d)\n",
6636 __kmp_dflt_team_nth ) );
6640 if ( __kmp_dflt_team_nth < KMP_MIN_NTH ) {
6641 __kmp_dflt_team_nth = KMP_MIN_NTH;
6643 if( __kmp_dflt_team_nth > __kmp_sys_max_nth ) {
6644 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6651 KMP_DEBUG_ASSERT( __kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub );
6653 if ( __kmp_dflt_team_nth != prev_dflt_team_nth ) {
6660 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6661 kmp_info_t *thread = __kmp_threads[ i ];
6662 if ( thread == NULL )
continue;
6663 if ( thread->th.th_current_task->td_icvs.nproc != 0 )
continue;
6665 set__nproc( __kmp_threads[ i ], __kmp_dflt_team_nth );
6668 KA_TRACE( 20, (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6669 __kmp_dflt_team_nth) );
6671 #ifdef KMP_ADJUST_BLOCKTIME 6674 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
6675 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
6676 if ( __kmp_nth > __kmp_avail_proc ) {
6677 __kmp_zero_bt = TRUE;
6683 TCW_SYNC_4(__kmp_init_middle, TRUE);
6685 KA_TRACE( 10, (
"__kmp_do_middle_initialize: exit\n" ) );
6689 __kmp_middle_initialize(
void )
6691 if ( __kmp_init_middle ) {
6694 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6695 if ( __kmp_init_middle ) {
6696 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6699 __kmp_do_middle_initialize();
6700 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6704 __kmp_parallel_initialize(
void )
6706 int gtid = __kmp_entry_gtid();
6709 if( TCR_4(__kmp_init_parallel) )
return;
6710 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6711 if( TCR_4(__kmp_init_parallel) ) { __kmp_release_bootstrap_lock( &__kmp_initz_lock );
return; }
6714 if( TCR_4(__kmp_global.g.g_done) ) {
6715 KA_TRACE( 10, (
"__kmp_parallel_initialize: attempt to init while shutting down\n" ) );
6716 __kmp_infinite_loop();
6722 if( !__kmp_init_middle ) {
6723 __kmp_do_middle_initialize();
6727 KA_TRACE( 10, (
"__kmp_parallel_initialize: enter\n" ) );
6728 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
6730 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 6735 __kmp_store_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word );
6736 __kmp_store_mxcsr( &__kmp_init_mxcsr );
6737 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
6741 # if KMP_HANDLE_SIGNALS 6743 __kmp_install_signals( TRUE );
6747 __kmp_suspend_initialize();
6749 # if defined(USE_LOAD_BALANCE) 6750 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
6751 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
6754 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
6755 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
6759 if ( __kmp_version ) {
6760 __kmp_print_version_2();
6764 TCW_SYNC_4(__kmp_init_parallel, TRUE);
6767 KA_TRACE( 10, (
"__kmp_parallel_initialize: exit\n" ) );
6769 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6776 __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
6779 kmp_disp_t *dispatch;
6784 this_thr->th.th_local.this_construct = 0;
6785 #if KMP_CACHE_MANAGE 6786 KMP_CACHE_PREFETCH( &this_thr->th.th_bar[ bs_forkjoin_barrier ].bb.b_arrived );
6788 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
6789 KMP_DEBUG_ASSERT( dispatch );
6790 KMP_DEBUG_ASSERT( team->t.t_dispatch );
6793 dispatch->th_disp_index = 0;
6795 if( __kmp_env_consistency_check )
6796 __kmp_push_parallel( gtid, team->t.t_ident );
6802 __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
6805 if( __kmp_env_consistency_check )
6806 __kmp_pop_parallel( gtid, team->t.t_ident );
6810 __kmp_invoke_task_func(
int gtid )
6813 int tid = __kmp_tid_from_gtid( gtid );
6814 kmp_info_t *this_thr = __kmp_threads[ gtid ];
6815 kmp_team_t *team = this_thr->th.th_team;
6817 __kmp_run_before_invoked_task( gtid, tid, this_thr, team );
6819 if ( __itt_stack_caller_create_ptr ) {
6820 __kmp_itt_stack_callee_enter( (__itt_caller)team->t.t_stack_id );
6823 #if INCLUDE_SSC_MARKS 6824 SSC_MARK_INVOKING();
6829 void **exit_runtime_p;
6830 ompt_task_id_t my_task_id;
6831 ompt_parallel_id_t my_parallel_id;
6834 exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid].
6835 ompt_task_info.frame.exit_runtime_frame);
6837 exit_runtime_p = &dummy;
6841 my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
6842 my_parallel_id = team->t.ompt_team_info.parallel_id;
6844 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
6845 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
6846 my_parallel_id, my_task_id);
6853 rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
6854 gtid, tid, (
int) team->t.t_argc, (
void **) team->t.t_argv
6861 #if OMPT_SUPPORT && OMPT_TRACE 6863 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
6864 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
6865 my_parallel_id, my_task_id);
6868 team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_runtime_frame = 0;
6873 if ( __itt_stack_caller_create_ptr ) {
6874 __kmp_itt_stack_callee_leave( (__itt_caller)team->t.t_stack_id );
6877 __kmp_run_after_invoked_task( gtid, tid, this_thr, team );
6884 __kmp_teams_master(
int gtid )
6887 kmp_info_t *thr = __kmp_threads[ gtid ];
6888 kmp_team_t *team = thr->th.th_team;
6889 ident_t *loc = team->t.t_ident;
6890 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
6891 KMP_DEBUG_ASSERT( thr->th.th_teams_microtask );
6892 KMP_DEBUG_ASSERT( thr->th.th_set_nproc );
6893 KA_TRACE( 20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n",
6894 gtid, __kmp_tid_from_gtid( gtid ), thr->th.th_teams_microtask ) );
6897 #if INCLUDE_SSC_MARKS 6900 __kmp_fork_call( loc, gtid, fork_context_intel,
6903 (
void *)thr->th.th_teams_microtask,
6905 (microtask_t)thr->th.th_teams_microtask,
6906 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
6908 #if INCLUDE_SSC_MARKS 6914 __kmp_join_call( loc, gtid
6916 , fork_context_intel
6922 __kmp_invoke_teams_master(
int gtid )
6924 kmp_info_t *this_thr = __kmp_threads[ gtid ];
6925 kmp_team_t *team = this_thr->th.th_team;
6927 if ( !__kmp_threads[gtid]-> th.th_team->t.t_serialized )
6928 KMP_DEBUG_ASSERT( (
void*)__kmp_threads[gtid]-> th.th_team->t.t_pkfn == (
void*)__kmp_teams_master );
6930 __kmp_run_before_invoked_task( gtid, 0, this_thr, team );
6931 __kmp_teams_master( gtid );
6932 __kmp_run_after_invoked_task( gtid, 0, this_thr, team );
6943 __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads )
6945 kmp_info_t *thr = __kmp_threads[gtid];
6947 if( num_threads > 0 )
6948 thr->th.th_set_nproc = num_threads;
6956 __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
int num_threads )
6958 kmp_info_t *thr = __kmp_threads[gtid];
6959 KMP_DEBUG_ASSERT(num_teams >= 0);
6960 KMP_DEBUG_ASSERT(num_threads >= 0);
6962 if( num_teams == 0 )
6964 if( num_teams > __kmp_max_nth ) {
6965 if ( !__kmp_reserve_warn ) {
6966 __kmp_reserve_warn = 1;
6969 KMP_MSG( CantFormThrTeam, num_teams, __kmp_max_nth ),
6970 KMP_HNT( Unset_ALL_THREADS ),
6974 num_teams = __kmp_max_nth;
6977 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
6980 if( num_threads == 0 ) {
6981 if( !TCR_4(__kmp_init_middle) )
6982 __kmp_middle_initialize();
6983 num_threads = __kmp_avail_proc / num_teams;
6984 if( num_teams * num_threads > __kmp_max_nth ) {
6986 num_threads = __kmp_max_nth / num_teams;
6989 if( num_teams * num_threads > __kmp_max_nth ) {
6990 int new_threads = __kmp_max_nth / num_teams;
6991 if ( !__kmp_reserve_warn ) {
6992 __kmp_reserve_warn = 1;
6995 KMP_MSG( CantFormThrTeam, num_threads, new_threads ),
6996 KMP_HNT( Unset_ALL_THREADS ),
7000 num_threads = new_threads;
7003 thr->th.th_teams_size.nth = num_threads;
7011 __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind )
7013 kmp_info_t *thr = __kmp_threads[gtid];
7014 thr->th.th_set_proc_bind = proc_bind;
7022 __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team )
7024 kmp_info_t *this_thr = __kmp_threads[gtid];
7030 KMP_DEBUG_ASSERT( team );
7031 KMP_DEBUG_ASSERT( this_thr->th.th_team == team );
7032 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
7035 team->t.t_construct = 0;
7036 team->t.t_ordered.dt.t_value = 0;
7039 KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
7040 if ( team->t.t_max_nproc > 1 ) {
7042 for (i = 0; i < KMP_MAX_DISP_BUF; ++i)
7043 team->t.t_disp_buffer[ i ].buffer_index = i;
7045 team->t.t_disp_buffer[ 0 ].buffer_index = 0;
7049 KMP_ASSERT( this_thr->th.th_team == team );
7052 for( f=0 ; f<team->t.t_nproc ; f++ ) {
7053 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
7054 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
7059 __kmp_fork_barrier( gtid, 0 );
7064 __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team )
7066 kmp_info_t *this_thr = __kmp_threads[gtid];
7068 KMP_DEBUG_ASSERT( team );
7069 KMP_DEBUG_ASSERT( this_thr->th.th_team == team );
7070 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
7076 if (__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc ) {
7077 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n",gtid, gtid, __kmp_threads[gtid]);
7078 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, team->t.t_nproc=%d\n",
7079 gtid, __kmp_threads[gtid]->th.th_team_nproc, team, team->t.t_nproc);
7080 __kmp_print_structure();
7082 KMP_DEBUG_ASSERT( __kmp_threads[gtid] &&
7083 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc );
7086 __kmp_join_barrier( gtid );
7089 KMP_ASSERT( this_thr->th.th_team == team );
7096 #ifdef USE_LOAD_BALANCE 7103 __kmp_active_hot_team_nproc( kmp_root_t *root )
7107 kmp_team_t *hot_team;
7109 if ( root->r.r_active ) {
7112 hot_team = root->r.r_hot_team;
7113 if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) {
7114 return hot_team->t.t_nproc - 1;
7121 for ( i = 1; i < hot_team->t.t_nproc; i++ ) {
7122 if ( hot_team->t.t_threads[i]->th.th_active ) {
7134 __kmp_load_balance_nproc( kmp_root_t *root,
int set_nproc )
7138 int hot_team_active;
7139 int team_curr_active;
7142 KB_TRACE( 20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n",
7143 root, set_nproc ) );
7144 KMP_DEBUG_ASSERT( root );
7145 KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_threads[0]->th.th_current_task->td_icvs.dynamic == TRUE );
7146 KMP_DEBUG_ASSERT( set_nproc > 1 );
7148 if ( set_nproc == 1) {
7149 KB_TRACE( 20, (
"__kmp_load_balance_nproc: serial execution.\n" ) );
7160 pool_active = TCR_4(__kmp_thread_pool_active_nth);
7161 hot_team_active = __kmp_active_hot_team_nproc( root );
7162 team_curr_active = pool_active + hot_team_active + 1;
7167 system_active = __kmp_get_load_balance( __kmp_avail_proc + team_curr_active );
7168 KB_TRACE( 30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d hot team active = %d\n",
7169 system_active, pool_active, hot_team_active ) );
7171 if ( system_active < 0 ) {
7178 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7179 KMP_WARNING( CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit" );
7184 retval = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
7185 : root->r.r_hot_team->t.t_nproc);
7186 if ( retval > set_nproc ) {
7189 if ( retval < KMP_MIN_NTH ) {
7190 retval = KMP_MIN_NTH;
7193 KB_TRACE( 20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n", retval ) );
7203 if ( system_active < team_curr_active ) {
7204 system_active = team_curr_active;
7206 retval = __kmp_avail_proc - system_active + team_curr_active;
7207 if ( retval > set_nproc ) {
7210 if ( retval < KMP_MIN_NTH ) {
7211 retval = KMP_MIN_NTH;
7214 KB_TRACE( 20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval ) );
7225 __kmp_cleanup(
void )
7229 KA_TRACE( 10, (
"__kmp_cleanup: enter\n" ) );
7231 if (TCR_4(__kmp_init_parallel)) {
7232 #if KMP_HANDLE_SIGNALS 7233 __kmp_remove_signals();
7235 TCW_4(__kmp_init_parallel, FALSE);
7238 if (TCR_4(__kmp_init_middle)) {
7239 #if KMP_AFFINITY_SUPPORTED 7240 __kmp_affinity_uninitialize();
7242 __kmp_cleanup_hierarchy();
7243 TCW_4(__kmp_init_middle, FALSE);
7246 KA_TRACE( 10, (
"__kmp_cleanup: go serial cleanup\n" ) );
7248 if (__kmp_init_serial) {
7249 __kmp_runtime_destroy();
7250 __kmp_init_serial = FALSE;
7253 for ( f = 0; f < __kmp_threads_capacity; f++ ) {
7254 if ( __kmp_root[ f ] != NULL ) {
7255 __kmp_free( __kmp_root[ f ] );
7256 __kmp_root[ f ] = NULL;
7259 __kmp_free( __kmp_threads );
7262 __kmp_threads = NULL;
7264 __kmp_threads_capacity = 0;
7266 #if KMP_USE_DYNAMIC_LOCK 7267 __kmp_cleanup_indirect_user_locks();
7269 __kmp_cleanup_user_locks();
7272 #if KMP_AFFINITY_SUPPORTED 7273 KMP_INTERNAL_FREE( (
void *) __kmp_cpuinfo_file );
7274 __kmp_cpuinfo_file = NULL;
7277 #if KMP_USE_ADAPTIVE_LOCKS 7278 #if KMP_DEBUG_ADAPTIVE_LOCKS 7279 __kmp_print_speculative_stats();
7282 KMP_INTERNAL_FREE( __kmp_nested_nth.nth );
7283 __kmp_nested_nth.nth = NULL;
7284 __kmp_nested_nth.size = 0;
7285 __kmp_nested_nth.used = 0;
7287 __kmp_i18n_catclose();
7289 #if KMP_STATS_ENABLED 7290 __kmp_accumulate_stats_at_exit();
7291 __kmp_stats_list.deallocate();
7294 KA_TRACE( 10, (
"__kmp_cleanup: exit\n" ) );
7301 __kmp_ignore_mppbeg(
void )
7305 if ((env = getenv(
"KMP_IGNORE_MPPBEG" )) != NULL) {
7306 if (__kmp_str_match_false( env ))
7314 __kmp_ignore_mppend(
void )
7318 if ((env = getenv(
"KMP_IGNORE_MPPEND" )) != NULL) {
7319 if (__kmp_str_match_false( env ))
7327 __kmp_internal_begin(
void )
7334 gtid = __kmp_entry_gtid();
7335 root = __kmp_threads[ gtid ]->th.th_root;
7336 KMP_ASSERT( KMP_UBER_GTID( gtid ));
7338 if( root->r.r_begin )
return;
7339 __kmp_acquire_lock( &root->r.r_begin_lock, gtid );
7340 if( root->r.r_begin ) {
7341 __kmp_release_lock( & root->r.r_begin_lock, gtid );
7345 root->r.r_begin = TRUE;
7347 __kmp_release_lock( & root->r.r_begin_lock, gtid );
7355 __kmp_user_set_library (
enum library_type arg)
7363 gtid = __kmp_entry_gtid();
7364 thread = __kmp_threads[ gtid ];
7366 root = thread->th.th_root;
7368 KA_TRACE( 20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, library_serial ));
7369 if (root->r.r_in_parallel) {
7370 KMP_WARNING( SetLibraryIncorrectCall );
7375 case library_serial :
7376 thread->th.th_set_nproc = 0;
7377 set__nproc( thread, 1 );
7379 case library_turnaround :
7380 thread->th.th_set_nproc = 0;
7381 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
7383 case library_throughput :
7384 thread->th.th_set_nproc = 0;
7385 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
7388 KMP_FATAL( UnknownLibraryType, arg );
7391 __kmp_aux_set_library ( arg );
7395 __kmp_aux_set_stacksize(
size_t arg )
7397 if (! __kmp_init_serial)
7398 __kmp_serial_initialize();
7401 if (arg & (0x1000 - 1)) {
7402 arg &= ~(0x1000 - 1);
7407 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
7410 if (! TCR_4(__kmp_init_parallel)) {
7413 if (value < __kmp_sys_min_stksize )
7414 value = __kmp_sys_min_stksize ;
7415 else if (value > KMP_MAX_STKSIZE)
7416 value = KMP_MAX_STKSIZE;
7418 __kmp_stksize = value;
7420 __kmp_env_stksize = TRUE;
7423 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7429 __kmp_aux_set_library (
enum library_type arg)
7431 __kmp_library = arg;
7433 switch ( __kmp_library ) {
7434 case library_serial :
7436 KMP_INFORM( LibraryIsSerial );
7437 (void) __kmp_change_library( TRUE );
7440 case library_turnaround :
7441 (void) __kmp_change_library( TRUE );
7443 case library_throughput :
7444 (void) __kmp_change_library( FALSE );
7447 KMP_FATAL( UnknownLibraryType, arg );
7455 __kmp_aux_set_blocktime (
int arg, kmp_info_t *thread,
int tid)
7457 int blocktime = arg;
7461 __kmp_save_internal_controls( thread );
7464 if (blocktime < KMP_MIN_BLOCKTIME)
7465 blocktime = KMP_MIN_BLOCKTIME;
7466 else if (blocktime > KMP_MAX_BLOCKTIME)
7467 blocktime = KMP_MAX_BLOCKTIME;
7469 set__blocktime_team( thread->th.th_team, tid, blocktime );
7470 set__blocktime_team( thread->th.th_serial_team, 0, blocktime );
7473 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
7475 set__bt_intervals_team( thread->th.th_team, tid, bt_intervals );
7476 set__bt_intervals_team( thread->th.th_serial_team, 0, bt_intervals );
7481 set__bt_set_team( thread->th.th_team, tid, bt_set );
7482 set__bt_set_team( thread->th.th_serial_team, 0, bt_set );
7483 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, bt_intervals=%d, monitor_updates=%d\n",
7484 __kmp_gtid_from_tid(tid, thread->th.th_team),
7485 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals, __kmp_monitor_wakeups ) );
7489 __kmp_aux_set_defaults(
7493 if ( ! __kmp_init_serial ) {
7494 __kmp_serial_initialize();
7496 __kmp_env_initialize( str );
7500 || __kmp_display_env || __kmp_display_env_verbose
7513 PACKED_REDUCTION_METHOD_T
7514 __kmp_determine_reduction_method(
ident_t *loc, kmp_int32 global_tid,
7515 kmp_int32 num_vars,
size_t reduce_size,
void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
7516 kmp_critical_name *lck )
7524 PACKED_REDUCTION_METHOD_T retval;
7528 KMP_DEBUG_ASSERT( loc );
7529 KMP_DEBUG_ASSERT( lck );
7531 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED ( ( loc->flags & ( KMP_IDENT_ATOMIC_REDUCE ) ) == ( KMP_IDENT_ATOMIC_REDUCE ) ) 7532 #define FAST_REDUCTION_TREE_METHOD_GENERATED ( ( reduce_data ) && ( reduce_func ) ) 7534 retval = critical_reduce_block;
7536 team_size = __kmp_get_team_num_threads( global_tid );
7538 if( team_size == 1 ) {
7540 retval = empty_reduce_block;
7544 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7545 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7547 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 7549 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN 7551 int teamsize_cutoff = 4;
7553 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 7554 if( __kmp_mic_type != non_mic ) {
7555 teamsize_cutoff = 8;
7558 if( tree_available ) {
7559 if( team_size <= teamsize_cutoff ) {
7560 if ( atomic_available ) {
7561 retval = atomic_reduce_block;
7564 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7566 }
else if ( atomic_available ) {
7567 retval = atomic_reduce_block;
7570 #error "Unknown or unsupported OS" 7571 #endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN 7573 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS 7575 #if KMP_OS_LINUX || KMP_OS_WINDOWS 7579 if( atomic_available ) {
7580 if( num_vars <= 2 ) {
7581 retval = atomic_reduce_block;
7587 if( atomic_available && ( num_vars <= 3 ) ) {
7588 retval = atomic_reduce_block;
7589 }
else if( tree_available ) {
7590 if( ( reduce_size > ( 9 *
sizeof( kmp_real64 ) ) ) && ( reduce_size < ( 2000 *
sizeof( kmp_real64 ) ) ) ) {
7591 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
7596 #error "Unknown or unsupported OS" 7600 #error "Unknown or unsupported architecture" 7609 if( __kmp_force_reduction_method != reduction_method_not_defined && team_size != 1) {
7611 PACKED_REDUCTION_METHOD_T forced_retval;
7613 int atomic_available, tree_available;
7615 switch( ( forced_retval = __kmp_force_reduction_method ) )
7617 case critical_reduce_block:
7621 case atomic_reduce_block:
7622 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7623 KMP_ASSERT( atomic_available );
7626 case tree_reduce_block:
7627 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7628 KMP_ASSERT( tree_available );
7629 #if KMP_FAST_REDUCTION_BARRIER 7630 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7638 retval = forced_retval;
7641 KA_TRACE(10, (
"reduction method selected=%08x\n", retval ) );
7643 #undef FAST_REDUCTION_TREE_METHOD_GENERATED 7644 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED 7651 __kmp_get_reduce_method(
void ) {
7652 return ( ( __kmp_entry_thread()->th.th_local.packed_reduction_method ) >> 8 );
#define KMP_TIME_BLOCK(name)
Uses specified timer (name) to time code block.
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_IDENT_AUTOPAR
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)