19 #include "kmp_wait_release.h" 20 #include "kmp_stats.h" 23 #include "ompt-specific.h" 32 static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr );
33 static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data );
34 static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team );
37 static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask );
40 static inline void __kmp_null_resume_wrapper(
int gtid,
volatile void *flag) {
43 switch (((kmp_flag_64 *)flag)->get_type()) {
44 case flag32: __kmp_resume_32(gtid, NULL);
break;
45 case flag64: __kmp_resume_64(gtid, NULL);
break;
46 case flag_oncore: __kmp_resume_oncore(gtid, NULL);
break;
50 #ifdef BUILD_TIED_TASK_STACK 62 __kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data,
int threshold,
char *location )
64 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
65 kmp_taskdata_t **stack_top = task_stack -> ts_top;
66 kmp_int32 entries = task_stack -> ts_entries;
67 kmp_taskdata_t *tied_task;
69 KA_TRACE(threshold, (
"__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, " 70 "first_block = %p, stack_top = %p \n",
71 location, gtid, entries, task_stack->ts_first_block, stack_top ) );
73 KMP_DEBUG_ASSERT( stack_top != NULL );
74 KMP_DEBUG_ASSERT( entries > 0 );
76 while ( entries != 0 )
78 KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] );
80 if ( entries & TASK_STACK_INDEX_MASK == 0 )
82 kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ;
84 stack_block = stack_block -> sb_prev;
85 stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
92 tied_task = * stack_top;
94 KMP_DEBUG_ASSERT( tied_task != NULL );
95 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
97 KA_TRACE(threshold, (
"__kmp_trace_task_stack(%s): gtid=%d, entry=%d, " 98 "stack_top=%p, tied_task=%p\n",
99 location, gtid, entries, stack_top, tied_task ) );
101 KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] );
103 KA_TRACE(threshold, (
"__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
116 __kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
118 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
119 kmp_stack_block_t *first_block;
122 first_block = & task_stack -> ts_first_block;
123 task_stack -> ts_top = (kmp_taskdata_t **) first_block;
124 memset( (
void *) first_block,
'\0', TASK_STACK_BLOCK_SIZE *
sizeof(kmp_taskdata_t *));
127 task_stack -> ts_entries = TASK_STACK_EMPTY;
128 first_block -> sb_next = NULL;
129 first_block -> sb_prev = NULL;
140 __kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
142 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
143 kmp_stack_block_t *stack_block = & task_stack -> ts_first_block;
145 KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY );
147 while ( stack_block != NULL ) {
148 kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL;
150 stack_block -> sb_next = NULL;
151 stack_block -> sb_prev = NULL;
152 if (stack_block != & task_stack -> ts_first_block) {
153 __kmp_thread_free( thread, stack_block );
155 stack_block = next_block;
158 task_stack -> ts_entries = 0;
159 task_stack -> ts_top = NULL;
172 __kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task )
175 kmp_thread_data_t *thread_data = & thread -> th.th_task_team ->
176 tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
177 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
179 if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) {
183 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
184 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
186 KA_TRACE(20, (
"__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
187 gtid, thread, tied_task ) );
189 * (task_stack -> ts_top) = tied_task;
192 task_stack -> ts_top++;
193 task_stack -> ts_entries++;
195 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
198 kmp_stack_block_t *stack_block =
199 (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE);
202 if ( stack_block -> sb_next != NULL )
204 task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0];
208 kmp_stack_block_t *new_block = (kmp_stack_block_t *)
209 __kmp_thread_calloc(thread,
sizeof(kmp_stack_block_t));
211 task_stack -> ts_top = & new_block -> sb_block[0];
212 stack_block -> sb_next = new_block;
213 new_block -> sb_prev = stack_block;
214 new_block -> sb_next = NULL;
216 KA_TRACE(30, (
"__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
217 gtid, tied_task, new_block ) );
220 KA_TRACE(20, (
"__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
233 __kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task )
236 kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
237 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
238 kmp_taskdata_t *tied_task;
240 if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) {
244 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
245 KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 );
247 KA_TRACE(20, (
"__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) );
250 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
252 kmp_stack_block_t *stack_block =
253 (kmp_stack_block_t *) (task_stack -> ts_top) ;
255 stack_block = stack_block -> sb_prev;
256 task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
260 task_stack -> ts_top--;
261 task_stack -> ts_entries--;
263 tied_task = * (task_stack -> ts_top );
265 KMP_DEBUG_ASSERT( tied_task != NULL );
266 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
267 KMP_DEBUG_ASSERT( tied_task == ending_task );
269 KA_TRACE(20, (
"__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
278 __kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
280 kmp_info_t * thread = __kmp_threads[ gtid ];
281 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
282 kmp_task_team_t * task_team = thread->th.th_task_team;
283 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
284 kmp_thread_data_t * thread_data;
286 KA_TRACE(20, (
"__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) );
289 if ( taskdata->td_flags.task_serial ) {
290 KA_TRACE(20, (
"__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n",
292 return TASK_NOT_PUSHED;
296 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
297 if ( ! KMP_TASKING_ENABLED(task_team) ) {
298 __kmp_enable_tasking( task_team, thread );
300 KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
301 KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL );
304 thread_data = & task_team -> tt.tt_threads_data[ tid ];
307 if (thread_data -> td.td_deque == NULL ) {
308 __kmp_alloc_task_deque( thread, thread_data );
312 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
314 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n",
316 return TASK_NOT_PUSHED;
320 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
324 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
326 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
327 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full on 2nd check; returning TASK_NOT_PUSHED for task %p\n",
329 return TASK_NOT_PUSHED;
333 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE );
336 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata;
338 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
339 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1);
341 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
343 KA_TRACE(20, (
"__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: " 344 "task=%p ntasks=%d head=%u tail=%u\n",
345 gtid, taskdata, thread_data->td.td_deque_ntasks,
346 thread_data->td.td_deque_tail, thread_data->td.td_deque_head) );
348 return TASK_SUCCESSFULLY_PUSHED;
357 __kmp_pop_current_task_from_thread( kmp_info_t *this_thr )
359 KF_TRACE( 10, (
"__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, " 360 "curtask_parent=%p\n",
361 0, this_thr, this_thr -> th.th_current_task,
362 this_thr -> th.th_current_task -> td_parent ) );
364 this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent;
366 KF_TRACE( 10, (
"__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, " 367 "curtask_parent=%p\n",
368 0, this_thr, this_thr -> th.th_current_task,
369 this_thr -> th.th_current_task -> td_parent ) );
380 __kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team,
int tid )
383 KF_TRACE( 10, (
"__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p " 385 tid, this_thr, this_thr->th.th_current_task,
386 team->t.t_implicit_task_taskdata[tid].td_parent ) );
388 KMP_DEBUG_ASSERT (this_thr != NULL);
391 if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) {
392 team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task;
393 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ];
396 team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent;
397 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ];
400 KF_TRACE( 10, (
"__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p " 402 tid, this_thr, this_thr->th.th_current_task,
403 team->t.t_implicit_task_taskdata[tid].td_parent ) );
414 __kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task )
416 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
417 kmp_info_t * thread = __kmp_threads[ gtid ];
419 KA_TRACE(10, (
"__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
420 gtid, taskdata, current_task) );
422 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
427 current_task -> td_flags.executing = 0;
430 #ifdef BUILD_TIED_TASK_STACK 431 if ( taskdata -> td_flags.tiedness == TASK_TIED )
433 __kmp_push_task_stack( gtid, thread, taskdata );
438 thread -> th.th_current_task = taskdata;
440 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 0 );
441 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 0 );
442 taskdata -> td_flags.started = 1;
443 taskdata -> td_flags.executing = 1;
444 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
445 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
452 KA_TRACE(10, (
"__kmp_task_start(exit): T#%d task=%p\n",
457 ompt_callbacks.ompt_callback(ompt_event_task_begin)) {
458 kmp_taskdata_t *parent = taskdata->td_parent;
459 ompt_callbacks.ompt_callback(ompt_event_task_begin)(
460 parent ? parent->ompt_task_info.task_id : ompt_task_id_none,
461 parent ? &(parent->ompt_task_info.frame) : NULL,
462 taskdata->ompt_task_info.task_id,
463 taskdata->ompt_task_info.function);
478 __kmpc_omp_task_begin_if0(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
480 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
481 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
483 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n",
484 gtid, loc_ref, taskdata, current_task ) );
486 taskdata -> td_flags.task_serial = 1;
487 __kmp_task_start( gtid, task, current_task );
489 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n",
490 gtid, loc_ref, taskdata ) );
501 __kmpc_omp_task_begin(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
503 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
505 KA_TRACE(10, (
"__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
506 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) );
508 __kmp_task_start( gtid, task, current_task );
510 KA_TRACE(10, (
"__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n",
511 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
515 #endif // TASK_UNUSED 525 __kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
527 KA_TRACE(30, (
"__kmp_free_task: T#%d freeing data from task %p\n",
531 KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT );
532 KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 );
533 KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 );
534 KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 );
535 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0 || taskdata->td_flags.task_serial == 1);
536 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 );
538 taskdata->td_flags.freed = 1;
541 __kmp_fast_free( thread, taskdata );
543 __kmp_thread_free( thread, taskdata );
546 KA_TRACE(20, (
"__kmp_free_task: T#%d freed task %p\n",
558 __kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
560 kmp_int32 children = 0;
561 kmp_int32 team_or_tasking_serialized = taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser;
563 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
565 if ( !team_or_tasking_serialized ) {
566 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
567 KMP_DEBUG_ASSERT( children >= 0 );
571 while ( children == 0 )
573 kmp_taskdata_t * parent_taskdata = taskdata -> td_parent;
575 KA_TRACE(20, (
"__kmp_free_task_and_ancestors(enter): T#%d task %p complete " 576 "and freeing itself\n", gtid, taskdata) );
579 __kmp_free_task( gtid, taskdata, thread );
581 taskdata = parent_taskdata;
585 if ( team_or_tasking_serialized || taskdata -> td_flags.tasktype == TASK_IMPLICIT )
588 if ( !team_or_tasking_serialized ) {
590 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
591 KMP_DEBUG_ASSERT( children >= 0 );
595 KA_TRACE(20, (
"__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; " 596 "not freeing it yet\n", gtid, taskdata, children) );
606 __kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task )
608 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
609 kmp_info_t * thread = __kmp_threads[ gtid ];
610 kmp_int32 children = 0;
614 ompt_callbacks.ompt_callback(ompt_event_task_end)) {
615 kmp_taskdata_t *parent = taskdata->td_parent;
616 ompt_callbacks.ompt_callback(ompt_event_task_end)(
617 taskdata->ompt_task_info.task_id);
621 KA_TRACE(10, (
"__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n",
622 gtid, taskdata, resumed_task) );
624 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
627 #ifdef BUILD_TIED_TASK_STACK 628 if ( taskdata -> td_flags.tiedness == TASK_TIED )
630 __kmp_pop_task_stack( gtid, thread, taskdata );
634 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
635 taskdata -> td_flags.complete = 1;
636 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 );
637 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
640 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
642 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
643 KMP_DEBUG_ASSERT( children >= 0 );
645 if ( taskdata->td_taskgroup )
646 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
647 __kmp_release_deps(gtid,taskdata);
654 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 );
655 taskdata -> td_flags.executing = 0;
657 KA_TRACE(20, (
"__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
658 gtid, taskdata, children) );
668 if (taskdata->td_flags.destructors_thunk) {
669 kmp_routine_entry_t destr_thunk = task->destructors;
670 KMP_ASSERT(destr_thunk);
671 destr_thunk(gtid, task);
673 #endif // OMP_40_ENABLED 677 KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
678 taskdata->td_flags.task_serial);
679 if ( taskdata->td_flags.task_serial )
681 if (resumed_task == NULL) {
682 resumed_task = taskdata->td_parent;
686 KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent );
690 KMP_DEBUG_ASSERT( resumed_task != NULL );
697 thread->th.th_current_task = resumed_task;
698 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
702 resumed_task->td_flags.executing = 1;
704 KA_TRACE(10, (
"__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
705 gtid, taskdata, resumed_task) );
717 __kmpc_omp_task_complete_if0(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
719 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
720 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
722 __kmp_task_finish( gtid, task, NULL );
724 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
725 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
736 __kmpc_omp_task_complete(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
738 KA_TRACE(10, (
"__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n",
739 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
741 __kmp_task_finish( gtid, task, NULL );
743 KA_TRACE(10, (
"__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n",
744 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
747 #endif // TASK_UNUSED 757 __kmp_task_init_ompt( kmp_taskdata_t * task,
int tid,
void *
function )
760 task->ompt_task_info.task_id = __ompt_task_id_new(tid);
761 task->ompt_task_info.function =
function;
762 task->ompt_task_info.frame.exit_runtime_frame = NULL;
763 task->ompt_task_info.frame.reenter_runtime_frame = NULL;
781 __kmp_init_implicit_task(
ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team,
int tid,
int set_curr_task )
783 kmp_taskdata_t * task = & team->t.t_implicit_task_taskdata[ tid ];
785 KF_TRACE(10, (
"__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
786 tid, team, task, set_curr_task ?
"TRUE" :
"FALSE" ) );
788 task->td_task_id = KMP_GEN_TASK_ID();
789 task->td_team = team;
791 task->td_ident = loc_ref;
792 task->td_taskwait_ident = NULL;
793 task->td_taskwait_counter = 0;
794 task->td_taskwait_thread = 0;
796 task->td_flags.tiedness = TASK_TIED;
797 task->td_flags.tasktype = TASK_IMPLICIT;
799 task->td_flags.proxy = TASK_FULL;
803 task->td_flags.task_serial = 1;
804 task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
805 task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
807 task->td_flags.started = 1;
808 task->td_flags.executing = 1;
809 task->td_flags.complete = 0;
810 task->td_flags.freed = 0;
813 task->td_dephash = NULL;
814 task->td_depnode = NULL;
818 task->td_incomplete_child_tasks = 0;
819 task->td_allocated_child_tasks = 0;
821 task->td_taskgroup = NULL;
823 __kmp_push_current_task_to_thread( this_thr, team, tid );
825 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
826 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
830 __kmp_task_init_ompt(task, tid, NULL);
833 KF_TRACE(10, (
"__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n",
840 __kmp_round_up_to_val(
size_t size,
size_t val ) {
841 if ( size & ( val - 1 ) ) {
842 size &= ~ ( val - 1 );
843 if ( size <= KMP_SIZE_T_MAX - val ) {
864 __kmp_task_alloc(
ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
865 size_t sizeof_kmp_task_t,
size_t sizeof_shareds,
866 kmp_routine_entry_t task_entry )
869 kmp_taskdata_t *taskdata;
870 kmp_info_t *thread = __kmp_threads[ gtid ];
871 kmp_team_t *team = thread->th.th_team;
872 kmp_taskdata_t *parent_task = thread->th.th_current_task;
873 size_t shareds_offset;
875 KA_TRACE(10, (
"__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) " 876 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
877 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
878 sizeof_shareds, task_entry) );
880 if ( parent_task->td_flags.final ) {
881 if (flags->merged_if0) {
887 if ( flags->proxy == TASK_PROXY ) {
888 flags->tiedness = TASK_UNTIED;
889 flags->merged_if0 = 1;
892 if ( (thread->th.th_task_team) == NULL ) {
896 KMP_DEBUG_ASSERT(team->t.t_serialized);
897 KA_TRACE(30,(
"T#%d creating task team in __kmp_task_alloc for proxy task\n", gtid));
898 __kmp_task_team_setup(thread,team,1);
899 thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
901 kmp_task_team_t * task_team = thread->th.th_task_team;
904 if ( !KMP_TASKING_ENABLED( task_team ) ) {
905 KA_TRACE(30,(
"T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
906 __kmp_enable_tasking( task_team, thread );
907 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
908 kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
910 if (thread_data -> td.td_deque == NULL ) {
911 __kmp_alloc_task_deque( thread, thread_data );
915 if ( task_team->tt.tt_found_proxy_tasks == FALSE )
916 TCW_4(task_team -> tt.tt_found_proxy_tasks, TRUE);
922 shareds_offset =
sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t;
923 shareds_offset = __kmp_round_up_to_val( shareds_offset,
sizeof(
void * ));
926 KA_TRACE(30, (
"__kmp_task_alloc: T#%d First malloc size: %ld\n",
927 gtid, shareds_offset) );
928 KA_TRACE(30, (
"__kmp_task_alloc: T#%d Second malloc size: %ld\n",
929 gtid, sizeof_shareds) );
933 taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds );
935 taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds );
938 task = KMP_TASKDATA_TO_TASK(taskdata);
941 #if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD 942 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (
sizeof(
double)-1) ) == 0 );
943 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (
sizeof(
double)-1) ) == 0 );
945 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (
sizeof(_Quad)-1) ) == 0 );
946 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (
sizeof(_Quad)-1) ) == 0 );
948 if (sizeof_shareds > 0) {
950 task->shareds = & ((
char *) taskdata)[ shareds_offset ];
952 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (
sizeof(
void *)-1) ) == 0 );
954 task->shareds = NULL;
956 task->routine = task_entry;
959 taskdata->td_task_id = KMP_GEN_TASK_ID();
960 taskdata->td_team = team;
961 taskdata->td_alloc_thread = thread;
962 taskdata->td_parent = parent_task;
963 taskdata->td_level = parent_task->td_level + 1;
964 taskdata->td_ident = loc_ref;
965 taskdata->td_taskwait_ident = NULL;
966 taskdata->td_taskwait_counter = 0;
967 taskdata->td_taskwait_thread = 0;
968 KMP_DEBUG_ASSERT( taskdata->td_parent != NULL );
971 if ( flags->proxy == TASK_FULL )
973 copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs );
975 taskdata->td_flags.tiedness = flags->tiedness;
976 taskdata->td_flags.final = flags->final;
977 taskdata->td_flags.merged_if0 = flags->merged_if0;
979 taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
980 #endif // OMP_40_ENABLED 982 taskdata->td_flags.proxy = flags->proxy;
984 taskdata->td_flags.tasktype = TASK_EXPLICIT;
987 taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
990 taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
995 taskdata->td_flags.task_serial = ( parent_task->td_flags.final
996 || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser );
998 taskdata->td_flags.started = 0;
999 taskdata->td_flags.executing = 0;
1000 taskdata->td_flags.complete = 0;
1001 taskdata->td_flags.freed = 0;
1003 taskdata->td_flags.native = flags->native;
1005 taskdata->td_incomplete_child_tasks = 0;
1006 taskdata->td_allocated_child_tasks = 1;
1008 taskdata->td_taskgroup = parent_task->td_taskgroup;
1009 taskdata->td_dephash = NULL;
1010 taskdata->td_depnode = NULL;
1015 if ( flags->proxy == TASK_PROXY || !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
1017 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
1020 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
1022 if ( parent_task->td_taskgroup )
1023 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
1026 if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) {
1027 KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
1031 KA_TRACE(20, (
"__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
1032 gtid, taskdata, taskdata->td_parent) );
1035 __kmp_task_init_ompt(taskdata, gtid, (
void*) task_entry);
1043 __kmpc_omp_task_alloc(
ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
1044 size_t sizeof_kmp_task_t,
size_t sizeof_shareds,
1045 kmp_routine_entry_t task_entry )
1048 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
1050 input_flags->native = FALSE;
1054 KA_TRACE(10, (
"__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) " 1055 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1056 gtid, loc_ref, input_flags->tiedness ?
"tied " :
"untied",
1057 input_flags->proxy ?
"proxy" :
"",
1058 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
1060 KA_TRACE(10, (
"__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) " 1061 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1062 gtid, loc_ref, input_flags->tiedness ?
"tied " :
"untied",
1063 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
1066 retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1067 sizeof_shareds, task_entry );
1069 KA_TRACE(20, (
"__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) );
1082 __kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
1084 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
1088 KA_TRACE(30, (
"__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1089 gtid, taskdata, current_task) );
1090 KMP_DEBUG_ASSERT(task);
1092 if ( taskdata->td_flags.proxy == TASK_PROXY &&
1093 taskdata->td_flags.complete == 1)
1097 KA_TRACE(30, (
"__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
1100 __kmp_bottom_half_finish_proxy(gtid,task);
1102 KA_TRACE(30, (
"__kmp_invoke_task(exit): T#%d completed bottom finish for proxy task %p, resuming task %p\n", gtid, taskdata, current_task) );
1110 if ( taskdata->td_flags.proxy != TASK_PROXY )
1112 __kmp_task_start( gtid, task, current_task );
1115 ompt_thread_info_t oldInfo;
1116 kmp_info_t * thread;
1119 thread = __kmp_threads[ gtid ];
1120 oldInfo = thread->th.ompt_thread_info;
1121 thread->th.ompt_thread_info.wait_id = 0;
1122 thread->th.ompt_thread_info.state = ompt_state_work_parallel;
1123 taskdata->ompt_task_info.frame.exit_runtime_frame = __builtin_frame_address(0);
1131 if (__kmp_omp_cancellation) {
1132 kmp_info_t *this_thr = __kmp_threads [ gtid ];
1133 kmp_team_t * this_team = this_thr->th.th_team;
1134 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1135 if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
1149 #endif // OMP_40_ENABLED 1151 #if OMPT_SUPPORT && OMPT_TRACE 1154 ompt_callbacks.ompt_callback(ompt_event_task_switch))
1156 ompt_callbacks.ompt_callback(ompt_event_task_switch)(
1157 current_task->ompt_task_info.task_id,
1158 taskdata->ompt_task_info.task_id);
1162 #ifdef KMP_GOMP_COMPAT 1163 if (taskdata->td_flags.native) {
1164 ((void (*)(
void *))(*(task->routine)))(task->shareds);
1169 (*(task->routine))(gtid, task);
1172 #if OMPT_SUPPORT && OMPT_TRACE 1175 ompt_callbacks.ompt_callback(ompt_event_task_switch))
1177 ompt_callbacks.ompt_callback(ompt_event_task_switch)(
1178 taskdata->ompt_task_info.task_id,
1179 current_task->ompt_task_info.task_id);
1185 #endif // OMP_40_ENABLED 1190 thread->th.ompt_thread_info = oldInfo;
1191 taskdata->ompt_task_info.frame.exit_runtime_frame = 0;
1197 if ( taskdata->td_flags.proxy != TASK_PROXY )
1199 __kmp_task_finish( gtid, task, current_task );
1201 KA_TRACE(30, (
"__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
1202 gtid, taskdata, current_task) );
1217 __kmpc_omp_task_parts(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1219 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1221 KA_TRACE(10, (
"__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n",
1222 gtid, loc_ref, new_taskdata ) );
1227 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED )
1229 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1230 new_taskdata->td_flags.task_serial = 1;
1231 __kmp_invoke_task( gtid, new_task, current_task );
1234 KA_TRACE(10, (
"__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: " 1235 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
1238 return TASK_CURRENT_NOT_QUEUED;
1251 __kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task,
bool serialize_immediate )
1253 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1257 new_taskdata->ompt_task_info.frame.reenter_runtime_frame =
1258 __builtin_frame_address(0);
1265 if ( new_taskdata->td_flags.proxy == TASK_PROXY || __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED )
1267 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED )
1270 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1271 if ( serialize_immediate )
1272 new_taskdata -> td_flags.task_serial = 1;
1273 __kmp_invoke_task( gtid, new_task, current_task );
1278 new_taskdata->ompt_task_info.frame.reenter_runtime_frame = 0;
1282 return TASK_CURRENT_NOT_QUEUED;
1297 __kmpc_omp_task(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1302 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1304 KA_TRACE(10, (
"__kmpc_omp_task(enter): T#%d loc=%p task=%p\n",
1305 gtid, loc_ref, new_taskdata ) );
1307 res = __kmp_omp_task(gtid,new_task,
true);
1309 KA_TRACE(10, (
"__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1310 gtid, loc_ref, new_taskdata ) );
1318 __kmpc_omp_taskwait(
ident_t *loc_ref, kmp_int32 gtid )
1320 kmp_taskdata_t * taskdata;
1321 kmp_info_t * thread;
1322 int thread_finished = FALSE;
1324 KA_TRACE(10, (
"__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref) );
1326 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1329 thread = __kmp_threads[ gtid ];
1330 taskdata = thread -> th.th_current_task;
1332 #if OMPT_SUPPORT && OMPT_TRACE 1333 ompt_task_id_t my_task_id;
1334 ompt_parallel_id_t my_parallel_id;
1337 kmp_team_t *team = thread->th.th_team;
1338 my_task_id = taskdata->ompt_task_info.task_id;
1339 my_parallel_id = team->t.ompt_team_info.parallel_id;
1341 if (ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)) {
1342 ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)(
1343 my_parallel_id, my_task_id);
1351 taskdata->td_taskwait_counter += 1;
1352 taskdata->td_taskwait_ident = loc_ref;
1353 taskdata->td_taskwait_thread = gtid + 1;
1356 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1357 if ( itt_sync_obj != NULL )
1358 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1362 if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
1364 if ( ! taskdata->td_flags.team_serial )
1368 kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U);
1369 while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
1370 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1371 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1375 if ( itt_sync_obj != NULL )
1376 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1380 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1382 #if OMPT_SUPPORT && OMPT_TRACE 1384 ompt_callbacks.ompt_callback(ompt_event_taskwait_end)) {
1385 ompt_callbacks.ompt_callback(ompt_event_taskwait_end)(
1386 my_parallel_id, my_task_id);
1391 KA_TRACE(10, (
"__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, " 1392 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1394 return TASK_CURRENT_NOT_QUEUED;
1402 __kmpc_omp_taskyield(
ident_t *loc_ref, kmp_int32 gtid,
int end_part )
1404 kmp_taskdata_t * taskdata;
1405 kmp_info_t * thread;
1406 int thread_finished = FALSE;
1410 KA_TRACE(10, (
"__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1411 gtid, loc_ref, end_part) );
1413 if ( __kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel ) {
1416 thread = __kmp_threads[ gtid ];
1417 taskdata = thread -> th.th_current_task;
1422 taskdata->td_taskwait_counter += 1;
1423 taskdata->td_taskwait_ident = loc_ref;
1424 taskdata->td_taskwait_thread = gtid + 1;
1427 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1428 if ( itt_sync_obj != NULL )
1429 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1431 if ( ! taskdata->td_flags.team_serial ) {
1432 kmp_task_team_t * task_team = thread->th.th_task_team;
1433 if (task_team != NULL) {
1434 if (KMP_TASKING_ENABLED(task_team)) {
1435 __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished
1436 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1441 if ( itt_sync_obj != NULL )
1442 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1446 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1449 KA_TRACE(10, (
"__kmpc_omp_taskyield(exit): T#%d task %p resuming, " 1450 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1452 return TASK_CURRENT_NOT_QUEUED;
1461 __kmpc_taskgroup(
ident_t* loc,
int gtid )
1463 kmp_info_t * thread = __kmp_threads[ gtid ];
1464 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1465 kmp_taskgroup_t * tg_new =
1466 (kmp_taskgroup_t *)__kmp_thread_malloc( thread,
sizeof( kmp_taskgroup_t ) );
1467 KA_TRACE(10, (
"__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
1469 tg_new->cancel_request = cancel_noreq;
1470 tg_new->parent = taskdata->td_taskgroup;
1471 taskdata->td_taskgroup = tg_new;
1480 __kmpc_end_taskgroup(
ident_t* loc,
int gtid )
1482 kmp_info_t * thread = __kmp_threads[ gtid ];
1483 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1484 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1485 int thread_finished = FALSE;
1487 KA_TRACE(10, (
"__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
1488 KMP_DEBUG_ASSERT( taskgroup != NULL );
1490 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1493 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1494 if ( itt_sync_obj != NULL )
1495 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1499 if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
1501 if ( ! taskdata->td_flags.team_serial )
1504 kmp_flag_32 flag(&(taskgroup->count), 0U);
1505 while ( TCR_4(taskgroup->count) != 0 ) {
1506 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1507 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1512 if ( itt_sync_obj != NULL )
1513 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1516 KMP_DEBUG_ASSERT( taskgroup->count == 0 );
1519 taskdata->td_taskgroup = taskgroup->parent;
1520 __kmp_thread_free( thread, taskgroup );
1522 KA_TRACE(10, (
"__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) );
1531 __kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team,
1532 kmp_int32 is_constrained )
1535 kmp_taskdata_t * taskdata;
1536 kmp_thread_data_t *thread_data;
1539 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1540 KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL );
1542 thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
1544 KA_TRACE(10, (
"__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
1545 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1546 thread_data->td.td_deque_tail) );
1548 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1549 KA_TRACE(10, (
"__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1550 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1551 thread_data->td.td_deque_tail) );
1555 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1557 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1558 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1559 KA_TRACE(10, (
"__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1560 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1561 thread_data->td.td_deque_tail) );
1565 tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK;
1566 taskdata = thread_data -> td.td_deque[ tail ];
1568 if (is_constrained) {
1571 kmp_taskdata_t * current = thread->th.th_current_task;
1572 kmp_int32 level = current->td_level;
1573 kmp_taskdata_t * parent = taskdata->td_parent;
1574 while ( parent != current && parent->td_level > level ) {
1575 parent = parent->td_parent;
1576 KMP_DEBUG_ASSERT(parent != NULL);
1578 if ( parent != current ) {
1580 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1581 KA_TRACE(10, (
"__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1582 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1583 thread_data->td.td_deque_tail) );
1588 thread_data -> td.td_deque_tail = tail;
1589 TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1);
1591 __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
1593 KA_TRACE(10, (
"__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n",
1594 gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1595 thread_data->td.td_deque_tail) );
1597 task = KMP_TASKDATA_TO_TASK( taskdata );
1608 __kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
1609 volatile kmp_uint32 *unfinished_threads,
int *thread_finished,
1610 kmp_int32 is_constrained )
1613 kmp_taskdata_t * taskdata;
1614 kmp_thread_data_t *victim_td, *threads_data;
1615 kmp_int32 victim_tid;
1617 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1619 threads_data = task_team -> tt.tt_threads_data;
1620 KMP_DEBUG_ASSERT( threads_data != NULL );
1622 victim_tid = victim->th.th_info.ds.ds_tid;
1623 victim_td = & threads_data[ victim_tid ];
1625 KA_TRACE(10, (
"__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d " 1626 "head=%u tail=%u\n",
1627 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1628 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1630 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1631 (TCR_PTR(victim->th.th_task_team) != task_team))
1633 KA_TRACE(10, (
"__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p " 1634 "ntasks=%d head=%u tail=%u\n",
1635 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1636 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1640 __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock );
1643 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1644 (TCR_PTR(victim->th.th_task_team) != task_team))
1646 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1647 KA_TRACE(10, (
"__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p " 1648 "ntasks=%d head=%u tail=%u\n",
1649 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1650 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1654 KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL );
1656 if ( !is_constrained ) {
1657 taskdata = victim_td -> td.td_deque[ victim_td -> td.td_deque_head ];
1659 victim_td -> td.td_deque_head = ( victim_td -> td.td_deque_head + 1 ) & TASK_DEQUE_MASK;
1662 kmp_int32 tail = ( victim_td -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK;
1663 taskdata = victim_td -> td.td_deque[ tail ];
1666 kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task;
1667 kmp_int32 level = current->td_level;
1668 kmp_taskdata_t * parent = taskdata->td_parent;
1669 while ( parent != current && parent->td_level > level ) {
1670 parent = parent->td_parent;
1671 KMP_DEBUG_ASSERT(parent != NULL);
1673 if ( parent != current ) {
1675 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1676 KA_TRACE(10, (
"__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p " 1677 "ntasks=%d head=%u tail=%u\n",
1678 gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ),
1679 task_team, victim_td->td.td_deque_ntasks,
1680 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1683 victim_td -> td.td_deque_tail = tail;
1685 if (*thread_finished) {
1691 count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads );
1693 KA_TRACE(20, (
"__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
1694 gtid, count + 1, task_team) );
1696 *thread_finished = FALSE;
1698 TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1);
1700 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1703 KA_TRACE(10, (
"__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p " 1704 "ntasks=%d head=%u tail=%u\n",
1705 gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
1706 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
1707 victim_td->td.td_deque_tail) );
1709 task = KMP_TASKDATA_TO_TASK( taskdata );
1724 static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gtid, C *flag,
int final_spin,
1725 int *thread_finished
1726 USE_ITT_BUILD_ARG(
void * itt_sync_obj), kmp_int32 is_constrained)
1728 kmp_task_team_t * task_team;
1729 kmp_thread_data_t * threads_data;
1731 kmp_taskdata_t * current_task = thread -> th.th_current_task;
1732 volatile kmp_uint32 * unfinished_threads;
1733 kmp_int32 nthreads, last_stolen, k, tid;
1735 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1736 KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
1738 task_team = thread -> th.th_task_team;
1739 if (task_team == NULL)
return FALSE;
1741 KA_TRACE(15, (
"__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n",
1742 gtid, final_spin, *thread_finished) );
1744 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1745 KMP_DEBUG_ASSERT( threads_data != NULL );
1747 nthreads = task_team -> tt.tt_nproc;
1748 unfinished_threads = &(task_team -> tt.tt_unfinished_threads);
1750 KMP_DEBUG_ASSERT( nthreads > 1 || task_team->tt.tt_found_proxy_tasks);
1752 KMP_DEBUG_ASSERT( nthreads > 1 );
1754 KMP_DEBUG_ASSERT( TCR_4((
int)*unfinished_threads) >= 0 );
1758 while (( task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained )) != NULL ) {
1759 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1760 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1761 if ( itt_sync_obj == NULL ) {
1763 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1765 __kmp_itt_task_starting( itt_sync_obj );
1768 __kmp_invoke_task( gtid, task, current_task );
1770 if ( itt_sync_obj != NULL )
1771 __kmp_itt_task_finished( itt_sync_obj );
1779 if (flag == NULL || (!final_spin && flag->done_check())) {
1780 KA_TRACE(15, (
"__kmp_execute_tasks_template(exit #1): T#%d spin condition satisfied\n", gtid) );
1783 if (thread->th.th_task_team == NULL)
break;
1784 KMP_YIELD( __kmp_library == library_throughput );
1791 if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1799 if (! *thread_finished) {
1802 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1803 KA_TRACE(20, (
"__kmp_execute_tasks_template(dec #1): T#%d dec unfinished_threads to %d task_team=%p\n",
1804 gtid, count, task_team) );
1805 *thread_finished = TRUE;
1813 if (flag != NULL && flag->done_check()) {
1814 KA_TRACE(15, (
"__kmp_execute_tasks_template(exit #2): T#%d spin condition satisfied\n", gtid) );
1819 if (thread->th.th_task_team == NULL)
return FALSE;
1822 if ( nthreads == 1 )
1827 tid = thread -> th.th_info.ds.ds_tid;
1828 last_stolen = threads_data[ tid ].td.td_deque_last_stolen;
1830 if (last_stolen != -1) {
1831 kmp_info_t *other_thread = threads_data[last_stolen].td.td_thr;
1833 while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1834 thread_finished, is_constrained )) != NULL)
1836 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1837 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1838 if ( itt_sync_obj == NULL ) {
1840 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1842 __kmp_itt_task_starting( itt_sync_obj );
1845 __kmp_invoke_task( gtid, task, current_task );
1847 if ( itt_sync_obj != NULL )
1848 __kmp_itt_task_finished( itt_sync_obj );
1852 if (flag == NULL || (!final_spin && flag->done_check())) {
1853 KA_TRACE(15, (
"__kmp_execute_tasks_template(exit #3): T#%d spin condition satisfied\n",
1858 if (thread->th.th_task_team == NULL)
break;
1859 KMP_YIELD( __kmp_library == library_throughput );
1862 if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
1863 KA_TRACE(20, (
"__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
1870 threads_data[ tid ].td.td_deque_last_stolen = -1;
1876 if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1884 if (! *thread_finished) {
1887 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1888 KA_TRACE(20, (
"__kmp_execute_tasks_template(dec #2): T#%d dec unfinished_threads to %d " 1889 "task_team=%p\n", gtid, count, task_team) );
1890 *thread_finished = TRUE;
1899 if (flag != NULL && flag->done_check()) {
1900 KA_TRACE(15, (
"__kmp_execute_tasks_template(exit #4): T#%d spin condition satisfied\n",
1905 if (thread->th.th_task_team == NULL)
return FALSE;
1914 k = __kmp_get_random( thread ) % (nthreads - 1);
1915 if ( k >= thread -> th.th_info.ds.ds_tid ) {
1919 kmp_info_t *other_thread = threads_data[k].td.td_thr;
1929 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1930 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
1931 (TCR_PTR(other_thread->th.th_sleep_loc) != NULL))
1933 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc);
1944 while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1945 thread_finished, is_constrained )) != NULL)
1947 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1948 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1949 if ( itt_sync_obj == NULL ) {
1951 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1953 __kmp_itt_task_starting( itt_sync_obj );
1956 __kmp_invoke_task( gtid, task, current_task );
1958 if ( itt_sync_obj != NULL )
1959 __kmp_itt_task_finished( itt_sync_obj );
1964 threads_data[ tid ].td.td_deque_last_stolen = k;
1969 if (flag == NULL || (!final_spin && flag->done_check())) {
1970 KA_TRACE(15, (
"__kmp_execute_tasks_template(exit #5): T#%d spin condition satisfied\n",
1974 if (thread->th.th_task_team == NULL)
break;
1975 KMP_YIELD( __kmp_library == library_throughput );
1979 if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
1980 KA_TRACE(20, (
"__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
1993 if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
2001 if (! *thread_finished) {
2004 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
2005 KA_TRACE(20, (
"__kmp_execute_tasks_template(dec #3): T#%d dec unfinished_threads to %d; " 2007 gtid, count, task_team) );
2008 *thread_finished = TRUE;
2017 if (flag != NULL && flag->done_check()) {
2018 KA_TRACE(15, (
"__kmp_execute_tasks_template(exit #6): T#%d spin condition satisfied\n", gtid) );
2022 if (thread->th.th_task_team == NULL)
return FALSE;
2025 KA_TRACE(15, (
"__kmp_execute_tasks_template(exit #7): T#%d can't find work\n", gtid) );
2029 int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag,
int final_spin,
2030 int *thread_finished
2031 USE_ITT_BUILD_ARG(
void * itt_sync_obj), kmp_int32 is_constrained)
2033 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
2034 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2037 int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag,
int final_spin,
2038 int *thread_finished
2039 USE_ITT_BUILD_ARG(
void * itt_sync_obj), kmp_int32 is_constrained)
2041 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
2042 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2045 int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag,
int final_spin,
2046 int *thread_finished
2047 USE_ITT_BUILD_ARG(
void * itt_sync_obj), kmp_int32 is_constrained)
2049 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
2050 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2061 __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr )
2063 kmp_thread_data_t *threads_data;
2064 int nthreads, i, is_init_thread;
2066 KA_TRACE( 10, (
"__kmp_enable_tasking(enter): T#%d\n",
2067 __kmp_gtid_from_thread( this_thr ) ) );
2069 KMP_DEBUG_ASSERT(task_team != NULL);
2070 KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL);
2072 nthreads = task_team->tt.tt_nproc;
2073 KMP_DEBUG_ASSERT(nthreads > 0);
2074 KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc);
2077 is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team );
2079 if (!is_init_thread) {
2081 KA_TRACE( 20, (
"__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
2082 __kmp_gtid_from_thread( this_thr ) ) );
2085 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
2086 KMP_DEBUG_ASSERT( threads_data != NULL );
2088 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
2089 ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) )
2094 for (i = 0; i < nthreads; i++) {
2095 volatile void *sleep_loc;
2096 kmp_info_t *thread = threads_data[i].td.td_thr;
2098 if (i == this_thr->th.th_info.ds.ds_tid) {
2108 if ( ( sleep_loc = TCR_PTR( thread -> th.th_sleep_loc) ) != NULL )
2110 KF_TRACE( 50, (
"__kmp_enable_tasking: T#%d waking up thread T#%d\n",
2111 __kmp_gtid_from_thread( this_thr ),
2112 __kmp_gtid_from_thread( thread ) ) );
2113 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
2116 KF_TRACE( 50, (
"__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
2117 __kmp_gtid_from_thread( this_thr ),
2118 __kmp_gtid_from_thread( thread ) ) );
2123 KA_TRACE( 10, (
"__kmp_enable_tasking(exit): T#%d\n",
2124 __kmp_gtid_from_thread( this_thr ) ) );
2164 static kmp_task_team_t *__kmp_free_task_teams = NULL;
2166 static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock );
2178 __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data )
2180 __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock );
2181 KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL );
2184 thread_data -> td.td_deque_last_stolen = -1;
2186 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 );
2187 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 );
2188 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 );
2190 KE_TRACE( 10, (
"__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
2191 __kmp_gtid_from_thread( thread ), TASK_DEQUE_SIZE, thread_data ) );
2195 thread_data -> td.td_deque = (kmp_taskdata_t **)
2196 __kmp_allocate( TASK_DEQUE_SIZE *
sizeof(kmp_taskdata_t *));
2206 __kmp_free_task_deque( kmp_thread_data_t *thread_data )
2208 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
2210 if ( thread_data -> td.td_deque != NULL ) {
2211 TCW_4(thread_data -> td.td_deque_ntasks, 0);
2212 __kmp_free( thread_data -> td.td_deque );
2213 thread_data -> td.td_deque = NULL;
2215 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
2217 #ifdef BUILD_TIED_TASK_STACK 2219 if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) {
2220 __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data );
2222 #endif // BUILD_TIED_TASK_STACK 2236 __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team )
2238 kmp_thread_data_t ** threads_data_p;
2239 kmp_int32 nthreads, maxthreads;
2240 int is_init_thread = FALSE;
2242 if ( TCR_4(task_team -> tt.tt_found_tasks) ) {
2247 threads_data_p = & task_team -> tt.tt_threads_data;
2248 nthreads = task_team -> tt.tt_nproc;
2249 maxthreads = task_team -> tt.tt_max_threads;
2253 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2255 if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) {
2257 kmp_team_t *team = thread -> th.th_team;
2260 is_init_thread = TRUE;
2261 if ( maxthreads < nthreads ) {
2263 if ( *threads_data_p != NULL ) {
2264 kmp_thread_data_t *old_data = *threads_data_p;
2265 kmp_thread_data_t *new_data = NULL;
2267 KE_TRACE( 10, (
"__kmp_realloc_task_threads_data: T#%d reallocating " 2268 "threads data for task_team %p, new_size = %d, old_size = %d\n",
2269 __kmp_gtid_from_thread( thread ), task_team,
2270 nthreads, maxthreads ) );
2275 new_data = (kmp_thread_data_t *)
2276 __kmp_allocate( nthreads *
sizeof(kmp_thread_data_t) );
2278 KMP_MEMCPY_S( (
void *) new_data, nthreads *
sizeof(kmp_thread_data_t),
2280 maxthreads *
sizeof(kmp_taskdata_t *) );
2282 #ifdef BUILD_TIED_TASK_STACK 2284 for (i = maxthreads; i < nthreads; i++) {
2285 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2286 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2288 #endif // BUILD_TIED_TASK_STACK 2290 (*threads_data_p) = new_data;
2291 __kmp_free( old_data );
2294 KE_TRACE( 10, (
"__kmp_realloc_task_threads_data: T#%d allocating " 2295 "threads data for task_team %p, size = %d\n",
2296 __kmp_gtid_from_thread( thread ), task_team, nthreads ) );
2300 *threads_data_p = (kmp_thread_data_t *)
2301 __kmp_allocate( nthreads *
sizeof(kmp_thread_data_t) );
2302 #ifdef BUILD_TIED_TASK_STACK 2304 for (i = 0; i < nthreads; i++) {
2305 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2306 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2308 #endif // BUILD_TIED_TASK_STACK 2310 task_team -> tt.tt_max_threads = nthreads;
2314 KMP_DEBUG_ASSERT( *threads_data_p != NULL );
2318 for (i = 0; i < nthreads; i++) {
2319 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2320 thread_data -> td.td_thr = team -> t.t_threads[i];
2322 if ( thread_data -> td.td_deque_last_stolen >= nthreads) {
2326 thread_data -> td.td_deque_last_stolen = -1;
2331 TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE);
2334 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2335 return is_init_thread;
2345 __kmp_free_task_threads_data( kmp_task_team_t *task_team )
2347 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2348 if ( task_team -> tt.tt_threads_data != NULL ) {
2350 for (i = 0; i < task_team->tt.tt_max_threads; i++ ) {
2351 __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] );
2353 __kmp_free( task_team -> tt.tt_threads_data );
2354 task_team -> tt.tt_threads_data = NULL;
2356 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2365 static kmp_task_team_t *
2366 __kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
2368 kmp_task_team_t *task_team = NULL;
2371 KA_TRACE( 20, (
"__kmp_allocate_task_team: T#%d entering; team = %p\n",
2372 (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) );
2374 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2376 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2377 if (__kmp_free_task_teams != NULL) {
2378 task_team = __kmp_free_task_teams;
2379 TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next);
2380 task_team -> tt.tt_next = NULL;
2382 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2385 if (task_team == NULL) {
2386 KE_TRACE( 10, (
"__kmp_allocate_task_team: T#%d allocating " 2387 "task team for team %p\n",
2388 __kmp_gtid_from_thread( thread ), team ) );
2392 task_team = (kmp_task_team_t *) __kmp_allocate(
sizeof(kmp_task_team_t) );
2393 __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2399 TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2401 TCW_4(task_team -> tt.tt_found_proxy_tasks, FALSE);
2403 task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
2405 TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
2406 TCW_4( task_team -> tt.tt_active, TRUE );
2408 KA_TRACE( 20, (
"__kmp_allocate_task_team: T#%d exiting; task_team = %p unfinished_threads init'd to %d\n",
2409 (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team, task_team -> tt.tt_unfinished_threads) );
2420 __kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
2422 KA_TRACE( 20, (
"__kmp_free_task_team: T#%d task_team = %p\n",
2423 thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
2426 __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
2428 KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL );
2429 task_team -> tt.tt_next = __kmp_free_task_teams;
2430 TCW_PTR(__kmp_free_task_teams, task_team);
2432 __kmp_release_bootstrap_lock( & __kmp_task_team_lock );
2443 __kmp_reap_task_teams(
void )
2445 kmp_task_team_t *task_team;
2447 if ( TCR_PTR(__kmp_free_task_teams) != NULL ) {
2449 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2450 while ( ( task_team = __kmp_free_task_teams ) != NULL ) {
2451 __kmp_free_task_teams = task_team -> tt.tt_next;
2452 task_team -> tt.tt_next = NULL;
2455 if ( task_team -> tt.tt_threads_data != NULL ) {
2456 __kmp_free_task_threads_data( task_team );
2458 __kmp_free( task_team );
2460 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2470 __kmp_wait_to_unref_task_teams(
void)
2476 KMP_INIT_YIELD( spins );
2486 for (thread = (kmp_info_t *)__kmp_thread_pool;
2488 thread = thread->th.th_next_pool)
2493 if ( TCR_PTR(thread->th.th_task_team) == NULL ) {
2494 KA_TRACE( 10, (
"__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
2495 __kmp_gtid_from_thread( thread ) ) );
2500 if (!__kmp_is_thread_alive(thread, &exit_val)) {
2501 thread->th.th_task_team = NULL;
2508 KA_TRACE( 10, (
"__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n",
2509 __kmp_gtid_from_thread( thread ) ) );
2511 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
2512 volatile void *sleep_loc;
2514 if ( ( sleep_loc = TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) {
2515 KA_TRACE( 10, (
"__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
2516 __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) );
2517 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
2528 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2529 KMP_YIELD_SPIN( spins );
2540 __kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team,
int always )
2542 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2546 if (team->t.t_task_team[this_thr->th.th_task_state] == NULL && (always || team->t.t_nproc > 1) ) {
2547 team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team );
2548 KA_TRACE(20, (
"__kmp_task_team_setup: Master T#%d created new task_team %p for team %d at parity=%d\n",
2549 __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state],
2550 ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
2558 if (team->t.t_nproc > 1) {
2559 int other_team = 1 - this_thr->th.th_task_state;
2560 if (team->t.t_task_team[other_team] == NULL) {
2561 team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team );
2562 KA_TRACE(20, (
"__kmp_task_team_setup: Master T#%d created second new task_team %p for team %d at parity=%d\n",
2563 __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
2564 ((team != NULL) ? team->t.t_id : -1), other_team ));
2567 kmp_task_team_t *task_team = team->t.t_task_team[other_team];
2568 if (!task_team->tt.tt_active || team->t.t_nproc != task_team->tt.tt_nproc) {
2569 TCW_4(task_team->tt.tt_nproc, team->t.t_nproc);
2570 TCW_4(task_team->tt.tt_found_tasks, FALSE);
2572 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
2574 TCW_4(task_team->tt.tt_unfinished_threads, team->t.t_nproc );
2575 TCW_4(task_team->tt.tt_active, TRUE );
2578 KA_TRACE(20, (
"__kmp_task_team_setup: Master T#%d reset next task_team %p for team %d at parity=%d\n",
2579 __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
2580 ((team != NULL) ? team->t.t_id : -1), other_team ));
2592 __kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
2594 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2597 this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
2599 TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]);
2600 KA_TRACE(20, (
"__kmp_task_team_sync: Thread T#%d task team switched to task_team %p from Team #%d (parity=%d)\n",
2601 __kmp_gtid_from_thread( this_thr ), this_thr->th.th_task_team,
2602 ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
2613 __kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team
2614 USE_ITT_BUILD_ARG(
void * itt_sync_obj)
2617 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
2619 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2620 KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
2622 if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) {
2624 KA_TRACE(20, (
"__kmp_task_team_wait: Master T#%d waiting for all tasks (for unfinished_threads to reach 0) on task_team = %p\n",
2625 __kmp_gtid_from_thread(this_thr), task_team));
2628 kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
2629 flag.wait(this_thr, TRUE
2630 USE_ITT_BUILD_ARG(itt_sync_obj));
2633 KA_TRACE(20, (
"__kmp_task_team_wait: Master T#%d deactivating task_team %p: " 2634 "setting active to false, setting local and team's pointer to NULL\n",
2635 __kmp_gtid_from_thread(this_thr), task_team));
2637 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == TRUE );
2638 TCW_SYNC_4( task_team->tt.tt_found_proxy_tasks, FALSE );
2640 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
2642 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
2645 TCW_PTR(this_thr->th.th_task_team, NULL);
2658 __kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread,
int gtid )
2660 volatile kmp_uint32 *spin = &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads;
2662 KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
2665 KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
2667 kmp_flag_32 spin_flag(spin, 0U);
2668 while (! spin_flag.execute_tasks(thread, gtid, TRUE, &flag
2669 USE_ITT_BUILD_ARG(NULL), 0 ) ) {
2672 KMP_FSYNC_SPIN_PREPARE( spin );
2675 if( TCR_4(__kmp_global.g.g_done) ) {
2676 if( __kmp_global.g.g_abort )
2677 __kmp_abort_thread( );
2683 KMP_FSYNC_SPIN_ACQUIRED( (
void*) spin );
2696 static bool __kmp_give_task ( kmp_info_t *thread, kmp_int32 tid, kmp_task_t * task )
2698 kmp_task_team_t * task_team = thread->th.th_task_team;
2699 kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
2700 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
2701 bool result =
false;
2703 KA_TRACE(20, (
"__kmp_give_task: trying to give task %p to thread %d.\n", taskdata, tid ) );
2706 KMP_DEBUG_ASSERT( task_team != NULL );
2708 if (thread_data -> td.td_deque == NULL ) {
2711 KA_TRACE(30, (
"__kmp_give_task: thread %d has no queue while giving task %p.\n", tid, taskdata ) );
2715 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
2717 KA_TRACE(30, (
"__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2721 __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock );
2723 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
2725 KA_TRACE(30, (
"__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2726 goto release_and_exit;
2729 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata;
2731 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
2732 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1);
2735 KA_TRACE(30, (
"__kmp_give_task: successfully gave task %p to thread %d.\n", taskdata, tid ) );
2738 __kmp_release_bootstrap_lock( & thread_data-> td.td_deque_lock );
2758 static void __kmp_first_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2760 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
2761 KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2762 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
2763 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
2765 taskdata -> td_flags.complete = 1;
2767 if ( taskdata->td_taskgroup )
2768 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
2771 TCR_4(taskdata->td_incomplete_child_tasks++);
2774 static void __kmp_second_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2776 kmp_int32 children = 0;
2779 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
2780 KMP_DEBUG_ASSERT( children >= 0 );
2783 TCR_4(taskdata->td_incomplete_child_tasks--);
2786 static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask )
2788 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2789 kmp_info_t * thread = __kmp_threads[ gtid ];
2791 KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2792 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 1 );
2796 while ( TCR_4(taskdata->td_incomplete_child_tasks) > 0 ) ;
2798 __kmp_release_deps(gtid,taskdata);
2799 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
2809 void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask )
2811 KMP_DEBUG_ASSERT( ptask != NULL );
2812 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2813 KA_TRACE(10, (
"__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n", gtid, taskdata ) );
2815 KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2817 __kmp_first_top_half_finish_proxy(taskdata);
2818 __kmp_second_top_half_finish_proxy(taskdata);
2819 __kmp_bottom_half_finish_proxy(gtid,ptask);
2821 KA_TRACE(10, (
"__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n", gtid, taskdata ) );
2830 void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask )
2832 KMP_DEBUG_ASSERT( ptask != NULL );
2833 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2835 KA_TRACE(10, (
"__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n", taskdata ) );
2837 KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2839 __kmp_first_top_half_finish_proxy(taskdata);
2842 kmp_team_t * team = taskdata->td_team;
2843 kmp_int32 nthreads = team->t.t_nproc;
2850 k = (k+1) % nthreads;
2851 thread = team->t.t_threads[k];
2852 }
while ( !__kmp_give_task( thread, k, ptask ) );
2854 __kmp_second_top_half_finish_proxy(taskdata);
2856 KA_TRACE(10, (
"__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n", taskdata ) );
#define KMP_TIME_BLOCK(name)
Uses specified timer (name) to time code block.
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).