17 #include "kmp_stats.h" 18 #include "kmp_wait_release.h" 19 #include "kmp_taskdeps.h" 22 #include "ompt-specific.h" 25 #include "tsan_annotations.h" 28 static void __kmp_enable_tasking(kmp_task_team_t *task_team,
29 kmp_info_t *this_thr);
30 static void __kmp_alloc_task_deque(kmp_info_t *thread,
31 kmp_thread_data_t *thread_data);
32 static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
33 kmp_task_team_t *task_team);
36 static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask);
39 #ifdef BUILD_TIED_TASK_STACK 48 static void __kmp_trace_task_stack(kmp_int32 gtid,
49 kmp_thread_data_t *thread_data,
50 int threshold,
char *location) {
51 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
52 kmp_taskdata_t **stack_top = task_stack->ts_top;
53 kmp_int32 entries = task_stack->ts_entries;
54 kmp_taskdata_t *tied_task;
58 (
"__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, " 59 "first_block = %p, stack_top = %p \n",
60 location, gtid, entries, task_stack->ts_first_block, stack_top));
62 KMP_DEBUG_ASSERT(stack_top != NULL);
63 KMP_DEBUG_ASSERT(entries > 0);
65 while (entries != 0) {
66 KMP_DEBUG_ASSERT(stack_top != &task_stack->ts_first_block.sb_block[0]);
68 if (entries & TASK_STACK_INDEX_MASK == 0) {
69 kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(stack_top);
71 stack_block = stack_block->sb_prev;
72 stack_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
79 tied_task = *stack_top;
81 KMP_DEBUG_ASSERT(tied_task != NULL);
82 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
85 (
"__kmp_trace_task_stack(%s): gtid=%d, entry=%d, " 86 "stack_top=%p, tied_task=%p\n",
87 location, gtid, entries, stack_top, tied_task));
89 KMP_DEBUG_ASSERT(stack_top == &task_stack->ts_first_block.sb_block[0]);
92 (
"__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
102 static void __kmp_init_task_stack(kmp_int32 gtid,
103 kmp_thread_data_t *thread_data) {
104 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
105 kmp_stack_block_t *first_block;
108 first_block = &task_stack->ts_first_block;
109 task_stack->ts_top = (kmp_taskdata_t **)first_block;
110 memset((
void *)first_block,
'\0',
111 TASK_STACK_BLOCK_SIZE *
sizeof(kmp_taskdata_t *));
114 task_stack->ts_entries = TASK_STACK_EMPTY;
115 first_block->sb_next = NULL;
116 first_block->sb_prev = NULL;
123 static void __kmp_free_task_stack(kmp_int32 gtid,
124 kmp_thread_data_t *thread_data) {
125 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
126 kmp_stack_block_t *stack_block = &task_stack->ts_first_block;
128 KMP_DEBUG_ASSERT(task_stack->ts_entries == TASK_STACK_EMPTY);
130 while (stack_block != NULL) {
131 kmp_stack_block_t *next_block = (stack_block) ? stack_block->sb_next : NULL;
133 stack_block->sb_next = NULL;
134 stack_block->sb_prev = NULL;
135 if (stack_block != &task_stack->ts_first_block) {
136 __kmp_thread_free(thread,
139 stack_block = next_block;
142 task_stack->ts_entries = 0;
143 task_stack->ts_top = NULL;
152 static void __kmp_push_task_stack(kmp_int32 gtid, kmp_info_t *thread,
153 kmp_taskdata_t *tied_task) {
155 kmp_thread_data_t *thread_data =
156 &thread->th.th_task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
157 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
159 if (tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser) {
163 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
164 KMP_DEBUG_ASSERT(task_stack->ts_top != NULL);
167 (
"__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
168 gtid, thread, tied_task));
170 *(task_stack->ts_top) = tied_task;
173 task_stack->ts_top++;
174 task_stack->ts_entries++;
176 if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
178 kmp_stack_block_t *stack_block =
179 (kmp_stack_block_t *)(task_stack->ts_top - TASK_STACK_BLOCK_SIZE);
182 if (stack_block->sb_next !=
184 task_stack->ts_top = &stack_block->sb_next->sb_block[0];
186 kmp_stack_block_t *new_block = (kmp_stack_block_t *)__kmp_thread_calloc(
187 thread,
sizeof(kmp_stack_block_t));
189 task_stack->ts_top = &new_block->sb_block[0];
190 stack_block->sb_next = new_block;
191 new_block->sb_prev = stack_block;
192 new_block->sb_next = NULL;
196 (
"__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
197 gtid, tied_task, new_block));
200 KA_TRACE(20, (
"__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
211 static void __kmp_pop_task_stack(kmp_int32 gtid, kmp_info_t *thread,
212 kmp_taskdata_t *ending_task) {
214 kmp_thread_data_t *thread_data =
215 &thread->th.th_task_team->tt_threads_data[__kmp_tid_from_gtid(gtid)];
216 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
217 kmp_taskdata_t *tied_task;
219 if (ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser) {
224 KMP_DEBUG_ASSERT(task_stack->ts_top != NULL);
225 KMP_DEBUG_ASSERT(task_stack->ts_entries > 0);
227 KA_TRACE(20, (
"__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid,
231 if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
232 kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(task_stack->ts_top);
234 stack_block = stack_block->sb_prev;
235 task_stack->ts_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
239 task_stack->ts_top--;
240 task_stack->ts_entries--;
242 tied_task = *(task_stack->ts_top);
244 KMP_DEBUG_ASSERT(tied_task != NULL);
245 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
246 KMP_DEBUG_ASSERT(tied_task == ending_task);
248 KA_TRACE(20, (
"__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
257 static bool __kmp_task_is_allowed(
int gtid,
const kmp_int32 is_constrained,
258 const kmp_taskdata_t *tasknew,
259 const kmp_taskdata_t *taskcurr) {
260 if (is_constrained && (tasknew->td_flags.tiedness == TASK_TIED)) {
264 kmp_taskdata_t *current = taskcurr->td_last_tied;
265 KMP_DEBUG_ASSERT(current != NULL);
267 if (current->td_flags.tasktype == TASK_EXPLICIT ||
268 current->td_taskwait_thread > 0) {
269 kmp_int32 level = current->td_level;
270 kmp_taskdata_t *parent = tasknew->td_parent;
271 while (parent != current && parent->td_level > level) {
273 parent = parent->td_parent;
274 KMP_DEBUG_ASSERT(parent != NULL);
276 if (parent != current)
281 kmp_depnode_t *node = tasknew->td_depnode;
282 if (node && (node->dn.mtx_num_locks > 0)) {
283 for (
int i = 0; i < node->dn.mtx_num_locks; ++i) {
284 KMP_DEBUG_ASSERT(node->dn.mtx_locks[i] != NULL);
285 if (__kmp_test_lock(node->dn.mtx_locks[i], gtid))
288 for (
int j = i - 1; j >= 0; --j)
289 __kmp_release_lock(node->dn.mtx_locks[j], gtid);
293 node->dn.mtx_num_locks = -node->dn.mtx_num_locks;
302 static void __kmp_realloc_task_deque(kmp_info_t *thread,
303 kmp_thread_data_t *thread_data) {
304 kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td);
305 kmp_int32 new_size = 2 * size;
307 KE_TRACE(10, (
"__kmp_realloc_task_deque: T#%d reallocating deque[from %d to " 308 "%d] for thread_data %p\n",
309 __kmp_gtid_from_thread(thread), size, new_size, thread_data));
311 kmp_taskdata_t **new_deque =
312 (kmp_taskdata_t **)__kmp_allocate(new_size *
sizeof(kmp_taskdata_t *));
315 for (i = thread_data->td.td_deque_head, j = 0; j < size;
316 i = (i + 1) & TASK_DEQUE_MASK(thread_data->td), j++)
317 new_deque[j] = thread_data->td.td_deque[i];
319 __kmp_free(thread_data->td.td_deque);
321 thread_data->td.td_deque_head = 0;
322 thread_data->td.td_deque_tail = size;
323 thread_data->td.td_deque = new_deque;
324 thread_data->td.td_deque_size = new_size;
328 static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
329 kmp_info_t *thread = __kmp_threads[gtid];
330 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
331 kmp_task_team_t *task_team = thread->th.th_task_team;
332 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
333 kmp_thread_data_t *thread_data;
336 (
"__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata));
338 if (taskdata->td_flags.tiedness == TASK_UNTIED) {
341 kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count);
342 KMP_DEBUG_USE_VAR(counter);
345 (
"__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n",
346 gtid, counter, taskdata));
350 if (taskdata->td_flags.task_serial) {
351 KA_TRACE(20, (
"__kmp_push_task: T#%d team serialized; returning " 352 "TASK_NOT_PUSHED for task %p\n",
354 return TASK_NOT_PUSHED;
359 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
360 if (!KMP_TASKING_ENABLED(task_team)) {
361 __kmp_enable_tasking(task_team, thread);
363 KMP_DEBUG_ASSERT(TCR_4(task_team->tt.tt_found_tasks) == TRUE);
364 KMP_DEBUG_ASSERT(TCR_PTR(task_team->tt.tt_threads_data) != NULL);
367 thread_data = &task_team->tt.tt_threads_data[tid];
370 if (thread_data->td.td_deque == NULL) {
371 __kmp_alloc_task_deque(thread, thread_data);
376 if (TCR_4(thread_data->td.td_deque_ntasks) >=
377 TASK_DEQUE_SIZE(thread_data->td)) {
378 if (__kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
379 thread->th.th_current_task)) {
380 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full; returning " 381 "TASK_NOT_PUSHED for task %p\n",
383 return TASK_NOT_PUSHED;
385 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
388 __kmp_realloc_task_deque(thread, thread_data);
393 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
396 if (TCR_4(thread_data->td.td_deque_ntasks) >=
397 TASK_DEQUE_SIZE(thread_data->td)) {
398 if (__kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
399 thread->th.th_current_task)) {
400 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
401 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full on 2nd check; " 402 "returning TASK_NOT_PUSHED for task %p\n",
404 return TASK_NOT_PUSHED;
407 __kmp_realloc_task_deque(thread, thread_data);
413 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) <
414 TASK_DEQUE_SIZE(thread_data->td));
416 thread_data->td.td_deque[thread_data->td.td_deque_tail] =
419 thread_data->td.td_deque_tail =
420 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
421 TCW_4(thread_data->td.td_deque_ntasks,
422 TCR_4(thread_data->td.td_deque_ntasks) + 1);
424 KA_TRACE(20, (
"__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: " 425 "task=%p ntasks=%d head=%u tail=%u\n",
426 gtid, taskdata, thread_data->td.td_deque_ntasks,
427 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
429 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
431 return TASK_SUCCESSFULLY_PUSHED;
438 void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr) {
439 KF_TRACE(10, (
"__kmp_pop_current_task_from_thread(enter): T#%d " 440 "this_thread=%p, curtask=%p, " 441 "curtask_parent=%p\n",
442 0, this_thr, this_thr->th.th_current_task,
443 this_thr->th.th_current_task->td_parent));
445 this_thr->th.th_current_task = this_thr->th.th_current_task->td_parent;
447 KF_TRACE(10, (
"__kmp_pop_current_task_from_thread(exit): T#%d " 448 "this_thread=%p, curtask=%p, " 449 "curtask_parent=%p\n",
450 0, this_thr, this_thr->th.th_current_task,
451 this_thr->th.th_current_task->td_parent));
460 void __kmp_push_current_task_to_thread(kmp_info_t *this_thr, kmp_team_t *team,
464 KF_TRACE(10, (
"__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p " 467 tid, this_thr, this_thr->th.th_current_task,
468 team->t.t_implicit_task_taskdata[tid].td_parent));
470 KMP_DEBUG_ASSERT(this_thr != NULL);
473 if (this_thr->th.th_current_task != &team->t.t_implicit_task_taskdata[0]) {
474 team->t.t_implicit_task_taskdata[0].td_parent =
475 this_thr->th.th_current_task;
476 this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[0];
479 team->t.t_implicit_task_taskdata[tid].td_parent =
480 team->t.t_implicit_task_taskdata[0].td_parent;
481 this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[tid];
484 KF_TRACE(10, (
"__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p " 487 tid, this_thr, this_thr->th.th_current_task,
488 team->t.t_implicit_task_taskdata[tid].td_parent));
496 static void __kmp_task_start(kmp_int32 gtid, kmp_task_t *task,
497 kmp_taskdata_t *current_task) {
498 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
499 kmp_info_t *thread = __kmp_threads[gtid];
502 (
"__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
503 gtid, taskdata, current_task));
505 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
510 current_task->td_flags.executing = 0;
513 #ifdef BUILD_TIED_TASK_STACK 514 if (taskdata->td_flags.tiedness == TASK_TIED) {
515 __kmp_push_task_stack(gtid, thread, taskdata);
520 thread->th.th_current_task = taskdata;
522 KMP_DEBUG_ASSERT(taskdata->td_flags.started == 0 ||
523 taskdata->td_flags.tiedness == TASK_UNTIED);
524 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0 ||
525 taskdata->td_flags.tiedness == TASK_UNTIED);
526 taskdata->td_flags.started = 1;
527 taskdata->td_flags.executing = 1;
528 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
529 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
536 KA_TRACE(10, (
"__kmp_task_start(exit): T#%d task=%p\n", gtid, taskdata));
547 static inline void __ompt_task_init(kmp_taskdata_t *task,
int tid) {
549 task->ompt_task_info.task_data.value = 0;
550 task->ompt_task_info.frame.exit_frame = ompt_data_none;
551 task->ompt_task_info.frame.enter_frame = ompt_data_none;
552 task->ompt_task_info.frame.exit_frame_flags = ompt_frame_runtime | ompt_frame_framepointer;
553 task->ompt_task_info.frame.enter_frame_flags = ompt_frame_runtime | ompt_frame_framepointer;
555 task->ompt_task_info.ndeps = 0;
556 task->ompt_task_info.deps = NULL;
562 static inline void __ompt_task_start(kmp_task_t *task,
563 kmp_taskdata_t *current_task,
565 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
566 ompt_task_status_t status = ompt_task_switch;
567 if (__kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded) {
568 status = ompt_task_yield;
569 __kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded = 0;
572 if (ompt_enabled.ompt_callback_task_schedule) {
573 ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
574 &(current_task->ompt_task_info.task_data), status,
575 &(taskdata->ompt_task_info.task_data));
577 taskdata->ompt_task_info.scheduling_parent = current_task;
583 __ompt_task_finish(kmp_task_t *task, kmp_taskdata_t *resumed_task,
584 ompt_task_status_t status = ompt_task_complete) {
585 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
586 if (__kmp_omp_cancellation && taskdata->td_taskgroup &&
587 taskdata->td_taskgroup->cancel_request == cancel_taskgroup) {
588 status = ompt_task_cancel;
592 if (ompt_enabled.ompt_callback_task_schedule) {
593 ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
594 &(taskdata->ompt_task_info.task_data), status,
595 &((resumed_task ? resumed_task
596 : (taskdata->ompt_task_info.scheduling_parent
597 ? taskdata->ompt_task_info.scheduling_parent
598 : taskdata->td_parent))
599 ->ompt_task_info.task_data));
605 static void __kmpc_omp_task_begin_if0_template(
ident_t *loc_ref, kmp_int32 gtid,
608 void *return_address) {
609 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
610 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
612 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p " 614 gtid, loc_ref, taskdata, current_task));
616 if (taskdata->td_flags.tiedness == TASK_UNTIED) {
619 kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count);
620 KMP_DEBUG_USE_VAR(counter);
621 KA_TRACE(20, (
"__kmpc_omp_task_begin_if0: T#%d untied_count (%d) " 622 "incremented for task %p\n",
623 gtid, counter, taskdata));
626 taskdata->td_flags.task_serial =
628 __kmp_task_start(gtid, task, current_task);
632 if (current_task->ompt_task_info.frame.enter_frame.ptr == NULL) {
633 current_task->ompt_task_info.frame.enter_frame.ptr =
634 taskdata->ompt_task_info.frame.exit_frame.ptr = frame_address;
635 current_task->ompt_task_info.frame.enter_frame_flags =
636 taskdata->ompt_task_info.frame.exit_frame_flags = ompt_frame_application | ompt_frame_framepointer;
638 if (ompt_enabled.ompt_callback_task_create) {
639 ompt_task_info_t *parent_info = &(current_task->ompt_task_info);
640 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
641 &(parent_info->task_data), &(parent_info->frame),
642 &(taskdata->ompt_task_info.task_data),
643 ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(taskdata), 0,
646 __ompt_task_start(task, current_task, gtid);
648 #endif // OMPT_SUPPORT 650 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n", gtid,
656 static void __kmpc_omp_task_begin_if0_ompt(
ident_t *loc_ref, kmp_int32 gtid,
659 void *return_address) {
660 __kmpc_omp_task_begin_if0_template<true>(loc_ref, gtid, task, frame_address,
663 #endif // OMPT_SUPPORT 671 void __kmpc_omp_task_begin_if0(
ident_t *loc_ref, kmp_int32 gtid,
674 if (UNLIKELY(ompt_enabled.enabled)) {
675 OMPT_STORE_RETURN_ADDRESS(gtid);
676 __kmpc_omp_task_begin_if0_ompt(loc_ref, gtid, task,
677 OMPT_GET_FRAME_ADDRESS(1),
678 OMPT_LOAD_RETURN_ADDRESS(gtid));
682 __kmpc_omp_task_begin_if0_template<false>(loc_ref, gtid, task, NULL, NULL);
688 void __kmpc_omp_task_begin(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task) {
689 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
693 (
"__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
694 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task));
696 __kmp_task_start(gtid, task, current_task);
698 KA_TRACE(10, (
"__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n", gtid,
699 loc_ref, KMP_TASK_TO_TASKDATA(task)));
702 #endif // TASK_UNUSED 709 static void __kmp_free_task(kmp_int32 gtid, kmp_taskdata_t *taskdata,
710 kmp_info_t *thread) {
711 KA_TRACE(30, (
"__kmp_free_task: T#%d freeing data from task %p\n", gtid,
715 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
716 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0);
717 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 1);
718 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
719 KMP_DEBUG_ASSERT(taskdata->td_allocated_child_tasks == 0 ||
720 taskdata->td_flags.task_serial == 1);
721 KMP_DEBUG_ASSERT(taskdata->td_incomplete_child_tasks == 0);
723 taskdata->td_flags.freed = 1;
724 ANNOTATE_HAPPENS_BEFORE(taskdata);
727 __kmp_fast_free(thread, taskdata);
729 __kmp_thread_free(thread, taskdata);
732 KA_TRACE(20, (
"__kmp_free_task: T#%d freed task %p\n", gtid, taskdata));
741 static void __kmp_free_task_and_ancestors(kmp_int32 gtid,
742 kmp_taskdata_t *taskdata,
743 kmp_info_t *thread) {
747 kmp_int32 team_serial =
748 (taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) &&
749 !taskdata->td_flags.proxy;
751 kmp_int32 team_serial =
752 taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser;
754 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
756 kmp_int32 children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks) - 1;
757 KMP_DEBUG_ASSERT(children >= 0);
760 while (children == 0) {
761 kmp_taskdata_t *parent_taskdata = taskdata->td_parent;
763 KA_TRACE(20, (
"__kmp_free_task_and_ancestors(enter): T#%d task %p complete " 764 "and freeing itself\n",
768 __kmp_free_task(gtid, taskdata, thread);
770 taskdata = parent_taskdata;
776 if (taskdata->td_flags.tasktype == TASK_IMPLICIT) {
777 if (taskdata->td_dephash) {
778 int children = KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks);
779 kmp_tasking_flags_t flags_old = taskdata->td_flags;
780 if (children == 0 && flags_old.complete == 1) {
781 kmp_tasking_flags_t flags_new = flags_old;
782 flags_new.complete = 0;
783 if (KMP_COMPARE_AND_STORE_ACQ32(
784 RCAST(kmp_int32 *, &taskdata->td_flags),
785 *RCAST(kmp_int32 *, &flags_old),
786 *RCAST(kmp_int32 *, &flags_new))) {
787 KA_TRACE(100, (
"__kmp_free_task_and_ancestors: T#%d cleans " 788 "dephash of implicit task %p\n",
791 __kmp_dephash_free_entries(thread, taskdata->td_dephash);
798 children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks) - 1;
799 KMP_DEBUG_ASSERT(children >= 0);
803 20, (
"__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; " 804 "not freeing it yet\n",
805 gtid, taskdata, children));
814 static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
815 kmp_taskdata_t *resumed_task) {
816 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
817 kmp_info_t *thread = __kmp_threads[gtid];
819 kmp_task_team_t *task_team =
820 thread->th.th_task_team;
821 #endif // OMP_45_ENABLED 822 kmp_int32 children = 0;
824 KA_TRACE(10, (
"__kmp_task_finish(enter): T#%d finishing task %p and resuming " 826 gtid, taskdata, resumed_task));
828 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
831 #ifdef BUILD_TIED_TASK_STACK 832 if (taskdata->td_flags.tiedness == TASK_TIED) {
833 __kmp_pop_task_stack(gtid, thread, taskdata);
837 if (taskdata->td_flags.tiedness == TASK_UNTIED) {
840 kmp_int32 counter = KMP_ATOMIC_DEC(&taskdata->td_untied_count) - 1;
843 (
"__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n",
844 gtid, counter, taskdata));
848 if (resumed_task == NULL) {
849 KMP_DEBUG_ASSERT(taskdata->td_flags.task_serial);
850 resumed_task = taskdata->td_parent;
853 thread->th.th_current_task = resumed_task;
854 resumed_task->td_flags.executing = 1;
855 KA_TRACE(10, (
"__kmp_task_finish(exit): T#%d partially done task %p, " 856 "resuming task %p\n",
857 gtid, taskdata, resumed_task));
863 __ompt_task_finish(task, resumed_task);
867 kmp_depnode_t *node = taskdata->td_depnode;
868 if (node && (node->dn.mtx_num_locks < 0)) {
870 node->dn.mtx_num_locks = -node->dn.mtx_num_locks;
871 for (
int i = node->dn.mtx_num_locks - 1; i >= 0; --i) {
872 KMP_DEBUG_ASSERT(node->dn.mtx_locks[i] != NULL);
873 __kmp_release_lock(node->dn.mtx_locks[i], gtid);
877 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
878 taskdata->td_flags.complete = 1;
879 KMP_DEBUG_ASSERT(taskdata->td_flags.started == 1);
880 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
884 if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) {
887 KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks) - 1;
888 KMP_DEBUG_ASSERT(children >= 0);
890 if (taskdata->td_taskgroup)
891 KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
892 __kmp_release_deps(gtid, taskdata);
894 }
else if (task_team && task_team->tt.tt_found_proxy_tasks) {
897 __kmp_release_deps(gtid, taskdata);
898 #endif // OMP_45_ENABLED 899 #endif // OMP_40_ENABLED 905 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
906 taskdata->td_flags.executing = 0;
909 20, (
"__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
910 gtid, taskdata, children));
919 if (taskdata->td_flags.destructors_thunk) {
920 kmp_routine_entry_t destr_thunk = task->data1.destructors;
921 KMP_ASSERT(destr_thunk);
922 destr_thunk(gtid, task);
924 #endif // OMP_40_ENABLED 929 (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
930 taskdata->td_flags.task_serial);
931 if (taskdata->td_flags.task_serial) {
932 if (resumed_task == NULL) {
933 resumed_task = taskdata->td_parent;
937 KMP_DEBUG_ASSERT(resumed_task !=
945 thread->th.th_current_task = resumed_task;
946 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
950 resumed_task->td_flags.executing = 1;
953 10, (
"__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
954 gtid, taskdata, resumed_task));
960 static void __kmpc_omp_task_complete_if0_template(
ident_t *loc_ref,
963 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
964 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
966 __kmp_task_finish<ompt>(gtid, task, NULL);
968 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
969 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
973 ompt_frame_t *ompt_frame;
974 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
975 ompt_frame->enter_frame = ompt_data_none;
976 ompt_frame->enter_frame_flags = ompt_frame_runtime | ompt_frame_framepointer;
985 void __kmpc_omp_task_complete_if0_ompt(
ident_t *loc_ref, kmp_int32 gtid,
987 __kmpc_omp_task_complete_if0_template<true>(loc_ref, gtid, task);
989 #endif // OMPT_SUPPORT 996 void __kmpc_omp_task_complete_if0(
ident_t *loc_ref, kmp_int32 gtid,
999 if (UNLIKELY(ompt_enabled.enabled)) {
1000 __kmpc_omp_task_complete_if0_ompt(loc_ref, gtid, task);
1004 __kmpc_omp_task_complete_if0_template<false>(loc_ref, gtid, task);
1010 void __kmpc_omp_task_complete(
ident_t *loc_ref, kmp_int32 gtid,
1012 KA_TRACE(10, (
"__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n", gtid,
1013 loc_ref, KMP_TASK_TO_TASKDATA(task)));
1015 __kmp_task_finish<false>(gtid, task,
1018 KA_TRACE(10, (
"__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n", gtid,
1019 loc_ref, KMP_TASK_TO_TASKDATA(task)));
1022 #endif // TASK_UNUSED 1035 void __kmp_init_implicit_task(
ident_t *loc_ref, kmp_info_t *this_thr,
1036 kmp_team_t *team,
int tid,
int set_curr_task) {
1037 kmp_taskdata_t *task = &team->t.t_implicit_task_taskdata[tid];
1041 (
"__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
1042 tid, team, task, set_curr_task ?
"TRUE" :
"FALSE"));
1044 task->td_task_id = KMP_GEN_TASK_ID();
1045 task->td_team = team;
1048 task->td_ident = loc_ref;
1049 task->td_taskwait_ident = NULL;
1050 task->td_taskwait_counter = 0;
1051 task->td_taskwait_thread = 0;
1053 task->td_flags.tiedness = TASK_TIED;
1054 task->td_flags.tasktype = TASK_IMPLICIT;
1056 task->td_flags.proxy = TASK_FULL;
1060 task->td_flags.task_serial = 1;
1061 task->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
1062 task->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
1064 task->td_flags.started = 1;
1065 task->td_flags.executing = 1;
1066 task->td_flags.complete = 0;
1067 task->td_flags.freed = 0;
1070 task->td_depnode = NULL;
1072 task->td_last_tied = task;
1074 if (set_curr_task) {
1075 KMP_ATOMIC_ST_REL(&task->td_incomplete_child_tasks, 0);
1077 KMP_ATOMIC_ST_REL(&task->td_allocated_child_tasks, 0);
1079 task->td_taskgroup = NULL;
1080 task->td_dephash = NULL;
1082 __kmp_push_current_task_to_thread(this_thr, team, tid);
1084 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
1085 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
1089 if (UNLIKELY(ompt_enabled.enabled))
1090 __ompt_task_init(task, tid);
1093 KF_TRACE(10, (
"__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n", tid,
1102 void __kmp_finish_implicit_task(kmp_info_t *thread) {
1103 kmp_taskdata_t *task = thread->th.th_current_task;
1104 if (task->td_dephash) {
1106 task->td_flags.complete = 1;
1107 children = KMP_ATOMIC_LD_ACQ(&task->td_incomplete_child_tasks);
1108 kmp_tasking_flags_t flags_old = task->td_flags;
1109 if (children == 0 && flags_old.complete == 1) {
1110 kmp_tasking_flags_t flags_new = flags_old;
1111 flags_new.complete = 0;
1112 if (KMP_COMPARE_AND_STORE_ACQ32(RCAST(kmp_int32 *, &task->td_flags),
1113 *RCAST(kmp_int32 *, &flags_old),
1114 *RCAST(kmp_int32 *, &flags_new))) {
1115 KA_TRACE(100, (
"__kmp_finish_implicit_task: T#%d cleans " 1116 "dephash of implicit task %p\n",
1117 thread->th.th_info.ds.ds_gtid, task));
1118 __kmp_dephash_free_entries(thread, task->td_dephash);
1128 void __kmp_free_implicit_task(kmp_info_t *thread) {
1129 kmp_taskdata_t *task = thread->th.th_current_task;
1130 if (task && task->td_dephash) {
1131 __kmp_dephash_free(thread, task->td_dephash);
1132 task->td_dephash = NULL;
1138 static size_t __kmp_round_up_to_val(
size_t size,
size_t val) {
1139 if (size & (val - 1)) {
1141 if (size <= KMP_SIZE_T_MAX - val) {
1160 kmp_task_t *__kmp_task_alloc(
ident_t *loc_ref, kmp_int32 gtid,
1161 kmp_tasking_flags_t *flags,
1162 size_t sizeof_kmp_task_t,
size_t sizeof_shareds,
1163 kmp_routine_entry_t task_entry) {
1165 kmp_taskdata_t *taskdata;
1166 kmp_info_t *thread = __kmp_threads[gtid];
1167 kmp_team_t *team = thread->th.th_team;
1168 kmp_taskdata_t *parent_task = thread->th.th_current_task;
1169 size_t shareds_offset;
1171 if (!TCR_4(__kmp_init_middle))
1172 __kmp_middle_initialize();
1174 KA_TRACE(10, (
"__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) " 1175 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1176 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
1177 sizeof_shareds, task_entry));
1179 if (parent_task->td_flags.final) {
1180 if (flags->merged_if0) {
1184 if (flags->tiedness == TASK_UNTIED && !team->t.t_serialized) {
1188 KMP_CHECK_UPDATE(thread->th.th_task_team->tt.tt_untied_task_encountered, 1);
1192 if (flags->proxy == TASK_PROXY) {
1193 flags->tiedness = TASK_UNTIED;
1194 flags->merged_if0 = 1;
1198 if ((thread->th.th_task_team) == NULL) {
1201 KMP_DEBUG_ASSERT(team->t.t_serialized);
1203 (
"T#%d creating task team in __kmp_task_alloc for proxy task\n",
1205 __kmp_task_team_setup(
1208 thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
1210 kmp_task_team_t *task_team = thread->th.th_task_team;
1213 if (!KMP_TASKING_ENABLED(task_team)) {
1216 (
"T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
1217 __kmp_enable_tasking(task_team, thread);
1218 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
1219 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
1221 if (thread_data->td.td_deque == NULL) {
1222 __kmp_alloc_task_deque(thread, thread_data);
1226 if (task_team->tt.tt_found_proxy_tasks == FALSE)
1227 TCW_4(task_team->tt.tt_found_proxy_tasks, TRUE);
1233 shareds_offset =
sizeof(kmp_taskdata_t) + sizeof_kmp_task_t;
1234 shareds_offset = __kmp_round_up_to_val(shareds_offset,
sizeof(
void *));
1237 KA_TRACE(30, (
"__kmp_task_alloc: T#%d First malloc size: %ld\n", gtid,
1239 KA_TRACE(30, (
"__kmp_task_alloc: T#%d Second malloc size: %ld\n", gtid,
1244 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, shareds_offset +
1247 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, shareds_offset +
1250 ANNOTATE_HAPPENS_AFTER(taskdata);
1252 task = KMP_TASKDATA_TO_TASK(taskdata);
1255 #if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD 1256 KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (
sizeof(
double) - 1)) == 0);
1257 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (
sizeof(
double) - 1)) == 0);
1259 KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (
sizeof(_Quad) - 1)) == 0);
1260 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (
sizeof(_Quad) - 1)) == 0);
1262 if (sizeof_shareds > 0) {
1264 task->shareds = &((
char *)taskdata)[shareds_offset];
1266 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (
sizeof(
void *) - 1)) ==
1269 task->shareds = NULL;
1271 task->routine = task_entry;
1274 taskdata->td_task_id = KMP_GEN_TASK_ID();
1275 taskdata->td_team = team;
1276 taskdata->td_alloc_thread = thread;
1277 taskdata->td_parent = parent_task;
1278 taskdata->td_level = parent_task->td_level + 1;
1279 KMP_ATOMIC_ST_RLX(&taskdata->td_untied_count, 0);
1280 taskdata->td_ident = loc_ref;
1281 taskdata->td_taskwait_ident = NULL;
1282 taskdata->td_taskwait_counter = 0;
1283 taskdata->td_taskwait_thread = 0;
1284 KMP_DEBUG_ASSERT(taskdata->td_parent != NULL);
1287 if (flags->proxy == TASK_FULL)
1289 copy_icvs(&taskdata->td_icvs, &taskdata->td_parent->td_icvs);
1291 taskdata->td_flags.tiedness = flags->tiedness;
1292 taskdata->td_flags.final = flags->final;
1293 taskdata->td_flags.merged_if0 = flags->merged_if0;
1295 taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
1296 #endif // OMP_40_ENABLED 1298 taskdata->td_flags.proxy = flags->proxy;
1299 taskdata->td_task_team = thread->th.th_task_team;
1300 taskdata->td_size_alloc = shareds_offset + sizeof_shareds;
1302 taskdata->td_flags.tasktype = TASK_EXPLICIT;
1305 taskdata->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
1308 taskdata->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
1314 taskdata->td_flags.task_serial =
1315 (parent_task->td_flags.final || taskdata->td_flags.team_serial ||
1316 taskdata->td_flags.tasking_ser);
1318 taskdata->td_flags.started = 0;
1319 taskdata->td_flags.executing = 0;
1320 taskdata->td_flags.complete = 0;
1321 taskdata->td_flags.freed = 0;
1323 taskdata->td_flags.native = flags->native;
1325 KMP_ATOMIC_ST_RLX(&taskdata->td_incomplete_child_tasks, 0);
1327 KMP_ATOMIC_ST_RLX(&taskdata->td_allocated_child_tasks, 1);
1329 taskdata->td_taskgroup =
1330 parent_task->td_taskgroup;
1331 taskdata->td_dephash = NULL;
1332 taskdata->td_depnode = NULL;
1334 if (flags->tiedness == TASK_UNTIED)
1335 taskdata->td_last_tied = NULL;
1337 taskdata->td_last_tied = taskdata;
1340 if (UNLIKELY(ompt_enabled.enabled))
1341 __ompt_task_init(taskdata, gtid);
1346 if (flags->proxy == TASK_PROXY ||
1347 !(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser))
1349 if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser))
1352 KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks);
1354 if (parent_task->td_taskgroup)
1355 KMP_ATOMIC_INC(&parent_task->td_taskgroup->count);
1359 if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT) {
1360 KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks);
1364 KA_TRACE(20, (
"__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
1365 gtid, taskdata, taskdata->td_parent));
1366 ANNOTATE_HAPPENS_BEFORE(task);
1371 kmp_task_t *__kmpc_omp_task_alloc(
ident_t *loc_ref, kmp_int32 gtid,
1372 kmp_int32 flags,
size_t sizeof_kmp_task_t,
1373 size_t sizeof_shareds,
1374 kmp_routine_entry_t task_entry) {
1376 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags;
1378 input_flags->native = FALSE;
1382 KA_TRACE(10, (
"__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) " 1383 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1384 gtid, loc_ref, input_flags->tiedness ?
"tied " :
"untied",
1385 input_flags->proxy ?
"proxy" :
"", sizeof_kmp_task_t,
1386 sizeof_shareds, task_entry));
1388 KA_TRACE(10, (
"__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) " 1389 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1390 gtid, loc_ref, input_flags->tiedness ?
"tied " :
"untied",
1391 sizeof_kmp_task_t, sizeof_shareds, task_entry));
1394 retval = __kmp_task_alloc(loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1395 sizeof_shareds, task_entry);
1397 KA_TRACE(20, (
"__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval));
1417 __kmpc_omp_reg_task_with_affinity(
ident_t *loc_ref, kmp_int32 gtid,
1418 kmp_task_t *new_task, kmp_int32 naffins,
1419 kmp_task_affinity_info_t *affin_list) {
1429 static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
1430 kmp_taskdata_t *current_task) {
1431 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
1437 30, (
"__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1438 gtid, taskdata, current_task));
1439 KMP_DEBUG_ASSERT(task);
1441 if (taskdata->td_flags.proxy == TASK_PROXY &&
1442 taskdata->td_flags.complete == 1) {
1447 (
"__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
1450 __kmp_bottom_half_finish_proxy(gtid, task);
1452 KA_TRACE(30, (
"__kmp_invoke_task(exit): T#%d completed bottom finish for " 1453 "proxy task %p, resuming task %p\n",
1454 gtid, taskdata, current_task));
1463 ompt_thread_info_t oldInfo;
1464 if (UNLIKELY(ompt_enabled.enabled)) {
1466 thread = __kmp_threads[gtid];
1467 oldInfo = thread->th.ompt_thread_info;
1468 thread->th.ompt_thread_info.wait_id = 0;
1469 thread->th.ompt_thread_info.state = (thread->th.th_team_serialized)
1470 ? ompt_state_work_serial
1471 : ompt_state_work_parallel;
1472 taskdata->ompt_task_info.frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1478 if (taskdata->td_flags.proxy != TASK_PROXY) {
1480 ANNOTATE_HAPPENS_AFTER(task);
1481 __kmp_task_start(gtid, task, current_task);
1490 if (__kmp_omp_cancellation) {
1491 thread = __kmp_threads[gtid];
1492 kmp_team_t *this_team = thread->th.th_team;
1493 kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
1494 if ((taskgroup && taskgroup->cancel_request) ||
1495 (this_team->t.t_cancel_request == cancel_parallel)) {
1496 #if OMPT_SUPPORT && OMPT_OPTIONAL 1497 ompt_data_t *task_data;
1498 if (UNLIKELY(ompt_enabled.ompt_callback_cancel)) {
1499 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
1500 ompt_callbacks.ompt_callback(ompt_callback_cancel)(
1502 ((taskgroup && taskgroup->cancel_request) ? ompt_cancel_taskgroup
1503 : ompt_cancel_parallel) |
1504 ompt_cancel_discarded_task,
1517 if (taskdata->td_flags.tiedness == TASK_UNTIED) {
1518 taskdata->td_last_tied = current_task->td_last_tied;
1519 KMP_DEBUG_ASSERT(taskdata->td_last_tied);
1521 #if KMP_STATS_ENABLED 1523 switch (KMP_GET_THREAD_STATE()) {
1524 case FORK_JOIN_BARRIER:
1525 KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar);
1528 KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar);
1531 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield);
1534 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait);
1537 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup);
1540 KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate);
1543 #endif // KMP_STATS_ENABLED 1544 #endif // OMP_40_ENABLED 1548 if (UNLIKELY(ompt_enabled.enabled))
1549 __ompt_task_start(task, current_task, gtid);
1552 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1553 kmp_uint64 cur_time;
1554 kmp_int32 kmp_itt_count_task =
1555 __kmp_forkjoin_frames_mode == 3 && !taskdata->td_flags.task_serial &&
1556 current_task->td_flags.tasktype == TASK_IMPLICIT;
1557 if (kmp_itt_count_task) {
1558 thread = __kmp_threads[gtid];
1560 if (thread->th.th_bar_arrive_time)
1561 cur_time = __itt_get_timestamp();
1563 kmp_itt_count_task = 0;
1567 #ifdef KMP_GOMP_COMPAT 1568 if (taskdata->td_flags.native) {
1569 ((void (*)(
void *))(*(task->routine)))(task->shareds);
1573 (*(task->routine))(gtid, task);
1575 KMP_POP_PARTITIONED_TIMER();
1577 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1578 if (kmp_itt_count_task) {
1580 thread->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time);
1586 #endif // OMP_40_ENABLED 1591 if (taskdata->td_flags.proxy != TASK_PROXY) {
1593 ANNOTATE_HAPPENS_BEFORE(taskdata->td_parent);
1595 if (UNLIKELY(ompt_enabled.enabled)) {
1596 thread->th.ompt_thread_info = oldInfo;
1597 if (taskdata->td_flags.tiedness == TASK_TIED) {
1598 taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
1600 __kmp_task_finish<true>(gtid, task, current_task);
1603 __kmp_task_finish<false>(gtid, task, current_task);
1610 (
"__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
1611 gtid, taskdata, current_task));
1625 kmp_int32 __kmpc_omp_task_parts(
ident_t *loc_ref, kmp_int32 gtid,
1626 kmp_task_t *new_task) {
1627 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1629 KA_TRACE(10, (
"__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n", gtid,
1630 loc_ref, new_taskdata));
1633 kmp_taskdata_t *parent;
1634 if (UNLIKELY(ompt_enabled.enabled)) {
1635 parent = new_taskdata->td_parent;
1636 if (ompt_enabled.ompt_callback_task_create) {
1637 ompt_data_t task_data = ompt_data_none;
1638 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
1639 parent ? &(parent->ompt_task_info.task_data) : &task_data,
1640 parent ? &(parent->ompt_task_info.frame) : NULL,
1641 &(new_taskdata->ompt_task_info.task_data), ompt_task_explicit, 0,
1642 OMPT_GET_RETURN_ADDRESS(0));
1650 if (__kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED)
1652 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
1653 new_taskdata->td_flags.task_serial = 1;
1654 __kmp_invoke_task(gtid, new_task, current_task);
1659 (
"__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: " 1660 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n",
1661 gtid, loc_ref, new_taskdata));
1663 ANNOTATE_HAPPENS_BEFORE(new_task);
1665 if (UNLIKELY(ompt_enabled.enabled)) {
1666 parent->ompt_task_info.frame.enter_frame = ompt_data_none;
1669 return TASK_CURRENT_NOT_QUEUED;
1683 kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
1684 bool serialize_immediate) {
1685 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1690 if (new_taskdata->td_flags.proxy == TASK_PROXY ||
1691 __kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED)
1693 if (__kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED)
1696 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
1697 if (serialize_immediate)
1698 new_taskdata->td_flags.task_serial = 1;
1699 __kmp_invoke_task(gtid, new_task, current_task);
1702 ANNOTATE_HAPPENS_BEFORE(new_task);
1703 return TASK_CURRENT_NOT_QUEUED;
1718 kmp_int32 __kmpc_omp_task(
ident_t *loc_ref, kmp_int32 gtid,
1719 kmp_task_t *new_task) {
1721 KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
1723 #if KMP_DEBUG || OMPT_SUPPORT 1724 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1726 KA_TRACE(10, (
"__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
1730 kmp_taskdata_t *parent = NULL;
1731 if (UNLIKELY(ompt_enabled.enabled)) {
1732 if (!new_taskdata->td_flags.started) {
1733 OMPT_STORE_RETURN_ADDRESS(gtid);
1734 parent = new_taskdata->td_parent;
1735 if (!parent->ompt_task_info.frame.enter_frame.ptr) {
1736 parent->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1738 if (ompt_enabled.ompt_callback_task_create) {
1739 ompt_data_t task_data = ompt_data_none;
1740 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
1741 parent ? &(parent->ompt_task_info.task_data) : &task_data,
1742 parent ? &(parent->ompt_task_info.frame) : NULL,
1743 &(new_taskdata->ompt_task_info.task_data),
1744 ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0,
1745 OMPT_LOAD_RETURN_ADDRESS(gtid));
1750 __ompt_task_finish(new_task,
1751 new_taskdata->ompt_task_info.scheduling_parent,
1753 new_taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
1758 res = __kmp_omp_task(gtid, new_task,
true);
1760 KA_TRACE(10, (
"__kmpc_omp_task(exit): T#%d returning " 1761 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1762 gtid, loc_ref, new_taskdata));
1764 if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) {
1765 parent->ompt_task_info.frame.enter_frame = ompt_data_none;
1784 kmp_int32 __kmp_omp_taskloop_task(
ident_t *loc_ref, kmp_int32 gtid,
1785 kmp_task_t *new_task,
void *codeptr_ra) {
1787 KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
1789 #if KMP_DEBUG || OMPT_SUPPORT 1790 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1792 KA_TRACE(10, (
"__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
1796 kmp_taskdata_t *parent = NULL;
1797 if (UNLIKELY(ompt_enabled.enabled && !new_taskdata->td_flags.started)) {
1798 parent = new_taskdata->td_parent;
1799 if (!parent->ompt_task_info.frame.enter_frame.ptr)
1800 parent->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1801 if (ompt_enabled.ompt_callback_task_create) {
1802 ompt_data_t task_data = ompt_data_none;
1803 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
1804 parent ? &(parent->ompt_task_info.task_data) : &task_data,
1805 parent ? &(parent->ompt_task_info.frame) : NULL,
1806 &(new_taskdata->ompt_task_info.task_data),
1807 ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0,
1813 res = __kmp_omp_task(gtid, new_task,
true);
1815 KA_TRACE(10, (
"__kmpc_omp_task(exit): T#%d returning " 1816 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1817 gtid, loc_ref, new_taskdata));
1819 if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) {
1820 parent->ompt_task_info.frame.enter_frame = ompt_data_none;
1826 template <
bool ompt>
1827 static kmp_int32 __kmpc_omp_taskwait_template(
ident_t *loc_ref, kmp_int32 gtid,
1828 void *frame_address,
1829 void *return_address) {
1830 kmp_taskdata_t *taskdata;
1832 int thread_finished = FALSE;
1833 KMP_SET_THREAD_STATE_BLOCK(TASKWAIT);
1835 KA_TRACE(10, (
"__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref));
1837 if (__kmp_tasking_mode != tskm_immediate_exec) {
1838 thread = __kmp_threads[gtid];
1839 taskdata = thread->th.th_current_task;
1841 #if OMPT_SUPPORT && OMPT_OPTIONAL 1842 ompt_data_t *my_task_data;
1843 ompt_data_t *my_parallel_data;
1846 my_task_data = &(taskdata->ompt_task_info.task_data);
1847 my_parallel_data = OMPT_CUR_TEAM_DATA(thread);
1849 taskdata->ompt_task_info.frame.enter_frame.ptr = frame_address;
1851 if (ompt_enabled.ompt_callback_sync_region) {
1852 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
1853 ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
1854 my_task_data, return_address);
1857 if (ompt_enabled.ompt_callback_sync_region_wait) {
1858 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
1859 ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
1860 my_task_data, return_address);
1863 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1870 taskdata->td_taskwait_counter += 1;
1871 taskdata->td_taskwait_ident = loc_ref;
1872 taskdata->td_taskwait_thread = gtid + 1;
1875 void *itt_sync_obj = __kmp_itt_taskwait_object(gtid);
1876 if (itt_sync_obj != NULL)
1877 __kmp_itt_taskwait_starting(gtid, itt_sync_obj);
1881 !taskdata->td_flags.team_serial && !taskdata->td_flags.final;
1884 must_wait = must_wait || (thread->th.th_task_team != NULL &&
1885 thread->th.th_task_team->tt.tt_found_proxy_tasks);
1888 kmp_flag_32 flag(RCAST(std::atomic<kmp_uint32> *,
1889 &(taskdata->td_incomplete_child_tasks)),
1891 while (KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) != 0) {
1892 flag.execute_tasks(thread, gtid, FALSE,
1893 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
1894 __kmp_task_stealing_constraint);
1898 if (itt_sync_obj != NULL)
1899 __kmp_itt_taskwait_finished(gtid, itt_sync_obj);
1904 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
1906 #if OMPT_SUPPORT && OMPT_OPTIONAL 1908 if (ompt_enabled.ompt_callback_sync_region_wait) {
1909 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
1910 ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
1911 my_task_data, return_address);
1913 if (ompt_enabled.ompt_callback_sync_region) {
1914 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
1915 ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
1916 my_task_data, return_address);
1918 taskdata->ompt_task_info.frame.enter_frame = ompt_data_none;
1920 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1922 ANNOTATE_HAPPENS_AFTER(taskdata);
1925 KA_TRACE(10, (
"__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, " 1926 "returning TASK_CURRENT_NOT_QUEUED\n",
1929 return TASK_CURRENT_NOT_QUEUED;
1932 #if OMPT_SUPPORT && OMPT_OPTIONAL 1934 static kmp_int32 __kmpc_omp_taskwait_ompt(
ident_t *loc_ref, kmp_int32 gtid,
1935 void *frame_address,
1936 void *return_address) {
1937 return __kmpc_omp_taskwait_template<true>(loc_ref, gtid, frame_address,
1940 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1944 kmp_int32 __kmpc_omp_taskwait(
ident_t *loc_ref, kmp_int32 gtid) {
1945 #if OMPT_SUPPORT && OMPT_OPTIONAL 1946 if (UNLIKELY(ompt_enabled.enabled)) {
1947 OMPT_STORE_RETURN_ADDRESS(gtid);
1948 return __kmpc_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(0),
1949 OMPT_LOAD_RETURN_ADDRESS(gtid));
1952 return __kmpc_omp_taskwait_template<false>(loc_ref, gtid, NULL, NULL);
1956 kmp_int32 __kmpc_omp_taskyield(
ident_t *loc_ref, kmp_int32 gtid,
int end_part) {
1957 kmp_taskdata_t *taskdata;
1959 int thread_finished = FALSE;
1962 KMP_SET_THREAD_STATE_BLOCK(TASKYIELD);
1964 KA_TRACE(10, (
"__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1965 gtid, loc_ref, end_part));
1967 if (__kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel) {
1968 thread = __kmp_threads[gtid];
1969 taskdata = thread->th.th_current_task;
1976 taskdata->td_taskwait_counter += 1;
1977 taskdata->td_taskwait_ident = loc_ref;
1978 taskdata->td_taskwait_thread = gtid + 1;
1981 void *itt_sync_obj = __kmp_itt_taskwait_object(gtid);
1982 if (itt_sync_obj != NULL)
1983 __kmp_itt_taskwait_starting(gtid, itt_sync_obj);
1985 if (!taskdata->td_flags.team_serial) {
1986 kmp_task_team_t *task_team = thread->th.th_task_team;
1987 if (task_team != NULL) {
1988 if (KMP_TASKING_ENABLED(task_team)) {
1990 if (UNLIKELY(ompt_enabled.enabled))
1991 thread->th.ompt_thread_info.ompt_task_yielded = 1;
1993 __kmp_execute_tasks_32(
1994 thread, gtid, NULL, FALSE,
1995 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
1996 __kmp_task_stealing_constraint);
1998 if (UNLIKELY(ompt_enabled.enabled))
1999 thread->th.ompt_thread_info.ompt_task_yielded = 0;
2005 if (itt_sync_obj != NULL)
2006 __kmp_itt_taskwait_finished(gtid, itt_sync_obj);
2011 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
2014 KA_TRACE(10, (
"__kmpc_omp_taskyield(exit): T#%d task %p resuming, " 2015 "returning TASK_CURRENT_NOT_QUEUED\n",
2018 return TASK_CURRENT_NOT_QUEUED;
2024 typedef struct kmp_task_red_flags {
2025 unsigned lazy_priv : 1;
2026 unsigned reserved31 : 31;
2027 } kmp_task_red_flags_t;
2030 typedef struct kmp_task_red_data {
2038 kmp_task_red_flags_t flags;
2039 } kmp_task_red_data_t;
2042 typedef struct kmp_task_red_input {
2048 kmp_task_red_flags_t flags;
2049 } kmp_task_red_input_t;
2060 void *__kmpc_task_reduction_init(
int gtid,
int num,
void *data) {
2061 kmp_info_t *thread = __kmp_threads[gtid];
2062 kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup;
2063 kmp_int32 nth = thread->th.th_team_nproc;
2064 kmp_task_red_input_t *input = (kmp_task_red_input_t *)data;
2065 kmp_task_red_data_t *arr;
2068 KMP_ASSERT(tg != NULL);
2069 KMP_ASSERT(data != NULL);
2070 KMP_ASSERT(num > 0);
2072 KA_TRACE(10, (
"__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n",
2076 KA_TRACE(10, (
"__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n",
2078 arr = (kmp_task_red_data_t *)__kmp_thread_malloc(
2079 thread, num *
sizeof(kmp_task_red_data_t));
2080 for (
int i = 0; i < num; ++i) {
2081 void (*f_init)(
void *) = (
void (*)(
void *))(input[i].reduce_init);
2082 size_t size = input[i].reduce_size - 1;
2084 size += CACHE_LINE - size % CACHE_LINE;
2085 KMP_ASSERT(input[i].reduce_comb != NULL);
2086 arr[i].reduce_shar = input[i].reduce_shar;
2087 arr[i].reduce_size = size;
2088 arr[i].reduce_init = input[i].reduce_init;
2089 arr[i].reduce_fini = input[i].reduce_fini;
2090 arr[i].reduce_comb = input[i].reduce_comb;
2091 arr[i].flags = input[i].flags;
2092 if (!input[i].flags.lazy_priv) {
2094 arr[i].reduce_priv = __kmp_allocate(nth * size);
2095 arr[i].reduce_pend = (
char *)(arr[i].reduce_priv) + nth * size;
2096 if (f_init != NULL) {
2098 for (
int j = 0; j < nth; ++j) {
2099 f_init((
char *)(arr[i].reduce_priv) + j * size);
2105 arr[i].reduce_priv = __kmp_allocate(nth *
sizeof(
void *));
2108 tg->reduce_data = (
void *)arr;
2109 tg->reduce_num_data = num;
2122 void *__kmpc_task_reduction_get_th_data(
int gtid,
void *tskgrp,
void *data) {
2123 kmp_info_t *thread = __kmp_threads[gtid];
2124 kmp_int32 nth = thread->th.th_team_nproc;
2128 kmp_taskgroup_t *tg = (kmp_taskgroup_t *)tskgrp;
2130 tg = thread->th.th_current_task->td_taskgroup;
2131 KMP_ASSERT(tg != NULL);
2132 kmp_task_red_data_t *arr = (kmp_task_red_data_t *)(tg->reduce_data);
2133 kmp_int32 num = tg->reduce_num_data;
2134 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
2136 KMP_ASSERT(data != NULL);
2137 while (tg != NULL) {
2138 for (
int i = 0; i < num; ++i) {
2139 if (!arr[i].flags.lazy_priv) {
2140 if (data == arr[i].reduce_shar ||
2141 (data >= arr[i].reduce_priv && data < arr[i].reduce_pend))
2142 return (
char *)(arr[i].reduce_priv) + tid * arr[i].reduce_size;
2145 void **p_priv = (
void **)(arr[i].reduce_priv);
2146 if (data == arr[i].reduce_shar)
2149 for (
int j = 0; j < nth; ++j)
2150 if (data == p_priv[j])
2154 if (p_priv[tid] == NULL) {
2156 void (*f_init)(
void *) = (
void (*)(
void *))(arr[i].reduce_init);
2157 p_priv[tid] = __kmp_allocate(arr[i].reduce_size);
2158 if (f_init != NULL) {
2159 f_init(p_priv[tid]);
2166 arr = (kmp_task_red_data_t *)(tg->reduce_data);
2167 num = tg->reduce_num_data;
2169 KMP_ASSERT2(0,
"Unknown task reduction item");
2175 static void __kmp_task_reduction_fini(kmp_info_t *th, kmp_taskgroup_t *tg) {
2176 kmp_int32 nth = th->th.th_team_nproc;
2177 KMP_DEBUG_ASSERT(nth > 1);
2178 kmp_task_red_data_t *arr = (kmp_task_red_data_t *)tg->reduce_data;
2179 kmp_int32 num = tg->reduce_num_data;
2180 for (
int i = 0; i < num; ++i) {
2181 void *sh_data = arr[i].reduce_shar;
2182 void (*f_fini)(
void *) = (
void (*)(
void *))(arr[i].reduce_fini);
2183 void (*f_comb)(
void *,
void *) =
2184 (
void (*)(
void *,
void *))(arr[i].reduce_comb);
2185 if (!arr[i].flags.lazy_priv) {
2186 void *pr_data = arr[i].reduce_priv;
2187 size_t size = arr[i].reduce_size;
2188 for (
int j = 0; j < nth; ++j) {
2189 void *priv_data = (
char *)pr_data + j * size;
2190 f_comb(sh_data, priv_data);
2195 void **pr_data = (
void **)(arr[i].reduce_priv);
2196 for (
int j = 0; j < nth; ++j) {
2197 if (pr_data[j] != NULL) {
2198 f_comb(sh_data, pr_data[j]);
2201 __kmp_free(pr_data[j]);
2205 __kmp_free(arr[i].reduce_priv);
2207 __kmp_thread_free(th, arr);
2208 tg->reduce_data = NULL;
2209 tg->reduce_num_data = 0;
2215 void __kmpc_taskgroup(
ident_t *loc,
int gtid) {
2216 kmp_info_t *thread = __kmp_threads[gtid];
2217 kmp_taskdata_t *taskdata = thread->th.th_current_task;
2218 kmp_taskgroup_t *tg_new =
2219 (kmp_taskgroup_t *)__kmp_thread_malloc(thread,
sizeof(kmp_taskgroup_t));
2220 KA_TRACE(10, (
"__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new));
2221 KMP_ATOMIC_ST_RLX(&tg_new->count, 0);
2222 KMP_ATOMIC_ST_RLX(&tg_new->cancel_request, cancel_noreq);
2223 tg_new->parent = taskdata->td_taskgroup;
2225 tg_new->reduce_data = NULL;
2226 tg_new->reduce_num_data = 0;
2228 taskdata->td_taskgroup = tg_new;
2230 #if OMPT_SUPPORT && OMPT_OPTIONAL 2231 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) {
2232 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2234 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2235 kmp_team_t *team = thread->th.th_team;
2236 ompt_data_t my_task_data = taskdata->ompt_task_info.task_data;
2238 ompt_data_t my_parallel_data = team->t.ompt_team_info.parallel_data;
2240 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
2241 ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
2242 &(my_task_data), codeptr);
2249 void __kmpc_end_taskgroup(
ident_t *loc,
int gtid) {
2250 kmp_info_t *thread = __kmp_threads[gtid];
2251 kmp_taskdata_t *taskdata = thread->th.th_current_task;
2252 kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
2253 int thread_finished = FALSE;
2255 #if OMPT_SUPPORT && OMPT_OPTIONAL 2257 ompt_data_t my_task_data;
2258 ompt_data_t my_parallel_data;
2260 if (UNLIKELY(ompt_enabled.enabled)) {
2261 team = thread->th.th_team;
2262 my_task_data = taskdata->ompt_task_info.task_data;
2264 my_parallel_data = team->t.ompt_team_info.parallel_data;
2265 codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2267 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2271 KA_TRACE(10, (
"__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc));
2272 KMP_DEBUG_ASSERT(taskgroup != NULL);
2273 KMP_SET_THREAD_STATE_BLOCK(TASKGROUP);
2275 if (__kmp_tasking_mode != tskm_immediate_exec) {
2277 taskdata->td_taskwait_counter += 1;
2278 taskdata->td_taskwait_ident = loc;
2279 taskdata->td_taskwait_thread = gtid + 1;
2283 void *itt_sync_obj = __kmp_itt_taskwait_object(gtid);
2284 if (itt_sync_obj != NULL)
2285 __kmp_itt_taskwait_starting(gtid, itt_sync_obj);
2288 #if OMPT_SUPPORT && OMPT_OPTIONAL 2289 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) {
2290 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
2291 ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
2292 &(my_task_data), codeptr);
2297 if (!taskdata->td_flags.team_serial ||
2298 (thread->th.th_task_team != NULL &&
2299 thread->th.th_task_team->tt.tt_found_proxy_tasks))
2301 if (!taskdata->td_flags.team_serial)
2304 kmp_flag_32 flag(RCAST(std::atomic<kmp_uint32> *, &(taskgroup->count)),
2306 while (KMP_ATOMIC_LD_ACQ(&taskgroup->count) != 0) {
2307 flag.execute_tasks(thread, gtid, FALSE,
2308 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
2309 __kmp_task_stealing_constraint);
2312 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
2314 #if OMPT_SUPPORT && OMPT_OPTIONAL 2315 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) {
2316 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
2317 ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
2318 &(my_task_data), codeptr);
2323 if (itt_sync_obj != NULL)
2324 __kmp_itt_taskwait_finished(gtid, itt_sync_obj);
2327 KMP_DEBUG_ASSERT(taskgroup->count == 0);
2330 if (taskgroup->reduce_data != NULL)
2331 __kmp_task_reduction_fini(thread, taskgroup);
2334 taskdata->td_taskgroup = taskgroup->parent;
2335 __kmp_thread_free(thread, taskgroup);
2337 KA_TRACE(10, (
"__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n",
2339 ANNOTATE_HAPPENS_AFTER(taskdata);
2341 #if OMPT_SUPPORT && OMPT_OPTIONAL 2342 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) {
2343 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
2344 ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
2345 &(my_task_data), codeptr);
2352 static kmp_task_t *__kmp_remove_my_task(kmp_info_t *thread, kmp_int32 gtid,
2353 kmp_task_team_t *task_team,
2354 kmp_int32 is_constrained) {
2356 kmp_taskdata_t *taskdata;
2357 kmp_thread_data_t *thread_data;
2360 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
2361 KMP_DEBUG_ASSERT(task_team->tt.tt_threads_data !=
2364 thread_data = &task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
2366 KA_TRACE(10, (
"__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
2367 gtid, thread_data->td.td_deque_ntasks,
2368 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2370 if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
2372 (
"__kmp_remove_my_task(exit #1): T#%d No tasks to remove: " 2373 "ntasks=%d head=%u tail=%u\n",
2374 gtid, thread_data->td.td_deque_ntasks,
2375 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2379 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
2381 if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
2382 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2384 (
"__kmp_remove_my_task(exit #2): T#%d No tasks to remove: " 2385 "ntasks=%d head=%u tail=%u\n",
2386 gtid, thread_data->td.td_deque_ntasks,
2387 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2391 tail = (thread_data->td.td_deque_tail - 1) &
2392 TASK_DEQUE_MASK(thread_data->td);
2393 taskdata = thread_data->td.td_deque[tail];
2395 if (!__kmp_task_is_allowed(gtid, is_constrained, taskdata,
2396 thread->th.th_current_task)) {
2398 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2400 (
"__kmp_remove_my_task(exit #3): T#%d TSC blocks tail task: " 2401 "ntasks=%d head=%u tail=%u\n",
2402 gtid, thread_data->td.td_deque_ntasks,
2403 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2407 thread_data->td.td_deque_tail = tail;
2408 TCW_4(thread_data->td.td_deque_ntasks, thread_data->td.td_deque_ntasks - 1);
2410 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2412 KA_TRACE(10, (
"__kmp_remove_my_task(exit #4): T#%d task %p removed: " 2413 "ntasks=%d head=%u tail=%u\n",
2414 gtid, taskdata, thread_data->td.td_deque_ntasks,
2415 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2417 task = KMP_TASKDATA_TO_TASK(taskdata);
2424 static kmp_task_t *__kmp_steal_task(kmp_info_t *victim_thr, kmp_int32 gtid,
2425 kmp_task_team_t *task_team,
2426 std::atomic<kmp_int32> *unfinished_threads,
2427 int *thread_finished,
2428 kmp_int32 is_constrained) {
2430 kmp_taskdata_t *taskdata;
2431 kmp_taskdata_t *current;
2432 kmp_thread_data_t *victim_td, *threads_data;
2434 kmp_int32 victim_tid;
2436 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
2438 threads_data = task_team->tt.tt_threads_data;
2439 KMP_DEBUG_ASSERT(threads_data != NULL);
2441 victim_tid = victim_thr->th.th_info.ds.ds_tid;
2442 victim_td = &threads_data[victim_tid];
2444 KA_TRACE(10, (
"__kmp_steal_task(enter): T#%d try to steal from T#%d: " 2445 "task_team=%p ntasks=%d head=%u tail=%u\n",
2446 gtid, __kmp_gtid_from_thread(victim_thr), task_team,
2447 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
2448 victim_td->td.td_deque_tail));
2450 if (TCR_4(victim_td->td.td_deque_ntasks) == 0) {
2451 KA_TRACE(10, (
"__kmp_steal_task(exit #1): T#%d could not steal from T#%d: " 2452 "task_team=%p ntasks=%d head=%u tail=%u\n",
2453 gtid, __kmp_gtid_from_thread(victim_thr), task_team,
2454 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
2455 victim_td->td.td_deque_tail));
2459 __kmp_acquire_bootstrap_lock(&victim_td->td.td_deque_lock);
2461 int ntasks = TCR_4(victim_td->td.td_deque_ntasks);
2464 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2465 KA_TRACE(10, (
"__kmp_steal_task(exit #2): T#%d could not steal from T#%d: " 2466 "task_team=%p ntasks=%d head=%u tail=%u\n",
2467 gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
2468 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
2472 KMP_DEBUG_ASSERT(victim_td->td.td_deque != NULL);
2473 current = __kmp_threads[gtid]->th.th_current_task;
2474 taskdata = victim_td->td.td_deque[victim_td->td.td_deque_head];
2475 if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
2477 victim_td->td.td_deque_head =
2478 (victim_td->td.td_deque_head + 1) & TASK_DEQUE_MASK(victim_td->td);
2480 if (!task_team->tt.tt_untied_task_encountered) {
2482 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2483 KA_TRACE(10, (
"__kmp_steal_task(exit #3): T#%d could not steal from " 2484 "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n",
2485 gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
2486 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
2491 target = victim_td->td.td_deque_head;
2493 for (i = 1; i < ntasks; ++i) {
2494 target = (target + 1) & TASK_DEQUE_MASK(victim_td->td);
2495 taskdata = victim_td->td.td_deque[target];
2496 if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
2502 if (taskdata == NULL) {
2504 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2505 KA_TRACE(10, (
"__kmp_steal_task(exit #4): T#%d could not steal from " 2506 "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n",
2507 gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
2508 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
2512 for (i = i + 1; i < ntasks; ++i) {
2514 target = (target + 1) & TASK_DEQUE_MASK(victim_td->td);
2515 victim_td->td.td_deque[prev] = victim_td->td.td_deque[target];
2519 victim_td->td.td_deque_tail ==
2520 (kmp_uint32)((target + 1) & TASK_DEQUE_MASK(victim_td->td)));
2521 victim_td->td.td_deque_tail = target;
2523 if (*thread_finished) {
2529 count = KMP_ATOMIC_INC(unfinished_threads);
2533 (
"__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
2534 gtid, count + 1, task_team));
2536 *thread_finished = FALSE;
2538 TCW_4(victim_td->td.td_deque_ntasks, ntasks - 1);
2540 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2544 (
"__kmp_steal_task(exit #5): T#%d stole task %p from T#%d: " 2545 "task_team=%p ntasks=%d head=%u tail=%u\n",
2546 gtid, taskdata, __kmp_gtid_from_thread(victim_thr), task_team,
2547 ntasks, victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
2549 task = KMP_TASKDATA_TO_TASK(taskdata);
2563 static inline int __kmp_execute_tasks_template(
2564 kmp_info_t *thread, kmp_int32 gtid, C *flag,
int final_spin,
2565 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
2566 kmp_int32 is_constrained) {
2567 kmp_task_team_t *task_team = thread->th.th_task_team;
2568 kmp_thread_data_t *threads_data;
2570 kmp_info_t *other_thread;
2571 kmp_taskdata_t *current_task = thread->th.th_current_task;
2572 std::atomic<kmp_int32> *unfinished_threads;
2573 kmp_int32 nthreads, victim_tid = -2, use_own_tasks = 1, new_victim = 0,
2574 tid = thread->th.th_info.ds.ds_tid;
2576 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
2577 KMP_DEBUG_ASSERT(thread == __kmp_threads[gtid]);
2579 if (task_team == NULL || current_task == NULL)
2582 KA_TRACE(15, (
"__kmp_execute_tasks_template(enter): T#%d final_spin=%d " 2583 "*thread_finished=%d\n",
2584 gtid, final_spin, *thread_finished));
2586 thread->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
2587 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
2588 KMP_DEBUG_ASSERT(threads_data != NULL);
2590 nthreads = task_team->tt.tt_nproc;
2591 unfinished_threads = &(task_team->tt.tt_unfinished_threads);
2593 KMP_DEBUG_ASSERT(nthreads > 1 || task_team->tt.tt_found_proxy_tasks);
2595 KMP_DEBUG_ASSERT(nthreads > 1);
2597 KMP_DEBUG_ASSERT(*unfinished_threads >= 0);
2603 if (use_own_tasks) {
2604 task = __kmp_remove_my_task(thread, gtid, task_team, is_constrained);
2606 if ((task == NULL) && (nthreads > 1)) {
2610 if (victim_tid == -2) {
2611 victim_tid = threads_data[tid].td.td_deque_last_stolen;
2614 other_thread = threads_data[victim_tid].td.td_thr;
2616 if (victim_tid != -1) {
2618 }
else if (!new_victim) {
2624 victim_tid = __kmp_get_random(thread) % (nthreads - 1);
2625 if (victim_tid >= tid) {
2629 other_thread = threads_data[victim_tid].td.td_thr;
2639 if ((__kmp_tasking_mode == tskm_task_teams) &&
2640 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
2641 (TCR_PTR(CCAST(
void *, other_thread->th.th_sleep_loc)) !=
2644 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread),
2645 other_thread->th.th_sleep_loc);
2658 task = __kmp_steal_task(other_thread, gtid, task_team,
2659 unfinished_threads, thread_finished,
2663 if (threads_data[tid].td.td_deque_last_stolen != victim_tid) {
2664 threads_data[tid].td.td_deque_last_stolen = victim_tid;
2671 KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1);
2680 #if USE_ITT_BUILD && USE_ITT_NOTIFY 2681 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
2682 if (itt_sync_obj == NULL) {
2684 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
2686 __kmp_itt_task_starting(itt_sync_obj);
2689 __kmp_invoke_task(gtid, task, current_task);
2691 if (itt_sync_obj != NULL)
2692 __kmp_itt_task_finished(itt_sync_obj);
2699 if (flag == NULL || (!final_spin && flag->done_check())) {
2702 (
"__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
2706 if (thread->th.th_task_team == NULL) {
2710 KMP_YIELD(__kmp_library == library_throughput);
2713 if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) {
2714 KA_TRACE(20, (
"__kmp_execute_tasks_template: T#%d stolen task spawned " 2715 "other tasks, restart\n",
2728 KMP_ATOMIC_LD_ACQ(¤t_task->td_incomplete_child_tasks) == 0)
2736 if (!*thread_finished) {
2739 count = KMP_ATOMIC_DEC(unfinished_threads) - 1;
2740 KA_TRACE(20, (
"__kmp_execute_tasks_template: T#%d dec " 2741 "unfinished_threads to %d task_team=%p\n",
2742 gtid, count, task_team));
2743 *thread_finished = TRUE;
2751 if (flag != NULL && flag->done_check()) {
2754 (
"__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
2762 if (thread->th.th_task_team == NULL) {
2764 (
"__kmp_execute_tasks_template: T#%d no more tasks\n", gtid));
2777 (
"__kmp_execute_tasks_template: T#%d can't find work\n", gtid));
2783 int __kmp_execute_tasks_32(
2784 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag,
int final_spin,
2785 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
2786 kmp_int32 is_constrained) {
2787 return __kmp_execute_tasks_template(
2788 thread, gtid, flag, final_spin,
2789 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2792 int __kmp_execute_tasks_64(
2793 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag,
int final_spin,
2794 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
2795 kmp_int32 is_constrained) {
2796 return __kmp_execute_tasks_template(
2797 thread, gtid, flag, final_spin,
2798 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2801 int __kmp_execute_tasks_oncore(
2802 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag,
int final_spin,
2803 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
2804 kmp_int32 is_constrained) {
2805 return __kmp_execute_tasks_template(
2806 thread, gtid, flag, final_spin,
2807 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2813 static void __kmp_enable_tasking(kmp_task_team_t *task_team,
2814 kmp_info_t *this_thr) {
2815 kmp_thread_data_t *threads_data;
2816 int nthreads, i, is_init_thread;
2818 KA_TRACE(10, (
"__kmp_enable_tasking(enter): T#%d\n",
2819 __kmp_gtid_from_thread(this_thr)));
2821 KMP_DEBUG_ASSERT(task_team != NULL);
2822 KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL);
2824 nthreads = task_team->tt.tt_nproc;
2825 KMP_DEBUG_ASSERT(nthreads > 0);
2826 KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc);
2829 is_init_thread = __kmp_realloc_task_threads_data(this_thr, task_team);
2831 if (!is_init_thread) {
2835 (
"__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
2836 __kmp_gtid_from_thread(this_thr)));
2839 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
2840 KMP_DEBUG_ASSERT(threads_data != NULL);
2842 if ((__kmp_tasking_mode == tskm_task_teams) &&
2843 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME)) {
2847 for (i = 0; i < nthreads; i++) {
2848 volatile void *sleep_loc;
2849 kmp_info_t *thread = threads_data[i].td.td_thr;
2851 if (i == this_thr->th.th_info.ds.ds_tid) {
2860 if ((sleep_loc = TCR_PTR(CCAST(
void *, thread->th.th_sleep_loc))) !=
2862 KF_TRACE(50, (
"__kmp_enable_tasking: T#%d waking up thread T#%d\n",
2863 __kmp_gtid_from_thread(this_thr),
2864 __kmp_gtid_from_thread(thread)));
2865 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
2867 KF_TRACE(50, (
"__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
2868 __kmp_gtid_from_thread(this_thr),
2869 __kmp_gtid_from_thread(thread)));
2874 KA_TRACE(10, (
"__kmp_enable_tasking(exit): T#%d\n",
2875 __kmp_gtid_from_thread(this_thr)));
2912 static kmp_task_team_t *__kmp_free_task_teams =
2915 kmp_bootstrap_lock_t __kmp_task_team_lock =
2916 KMP_BOOTSTRAP_LOCK_INITIALIZER(__kmp_task_team_lock);
2923 static void __kmp_alloc_task_deque(kmp_info_t *thread,
2924 kmp_thread_data_t *thread_data) {
2925 __kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock);
2926 KMP_DEBUG_ASSERT(thread_data->td.td_deque == NULL);
2929 thread_data->td.td_deque_last_stolen = -1;
2931 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == 0);
2932 KMP_DEBUG_ASSERT(thread_data->td.td_deque_head == 0);
2933 KMP_DEBUG_ASSERT(thread_data->td.td_deque_tail == 0);
2937 (
"__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
2938 __kmp_gtid_from_thread(thread), INITIAL_TASK_DEQUE_SIZE, thread_data));
2942 thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate(
2943 INITIAL_TASK_DEQUE_SIZE *
sizeof(kmp_taskdata_t *));
2944 thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
2950 static void __kmp_free_task_deque(kmp_thread_data_t *thread_data) {
2951 if (thread_data->td.td_deque != NULL) {
2952 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
2953 TCW_4(thread_data->td.td_deque_ntasks, 0);
2954 __kmp_free(thread_data->td.td_deque);
2955 thread_data->td.td_deque = NULL;
2956 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2959 #ifdef BUILD_TIED_TASK_STACK 2961 if (thread_data->td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY) {
2962 __kmp_free_task_stack(__kmp_thread_from_gtid(gtid), thread_data);
2964 #endif // BUILD_TIED_TASK_STACK 2974 static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
2975 kmp_task_team_t *task_team) {
2976 kmp_thread_data_t **threads_data_p;
2977 kmp_int32 nthreads, maxthreads;
2978 int is_init_thread = FALSE;
2980 if (TCR_4(task_team->tt.tt_found_tasks)) {
2985 threads_data_p = &task_team->tt.tt_threads_data;
2986 nthreads = task_team->tt.tt_nproc;
2987 maxthreads = task_team->tt.tt_max_threads;
2992 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
2994 if (!TCR_4(task_team->tt.tt_found_tasks)) {
2996 kmp_team_t *team = thread->th.th_team;
2999 is_init_thread = TRUE;
3000 if (maxthreads < nthreads) {
3002 if (*threads_data_p != NULL) {
3003 kmp_thread_data_t *old_data = *threads_data_p;
3004 kmp_thread_data_t *new_data = NULL;
3008 (
"__kmp_realloc_task_threads_data: T#%d reallocating " 3009 "threads data for task_team %p, new_size = %d, old_size = %d\n",
3010 __kmp_gtid_from_thread(thread), task_team, nthreads, maxthreads));
3015 new_data = (kmp_thread_data_t *)__kmp_allocate(
3016 nthreads *
sizeof(kmp_thread_data_t));
3018 KMP_MEMCPY_S((
void *)new_data, nthreads *
sizeof(kmp_thread_data_t),
3019 (
void *)old_data, maxthreads *
sizeof(kmp_thread_data_t));
3021 #ifdef BUILD_TIED_TASK_STACK 3023 for (i = maxthreads; i < nthreads; i++) {
3024 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
3025 __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
3027 #endif // BUILD_TIED_TASK_STACK 3029 (*threads_data_p) = new_data;
3030 __kmp_free(old_data);
3032 KE_TRACE(10, (
"__kmp_realloc_task_threads_data: T#%d allocating " 3033 "threads data for task_team %p, size = %d\n",
3034 __kmp_gtid_from_thread(thread), task_team, nthreads));
3038 ANNOTATE_IGNORE_WRITES_BEGIN();
3039 *threads_data_p = (kmp_thread_data_t *)__kmp_allocate(
3040 nthreads *
sizeof(kmp_thread_data_t));
3041 ANNOTATE_IGNORE_WRITES_END();
3042 #ifdef BUILD_TIED_TASK_STACK 3044 for (i = 0; i < nthreads; i++) {
3045 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
3046 __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
3048 #endif // BUILD_TIED_TASK_STACK 3050 task_team->tt.tt_max_threads = nthreads;
3053 KMP_DEBUG_ASSERT(*threads_data_p != NULL);
3057 for (i = 0; i < nthreads; i++) {
3058 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
3059 thread_data->td.td_thr = team->t.t_threads[i];
3061 if (thread_data->td.td_deque_last_stolen >= nthreads) {
3065 thread_data->td.td_deque_last_stolen = -1;
3070 TCW_SYNC_4(task_team->tt.tt_found_tasks, TRUE);
3073 __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
3074 return is_init_thread;
3080 static void __kmp_free_task_threads_data(kmp_task_team_t *task_team) {
3081 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
3082 if (task_team->tt.tt_threads_data != NULL) {
3084 for (i = 0; i < task_team->tt.tt_max_threads; i++) {
3085 __kmp_free_task_deque(&task_team->tt.tt_threads_data[i]);
3087 __kmp_free(task_team->tt.tt_threads_data);
3088 task_team->tt.tt_threads_data = NULL;
3090 __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
3097 static kmp_task_team_t *__kmp_allocate_task_team(kmp_info_t *thread,
3099 kmp_task_team_t *task_team = NULL;
3102 KA_TRACE(20, (
"__kmp_allocate_task_team: T#%d entering; team = %p\n",
3103 (thread ? __kmp_gtid_from_thread(thread) : -1), team));
3105 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
3107 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
3108 if (__kmp_free_task_teams != NULL) {
3109 task_team = __kmp_free_task_teams;
3110 TCW_PTR(__kmp_free_task_teams, task_team->tt.tt_next);
3111 task_team->tt.tt_next = NULL;
3113 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
3116 if (task_team == NULL) {
3117 KE_TRACE(10, (
"__kmp_allocate_task_team: T#%d allocating " 3118 "task team for team %p\n",
3119 __kmp_gtid_from_thread(thread), team));
3123 task_team = (kmp_task_team_t *)__kmp_allocate(
sizeof(kmp_task_team_t));
3124 __kmp_init_bootstrap_lock(&task_team->tt.tt_threads_lock);
3131 TCW_4(task_team->tt.tt_found_tasks, FALSE);
3133 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
3135 task_team->tt.tt_nproc = nthreads = team->t.t_nproc;
3137 KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads, nthreads);
3138 TCW_4(task_team->tt.tt_active, TRUE);
3140 KA_TRACE(20, (
"__kmp_allocate_task_team: T#%d exiting; task_team = %p " 3141 "unfinished_threads init'd to %d\n",
3142 (thread ? __kmp_gtid_from_thread(thread) : -1), task_team,
3143 KMP_ATOMIC_LD_RLX(&task_team->tt.tt_unfinished_threads)));
3150 void __kmp_free_task_team(kmp_info_t *thread, kmp_task_team_t *task_team) {
3151 KA_TRACE(20, (
"__kmp_free_task_team: T#%d task_team = %p\n",
3152 thread ? __kmp_gtid_from_thread(thread) : -1, task_team));
3155 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
3157 KMP_DEBUG_ASSERT(task_team->tt.tt_next == NULL);
3158 task_team->tt.tt_next = __kmp_free_task_teams;
3159 TCW_PTR(__kmp_free_task_teams, task_team);
3161 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
3169 void __kmp_reap_task_teams(
void) {
3170 kmp_task_team_t *task_team;
3172 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
3174 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
3175 while ((task_team = __kmp_free_task_teams) != NULL) {
3176 __kmp_free_task_teams = task_team->tt.tt_next;
3177 task_team->tt.tt_next = NULL;
3180 if (task_team->tt.tt_threads_data != NULL) {
3181 __kmp_free_task_threads_data(task_team);
3183 __kmp_free(task_team);
3185 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
3192 void __kmp_wait_to_unref_task_teams(
void) {
3197 KMP_INIT_YIELD(spins);
3205 for (thread = CCAST(kmp_info_t *, __kmp_thread_pool); thread != NULL;
3206 thread = thread->th.th_next_pool) {
3210 if (TCR_PTR(thread->th.th_task_team) == NULL) {
3211 KA_TRACE(10, (
"__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
3212 __kmp_gtid_from_thread(thread)));
3217 if (!__kmp_is_thread_alive(thread, &exit_val)) {
3218 thread->th.th_task_team = NULL;
3225 KA_TRACE(10, (
"__kmp_wait_to_unref_task_team: Waiting for T#%d to " 3226 "unreference task_team\n",
3227 __kmp_gtid_from_thread(thread)));
3229 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
3230 volatile void *sleep_loc;
3232 if ((sleep_loc = TCR_PTR(CCAST(
void *, thread->th.th_sleep_loc))) !=
3236 (
"__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
3237 __kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread)));
3238 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
3248 KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
3249 KMP_YIELD_SPIN(spins);
3255 void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team,
int always) {
3256 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
3262 if (team->t.t_task_team[this_thr->th.th_task_state] == NULL &&
3263 (always || team->t.t_nproc > 1)) {
3264 team->t.t_task_team[this_thr->th.th_task_state] =
3265 __kmp_allocate_task_team(this_thr, team);
3266 KA_TRACE(20, (
"__kmp_task_team_setup: Master T#%d created new task_team %p " 3267 "for team %d at parity=%d\n",
3268 __kmp_gtid_from_thread(this_thr),
3269 team->t.t_task_team[this_thr->th.th_task_state],
3270 ((team != NULL) ? team->t.t_id : -1),
3271 this_thr->th.th_task_state));
3281 if (team->t.t_nproc > 1) {
3282 int other_team = 1 - this_thr->th.th_task_state;
3283 if (team->t.t_task_team[other_team] == NULL) {
3284 team->t.t_task_team[other_team] =
3285 __kmp_allocate_task_team(this_thr, team);
3286 KA_TRACE(20, (
"__kmp_task_team_setup: Master T#%d created second new " 3287 "task_team %p for team %d at parity=%d\n",
3288 __kmp_gtid_from_thread(this_thr),
3289 team->t.t_task_team[other_team],
3290 ((team != NULL) ? team->t.t_id : -1), other_team));
3293 kmp_task_team_t *task_team = team->t.t_task_team[other_team];
3294 if (!task_team->tt.tt_active ||
3295 team->t.t_nproc != task_team->tt.tt_nproc) {
3296 TCW_4(task_team->tt.tt_nproc, team->t.t_nproc);
3297 TCW_4(task_team->tt.tt_found_tasks, FALSE);
3299 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
3301 KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads,
3303 TCW_4(task_team->tt.tt_active, TRUE);
3307 KA_TRACE(20, (
"__kmp_task_team_setup: Master T#%d reset next task_team " 3308 "%p for team %d at parity=%d\n",
3309 __kmp_gtid_from_thread(this_thr),
3310 team->t.t_task_team[other_team],
3311 ((team != NULL) ? team->t.t_id : -1), other_team));
3319 void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team) {
3320 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
3324 this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
3327 TCW_PTR(this_thr->th.th_task_team,
3328 team->t.t_task_team[this_thr->th.th_task_state]);
3330 (
"__kmp_task_team_sync: Thread T#%d task team switched to task_team " 3331 "%p from Team #%d (parity=%d)\n",
3332 __kmp_gtid_from_thread(this_thr), this_thr->th.th_task_team,
3333 ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
3343 void __kmp_task_team_wait(
3344 kmp_info_t *this_thr,
3345 kmp_team_t *team USE_ITT_BUILD_ARG(
void *itt_sync_obj),
int wait) {
3346 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
3348 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
3349 KMP_DEBUG_ASSERT(task_team == this_thr->th.th_task_team);
3351 if ((task_team != NULL) && KMP_TASKING_ENABLED(task_team)) {
3353 KA_TRACE(20, (
"__kmp_task_team_wait: Master T#%d waiting for all tasks " 3354 "(for unfinished_threads to reach 0) on task_team = %p\n",
3355 __kmp_gtid_from_thread(this_thr), task_team));
3359 kmp_flag_32 flag(RCAST(std::atomic<kmp_uint32> *,
3360 &task_team->tt.tt_unfinished_threads),
3362 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
3368 (
"__kmp_task_team_wait: Master T#%d deactivating task_team %p: " 3369 "setting active to false, setting local and team's pointer to NULL\n",
3370 __kmp_gtid_from_thread(this_thr), task_team));
3372 KMP_DEBUG_ASSERT(task_team->tt.tt_nproc > 1 ||
3373 task_team->tt.tt_found_proxy_tasks == TRUE);
3374 TCW_SYNC_4(task_team->tt.tt_found_proxy_tasks, FALSE);
3376 KMP_DEBUG_ASSERT(task_team->tt.tt_nproc > 1);
3378 KMP_CHECK_UPDATE(task_team->tt.tt_untied_task_encountered, 0);
3379 TCW_SYNC_4(task_team->tt.tt_active, FALSE);
3382 TCW_PTR(this_thr->th.th_task_team, NULL);
3391 void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread,
int gtid) {
3392 std::atomic<kmp_uint32> *spin = RCAST(
3393 std::atomic<kmp_uint32> *,
3394 &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads);
3396 KMP_DEBUG_ASSERT(__kmp_tasking_mode == tskm_extra_barrier);
3399 KMP_FSYNC_SPIN_INIT(spin, NULL);
3401 kmp_flag_32 spin_flag(spin, 0U);
3402 while (!spin_flag.execute_tasks(thread, gtid, TRUE,
3403 &flag USE_ITT_BUILD_ARG(NULL), 0)) {
3406 KMP_FSYNC_SPIN_PREPARE(RCAST(
void *, spin));
3409 if (TCR_4(__kmp_global.g.g_done)) {
3410 if (__kmp_global.g.g_abort)
3411 __kmp_abort_thread();
3417 KMP_FSYNC_SPIN_ACQUIRED(RCAST(
void *, spin));
3428 static bool __kmp_give_task(kmp_info_t *thread, kmp_int32 tid, kmp_task_t *task,
3430 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
3431 kmp_task_team_t *task_team = taskdata->td_task_team;
3433 KA_TRACE(20, (
"__kmp_give_task: trying to give task %p to thread %d.\n",
3437 KMP_DEBUG_ASSERT(task_team != NULL);
3439 bool result =
false;
3440 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
3442 if (thread_data->td.td_deque == NULL) {
3446 (
"__kmp_give_task: thread %d has no queue while giving task %p.\n",
3451 if (TCR_4(thread_data->td.td_deque_ntasks) >=
3452 TASK_DEQUE_SIZE(thread_data->td)) {
3455 (
"__kmp_give_task: queue is full while giving task %p to thread %d.\n",
3460 if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
3463 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3464 __kmp_realloc_task_deque(thread, thread_data);
3468 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3470 if (TCR_4(thread_data->td.td_deque_ntasks) >=
3471 TASK_DEQUE_SIZE(thread_data->td)) {
3472 KA_TRACE(30, (
"__kmp_give_task: queue is full while giving task %p to " 3478 if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
3479 goto release_and_exit;
3481 __kmp_realloc_task_deque(thread, thread_data);
3487 thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata;
3489 thread_data->td.td_deque_tail =
3490 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
3491 TCW_4(thread_data->td.td_deque_ntasks,
3492 TCR_4(thread_data->td.td_deque_ntasks) + 1);
3495 KA_TRACE(30, (
"__kmp_give_task: successfully gave task %p to thread %d.\n",
3499 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3520 static void __kmp_first_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
3521 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
3522 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
3523 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
3524 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
3526 taskdata->td_flags.complete = 1;
3528 if (taskdata->td_taskgroup)
3529 KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
3533 KMP_ATOMIC_INC(&taskdata->td_incomplete_child_tasks);
3536 static void __kmp_second_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
3537 kmp_int32 children = 0;
3541 KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks) - 1;
3542 KMP_DEBUG_ASSERT(children >= 0);
3545 KMP_ATOMIC_DEC(&taskdata->td_incomplete_child_tasks);
3548 static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask) {
3549 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
3550 kmp_info_t *thread = __kmp_threads[gtid];
3552 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
3553 KMP_DEBUG_ASSERT(taskdata->td_flags.complete ==
3558 while (KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) > 0)
3561 __kmp_release_deps(gtid, taskdata);
3562 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
3573 void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask) {
3574 KMP_DEBUG_ASSERT(ptask != NULL);
3575 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
3577 10, (
"__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n",
3580 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
3582 __kmp_first_top_half_finish_proxy(taskdata);
3583 __kmp_second_top_half_finish_proxy(taskdata);
3584 __kmp_bottom_half_finish_proxy(gtid, ptask);
3587 (
"__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n",
3598 void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask) {
3599 KMP_DEBUG_ASSERT(ptask != NULL);
3600 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
3604 (
"__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n",
3607 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
3609 __kmp_first_top_half_finish_proxy(taskdata);
3613 kmp_team_t *team = taskdata->td_team;
3614 kmp_int32 nthreads = team->t.t_nproc;
3619 kmp_int32 start_k = 0;
3621 kmp_int32 k = start_k;
3625 thread = team->t.t_threads[k];
3626 k = (k + 1) % nthreads;
3632 }
while (!__kmp_give_task(thread, k, ptask, pass));
3634 __kmp_second_top_half_finish_proxy(taskdata);
3638 (
"__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n",
3648 kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src) {
3650 kmp_taskdata_t *taskdata;
3651 kmp_taskdata_t *taskdata_src;
3652 kmp_taskdata_t *parent_task = thread->th.th_current_task;
3653 size_t shareds_offset;
3656 KA_TRACE(10, (
"__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread,
3658 taskdata_src = KMP_TASK_TO_TASKDATA(task_src);
3659 KMP_DEBUG_ASSERT(taskdata_src->td_flags.proxy ==
3661 KMP_DEBUG_ASSERT(taskdata_src->td_flags.tasktype == TASK_EXPLICIT);
3662 task_size = taskdata_src->td_size_alloc;
3665 KA_TRACE(30, (
"__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread,
3668 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, task_size);
3670 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, task_size);
3672 KMP_MEMCPY(taskdata, taskdata_src, task_size);
3674 task = KMP_TASKDATA_TO_TASK(taskdata);
3677 taskdata->td_task_id = KMP_GEN_TASK_ID();
3678 if (task->shareds != NULL) {
3679 shareds_offset = (
char *)task_src->shareds - (
char *)taskdata_src;
3680 task->shareds = &((
char *)taskdata)[shareds_offset];
3681 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (
sizeof(
void *) - 1)) ==
3684 taskdata->td_alloc_thread = thread;
3685 taskdata->td_parent = parent_task;
3686 taskdata->td_taskgroup =
3692 if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) {
3693 KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks);
3694 if (parent_task->td_taskgroup)
3695 KMP_ATOMIC_INC(&parent_task->td_taskgroup->count);
3698 if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT)
3699 KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks);
3703 (
"__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n",
3704 thread, taskdata, taskdata->td_parent));
3706 if (UNLIKELY(ompt_enabled.enabled))
3707 __ompt_task_init(taskdata, thread->th.th_info.ds.ds_gtid);
3716 typedef void (*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
3718 KMP_BUILD_ASSERT(
sizeof(
long) == 4 ||
sizeof(
long) == 8);
3723 class kmp_taskloop_bounds_t {
3725 const kmp_taskdata_t *taskdata;
3726 size_t lower_offset;
3727 size_t upper_offset;
3730 kmp_taskloop_bounds_t(kmp_task_t *_task, kmp_uint64 *lb, kmp_uint64 *ub)
3731 : task(_task), taskdata(KMP_TASK_TO_TASKDATA(task)),
3732 lower_offset((char *)lb - (char *)task),
3733 upper_offset((char *)ub - (char *)task) {
3734 KMP_DEBUG_ASSERT((
char *)lb > (
char *)_task);
3735 KMP_DEBUG_ASSERT((
char *)ub > (
char *)_task);
3737 kmp_taskloop_bounds_t(kmp_task_t *_task,
const kmp_taskloop_bounds_t &bounds)
3738 : task(_task), taskdata(KMP_TASK_TO_TASKDATA(_task)),
3739 lower_offset(bounds.lower_offset), upper_offset(bounds.upper_offset) {}
3740 size_t get_lower_offset()
const {
return lower_offset; }
3741 size_t get_upper_offset()
const {
return upper_offset; }
3742 kmp_uint64 get_lb()
const {
3744 #if defined(KMP_GOMP_COMPAT) 3746 if (!taskdata->td_flags.native) {
3747 retval = *(kmp_int64 *)((
char *)task + lower_offset);
3750 if (taskdata->td_size_loop_bounds == 4) {
3751 kmp_int32 *lb = RCAST(kmp_int32 *, task->shareds);
3752 retval = (kmp_int64)*lb;
3754 kmp_int64 *lb = RCAST(kmp_int64 *, task->shareds);
3755 retval = (kmp_int64)*lb;
3759 retval = *(kmp_int64 *)((
char *)task + lower_offset);
3760 #endif // defined(KMP_GOMP_COMPAT) 3763 kmp_uint64 get_ub()
const {
3765 #if defined(KMP_GOMP_COMPAT) 3767 if (!taskdata->td_flags.native) {
3768 retval = *(kmp_int64 *)((
char *)task + upper_offset);
3771 if (taskdata->td_size_loop_bounds == 4) {
3772 kmp_int32 *ub = RCAST(kmp_int32 *, task->shareds) + 1;
3773 retval = (kmp_int64)*ub;
3775 kmp_int64 *ub = RCAST(kmp_int64 *, task->shareds) + 1;
3776 retval = (kmp_int64)*ub;
3780 retval = *(kmp_int64 *)((
char *)task + upper_offset);
3781 #endif // defined(KMP_GOMP_COMPAT) 3784 void set_lb(kmp_uint64 lb) {
3785 #if defined(KMP_GOMP_COMPAT) 3787 if (!taskdata->td_flags.native) {
3788 *(kmp_uint64 *)((
char *)task + lower_offset) = lb;
3791 if (taskdata->td_size_loop_bounds == 4) {
3792 kmp_uint32 *lower = RCAST(kmp_uint32 *, task->shareds);
3793 *lower = (kmp_uint32)lb;
3795 kmp_uint64 *lower = RCAST(kmp_uint64 *, task->shareds);
3796 *lower = (kmp_uint64)lb;
3800 *(kmp_uint64 *)((
char *)task + lower_offset) = lb;
3801 #endif // defined(KMP_GOMP_COMPAT) 3803 void set_ub(kmp_uint64 ub) {
3804 #if defined(KMP_GOMP_COMPAT) 3806 if (!taskdata->td_flags.native) {
3807 *(kmp_uint64 *)((
char *)task + upper_offset) = ub;
3810 if (taskdata->td_size_loop_bounds == 4) {
3811 kmp_uint32 *upper = RCAST(kmp_uint32 *, task->shareds) + 1;
3812 *upper = (kmp_uint32)ub;
3814 kmp_uint64 *upper = RCAST(kmp_uint64 *, task->shareds) + 1;
3815 *upper = (kmp_uint64)ub;
3819 *(kmp_uint64 *)((
char *)task + upper_offset) = ub;
3820 #endif // defined(KMP_GOMP_COMPAT) 3839 void __kmp_taskloop_linear(
ident_t *loc,
int gtid, kmp_task_t *task,
3840 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
3841 kmp_uint64 ub_glob, kmp_uint64 num_tasks,
3842 kmp_uint64 grainsize, kmp_uint64 extras,
3849 KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling);
3850 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
3852 kmp_taskloop_bounds_t task_bounds(task, lb, ub);
3853 kmp_uint64 lower = task_bounds.get_lb();
3854 kmp_uint64 upper = task_bounds.get_ub();
3856 kmp_info_t *thread = __kmp_threads[gtid];
3857 kmp_taskdata_t *current_task = thread->th.th_current_task;
3858 kmp_task_t *next_task;
3859 kmp_int32 lastpriv = 0;
3861 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
3862 KMP_DEBUG_ASSERT(num_tasks > extras);
3863 KMP_DEBUG_ASSERT(num_tasks > 0);
3864 KA_TRACE(20, (
"__kmp_taskloop_linear: T#%d: %lld tasks, grainsize %lld, " 3865 "extras %lld, i=%lld,%lld(%d)%lld, dup %p\n",
3866 gtid, num_tasks, grainsize, extras, lower, upper, ub_glob, st,
3870 for (i = 0; i < num_tasks; ++i) {
3871 kmp_uint64 chunk_minus_1;
3873 chunk_minus_1 = grainsize - 1;
3875 chunk_minus_1 = grainsize;
3878 upper = lower + st * chunk_minus_1;
3879 if (i == num_tasks - 1) {
3882 KMP_DEBUG_ASSERT(upper == *ub);
3883 if (upper == ub_glob)
3885 }
else if (st > 0) {
3886 KMP_DEBUG_ASSERT((kmp_uint64)st > *ub - upper);
3887 if ((kmp_uint64)st > ub_glob - upper)
3890 KMP_DEBUG_ASSERT(upper + st < *ub);
3891 if (upper - ub_glob < (kmp_uint64)(-st))
3895 next_task = __kmp_task_dup_alloc(thread, task);
3896 kmp_taskdata_t *next_taskdata = KMP_TASK_TO_TASKDATA(next_task);
3897 kmp_taskloop_bounds_t next_task_bounds =
3898 kmp_taskloop_bounds_t(next_task, task_bounds);
3901 next_task_bounds.set_lb(lower);
3902 if (next_taskdata->td_flags.native) {
3903 next_task_bounds.set_ub(upper + (st > 0 ? 1 : -1));
3905 next_task_bounds.set_ub(upper);
3907 if (ptask_dup != NULL)
3908 ptask_dup(next_task, task, lastpriv);
3910 (
"__kmp_taskloop_linear: T#%d; task #%llu: task %p: lower %lld, " 3911 "upper %lld stride %lld, (offsets %p %p)\n",
3912 gtid, i, next_task, lower, upper, st,
3913 next_task_bounds.get_lower_offset(),
3914 next_task_bounds.get_upper_offset()));
3916 __kmp_omp_taskloop_task(NULL, gtid, next_task,
3919 __kmp_omp_task(gtid, next_task,
true);
3924 __kmp_task_start(gtid, task, current_task);
3926 __kmp_task_finish<false>(gtid, task, current_task);
3931 typedef struct __taskloop_params {
3938 kmp_uint64 num_tasks;
3939 kmp_uint64 grainsize;
3942 kmp_uint64 num_t_min;
3946 } __taskloop_params_t;
3948 void __kmp_taskloop_recur(
ident_t *,
int, kmp_task_t *, kmp_uint64 *,
3949 kmp_uint64 *, kmp_int64, kmp_uint64, kmp_uint64,
3950 kmp_uint64, kmp_uint64, kmp_uint64, kmp_uint64,
3957 int __kmp_taskloop_task(
int gtid,
void *ptask) {
3958 __taskloop_params_t *p =
3959 (__taskloop_params_t *)((kmp_task_t *)ptask)->shareds;
3960 kmp_task_t *task = p->task;
3961 kmp_uint64 *lb = p->lb;
3962 kmp_uint64 *ub = p->ub;
3963 void *task_dup = p->task_dup;
3965 kmp_int64 st = p->st;
3966 kmp_uint64 ub_glob = p->ub_glob;
3967 kmp_uint64 num_tasks = p->num_tasks;
3968 kmp_uint64 grainsize = p->grainsize;
3969 kmp_uint64 extras = p->extras;
3970 kmp_uint64 tc = p->tc;
3971 kmp_uint64 num_t_min = p->num_t_min;
3973 void *codeptr_ra = p->codeptr_ra;
3976 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
3977 KMP_DEBUG_ASSERT(task != NULL);
3978 KA_TRACE(20, (
"__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize" 3979 " %lld, extras %lld, i=%lld,%lld(%d), dup %p\n",
3980 gtid, taskdata, num_tasks, grainsize, extras, *lb, *ub, st,
3983 KMP_DEBUG_ASSERT(num_tasks * 2 + 1 > num_t_min);
3984 if (num_tasks > num_t_min)
3985 __kmp_taskloop_recur(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
3986 grainsize, extras, tc, num_t_min,
3992 __kmp_taskloop_linear(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
3993 grainsize, extras, tc,
3999 KA_TRACE(40, (
"__kmp_taskloop_task(exit): T#%d\n", gtid));
4020 void __kmp_taskloop_recur(
ident_t *loc,
int gtid, kmp_task_t *task,
4021 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
4022 kmp_uint64 ub_glob, kmp_uint64 num_tasks,
4023 kmp_uint64 grainsize, kmp_uint64 extras,
4024 kmp_uint64 tc, kmp_uint64 num_t_min,
4030 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
4031 KMP_DEBUG_ASSERT(task != NULL);
4032 KMP_DEBUG_ASSERT(num_tasks > num_t_min);
4033 KA_TRACE(20, (
"__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize" 4034 " %lld, extras %lld, i=%lld,%lld(%d), dup %p\n",
4035 gtid, taskdata, num_tasks, grainsize, extras, *lb, *ub, st,
4038 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
4039 kmp_uint64 lower = *lb;
4040 kmp_info_t *thread = __kmp_threads[gtid];
4042 kmp_task_t *next_task;
4043 size_t lower_offset =
4044 (
char *)lb - (
char *)task;
4045 size_t upper_offset =
4046 (
char *)ub - (
char *)task;
4048 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
4049 KMP_DEBUG_ASSERT(num_tasks > extras);
4050 KMP_DEBUG_ASSERT(num_tasks > 0);
4053 kmp_uint64 lb1, ub0, tc0, tc1, ext0, ext1;
4054 kmp_uint64 gr_size0 = grainsize;
4055 kmp_uint64 n_tsk0 = num_tasks >> 1;
4056 kmp_uint64 n_tsk1 = num_tasks - n_tsk0;
4057 if (n_tsk0 <= extras) {
4060 ext1 = extras - n_tsk0;
4061 tc0 = gr_size0 * n_tsk0;
4066 tc1 = grainsize * n_tsk1;
4069 ub0 = lower + st * (tc0 - 1);
4073 next_task = __kmp_task_dup_alloc(thread, task);
4075 *(kmp_uint64 *)((
char *)next_task + lower_offset) = lb1;
4076 if (ptask_dup != NULL)
4077 ptask_dup(next_task, task, 0);
4081 kmp_task_t *new_task =
4082 __kmpc_omp_task_alloc(loc, gtid, 1, 3 *
sizeof(
void *),
4083 sizeof(__taskloop_params_t), &__kmp_taskloop_task);
4084 __taskloop_params_t *p = (__taskloop_params_t *)new_task->shareds;
4085 p->task = next_task;
4086 p->lb = (kmp_uint64 *)((
char *)next_task + lower_offset);
4087 p->ub = (kmp_uint64 *)((
char *)next_task + upper_offset);
4088 p->task_dup = task_dup;
4090 p->ub_glob = ub_glob;
4091 p->num_tasks = n_tsk1;
4092 p->grainsize = grainsize;
4095 p->num_t_min = num_t_min;
4097 p->codeptr_ra = codeptr_ra;
4102 __kmp_omp_taskloop_task(NULL, gtid, new_task, codeptr_ra);
4104 __kmp_omp_task(gtid, new_task,
true);
4108 if (n_tsk0 > num_t_min)
4109 __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0, gr_size0,
4110 ext0, tc0, num_t_min,
4116 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0,
4117 gr_size0, ext0, tc0,
4123 KA_TRACE(40, (
"__kmpc_taskloop_recur(exit): T#%d\n", gtid));
4142 void __kmpc_taskloop(
ident_t *loc,
int gtid, kmp_task_t *task,
int if_val,
4143 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
int nogroup,
4144 int sched, kmp_uint64 grainsize,
void *task_dup) {
4145 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
4146 KMP_DEBUG_ASSERT(task != NULL);
4149 #if OMPT_SUPPORT && OMPT_OPTIONAL 4150 OMPT_STORE_RETURN_ADDRESS(gtid);
4152 __kmpc_taskgroup(loc, gtid);
4157 kmp_taskloop_bounds_t task_bounds(task, lb, ub);
4160 kmp_uint64 lower = task_bounds.get_lb();
4161 kmp_uint64 upper = task_bounds.get_ub();
4162 kmp_uint64 ub_glob = upper;
4163 kmp_uint64 num_tasks = 0, extras = 0;
4164 kmp_uint64 num_tasks_min = __kmp_taskloop_min_tasks;
4165 kmp_info_t *thread = __kmp_threads[gtid];
4166 kmp_taskdata_t *current_task = thread->th.th_current_task;
4168 KA_TRACE(20, (
"__kmpc_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, " 4169 "grain %llu(%d), dup %p\n",
4170 gtid, taskdata, lower, upper, st, grainsize, sched, task_dup));
4174 tc = upper - lower + 1;
4175 }
else if (st < 0) {
4176 tc = (lower - upper) / (-st) + 1;
4178 tc = (upper - lower) / st + 1;
4181 KA_TRACE(20, (
"__kmpc_taskloop(exit): T#%d zero-trip loop\n", gtid));
4183 __kmp_task_start(gtid, task, current_task);
4185 __kmp_task_finish<false>(gtid, task, current_task);
4189 #if OMPT_SUPPORT && OMPT_OPTIONAL 4190 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
4191 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
4192 if (ompt_enabled.ompt_callback_work) {
4193 ompt_callbacks.ompt_callback(ompt_callback_work)(
4194 ompt_work_taskloop, ompt_scope_begin, &(team_info->parallel_data),
4195 &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0));
4199 if (num_tasks_min == 0)
4202 KMP_MIN(thread->th.th_team_nproc * 10, INITIAL_TASK_DEQUE_SIZE);
4208 grainsize = thread->th.th_team_nproc * 10;
4210 if (grainsize > tc) {
4215 num_tasks = grainsize;
4216 grainsize = tc / num_tasks;
4217 extras = tc % num_tasks;
4221 if (grainsize > tc) {
4226 num_tasks = tc / grainsize;
4228 grainsize = tc / num_tasks;
4229 extras = tc % num_tasks;
4233 KMP_ASSERT2(0,
"unknown scheduling of taskloop");
4235 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
4236 KMP_DEBUG_ASSERT(num_tasks > extras);
4237 KMP_DEBUG_ASSERT(num_tasks > 0);
4243 taskdata->td_flags.task_serial = 1;
4244 taskdata->td_flags.tiedness = TASK_TIED;
4246 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
4247 grainsize, extras, tc,
4249 OMPT_GET_RETURN_ADDRESS(0),
4254 }
else if (num_tasks > num_tasks_min && !taskdata->td_flags.native) {
4255 KA_TRACE(20, (
"__kmpc_taskloop: T#%d, go recursive: tc %llu, #tasks %llu" 4256 "(%lld), grain %llu, extras %llu\n",
4257 gtid, tc, num_tasks, num_tasks_min, grainsize, extras));
4258 __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
4259 grainsize, extras, tc, num_tasks_min,
4261 OMPT_GET_RETURN_ADDRESS(0),
4265 KA_TRACE(20, (
"__kmpc_taskloop: T#%d, go linear: tc %llu, #tasks %llu" 4266 "(%lld), grain %llu, extras %llu\n",
4267 gtid, tc, num_tasks, num_tasks_min, grainsize, extras));
4268 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
4269 grainsize, extras, tc,
4271 OMPT_GET_RETURN_ADDRESS(0),
4276 #if OMPT_SUPPORT && OMPT_OPTIONAL 4277 if (ompt_enabled.ompt_callback_work) {
4278 ompt_callbacks.ompt_callback(ompt_callback_work)(
4279 ompt_work_taskloop, ompt_scope_end, &(team_info->parallel_data),
4280 &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0));
4285 #if OMPT_SUPPORT && OMPT_OPTIONAL 4286 OMPT_STORE_RETURN_ADDRESS(gtid);
4288 __kmpc_end_taskgroup(loc, gtid);
4290 KA_TRACE(20, (
"__kmpc_taskloop(exit): T#%d\n", gtid));
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).