17 #include "kmp_stats.h" 18 #include "kmp_wait_release.h" 19 #include "kmp_taskdeps.h" 22 #include "ompt-specific.h" 25 #include "tsan_annotations.h" 28 static void __kmp_enable_tasking(kmp_task_team_t *task_team,
29 kmp_info_t *this_thr);
30 static void __kmp_alloc_task_deque(kmp_info_t *thread,
31 kmp_thread_data_t *thread_data);
32 static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
33 kmp_task_team_t *task_team);
36 static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask);
39 #ifdef BUILD_TIED_TASK_STACK 48 static void __kmp_trace_task_stack(kmp_int32 gtid,
49 kmp_thread_data_t *thread_data,
50 int threshold,
char *location) {
51 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
52 kmp_taskdata_t **stack_top = task_stack->ts_top;
53 kmp_int32 entries = task_stack->ts_entries;
54 kmp_taskdata_t *tied_task;
58 (
"__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, " 59 "first_block = %p, stack_top = %p \n",
60 location, gtid, entries, task_stack->ts_first_block, stack_top));
62 KMP_DEBUG_ASSERT(stack_top != NULL);
63 KMP_DEBUG_ASSERT(entries > 0);
65 while (entries != 0) {
66 KMP_DEBUG_ASSERT(stack_top != &task_stack->ts_first_block.sb_block[0]);
68 if (entries & TASK_STACK_INDEX_MASK == 0) {
69 kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(stack_top);
71 stack_block = stack_block->sb_prev;
72 stack_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
79 tied_task = *stack_top;
81 KMP_DEBUG_ASSERT(tied_task != NULL);
82 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
85 (
"__kmp_trace_task_stack(%s): gtid=%d, entry=%d, " 86 "stack_top=%p, tied_task=%p\n",
87 location, gtid, entries, stack_top, tied_task));
89 KMP_DEBUG_ASSERT(stack_top == &task_stack->ts_first_block.sb_block[0]);
92 (
"__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
102 static void __kmp_init_task_stack(kmp_int32 gtid,
103 kmp_thread_data_t *thread_data) {
104 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
105 kmp_stack_block_t *first_block;
108 first_block = &task_stack->ts_first_block;
109 task_stack->ts_top = (kmp_taskdata_t **)first_block;
110 memset((
void *)first_block,
'\0',
111 TASK_STACK_BLOCK_SIZE *
sizeof(kmp_taskdata_t *));
114 task_stack->ts_entries = TASK_STACK_EMPTY;
115 first_block->sb_next = NULL;
116 first_block->sb_prev = NULL;
123 static void __kmp_free_task_stack(kmp_int32 gtid,
124 kmp_thread_data_t *thread_data) {
125 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
126 kmp_stack_block_t *stack_block = &task_stack->ts_first_block;
128 KMP_DEBUG_ASSERT(task_stack->ts_entries == TASK_STACK_EMPTY);
130 while (stack_block != NULL) {
131 kmp_stack_block_t *next_block = (stack_block) ? stack_block->sb_next : NULL;
133 stack_block->sb_next = NULL;
134 stack_block->sb_prev = NULL;
135 if (stack_block != &task_stack->ts_first_block) {
136 __kmp_thread_free(thread,
139 stack_block = next_block;
142 task_stack->ts_entries = 0;
143 task_stack->ts_top = NULL;
152 static void __kmp_push_task_stack(kmp_int32 gtid, kmp_info_t *thread,
153 kmp_taskdata_t *tied_task) {
155 kmp_thread_data_t *thread_data =
156 &thread->th.th_task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
157 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
159 if (tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser) {
163 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
164 KMP_DEBUG_ASSERT(task_stack->ts_top != NULL);
167 (
"__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
168 gtid, thread, tied_task));
170 *(task_stack->ts_top) = tied_task;
173 task_stack->ts_top++;
174 task_stack->ts_entries++;
176 if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
178 kmp_stack_block_t *stack_block =
179 (kmp_stack_block_t *)(task_stack->ts_top - TASK_STACK_BLOCK_SIZE);
182 if (stack_block->sb_next !=
184 task_stack->ts_top = &stack_block->sb_next->sb_block[0];
186 kmp_stack_block_t *new_block = (kmp_stack_block_t *)__kmp_thread_calloc(
187 thread,
sizeof(kmp_stack_block_t));
189 task_stack->ts_top = &new_block->sb_block[0];
190 stack_block->sb_next = new_block;
191 new_block->sb_prev = stack_block;
192 new_block->sb_next = NULL;
196 (
"__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
197 gtid, tied_task, new_block));
200 KA_TRACE(20, (
"__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
211 static void __kmp_pop_task_stack(kmp_int32 gtid, kmp_info_t *thread,
212 kmp_taskdata_t *ending_task) {
214 kmp_thread_data_t *thread_data =
215 &thread->th.th_task_team->tt_threads_data[__kmp_tid_from_gtid(gtid)];
216 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
217 kmp_taskdata_t *tied_task;
219 if (ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser) {
224 KMP_DEBUG_ASSERT(task_stack->ts_top != NULL);
225 KMP_DEBUG_ASSERT(task_stack->ts_entries > 0);
227 KA_TRACE(20, (
"__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid,
231 if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
232 kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(task_stack->ts_top);
234 stack_block = stack_block->sb_prev;
235 task_stack->ts_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
239 task_stack->ts_top--;
240 task_stack->ts_entries--;
242 tied_task = *(task_stack->ts_top);
244 KMP_DEBUG_ASSERT(tied_task != NULL);
245 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
246 KMP_DEBUG_ASSERT(tied_task == ending_task);
248 KA_TRACE(20, (
"__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
255 static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
256 kmp_info_t *thread = __kmp_threads[gtid];
257 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
258 kmp_task_team_t *task_team = thread->th.th_task_team;
259 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
260 kmp_thread_data_t *thread_data;
263 (
"__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata));
265 if (taskdata->td_flags.tiedness == TASK_UNTIED) {
268 kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count);
269 KMP_DEBUG_USE_VAR(counter);
272 (
"__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n",
273 gtid, counter, taskdata));
277 if (taskdata->td_flags.task_serial) {
278 KA_TRACE(20, (
"__kmp_push_task: T#%d team serialized; returning " 279 "TASK_NOT_PUSHED for task %p\n",
281 return TASK_NOT_PUSHED;
286 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
287 if (!KMP_TASKING_ENABLED(task_team)) {
288 __kmp_enable_tasking(task_team, thread);
290 KMP_DEBUG_ASSERT(TCR_4(task_team->tt.tt_found_tasks) == TRUE);
291 KMP_DEBUG_ASSERT(TCR_PTR(task_team->tt.tt_threads_data) != NULL);
294 thread_data = &task_team->tt.tt_threads_data[tid];
297 if (thread_data->td.td_deque == NULL) {
298 __kmp_alloc_task_deque(thread, thread_data);
302 if (TCR_4(thread_data->td.td_deque_ntasks) >=
303 TASK_DEQUE_SIZE(thread_data->td)) {
304 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full; returning " 305 "TASK_NOT_PUSHED for task %p\n",
307 return TASK_NOT_PUSHED;
311 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
315 if (TCR_4(thread_data->td.td_deque_ntasks) >=
316 TASK_DEQUE_SIZE(thread_data->td)) {
317 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
318 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full on 2nd check; returning " 319 "TASK_NOT_PUSHED for task %p\n",
321 return TASK_NOT_PUSHED;
325 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) <
326 TASK_DEQUE_SIZE(thread_data->td));
329 thread_data->td.td_deque[thread_data->td.td_deque_tail] =
332 thread_data->td.td_deque_tail =
333 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
334 TCW_4(thread_data->td.td_deque_ntasks,
335 TCR_4(thread_data->td.td_deque_ntasks) + 1);
337 KA_TRACE(20, (
"__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: " 338 "task=%p ntasks=%d head=%u tail=%u\n",
339 gtid, taskdata, thread_data->td.td_deque_ntasks,
340 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
342 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
344 return TASK_SUCCESSFULLY_PUSHED;
351 void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr) {
352 KF_TRACE(10, (
"__kmp_pop_current_task_from_thread(enter): T#%d " 353 "this_thread=%p, curtask=%p, " 354 "curtask_parent=%p\n",
355 0, this_thr, this_thr->th.th_current_task,
356 this_thr->th.th_current_task->td_parent));
358 this_thr->th.th_current_task = this_thr->th.th_current_task->td_parent;
360 KF_TRACE(10, (
"__kmp_pop_current_task_from_thread(exit): T#%d " 361 "this_thread=%p, curtask=%p, " 362 "curtask_parent=%p\n",
363 0, this_thr, this_thr->th.th_current_task,
364 this_thr->th.th_current_task->td_parent));
373 void __kmp_push_current_task_to_thread(kmp_info_t *this_thr, kmp_team_t *team,
377 KF_TRACE(10, (
"__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p " 380 tid, this_thr, this_thr->th.th_current_task,
381 team->t.t_implicit_task_taskdata[tid].td_parent));
383 KMP_DEBUG_ASSERT(this_thr != NULL);
386 if (this_thr->th.th_current_task != &team->t.t_implicit_task_taskdata[0]) {
387 team->t.t_implicit_task_taskdata[0].td_parent =
388 this_thr->th.th_current_task;
389 this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[0];
392 team->t.t_implicit_task_taskdata[tid].td_parent =
393 team->t.t_implicit_task_taskdata[0].td_parent;
394 this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[tid];
397 KF_TRACE(10, (
"__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p " 400 tid, this_thr, this_thr->th.th_current_task,
401 team->t.t_implicit_task_taskdata[tid].td_parent));
409 static void __kmp_task_start(kmp_int32 gtid, kmp_task_t *task,
410 kmp_taskdata_t *current_task) {
411 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
412 kmp_info_t *thread = __kmp_threads[gtid];
415 (
"__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
416 gtid, taskdata, current_task));
418 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
423 current_task->td_flags.executing = 0;
426 #ifdef BUILD_TIED_TASK_STACK 427 if (taskdata->td_flags.tiedness == TASK_TIED) {
428 __kmp_push_task_stack(gtid, thread, taskdata);
433 thread->th.th_current_task = taskdata;
435 KMP_DEBUG_ASSERT(taskdata->td_flags.started == 0 ||
436 taskdata->td_flags.tiedness == TASK_UNTIED);
437 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0 ||
438 taskdata->td_flags.tiedness == TASK_UNTIED);
439 taskdata->td_flags.started = 1;
440 taskdata->td_flags.executing = 1;
441 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
442 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
449 KA_TRACE(10, (
"__kmp_task_start(exit): T#%d task=%p\n", gtid, taskdata));
460 static inline void __ompt_task_init(kmp_taskdata_t *task,
int tid) {
462 task->ompt_task_info.task_data.value = 0;
463 task->ompt_task_info.frame.exit_frame = NULL;
464 task->ompt_task_info.frame.enter_frame = NULL;
466 task->ompt_task_info.ndeps = 0;
467 task->ompt_task_info.deps = NULL;
473 static inline void __ompt_task_start(kmp_task_t *task,
474 kmp_taskdata_t *current_task,
476 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
477 ompt_task_status_t status = ompt_task_switch;
478 if (__kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded) {
479 status = ompt_task_yield;
480 __kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded = 0;
483 if (ompt_enabled.ompt_callback_task_schedule) {
484 ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
485 &(current_task->ompt_task_info.task_data), status,
486 &(taskdata->ompt_task_info.task_data));
488 taskdata->ompt_task_info.scheduling_parent = current_task;
494 __ompt_task_finish(kmp_task_t *task, kmp_taskdata_t *resumed_task,
495 ompt_task_status_t status = ompt_task_complete) {
496 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
497 if (__kmp_omp_cancellation && taskdata->td_taskgroup &&
498 taskdata->td_taskgroup->cancel_request == cancel_taskgroup) {
499 status = ompt_task_cancel;
503 if (ompt_enabled.ompt_callback_task_schedule) {
504 ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
505 &(taskdata->ompt_task_info.task_data), status,
506 &((resumed_task ? resumed_task
507 : (taskdata->ompt_task_info.scheduling_parent
508 ? taskdata->ompt_task_info.scheduling_parent
509 : taskdata->td_parent))
510 ->ompt_task_info.task_data));
516 static void __kmpc_omp_task_begin_if0_template(
ident_t *loc_ref, kmp_int32 gtid,
519 void *return_address) {
520 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
521 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
523 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p " 525 gtid, loc_ref, taskdata, current_task));
527 if (taskdata->td_flags.tiedness == TASK_UNTIED) {
530 kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count);
531 KMP_DEBUG_USE_VAR(counter);
532 KA_TRACE(20, (
"__kmpc_omp_task_begin_if0: T#%d untied_count (%d) " 533 "incremented for task %p\n",
534 gtid, counter, taskdata));
537 taskdata->td_flags.task_serial =
539 __kmp_task_start(gtid, task, current_task);
543 if (current_task->ompt_task_info.frame.enter_frame == NULL) {
544 current_task->ompt_task_info.frame.enter_frame =
545 taskdata->ompt_task_info.frame.exit_frame = frame_address;
547 if (ompt_enabled.ompt_callback_task_create) {
548 ompt_task_info_t *parent_info = &(current_task->ompt_task_info);
549 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
550 &(parent_info->task_data), &(parent_info->frame),
551 &(taskdata->ompt_task_info.task_data),
552 ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(taskdata), 0,
555 __ompt_task_start(task, current_task, gtid);
557 #endif // OMPT_SUPPORT 559 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n", gtid,
565 static void __kmpc_omp_task_begin_if0_ompt(
ident_t *loc_ref, kmp_int32 gtid,
568 void *return_address) {
569 __kmpc_omp_task_begin_if0_template<true>(loc_ref, gtid, task, frame_address,
572 #endif // OMPT_SUPPORT 580 void __kmpc_omp_task_begin_if0(
ident_t *loc_ref, kmp_int32 gtid,
583 if (UNLIKELY(ompt_enabled.enabled)) {
584 OMPT_STORE_RETURN_ADDRESS(gtid);
585 __kmpc_omp_task_begin_if0_ompt(loc_ref, gtid, task,
586 OMPT_GET_FRAME_ADDRESS(1),
587 OMPT_LOAD_RETURN_ADDRESS(gtid));
591 __kmpc_omp_task_begin_if0_template<false>(loc_ref, gtid, task, NULL, NULL);
597 void __kmpc_omp_task_begin(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task) {
598 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
602 (
"__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
603 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task));
605 __kmp_task_start(gtid, task, current_task);
607 KA_TRACE(10, (
"__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n", gtid,
608 loc_ref, KMP_TASK_TO_TASKDATA(task)));
611 #endif // TASK_UNUSED 618 static void __kmp_free_task(kmp_int32 gtid, kmp_taskdata_t *taskdata,
619 kmp_info_t *thread) {
620 KA_TRACE(30, (
"__kmp_free_task: T#%d freeing data from task %p\n", gtid,
624 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
625 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0);
626 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 1);
627 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
628 KMP_DEBUG_ASSERT(taskdata->td_allocated_child_tasks == 0 ||
629 taskdata->td_flags.task_serial == 1);
630 KMP_DEBUG_ASSERT(taskdata->td_incomplete_child_tasks == 0);
632 taskdata->td_flags.freed = 1;
633 ANNOTATE_HAPPENS_BEFORE(taskdata);
636 __kmp_fast_free(thread, taskdata);
638 __kmp_thread_free(thread, taskdata);
641 KA_TRACE(20, (
"__kmp_free_task: T#%d freed task %p\n", gtid, taskdata));
650 static void __kmp_free_task_and_ancestors(kmp_int32 gtid,
651 kmp_taskdata_t *taskdata,
652 kmp_info_t *thread) {
656 kmp_int32 team_serial =
657 (taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) &&
658 !taskdata->td_flags.proxy;
660 kmp_int32 team_serial =
661 taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser;
663 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
665 kmp_int32 children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks) - 1;
666 KMP_DEBUG_ASSERT(children >= 0);
669 while (children == 0) {
670 kmp_taskdata_t *parent_taskdata = taskdata->td_parent;
672 KA_TRACE(20, (
"__kmp_free_task_and_ancestors(enter): T#%d task %p complete " 673 "and freeing itself\n",
677 __kmp_free_task(gtid, taskdata, thread);
679 taskdata = parent_taskdata;
683 if (team_serial || taskdata->td_flags.tasktype == TASK_IMPLICIT)
687 children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks) - 1;
688 KMP_DEBUG_ASSERT(children >= 0);
692 20, (
"__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; " 693 "not freeing it yet\n",
694 gtid, taskdata, children));
703 static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
704 kmp_taskdata_t *resumed_task) {
705 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
706 kmp_info_t *thread = __kmp_threads[gtid];
707 kmp_task_team_t *task_team =
708 thread->th.th_task_team;
709 kmp_int32 children = 0;
711 KA_TRACE(10, (
"__kmp_task_finish(enter): T#%d finishing task %p and resuming " 713 gtid, taskdata, resumed_task));
715 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
718 #ifdef BUILD_TIED_TASK_STACK 719 if (taskdata->td_flags.tiedness == TASK_TIED) {
720 __kmp_pop_task_stack(gtid, thread, taskdata);
724 if (taskdata->td_flags.tiedness == TASK_UNTIED) {
727 kmp_int32 counter = KMP_ATOMIC_DEC(&taskdata->td_untied_count) - 1;
730 (
"__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n",
731 gtid, counter, taskdata));
735 if (resumed_task == NULL) {
736 KMP_DEBUG_ASSERT(taskdata->td_flags.task_serial);
737 resumed_task = taskdata->td_parent;
740 thread->th.th_current_task = resumed_task;
741 resumed_task->td_flags.executing = 1;
742 KA_TRACE(10, (
"__kmp_task_finish(exit): T#%d partially done task %p, " 743 "resuming task %p\n",
744 gtid, taskdata, resumed_task));
750 __ompt_task_finish(task, resumed_task);
753 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
754 taskdata->td_flags.complete = 1;
755 KMP_DEBUG_ASSERT(taskdata->td_flags.started == 1);
756 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
760 if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) {
763 KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks) - 1;
764 KMP_DEBUG_ASSERT(children >= 0);
766 if (taskdata->td_taskgroup)
767 KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
768 __kmp_release_deps(gtid, taskdata);
770 }
else if (task_team && task_team->tt.tt_found_proxy_tasks) {
773 __kmp_release_deps(gtid, taskdata);
774 #endif // OMP_45_ENABLED 775 #endif // OMP_40_ENABLED 781 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
782 taskdata->td_flags.executing = 0;
785 20, (
"__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
786 gtid, taskdata, children));
795 if (taskdata->td_flags.destructors_thunk) {
796 kmp_routine_entry_t destr_thunk = task->data1.destructors;
797 KMP_ASSERT(destr_thunk);
798 destr_thunk(gtid, task);
800 #endif // OMP_40_ENABLED 805 (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
806 taskdata->td_flags.task_serial);
807 if (taskdata->td_flags.task_serial) {
808 if (resumed_task == NULL) {
809 resumed_task = taskdata->td_parent;
813 KMP_DEBUG_ASSERT(resumed_task !=
821 thread->th.th_current_task = resumed_task;
822 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
826 resumed_task->td_flags.executing = 1;
829 10, (
"__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
830 gtid, taskdata, resumed_task));
836 static void __kmpc_omp_task_complete_if0_template(
ident_t *loc_ref,
839 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
840 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
842 __kmp_task_finish<ompt>(gtid, task, NULL);
844 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
845 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
849 omp_frame_t *ompt_frame;
850 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
851 ompt_frame->enter_frame = NULL;
860 void __kmpc_omp_task_complete_if0_ompt(
ident_t *loc_ref, kmp_int32 gtid,
862 __kmpc_omp_task_complete_if0_template<true>(loc_ref, gtid, task);
864 #endif // OMPT_SUPPORT 871 void __kmpc_omp_task_complete_if0(
ident_t *loc_ref, kmp_int32 gtid,
874 if (UNLIKELY(ompt_enabled.enabled)) {
875 __kmpc_omp_task_complete_if0_ompt(loc_ref, gtid, task);
879 __kmpc_omp_task_complete_if0_template<false>(loc_ref, gtid, task);
885 void __kmpc_omp_task_complete(
ident_t *loc_ref, kmp_int32 gtid,
887 KA_TRACE(10, (
"__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n", gtid,
888 loc_ref, KMP_TASK_TO_TASKDATA(task)));
890 __kmp_task_finish<false>(gtid, task,
893 KA_TRACE(10, (
"__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n", gtid,
894 loc_ref, KMP_TASK_TO_TASKDATA(task)));
897 #endif // TASK_UNUSED 910 void __kmp_init_implicit_task(
ident_t *loc_ref, kmp_info_t *this_thr,
911 kmp_team_t *team,
int tid,
int set_curr_task) {
912 kmp_taskdata_t *task = &team->t.t_implicit_task_taskdata[tid];
916 (
"__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
917 tid, team, task, set_curr_task ?
"TRUE" :
"FALSE"));
919 task->td_task_id = KMP_GEN_TASK_ID();
920 task->td_team = team;
923 task->td_ident = loc_ref;
924 task->td_taskwait_ident = NULL;
925 task->td_taskwait_counter = 0;
926 task->td_taskwait_thread = 0;
928 task->td_flags.tiedness = TASK_TIED;
929 task->td_flags.tasktype = TASK_IMPLICIT;
931 task->td_flags.proxy = TASK_FULL;
935 task->td_flags.task_serial = 1;
936 task->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
937 task->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
939 task->td_flags.started = 1;
940 task->td_flags.executing = 1;
941 task->td_flags.complete = 0;
942 task->td_flags.freed = 0;
945 task->td_depnode = NULL;
947 task->td_last_tied = task;
950 KMP_ATOMIC_ST_REL(&task->td_incomplete_child_tasks, 0);
952 KMP_ATOMIC_ST_REL(&task->td_allocated_child_tasks, 0);
954 task->td_taskgroup = NULL;
955 task->td_dephash = NULL;
957 __kmp_push_current_task_to_thread(this_thr, team, tid);
959 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
960 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
964 if (UNLIKELY(ompt_enabled.enabled))
965 __ompt_task_init(task, tid);
968 KF_TRACE(10, (
"__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n", tid,
977 void __kmp_finish_implicit_task(kmp_info_t *thread) {
978 kmp_taskdata_t *task = thread->th.th_current_task;
979 if (task->td_dephash)
980 __kmp_dephash_free_entries(thread, task->td_dephash);
987 void __kmp_free_implicit_task(kmp_info_t *thread) {
988 kmp_taskdata_t *task = thread->th.th_current_task;
989 if (task && task->td_dephash) {
990 __kmp_dephash_free(thread, task->td_dephash);
991 task->td_dephash = NULL;
997 static size_t __kmp_round_up_to_val(
size_t size,
size_t val) {
998 if (size & (val - 1)) {
1000 if (size <= KMP_SIZE_T_MAX - val) {
1019 kmp_task_t *__kmp_task_alloc(
ident_t *loc_ref, kmp_int32 gtid,
1020 kmp_tasking_flags_t *flags,
1021 size_t sizeof_kmp_task_t,
size_t sizeof_shareds,
1022 kmp_routine_entry_t task_entry) {
1024 kmp_taskdata_t *taskdata;
1025 kmp_info_t *thread = __kmp_threads[gtid];
1026 kmp_team_t *team = thread->th.th_team;
1027 kmp_taskdata_t *parent_task = thread->th.th_current_task;
1028 size_t shareds_offset;
1030 if (!TCR_4(__kmp_init_middle))
1031 __kmp_middle_initialize();
1033 KA_TRACE(10, (
"__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) " 1034 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1035 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
1036 sizeof_shareds, task_entry));
1038 if (parent_task->td_flags.final) {
1039 if (flags->merged_if0) {
1043 if (flags->tiedness == TASK_UNTIED && !team->t.t_serialized) {
1047 KMP_CHECK_UPDATE(thread->th.th_task_team->tt.tt_untied_task_encountered, 1);
1051 if (flags->proxy == TASK_PROXY) {
1052 flags->tiedness = TASK_UNTIED;
1053 flags->merged_if0 = 1;
1057 if ((thread->th.th_task_team) == NULL) {
1060 KMP_DEBUG_ASSERT(team->t.t_serialized);
1062 (
"T#%d creating task team in __kmp_task_alloc for proxy task\n",
1064 __kmp_task_team_setup(
1067 thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
1069 kmp_task_team_t *task_team = thread->th.th_task_team;
1072 if (!KMP_TASKING_ENABLED(task_team)) {
1075 (
"T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
1076 __kmp_enable_tasking(task_team, thread);
1077 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
1078 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
1080 if (thread_data->td.td_deque == NULL) {
1081 __kmp_alloc_task_deque(thread, thread_data);
1085 if (task_team->tt.tt_found_proxy_tasks == FALSE)
1086 TCW_4(task_team->tt.tt_found_proxy_tasks, TRUE);
1092 shareds_offset =
sizeof(kmp_taskdata_t) + sizeof_kmp_task_t;
1093 shareds_offset = __kmp_round_up_to_val(shareds_offset,
sizeof(
void *));
1096 KA_TRACE(30, (
"__kmp_task_alloc: T#%d First malloc size: %ld\n", gtid,
1098 KA_TRACE(30, (
"__kmp_task_alloc: T#%d Second malloc size: %ld\n", gtid,
1103 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, shareds_offset +
1106 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, shareds_offset +
1109 ANNOTATE_HAPPENS_AFTER(taskdata);
1111 task = KMP_TASKDATA_TO_TASK(taskdata);
1114 #if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD 1115 KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (
sizeof(
double) - 1)) == 0);
1116 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (
sizeof(
double) - 1)) == 0);
1118 KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (
sizeof(_Quad) - 1)) == 0);
1119 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (
sizeof(_Quad) - 1)) == 0);
1121 if (sizeof_shareds > 0) {
1123 task->shareds = &((
char *)taskdata)[shareds_offset];
1125 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (
sizeof(
void *) - 1)) ==
1128 task->shareds = NULL;
1130 task->routine = task_entry;
1133 taskdata->td_task_id = KMP_GEN_TASK_ID();
1134 taskdata->td_team = team;
1135 taskdata->td_alloc_thread = thread;
1136 taskdata->td_parent = parent_task;
1137 taskdata->td_level = parent_task->td_level + 1;
1138 KMP_ATOMIC_ST_RLX(&taskdata->td_untied_count, 0);
1139 taskdata->td_ident = loc_ref;
1140 taskdata->td_taskwait_ident = NULL;
1141 taskdata->td_taskwait_counter = 0;
1142 taskdata->td_taskwait_thread = 0;
1143 KMP_DEBUG_ASSERT(taskdata->td_parent != NULL);
1146 if (flags->proxy == TASK_FULL)
1148 copy_icvs(&taskdata->td_icvs, &taskdata->td_parent->td_icvs);
1150 taskdata->td_flags.tiedness = flags->tiedness;
1151 taskdata->td_flags.final = flags->final;
1152 taskdata->td_flags.merged_if0 = flags->merged_if0;
1154 taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
1155 #endif // OMP_40_ENABLED 1157 taskdata->td_flags.proxy = flags->proxy;
1158 taskdata->td_task_team = thread->th.th_task_team;
1159 taskdata->td_size_alloc = shareds_offset + sizeof_shareds;
1161 taskdata->td_flags.tasktype = TASK_EXPLICIT;
1164 taskdata->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
1167 taskdata->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
1173 taskdata->td_flags.task_serial =
1174 (parent_task->td_flags.final || taskdata->td_flags.team_serial ||
1175 taskdata->td_flags.tasking_ser);
1177 taskdata->td_flags.started = 0;
1178 taskdata->td_flags.executing = 0;
1179 taskdata->td_flags.complete = 0;
1180 taskdata->td_flags.freed = 0;
1182 taskdata->td_flags.native = flags->native;
1184 KMP_ATOMIC_ST_RLX(&taskdata->td_incomplete_child_tasks, 0);
1186 KMP_ATOMIC_ST_RLX(&taskdata->td_allocated_child_tasks, 1);
1188 taskdata->td_taskgroup =
1189 parent_task->td_taskgroup;
1190 taskdata->td_dephash = NULL;
1191 taskdata->td_depnode = NULL;
1193 if (flags->tiedness == TASK_UNTIED)
1194 taskdata->td_last_tied = NULL;
1196 taskdata->td_last_tied = taskdata;
1199 if (UNLIKELY(ompt_enabled.enabled))
1200 __ompt_task_init(taskdata, gtid);
1205 if (flags->proxy == TASK_PROXY ||
1206 !(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser))
1208 if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser))
1211 KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks);
1213 if (parent_task->td_taskgroup)
1214 KMP_ATOMIC_INC(&parent_task->td_taskgroup->count);
1218 if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT) {
1219 KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks);
1223 KA_TRACE(20, (
"__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
1224 gtid, taskdata, taskdata->td_parent));
1225 ANNOTATE_HAPPENS_BEFORE(task);
1230 kmp_task_t *__kmpc_omp_task_alloc(
ident_t *loc_ref, kmp_int32 gtid,
1231 kmp_int32 flags,
size_t sizeof_kmp_task_t,
1232 size_t sizeof_shareds,
1233 kmp_routine_entry_t task_entry) {
1235 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags;
1237 input_flags->native = FALSE;
1241 KA_TRACE(10, (
"__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) " 1242 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1243 gtid, loc_ref, input_flags->tiedness ?
"tied " :
"untied",
1244 input_flags->proxy ?
"proxy" :
"", sizeof_kmp_task_t,
1245 sizeof_shareds, task_entry));
1247 KA_TRACE(10, (
"__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) " 1248 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1249 gtid, loc_ref, input_flags->tiedness ?
"tied " :
"untied",
1250 sizeof_kmp_task_t, sizeof_shareds, task_entry));
1253 retval = __kmp_task_alloc(loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1254 sizeof_shareds, task_entry);
1256 KA_TRACE(20, (
"__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval));
1266 static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
1267 kmp_taskdata_t *current_task) {
1268 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
1274 30, (
"__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1275 gtid, taskdata, current_task));
1276 KMP_DEBUG_ASSERT(task);
1278 if (taskdata->td_flags.proxy == TASK_PROXY &&
1279 taskdata->td_flags.complete == 1) {
1284 (
"__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
1287 __kmp_bottom_half_finish_proxy(gtid, task);
1289 KA_TRACE(30, (
"__kmp_invoke_task(exit): T#%d completed bottom finish for " 1290 "proxy task %p, resuming task %p\n",
1291 gtid, taskdata, current_task));
1300 ompt_thread_info_t oldInfo;
1301 if (UNLIKELY(ompt_enabled.enabled)) {
1303 thread = __kmp_threads[gtid];
1304 oldInfo = thread->th.ompt_thread_info;
1305 thread->th.ompt_thread_info.wait_id = 0;
1306 thread->th.ompt_thread_info.state = (thread->th.th_team_serialized)
1307 ? omp_state_work_serial
1308 : omp_state_work_parallel;
1309 taskdata->ompt_task_info.frame.exit_frame = OMPT_GET_FRAME_ADDRESS(0);
1315 if (taskdata->td_flags.proxy != TASK_PROXY) {
1317 ANNOTATE_HAPPENS_AFTER(task);
1318 __kmp_task_start(gtid, task, current_task);
1327 if (__kmp_omp_cancellation) {
1328 thread = __kmp_threads[gtid];
1329 kmp_team_t *this_team = thread->th.th_team;
1330 kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
1331 if ((taskgroup && taskgroup->cancel_request) ||
1332 (this_team->t.t_cancel_request == cancel_parallel)) {
1333 #if OMPT_SUPPORT && OMPT_OPTIONAL 1334 ompt_data_t *task_data;
1335 if (UNLIKELY(ompt_enabled.ompt_callback_cancel)) {
1336 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
1337 ompt_callbacks.ompt_callback(ompt_callback_cancel)(
1339 ((taskgroup && taskgroup->cancel_request) ? ompt_cancel_taskgroup
1340 : ompt_cancel_parallel) |
1341 ompt_cancel_discarded_task,
1354 if (taskdata->td_flags.tiedness == TASK_UNTIED) {
1355 taskdata->td_last_tied = current_task->td_last_tied;
1356 KMP_DEBUG_ASSERT(taskdata->td_last_tied);
1358 #if KMP_STATS_ENABLED 1360 switch (KMP_GET_THREAD_STATE()) {
1361 case FORK_JOIN_BARRIER:
1362 KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar);
1365 KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar);
1368 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield);
1371 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait);
1374 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup);
1377 KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate);
1380 #endif // KMP_STATS_ENABLED 1381 #endif // OMP_40_ENABLED 1385 if (UNLIKELY(ompt_enabled.enabled))
1386 __ompt_task_start(task, current_task, gtid);
1389 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1390 kmp_uint64 cur_time;
1391 kmp_int32 kmp_itt_count_task =
1392 __kmp_forkjoin_frames_mode == 3 && !taskdata->td_flags.task_serial &&
1393 current_task->td_flags.tasktype == TASK_IMPLICIT;
1394 if (kmp_itt_count_task) {
1395 thread = __kmp_threads[gtid];
1397 if (thread->th.th_bar_arrive_time)
1398 cur_time = __itt_get_timestamp();
1400 kmp_itt_count_task = 0;
1404 #ifdef KMP_GOMP_COMPAT 1405 if (taskdata->td_flags.native) {
1406 ((void (*)(
void *))(*(task->routine)))(task->shareds);
1410 (*(task->routine))(gtid, task);
1412 KMP_POP_PARTITIONED_TIMER();
1414 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1415 if (kmp_itt_count_task) {
1417 thread->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time);
1423 #endif // OMP_40_ENABLED 1428 if (taskdata->td_flags.proxy != TASK_PROXY) {
1430 ANNOTATE_HAPPENS_BEFORE(taskdata->td_parent);
1432 if (UNLIKELY(ompt_enabled.enabled)) {
1433 thread->th.ompt_thread_info = oldInfo;
1434 if (taskdata->td_flags.tiedness == TASK_TIED) {
1435 taskdata->ompt_task_info.frame.exit_frame = NULL;
1437 __kmp_task_finish<true>(gtid, task, current_task);
1440 __kmp_task_finish<false>(gtid, task, current_task);
1447 (
"__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
1448 gtid, taskdata, current_task));
1462 kmp_int32 __kmpc_omp_task_parts(
ident_t *loc_ref, kmp_int32 gtid,
1463 kmp_task_t *new_task) {
1464 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1466 KA_TRACE(10, (
"__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n", gtid,
1467 loc_ref, new_taskdata));
1470 kmp_taskdata_t *parent;
1471 if (UNLIKELY(ompt_enabled.enabled)) {
1472 parent = new_taskdata->td_parent;
1473 if (ompt_enabled.ompt_callback_task_create) {
1474 ompt_data_t task_data = ompt_data_none;
1475 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
1476 parent ? &(parent->ompt_task_info.task_data) : &task_data,
1477 parent ? &(parent->ompt_task_info.frame) : NULL,
1478 &(new_taskdata->ompt_task_info.task_data), ompt_task_explicit, 0,
1479 OMPT_GET_RETURN_ADDRESS(0));
1487 if (__kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED)
1489 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
1490 new_taskdata->td_flags.task_serial = 1;
1491 __kmp_invoke_task(gtid, new_task, current_task);
1496 (
"__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: " 1497 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n",
1498 gtid, loc_ref, new_taskdata));
1500 ANNOTATE_HAPPENS_BEFORE(new_task);
1502 if (UNLIKELY(ompt_enabled.enabled)) {
1503 parent->ompt_task_info.frame.enter_frame = NULL;
1506 return TASK_CURRENT_NOT_QUEUED;
1520 kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
1521 bool serialize_immediate) {
1522 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1527 if (new_taskdata->td_flags.proxy == TASK_PROXY ||
1528 __kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED)
1530 if (__kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED)
1533 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
1534 if (serialize_immediate)
1535 new_taskdata->td_flags.task_serial = 1;
1536 __kmp_invoke_task(gtid, new_task, current_task);
1539 ANNOTATE_HAPPENS_BEFORE(new_task);
1540 return TASK_CURRENT_NOT_QUEUED;
1555 kmp_int32 __kmpc_omp_task(
ident_t *loc_ref, kmp_int32 gtid,
1556 kmp_task_t *new_task) {
1558 KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
1560 #if KMP_DEBUG || OMPT_SUPPORT 1561 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1563 KA_TRACE(10, (
"__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
1567 kmp_taskdata_t *parent = NULL;
1568 if (UNLIKELY(ompt_enabled.enabled)) {
1569 if (!new_taskdata->td_flags.started) {
1570 OMPT_STORE_RETURN_ADDRESS(gtid);
1571 parent = new_taskdata->td_parent;
1572 if (!parent->ompt_task_info.frame.enter_frame) {
1573 parent->ompt_task_info.frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1);
1575 if (ompt_enabled.ompt_callback_task_create) {
1576 ompt_data_t task_data = ompt_data_none;
1577 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
1578 parent ? &(parent->ompt_task_info.task_data) : &task_data,
1579 parent ? &(parent->ompt_task_info.frame) : NULL,
1580 &(new_taskdata->ompt_task_info.task_data),
1581 ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0,
1582 OMPT_LOAD_RETURN_ADDRESS(gtid));
1587 __ompt_task_finish(new_task,
1588 new_taskdata->ompt_task_info.scheduling_parent,
1590 new_taskdata->ompt_task_info.frame.exit_frame = NULL;
1595 res = __kmp_omp_task(gtid, new_task,
true);
1597 KA_TRACE(10, (
"__kmpc_omp_task(exit): T#%d returning " 1598 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1599 gtid, loc_ref, new_taskdata));
1601 if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) {
1602 parent->ompt_task_info.frame.enter_frame = NULL;
1621 kmp_int32 __kmp_omp_taskloop_task(
ident_t *loc_ref, kmp_int32 gtid,
1622 kmp_task_t *new_task,
void *codeptr_ra) {
1624 KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
1626 #if KMP_DEBUG || OMPT_SUPPORT 1627 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1629 KA_TRACE(10, (
"__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
1633 kmp_taskdata_t *parent = NULL;
1634 if (UNLIKELY(ompt_enabled.enabled && !new_taskdata->td_flags.started)) {
1635 parent = new_taskdata->td_parent;
1636 if (!parent->ompt_task_info.frame.enter_frame)
1637 parent->ompt_task_info.frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1);
1638 if (ompt_enabled.ompt_callback_task_create) {
1639 ompt_data_t task_data = ompt_data_none;
1640 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
1641 parent ? &(parent->ompt_task_info.task_data) : &task_data,
1642 parent ? &(parent->ompt_task_info.frame) : NULL,
1643 &(new_taskdata->ompt_task_info.task_data),
1644 ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0,
1650 res = __kmp_omp_task(gtid, new_task,
true);
1652 KA_TRACE(10, (
"__kmpc_omp_task(exit): T#%d returning " 1653 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1654 gtid, loc_ref, new_taskdata));
1656 if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) {
1657 parent->ompt_task_info.frame.enter_frame = NULL;
1663 template <
bool ompt>
1664 static kmp_int32 __kmpc_omp_taskwait_template(
ident_t *loc_ref, kmp_int32 gtid,
1665 void *frame_address,
1666 void *return_address) {
1667 kmp_taskdata_t *taskdata;
1669 int thread_finished = FALSE;
1670 KMP_SET_THREAD_STATE_BLOCK(TASKWAIT);
1672 KA_TRACE(10, (
"__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref));
1674 if (__kmp_tasking_mode != tskm_immediate_exec) {
1675 thread = __kmp_threads[gtid];
1676 taskdata = thread->th.th_current_task;
1678 #if OMPT_SUPPORT && OMPT_OPTIONAL 1679 ompt_data_t *my_task_data;
1680 ompt_data_t *my_parallel_data;
1683 my_task_data = &(taskdata->ompt_task_info.task_data);
1684 my_parallel_data = OMPT_CUR_TEAM_DATA(thread);
1686 taskdata->ompt_task_info.frame.enter_frame = frame_address;
1688 if (ompt_enabled.ompt_callback_sync_region) {
1689 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
1690 ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
1691 my_task_data, return_address);
1694 if (ompt_enabled.ompt_callback_sync_region_wait) {
1695 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
1696 ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
1697 my_task_data, return_address);
1700 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1707 taskdata->td_taskwait_counter += 1;
1708 taskdata->td_taskwait_ident = loc_ref;
1709 taskdata->td_taskwait_thread = gtid + 1;
1712 void *itt_sync_obj = __kmp_itt_taskwait_object(gtid);
1713 if (itt_sync_obj != NULL)
1714 __kmp_itt_taskwait_starting(gtid, itt_sync_obj);
1718 !taskdata->td_flags.team_serial && !taskdata->td_flags.final;
1721 must_wait = must_wait || (thread->th.th_task_team != NULL &&
1722 thread->th.th_task_team->tt.tt_found_proxy_tasks);
1725 kmp_flag_32 flag(RCAST(std::atomic<kmp_uint32> *,
1726 &(taskdata->td_incomplete_child_tasks)),
1728 while (KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) != 0) {
1729 flag.execute_tasks(thread, gtid, FALSE,
1730 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
1731 __kmp_task_stealing_constraint);
1735 if (itt_sync_obj != NULL)
1736 __kmp_itt_taskwait_finished(gtid, itt_sync_obj);
1741 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
1743 #if OMPT_SUPPORT && OMPT_OPTIONAL 1745 if (ompt_enabled.ompt_callback_sync_region_wait) {
1746 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
1747 ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
1748 my_task_data, return_address);
1750 if (ompt_enabled.ompt_callback_sync_region) {
1751 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
1752 ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
1753 my_task_data, return_address);
1755 taskdata->ompt_task_info.frame.enter_frame = NULL;
1757 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1759 ANNOTATE_HAPPENS_AFTER(taskdata);
1762 KA_TRACE(10, (
"__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, " 1763 "returning TASK_CURRENT_NOT_QUEUED\n",
1766 return TASK_CURRENT_NOT_QUEUED;
1769 #if OMPT_SUPPORT && OMPT_OPTIONAL 1771 static kmp_int32 __kmpc_omp_taskwait_ompt(
ident_t *loc_ref, kmp_int32 gtid,
1772 void *frame_address,
1773 void *return_address) {
1774 return __kmpc_omp_taskwait_template<true>(loc_ref, gtid, frame_address,
1777 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1781 kmp_int32 __kmpc_omp_taskwait(
ident_t *loc_ref, kmp_int32 gtid) {
1782 #if OMPT_SUPPORT && OMPT_OPTIONAL 1783 if (UNLIKELY(ompt_enabled.enabled)) {
1784 OMPT_STORE_RETURN_ADDRESS(gtid);
1785 return __kmpc_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(1),
1786 OMPT_LOAD_RETURN_ADDRESS(gtid));
1789 return __kmpc_omp_taskwait_template<false>(loc_ref, gtid, NULL, NULL);
1793 kmp_int32 __kmpc_omp_taskyield(
ident_t *loc_ref, kmp_int32 gtid,
int end_part) {
1794 kmp_taskdata_t *taskdata;
1796 int thread_finished = FALSE;
1799 KMP_SET_THREAD_STATE_BLOCK(TASKYIELD);
1801 KA_TRACE(10, (
"__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1802 gtid, loc_ref, end_part));
1804 if (__kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel) {
1805 thread = __kmp_threads[gtid];
1806 taskdata = thread->th.th_current_task;
1813 taskdata->td_taskwait_counter += 1;
1814 taskdata->td_taskwait_ident = loc_ref;
1815 taskdata->td_taskwait_thread = gtid + 1;
1818 void *itt_sync_obj = __kmp_itt_taskwait_object(gtid);
1819 if (itt_sync_obj != NULL)
1820 __kmp_itt_taskwait_starting(gtid, itt_sync_obj);
1822 if (!taskdata->td_flags.team_serial) {
1823 kmp_task_team_t *task_team = thread->th.th_task_team;
1824 if (task_team != NULL) {
1825 if (KMP_TASKING_ENABLED(task_team)) {
1827 if (UNLIKELY(ompt_enabled.enabled))
1828 thread->th.ompt_thread_info.ompt_task_yielded = 1;
1830 __kmp_execute_tasks_32(
1831 thread, gtid, NULL, FALSE,
1832 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
1833 __kmp_task_stealing_constraint);
1835 if (UNLIKELY(ompt_enabled.enabled))
1836 thread->th.ompt_thread_info.ompt_task_yielded = 0;
1842 if (itt_sync_obj != NULL)
1843 __kmp_itt_taskwait_finished(gtid, itt_sync_obj);
1848 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
1851 KA_TRACE(10, (
"__kmpc_omp_taskyield(exit): T#%d task %p resuming, " 1852 "returning TASK_CURRENT_NOT_QUEUED\n",
1855 return TASK_CURRENT_NOT_QUEUED;
1861 typedef struct kmp_task_red_flags {
1862 unsigned lazy_priv : 1;
1863 unsigned reserved31 : 31;
1864 } kmp_task_red_flags_t;
1867 typedef struct kmp_task_red_data {
1875 kmp_task_red_flags_t flags;
1876 } kmp_task_red_data_t;
1879 typedef struct kmp_task_red_input {
1885 kmp_task_red_flags_t flags;
1886 } kmp_task_red_input_t;
1897 void *__kmpc_task_reduction_init(
int gtid,
int num,
void *data) {
1898 kmp_info_t *thread = __kmp_threads[gtid];
1899 kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup;
1900 kmp_int32 nth = thread->th.th_team_nproc;
1901 kmp_task_red_input_t *input = (kmp_task_red_input_t *)data;
1902 kmp_task_red_data_t *arr;
1905 KMP_ASSERT(tg != NULL);
1906 KMP_ASSERT(data != NULL);
1907 KMP_ASSERT(num > 0);
1909 KA_TRACE(10, (
"__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n",
1913 KA_TRACE(10, (
"__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n",
1915 arr = (kmp_task_red_data_t *)__kmp_thread_malloc(
1916 thread, num *
sizeof(kmp_task_red_data_t));
1917 for (
int i = 0; i < num; ++i) {
1918 void (*f_init)(
void *) = (
void (*)(
void *))(input[i].reduce_init);
1919 size_t size = input[i].reduce_size - 1;
1921 size += CACHE_LINE - size % CACHE_LINE;
1922 KMP_ASSERT(input[i].reduce_comb != NULL);
1923 arr[i].reduce_shar = input[i].reduce_shar;
1924 arr[i].reduce_size = size;
1925 arr[i].reduce_init = input[i].reduce_init;
1926 arr[i].reduce_fini = input[i].reduce_fini;
1927 arr[i].reduce_comb = input[i].reduce_comb;
1928 arr[i].flags = input[i].flags;
1929 if (!input[i].flags.lazy_priv) {
1931 arr[i].reduce_priv = __kmp_allocate(nth * size);
1932 arr[i].reduce_pend = (
char *)(arr[i].reduce_priv) + nth * size;
1933 if (f_init != NULL) {
1935 for (
int j = 0; j < nth; ++j) {
1936 f_init((
char *)(arr[i].reduce_priv) + j * size);
1942 arr[i].reduce_priv = __kmp_allocate(nth *
sizeof(
void *));
1945 tg->reduce_data = (
void *)arr;
1946 tg->reduce_num_data = num;
1959 void *__kmpc_task_reduction_get_th_data(
int gtid,
void *tskgrp,
void *data) {
1960 kmp_info_t *thread = __kmp_threads[gtid];
1961 kmp_int32 nth = thread->th.th_team_nproc;
1965 kmp_taskgroup_t *tg = (kmp_taskgroup_t *)tskgrp;
1967 tg = thread->th.th_current_task->td_taskgroup;
1968 KMP_ASSERT(tg != NULL);
1969 kmp_task_red_data_t *arr = (kmp_task_red_data_t *)(tg->reduce_data);
1970 kmp_int32 num = tg->reduce_num_data;
1971 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
1973 KMP_ASSERT(data != NULL);
1974 while (tg != NULL) {
1975 for (
int i = 0; i < num; ++i) {
1976 if (!arr[i].flags.lazy_priv) {
1977 if (data == arr[i].reduce_shar ||
1978 (data >= arr[i].reduce_priv && data < arr[i].reduce_pend))
1979 return (
char *)(arr[i].reduce_priv) + tid * arr[i].reduce_size;
1982 void **p_priv = (
void **)(arr[i].reduce_priv);
1983 if (data == arr[i].reduce_shar)
1986 for (
int j = 0; j < nth; ++j)
1987 if (data == p_priv[j])
1991 if (p_priv[tid] == NULL) {
1993 void (*f_init)(
void *) = (
void (*)(
void *))(arr[i].reduce_init);
1994 p_priv[tid] = __kmp_allocate(arr[i].reduce_size);
1995 if (f_init != NULL) {
1996 f_init(p_priv[tid]);
2003 arr = (kmp_task_red_data_t *)(tg->reduce_data);
2004 num = tg->reduce_num_data;
2006 KMP_ASSERT2(0,
"Unknown task reduction item");
2012 static void __kmp_task_reduction_fini(kmp_info_t *th, kmp_taskgroup_t *tg) {
2013 kmp_int32 nth = th->th.th_team_nproc;
2014 KMP_DEBUG_ASSERT(nth > 1);
2015 kmp_task_red_data_t *arr = (kmp_task_red_data_t *)tg->reduce_data;
2016 kmp_int32 num = tg->reduce_num_data;
2017 for (
int i = 0; i < num; ++i) {
2018 void *sh_data = arr[i].reduce_shar;
2019 void (*f_fini)(
void *) = (
void (*)(
void *))(arr[i].reduce_fini);
2020 void (*f_comb)(
void *,
void *) =
2021 (
void (*)(
void *,
void *))(arr[i].reduce_comb);
2022 if (!arr[i].flags.lazy_priv) {
2023 void *pr_data = arr[i].reduce_priv;
2024 size_t size = arr[i].reduce_size;
2025 for (
int j = 0; j < nth; ++j) {
2026 void *priv_data = (
char *)pr_data + j * size;
2027 f_comb(sh_data, priv_data);
2032 void **pr_data = (
void **)(arr[i].reduce_priv);
2033 for (
int j = 0; j < nth; ++j) {
2034 if (pr_data[j] != NULL) {
2035 f_comb(sh_data, pr_data[j]);
2038 __kmp_free(pr_data[j]);
2042 __kmp_free(arr[i].reduce_priv);
2044 __kmp_thread_free(th, arr);
2045 tg->reduce_data = NULL;
2046 tg->reduce_num_data = 0;
2052 void __kmpc_taskgroup(
ident_t *loc,
int gtid) {
2053 kmp_info_t *thread = __kmp_threads[gtid];
2054 kmp_taskdata_t *taskdata = thread->th.th_current_task;
2055 kmp_taskgroup_t *tg_new =
2056 (kmp_taskgroup_t *)__kmp_thread_malloc(thread,
sizeof(kmp_taskgroup_t));
2057 KA_TRACE(10, (
"__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new));
2058 KMP_ATOMIC_ST_RLX(&tg_new->count, 0);
2059 KMP_ATOMIC_ST_RLX(&tg_new->cancel_request, cancel_noreq);
2060 tg_new->parent = taskdata->td_taskgroup;
2062 tg_new->reduce_data = NULL;
2063 tg_new->reduce_num_data = 0;
2065 taskdata->td_taskgroup = tg_new;
2067 #if OMPT_SUPPORT && OMPT_OPTIONAL 2068 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) {
2069 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2071 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2072 kmp_team_t *team = thread->th.th_team;
2073 ompt_data_t my_task_data = taskdata->ompt_task_info.task_data;
2075 ompt_data_t my_parallel_data = team->t.ompt_team_info.parallel_data;
2077 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
2078 ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
2079 &(my_task_data), codeptr);
2086 void __kmpc_end_taskgroup(
ident_t *loc,
int gtid) {
2087 kmp_info_t *thread = __kmp_threads[gtid];
2088 kmp_taskdata_t *taskdata = thread->th.th_current_task;
2089 kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
2090 int thread_finished = FALSE;
2092 #if OMPT_SUPPORT && OMPT_OPTIONAL 2094 ompt_data_t my_task_data;
2095 ompt_data_t my_parallel_data;
2097 if (UNLIKELY(ompt_enabled.enabled)) {
2098 team = thread->th.th_team;
2099 my_task_data = taskdata->ompt_task_info.task_data;
2101 my_parallel_data = team->t.ompt_team_info.parallel_data;
2102 codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2104 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2108 KA_TRACE(10, (
"__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc));
2109 KMP_DEBUG_ASSERT(taskgroup != NULL);
2110 KMP_SET_THREAD_STATE_BLOCK(TASKGROUP);
2112 if (__kmp_tasking_mode != tskm_immediate_exec) {
2114 taskdata->td_taskwait_counter += 1;
2115 taskdata->td_taskwait_ident = loc;
2116 taskdata->td_taskwait_thread = gtid + 1;
2120 void *itt_sync_obj = __kmp_itt_taskwait_object(gtid);
2121 if (itt_sync_obj != NULL)
2122 __kmp_itt_taskwait_starting(gtid, itt_sync_obj);
2125 #if OMPT_SUPPORT && OMPT_OPTIONAL 2126 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) {
2127 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
2128 ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
2129 &(my_task_data), codeptr);
2134 if (!taskdata->td_flags.team_serial ||
2135 (thread->th.th_task_team != NULL &&
2136 thread->th.th_task_team->tt.tt_found_proxy_tasks))
2138 if (!taskdata->td_flags.team_serial)
2141 kmp_flag_32 flag(RCAST(std::atomic<kmp_uint32> *, &(taskgroup->count)),
2143 while (KMP_ATOMIC_LD_ACQ(&taskgroup->count) != 0) {
2144 flag.execute_tasks(thread, gtid, FALSE,
2145 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
2146 __kmp_task_stealing_constraint);
2149 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
2151 #if OMPT_SUPPORT && OMPT_OPTIONAL 2152 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) {
2153 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
2154 ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
2155 &(my_task_data), codeptr);
2160 if (itt_sync_obj != NULL)
2161 __kmp_itt_taskwait_finished(gtid, itt_sync_obj);
2164 KMP_DEBUG_ASSERT(taskgroup->count == 0);
2167 if (taskgroup->reduce_data != NULL)
2168 __kmp_task_reduction_fini(thread, taskgroup);
2171 taskdata->td_taskgroup = taskgroup->parent;
2172 __kmp_thread_free(thread, taskgroup);
2174 KA_TRACE(10, (
"__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n",
2176 ANNOTATE_HAPPENS_AFTER(taskdata);
2178 #if OMPT_SUPPORT && OMPT_OPTIONAL 2179 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) {
2180 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
2181 ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
2182 &(my_task_data), codeptr);
2189 static kmp_task_t *__kmp_remove_my_task(kmp_info_t *thread, kmp_int32 gtid,
2190 kmp_task_team_t *task_team,
2191 kmp_int32 is_constrained) {
2193 kmp_taskdata_t *taskdata;
2194 kmp_thread_data_t *thread_data;
2197 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
2198 KMP_DEBUG_ASSERT(task_team->tt.tt_threads_data !=
2201 thread_data = &task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
2203 KA_TRACE(10, (
"__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
2204 gtid, thread_data->td.td_deque_ntasks,
2205 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2207 if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
2209 (
"__kmp_remove_my_task(exit #1): T#%d No tasks to remove: " 2210 "ntasks=%d head=%u tail=%u\n",
2211 gtid, thread_data->td.td_deque_ntasks,
2212 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2216 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
2218 if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
2219 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2221 (
"__kmp_remove_my_task(exit #2): T#%d No tasks to remove: " 2222 "ntasks=%d head=%u tail=%u\n",
2223 gtid, thread_data->td.td_deque_ntasks,
2224 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2228 tail = (thread_data->td.td_deque_tail - 1) &
2229 TASK_DEQUE_MASK(thread_data->td);
2230 taskdata = thread_data->td.td_deque[tail];
2232 if (is_constrained && (taskdata->td_flags.tiedness == TASK_TIED)) {
2236 kmp_taskdata_t *current = thread->th.th_current_task->td_last_tied;
2237 KMP_DEBUG_ASSERT(current != NULL);
2239 if (current->td_flags.tasktype == TASK_EXPLICIT ||
2240 current->td_taskwait_thread > 0) {
2241 kmp_int32 level = current->td_level;
2242 kmp_taskdata_t *parent = taskdata->td_parent;
2243 while (parent != current && parent->td_level > level) {
2244 parent = parent->td_parent;
2246 KMP_DEBUG_ASSERT(parent != NULL);
2248 if (parent != current) {
2250 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2251 KA_TRACE(10, (
"__kmp_remove_my_task(exit #2): T#%d No tasks to remove: " 2252 "ntasks=%d head=%u tail=%u\n",
2253 gtid, thread_data->td.td_deque_ntasks,
2254 thread_data->td.td_deque_head,
2255 thread_data->td.td_deque_tail));
2261 thread_data->td.td_deque_tail = tail;
2262 TCW_4(thread_data->td.td_deque_ntasks, thread_data->td.td_deque_ntasks - 1);
2264 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2266 KA_TRACE(10, (
"__kmp_remove_my_task(exit #2): T#%d task %p removed: " 2267 "ntasks=%d head=%u tail=%u\n",
2268 gtid, taskdata, thread_data->td.td_deque_ntasks,
2269 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2271 task = KMP_TASKDATA_TO_TASK(taskdata);
2278 static kmp_task_t *__kmp_steal_task(kmp_info_t *victim_thr, kmp_int32 gtid,
2279 kmp_task_team_t *task_team,
2280 std::atomic<kmp_int32> *unfinished_threads,
2281 int *thread_finished,
2282 kmp_int32 is_constrained) {
2284 kmp_taskdata_t *taskdata;
2285 kmp_taskdata_t *current;
2286 kmp_thread_data_t *victim_td, *threads_data;
2287 kmp_int32 level, target;
2288 kmp_int32 victim_tid;
2290 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
2292 threads_data = task_team->tt.tt_threads_data;
2293 KMP_DEBUG_ASSERT(threads_data != NULL);
2295 victim_tid = victim_thr->th.th_info.ds.ds_tid;
2296 victim_td = &threads_data[victim_tid];
2298 KA_TRACE(10, (
"__kmp_steal_task(enter): T#%d try to steal from T#%d: " 2299 "task_team=%p ntasks=%d head=%u tail=%u\n",
2300 gtid, __kmp_gtid_from_thread(victim_thr), task_team,
2301 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
2302 victim_td->td.td_deque_tail));
2304 if (TCR_4(victim_td->td.td_deque_ntasks) == 0) {
2305 KA_TRACE(10, (
"__kmp_steal_task(exit #1): T#%d could not steal from T#%d: " 2306 "task_team=%p ntasks=%d head=%u tail=%u\n",
2307 gtid, __kmp_gtid_from_thread(victim_thr), task_team,
2308 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
2309 victim_td->td.td_deque_tail));
2313 __kmp_acquire_bootstrap_lock(&victim_td->td.td_deque_lock);
2315 int ntasks = TCR_4(victim_td->td.td_deque_ntasks);
2318 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2319 KA_TRACE(10, (
"__kmp_steal_task(exit #2): T#%d could not steal from T#%d: " 2320 "task_team=%p ntasks=%d head=%u tail=%u\n",
2321 gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
2322 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
2326 KMP_DEBUG_ASSERT(victim_td->td.td_deque != NULL);
2328 taskdata = victim_td->td.td_deque[victim_td->td.td_deque_head];
2329 if (is_constrained && (taskdata->td_flags.tiedness == TASK_TIED)) {
2333 current = __kmp_threads[gtid]->th.th_current_task->td_last_tied;
2334 KMP_DEBUG_ASSERT(current != NULL);
2336 if (current->td_flags.tasktype == TASK_EXPLICIT ||
2337 current->td_taskwait_thread > 0) {
2338 level = current->td_level;
2339 kmp_taskdata_t *parent = taskdata->td_parent;
2340 while (parent != current && parent->td_level > level) {
2341 parent = parent->td_parent;
2343 KMP_DEBUG_ASSERT(parent != NULL);
2345 if (parent != current) {
2346 if (!task_team->tt.tt_untied_task_encountered) {
2348 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2350 (
"__kmp_steal_task(exit #3): T#%d could not steal from " 2351 "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n",
2352 gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
2353 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
2360 if (taskdata != NULL) {
2362 victim_td->td.td_deque_head =
2363 (victim_td->td.td_deque_head + 1) & TASK_DEQUE_MASK(victim_td->td);
2367 target = victim_td->td.td_deque_head;
2368 for (i = 1; i < ntasks; ++i) {
2369 target = (target + 1) & TASK_DEQUE_MASK(victim_td->td);
2370 taskdata = victim_td->td.td_deque[target];
2371 if (taskdata->td_flags.tiedness == TASK_TIED) {
2373 kmp_taskdata_t *parent = taskdata->td_parent;
2375 while (parent != current && parent->td_level > level) {
2376 parent = parent->td_parent;
2377 KMP_DEBUG_ASSERT(parent != NULL);
2379 if (parent != current) {
2392 if (taskdata == NULL) {
2394 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2395 KA_TRACE(10, (
"__kmp_steal_task(exit #4): T#%d could not steal from " 2396 "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n",
2397 gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
2398 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
2402 for (i = i + 1; i < ntasks; ++i) {
2404 target = (target + 1) & TASK_DEQUE_MASK(victim_td->td);
2405 victim_td->td.td_deque[prev] = victim_td->td.td_deque[target];
2409 victim_td->td.td_deque_tail ==
2410 (kmp_uint32)((target + 1) & TASK_DEQUE_MASK(victim_td->td)));
2411 victim_td->td.td_deque_tail = target;
2413 if (*thread_finished) {
2419 count = KMP_ATOMIC_INC(unfinished_threads);
2423 (
"__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
2424 gtid, count + 1, task_team));
2426 *thread_finished = FALSE;
2428 TCW_4(victim_td->td.td_deque_ntasks, ntasks - 1);
2430 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2434 (
"__kmp_steal_task(exit #5): T#%d stole task %p from T#%d: " 2435 "task_team=%p ntasks=%d head=%u tail=%u\n",
2436 gtid, taskdata, __kmp_gtid_from_thread(victim_thr), task_team,
2437 ntasks, victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
2439 task = KMP_TASKDATA_TO_TASK(taskdata);
2453 static inline int __kmp_execute_tasks_template(
2454 kmp_info_t *thread, kmp_int32 gtid, C *flag,
int final_spin,
2455 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
2456 kmp_int32 is_constrained) {
2457 kmp_task_team_t *task_team = thread->th.th_task_team;
2458 kmp_thread_data_t *threads_data;
2460 kmp_info_t *other_thread;
2461 kmp_taskdata_t *current_task = thread->th.th_current_task;
2462 std::atomic<kmp_int32> *unfinished_threads;
2463 kmp_int32 nthreads, victim_tid = -2, use_own_tasks = 1, new_victim = 0,
2464 tid = thread->th.th_info.ds.ds_tid;
2466 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
2467 KMP_DEBUG_ASSERT(thread == __kmp_threads[gtid]);
2469 if (task_team == NULL || current_task == NULL)
2472 KA_TRACE(15, (
"__kmp_execute_tasks_template(enter): T#%d final_spin=%d " 2473 "*thread_finished=%d\n",
2474 gtid, final_spin, *thread_finished));
2476 thread->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
2477 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
2478 KMP_DEBUG_ASSERT(threads_data != NULL);
2480 nthreads = task_team->tt.tt_nproc;
2481 unfinished_threads = &(task_team->tt.tt_unfinished_threads);
2483 KMP_DEBUG_ASSERT(nthreads > 1 || task_team->tt.tt_found_proxy_tasks);
2485 KMP_DEBUG_ASSERT(nthreads > 1);
2487 KMP_DEBUG_ASSERT(*unfinished_threads >= 0);
2493 if (use_own_tasks) {
2494 task = __kmp_remove_my_task(thread, gtid, task_team, is_constrained);
2496 if ((task == NULL) && (nthreads > 1)) {
2500 if (victim_tid == -2) {
2501 victim_tid = threads_data[tid].td.td_deque_last_stolen;
2504 other_thread = threads_data[victim_tid].td.td_thr;
2506 if (victim_tid != -1) {
2508 }
else if (!new_victim) {
2514 victim_tid = __kmp_get_random(thread) % (nthreads - 1);
2515 if (victim_tid >= tid) {
2519 other_thread = threads_data[victim_tid].td.td_thr;
2529 if ((__kmp_tasking_mode == tskm_task_teams) &&
2530 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
2531 (TCR_PTR(CCAST(
void *, other_thread->th.th_sleep_loc)) !=
2534 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread),
2535 other_thread->th.th_sleep_loc);
2548 task = __kmp_steal_task(other_thread, gtid, task_team,
2549 unfinished_threads, thread_finished,
2553 if (threads_data[tid].td.td_deque_last_stolen != victim_tid) {
2554 threads_data[tid].td.td_deque_last_stolen = victim_tid;
2561 KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1);
2570 #if USE_ITT_BUILD && USE_ITT_NOTIFY 2571 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
2572 if (itt_sync_obj == NULL) {
2574 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
2576 __kmp_itt_task_starting(itt_sync_obj);
2579 __kmp_invoke_task(gtid, task, current_task);
2581 if (itt_sync_obj != NULL)
2582 __kmp_itt_task_finished(itt_sync_obj);
2589 if (flag == NULL || (!final_spin && flag->done_check())) {
2592 (
"__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
2596 if (thread->th.th_task_team == NULL) {
2600 KMP_YIELD(__kmp_library == library_throughput);
2603 if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) {
2604 KA_TRACE(20, (
"__kmp_execute_tasks_template: T#%d stolen task spawned " 2605 "other tasks, restart\n",
2618 KMP_ATOMIC_LD_ACQ(¤t_task->td_incomplete_child_tasks) == 0)
2626 if (!*thread_finished) {
2629 count = KMP_ATOMIC_DEC(unfinished_threads) - 1;
2630 KA_TRACE(20, (
"__kmp_execute_tasks_template: T#%d dec " 2631 "unfinished_threads to %d task_team=%p\n",
2632 gtid, count, task_team));
2633 *thread_finished = TRUE;
2641 if (flag != NULL && flag->done_check()) {
2644 (
"__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
2652 if (thread->th.th_task_team == NULL) {
2654 (
"__kmp_execute_tasks_template: T#%d no more tasks\n", gtid));
2667 (
"__kmp_execute_tasks_template: T#%d can't find work\n", gtid));
2673 int __kmp_execute_tasks_32(
2674 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag,
int final_spin,
2675 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
2676 kmp_int32 is_constrained) {
2677 return __kmp_execute_tasks_template(
2678 thread, gtid, flag, final_spin,
2679 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2682 int __kmp_execute_tasks_64(
2683 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag,
int final_spin,
2684 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
2685 kmp_int32 is_constrained) {
2686 return __kmp_execute_tasks_template(
2687 thread, gtid, flag, final_spin,
2688 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2691 int __kmp_execute_tasks_oncore(
2692 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag,
int final_spin,
2693 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
2694 kmp_int32 is_constrained) {
2695 return __kmp_execute_tasks_template(
2696 thread, gtid, flag, final_spin,
2697 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2703 static void __kmp_enable_tasking(kmp_task_team_t *task_team,
2704 kmp_info_t *this_thr) {
2705 kmp_thread_data_t *threads_data;
2706 int nthreads, i, is_init_thread;
2708 KA_TRACE(10, (
"__kmp_enable_tasking(enter): T#%d\n",
2709 __kmp_gtid_from_thread(this_thr)));
2711 KMP_DEBUG_ASSERT(task_team != NULL);
2712 KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL);
2714 nthreads = task_team->tt.tt_nproc;
2715 KMP_DEBUG_ASSERT(nthreads > 0);
2716 KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc);
2719 is_init_thread = __kmp_realloc_task_threads_data(this_thr, task_team);
2721 if (!is_init_thread) {
2725 (
"__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
2726 __kmp_gtid_from_thread(this_thr)));
2729 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
2730 KMP_DEBUG_ASSERT(threads_data != NULL);
2732 if ((__kmp_tasking_mode == tskm_task_teams) &&
2733 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME)) {
2737 for (i = 0; i < nthreads; i++) {
2738 volatile void *sleep_loc;
2739 kmp_info_t *thread = threads_data[i].td.td_thr;
2741 if (i == this_thr->th.th_info.ds.ds_tid) {
2750 if ((sleep_loc = TCR_PTR(CCAST(
void *, thread->th.th_sleep_loc))) !=
2752 KF_TRACE(50, (
"__kmp_enable_tasking: T#%d waking up thread T#%d\n",
2753 __kmp_gtid_from_thread(this_thr),
2754 __kmp_gtid_from_thread(thread)));
2755 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
2757 KF_TRACE(50, (
"__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
2758 __kmp_gtid_from_thread(this_thr),
2759 __kmp_gtid_from_thread(thread)));
2764 KA_TRACE(10, (
"__kmp_enable_tasking(exit): T#%d\n",
2765 __kmp_gtid_from_thread(this_thr)));
2802 static kmp_task_team_t *__kmp_free_task_teams =
2805 kmp_bootstrap_lock_t __kmp_task_team_lock =
2806 KMP_BOOTSTRAP_LOCK_INITIALIZER(__kmp_task_team_lock);
2813 static void __kmp_alloc_task_deque(kmp_info_t *thread,
2814 kmp_thread_data_t *thread_data) {
2815 __kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock);
2816 KMP_DEBUG_ASSERT(thread_data->td.td_deque == NULL);
2819 thread_data->td.td_deque_last_stolen = -1;
2821 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == 0);
2822 KMP_DEBUG_ASSERT(thread_data->td.td_deque_head == 0);
2823 KMP_DEBUG_ASSERT(thread_data->td.td_deque_tail == 0);
2827 (
"__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
2828 __kmp_gtid_from_thread(thread), INITIAL_TASK_DEQUE_SIZE, thread_data));
2832 thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate(
2833 INITIAL_TASK_DEQUE_SIZE *
sizeof(kmp_taskdata_t *));
2834 thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
2841 static void __kmp_realloc_task_deque(kmp_info_t *thread,
2842 kmp_thread_data_t *thread_data) {
2843 kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td);
2844 kmp_int32 new_size = 2 * size;
2846 KE_TRACE(10, (
"__kmp_realloc_task_deque: T#%d reallocating deque[from %d to " 2847 "%d] for thread_data %p\n",
2848 __kmp_gtid_from_thread(thread), size, new_size, thread_data));
2850 kmp_taskdata_t **new_deque =
2851 (kmp_taskdata_t **)__kmp_allocate(new_size *
sizeof(kmp_taskdata_t *));
2854 for (i = thread_data->td.td_deque_head, j = 0; j < size;
2855 i = (i + 1) & TASK_DEQUE_MASK(thread_data->td), j++)
2856 new_deque[j] = thread_data->td.td_deque[i];
2858 __kmp_free(thread_data->td.td_deque);
2860 thread_data->td.td_deque_head = 0;
2861 thread_data->td.td_deque_tail = size;
2862 thread_data->td.td_deque = new_deque;
2863 thread_data->td.td_deque_size = new_size;
2869 static void __kmp_free_task_deque(kmp_thread_data_t *thread_data) {
2870 if (thread_data->td.td_deque != NULL) {
2871 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
2872 TCW_4(thread_data->td.td_deque_ntasks, 0);
2873 __kmp_free(thread_data->td.td_deque);
2874 thread_data->td.td_deque = NULL;
2875 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2878 #ifdef BUILD_TIED_TASK_STACK 2880 if (thread_data->td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY) {
2881 __kmp_free_task_stack(__kmp_thread_from_gtid(gtid), thread_data);
2883 #endif // BUILD_TIED_TASK_STACK 2893 static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
2894 kmp_task_team_t *task_team) {
2895 kmp_thread_data_t **threads_data_p;
2896 kmp_int32 nthreads, maxthreads;
2897 int is_init_thread = FALSE;
2899 if (TCR_4(task_team->tt.tt_found_tasks)) {
2904 threads_data_p = &task_team->tt.tt_threads_data;
2905 nthreads = task_team->tt.tt_nproc;
2906 maxthreads = task_team->tt.tt_max_threads;
2911 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
2913 if (!TCR_4(task_team->tt.tt_found_tasks)) {
2915 kmp_team_t *team = thread->th.th_team;
2918 is_init_thread = TRUE;
2919 if (maxthreads < nthreads) {
2921 if (*threads_data_p != NULL) {
2922 kmp_thread_data_t *old_data = *threads_data_p;
2923 kmp_thread_data_t *new_data = NULL;
2927 (
"__kmp_realloc_task_threads_data: T#%d reallocating " 2928 "threads data for task_team %p, new_size = %d, old_size = %d\n",
2929 __kmp_gtid_from_thread(thread), task_team, nthreads, maxthreads));
2934 new_data = (kmp_thread_data_t *)__kmp_allocate(
2935 nthreads *
sizeof(kmp_thread_data_t));
2937 KMP_MEMCPY_S((
void *)new_data, nthreads *
sizeof(kmp_thread_data_t),
2938 (
void *)old_data, maxthreads *
sizeof(kmp_thread_data_t));
2940 #ifdef BUILD_TIED_TASK_STACK 2942 for (i = maxthreads; i < nthreads; i++) {
2943 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
2944 __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
2946 #endif // BUILD_TIED_TASK_STACK 2948 (*threads_data_p) = new_data;
2949 __kmp_free(old_data);
2951 KE_TRACE(10, (
"__kmp_realloc_task_threads_data: T#%d allocating " 2952 "threads data for task_team %p, size = %d\n",
2953 __kmp_gtid_from_thread(thread), task_team, nthreads));
2957 ANNOTATE_IGNORE_WRITES_BEGIN();
2958 *threads_data_p = (kmp_thread_data_t *)__kmp_allocate(
2959 nthreads *
sizeof(kmp_thread_data_t));
2960 ANNOTATE_IGNORE_WRITES_END();
2961 #ifdef BUILD_TIED_TASK_STACK 2963 for (i = 0; i < nthreads; i++) {
2964 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
2965 __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
2967 #endif // BUILD_TIED_TASK_STACK 2969 task_team->tt.tt_max_threads = nthreads;
2972 KMP_DEBUG_ASSERT(*threads_data_p != NULL);
2976 for (i = 0; i < nthreads; i++) {
2977 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
2978 thread_data->td.td_thr = team->t.t_threads[i];
2980 if (thread_data->td.td_deque_last_stolen >= nthreads) {
2984 thread_data->td.td_deque_last_stolen = -1;
2989 TCW_SYNC_4(task_team->tt.tt_found_tasks, TRUE);
2992 __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
2993 return is_init_thread;
2999 static void __kmp_free_task_threads_data(kmp_task_team_t *task_team) {
3000 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
3001 if (task_team->tt.tt_threads_data != NULL) {
3003 for (i = 0; i < task_team->tt.tt_max_threads; i++) {
3004 __kmp_free_task_deque(&task_team->tt.tt_threads_data[i]);
3006 __kmp_free(task_team->tt.tt_threads_data);
3007 task_team->tt.tt_threads_data = NULL;
3009 __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
3016 static kmp_task_team_t *__kmp_allocate_task_team(kmp_info_t *thread,
3018 kmp_task_team_t *task_team = NULL;
3021 KA_TRACE(20, (
"__kmp_allocate_task_team: T#%d entering; team = %p\n",
3022 (thread ? __kmp_gtid_from_thread(thread) : -1), team));
3024 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
3026 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
3027 if (__kmp_free_task_teams != NULL) {
3028 task_team = __kmp_free_task_teams;
3029 TCW_PTR(__kmp_free_task_teams, task_team->tt.tt_next);
3030 task_team->tt.tt_next = NULL;
3032 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
3035 if (task_team == NULL) {
3036 KE_TRACE(10, (
"__kmp_allocate_task_team: T#%d allocating " 3037 "task team for team %p\n",
3038 __kmp_gtid_from_thread(thread), team));
3042 task_team = (kmp_task_team_t *)__kmp_allocate(
sizeof(kmp_task_team_t));
3043 __kmp_init_bootstrap_lock(&task_team->tt.tt_threads_lock);
3050 TCW_4(task_team->tt.tt_found_tasks, FALSE);
3052 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
3054 task_team->tt.tt_nproc = nthreads = team->t.t_nproc;
3056 KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads, nthreads);
3057 TCW_4(task_team->tt.tt_active, TRUE);
3059 KA_TRACE(20, (
"__kmp_allocate_task_team: T#%d exiting; task_team = %p " 3060 "unfinished_threads init'd to %d\n",
3061 (thread ? __kmp_gtid_from_thread(thread) : -1), task_team,
3062 KMP_ATOMIC_LD_RLX(&task_team->tt.tt_unfinished_threads)));
3069 void __kmp_free_task_team(kmp_info_t *thread, kmp_task_team_t *task_team) {
3070 KA_TRACE(20, (
"__kmp_free_task_team: T#%d task_team = %p\n",
3071 thread ? __kmp_gtid_from_thread(thread) : -1, task_team));
3074 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
3076 KMP_DEBUG_ASSERT(task_team->tt.tt_next == NULL);
3077 task_team->tt.tt_next = __kmp_free_task_teams;
3078 TCW_PTR(__kmp_free_task_teams, task_team);
3080 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
3088 void __kmp_reap_task_teams(
void) {
3089 kmp_task_team_t *task_team;
3091 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
3093 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
3094 while ((task_team = __kmp_free_task_teams) != NULL) {
3095 __kmp_free_task_teams = task_team->tt.tt_next;
3096 task_team->tt.tt_next = NULL;
3099 if (task_team->tt.tt_threads_data != NULL) {
3100 __kmp_free_task_threads_data(task_team);
3102 __kmp_free(task_team);
3104 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
3111 void __kmp_wait_to_unref_task_teams(
void) {
3116 KMP_INIT_YIELD(spins);
3124 for (thread = CCAST(kmp_info_t *, __kmp_thread_pool); thread != NULL;
3125 thread = thread->th.th_next_pool) {
3129 if (TCR_PTR(thread->th.th_task_team) == NULL) {
3130 KA_TRACE(10, (
"__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
3131 __kmp_gtid_from_thread(thread)));
3136 if (!__kmp_is_thread_alive(thread, &exit_val)) {
3137 thread->th.th_task_team = NULL;
3144 KA_TRACE(10, (
"__kmp_wait_to_unref_task_team: Waiting for T#%d to " 3145 "unreference task_team\n",
3146 __kmp_gtid_from_thread(thread)));
3148 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
3149 volatile void *sleep_loc;
3151 if ((sleep_loc = TCR_PTR(CCAST(
void *, thread->th.th_sleep_loc))) !=
3155 (
"__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
3156 __kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread)));
3157 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
3167 KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
3168 KMP_YIELD_SPIN(spins);
3174 void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team,
int always) {
3175 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
3181 if (team->t.t_task_team[this_thr->th.th_task_state] == NULL &&
3182 (always || team->t.t_nproc > 1)) {
3183 team->t.t_task_team[this_thr->th.th_task_state] =
3184 __kmp_allocate_task_team(this_thr, team);
3185 KA_TRACE(20, (
"__kmp_task_team_setup: Master T#%d created new task_team %p " 3186 "for team %d at parity=%d\n",
3187 __kmp_gtid_from_thread(this_thr),
3188 team->t.t_task_team[this_thr->th.th_task_state],
3189 ((team != NULL) ? team->t.t_id : -1),
3190 this_thr->th.th_task_state));
3200 if (team->t.t_nproc > 1) {
3201 int other_team = 1 - this_thr->th.th_task_state;
3202 if (team->t.t_task_team[other_team] == NULL) {
3203 team->t.t_task_team[other_team] =
3204 __kmp_allocate_task_team(this_thr, team);
3205 KA_TRACE(20, (
"__kmp_task_team_setup: Master T#%d created second new " 3206 "task_team %p for team %d at parity=%d\n",
3207 __kmp_gtid_from_thread(this_thr),
3208 team->t.t_task_team[other_team],
3209 ((team != NULL) ? team->t.t_id : -1), other_team));
3212 kmp_task_team_t *task_team = team->t.t_task_team[other_team];
3213 if (!task_team->tt.tt_active ||
3214 team->t.t_nproc != task_team->tt.tt_nproc) {
3215 TCW_4(task_team->tt.tt_nproc, team->t.t_nproc);
3216 TCW_4(task_team->tt.tt_found_tasks, FALSE);
3218 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
3220 KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads,
3222 TCW_4(task_team->tt.tt_active, TRUE);
3226 KA_TRACE(20, (
"__kmp_task_team_setup: Master T#%d reset next task_team " 3227 "%p for team %d at parity=%d\n",
3228 __kmp_gtid_from_thread(this_thr),
3229 team->t.t_task_team[other_team],
3230 ((team != NULL) ? team->t.t_id : -1), other_team));
3238 void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team) {
3239 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
3243 this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
3246 TCW_PTR(this_thr->th.th_task_team,
3247 team->t.t_task_team[this_thr->th.th_task_state]);
3249 (
"__kmp_task_team_sync: Thread T#%d task team switched to task_team " 3250 "%p from Team #%d (parity=%d)\n",
3251 __kmp_gtid_from_thread(this_thr), this_thr->th.th_task_team,
3252 ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
3262 void __kmp_task_team_wait(
3263 kmp_info_t *this_thr,
3264 kmp_team_t *team USE_ITT_BUILD_ARG(
void *itt_sync_obj),
int wait) {
3265 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
3267 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
3268 KMP_DEBUG_ASSERT(task_team == this_thr->th.th_task_team);
3270 if ((task_team != NULL) && KMP_TASKING_ENABLED(task_team)) {
3272 KA_TRACE(20, (
"__kmp_task_team_wait: Master T#%d waiting for all tasks " 3273 "(for unfinished_threads to reach 0) on task_team = %p\n",
3274 __kmp_gtid_from_thread(this_thr), task_team));
3278 kmp_flag_32 flag(RCAST(std::atomic<kmp_uint32> *,
3279 &task_team->tt.tt_unfinished_threads),
3281 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
3287 (
"__kmp_task_team_wait: Master T#%d deactivating task_team %p: " 3288 "setting active to false, setting local and team's pointer to NULL\n",
3289 __kmp_gtid_from_thread(this_thr), task_team));
3291 KMP_DEBUG_ASSERT(task_team->tt.tt_nproc > 1 ||
3292 task_team->tt.tt_found_proxy_tasks == TRUE);
3293 TCW_SYNC_4(task_team->tt.tt_found_proxy_tasks, FALSE);
3295 KMP_DEBUG_ASSERT(task_team->tt.tt_nproc > 1);
3297 KMP_CHECK_UPDATE(task_team->tt.tt_untied_task_encountered, 0);
3298 TCW_SYNC_4(task_team->tt.tt_active, FALSE);
3301 TCW_PTR(this_thr->th.th_task_team, NULL);
3310 void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread,
int gtid) {
3311 std::atomic<kmp_uint32> *spin = RCAST(
3312 std::atomic<kmp_uint32> *,
3313 &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads);
3315 KMP_DEBUG_ASSERT(__kmp_tasking_mode == tskm_extra_barrier);
3318 KMP_FSYNC_SPIN_INIT(spin, NULL);
3320 kmp_flag_32 spin_flag(spin, 0U);
3321 while (!spin_flag.execute_tasks(thread, gtid, TRUE,
3322 &flag USE_ITT_BUILD_ARG(NULL), 0)) {
3325 KMP_FSYNC_SPIN_PREPARE(RCAST(
void *, spin));
3328 if (TCR_4(__kmp_global.g.g_done)) {
3329 if (__kmp_global.g.g_abort)
3330 __kmp_abort_thread();
3336 KMP_FSYNC_SPIN_ACQUIRED(RCAST(
void *, spin));
3347 static bool __kmp_give_task(kmp_info_t *thread, kmp_int32 tid, kmp_task_t *task,
3349 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
3350 kmp_task_team_t *task_team = taskdata->td_task_team;
3352 KA_TRACE(20, (
"__kmp_give_task: trying to give task %p to thread %d.\n",
3356 KMP_DEBUG_ASSERT(task_team != NULL);
3358 bool result =
false;
3359 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
3361 if (thread_data->td.td_deque == NULL) {
3365 (
"__kmp_give_task: thread %d has no queue while giving task %p.\n",
3370 if (TCR_4(thread_data->td.td_deque_ntasks) >=
3371 TASK_DEQUE_SIZE(thread_data->td)) {
3374 (
"__kmp_give_task: queue is full while giving task %p to thread %d.\n",
3379 if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
3382 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3383 __kmp_realloc_task_deque(thread, thread_data);
3387 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3389 if (TCR_4(thread_data->td.td_deque_ntasks) >=
3390 TASK_DEQUE_SIZE(thread_data->td)) {
3391 KA_TRACE(30, (
"__kmp_give_task: queue is full while giving task %p to " 3397 if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
3398 goto release_and_exit;
3400 __kmp_realloc_task_deque(thread, thread_data);
3406 thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata;
3408 thread_data->td.td_deque_tail =
3409 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
3410 TCW_4(thread_data->td.td_deque_ntasks,
3411 TCR_4(thread_data->td.td_deque_ntasks) + 1);
3414 KA_TRACE(30, (
"__kmp_give_task: successfully gave task %p to thread %d.\n",
3418 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3439 static void __kmp_first_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
3440 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
3441 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
3442 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
3443 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
3445 taskdata->td_flags.complete = 1;
3447 if (taskdata->td_taskgroup)
3448 KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
3452 KMP_ATOMIC_INC(&taskdata->td_incomplete_child_tasks);
3455 static void __kmp_second_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
3456 kmp_int32 children = 0;
3460 KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks) - 1;
3461 KMP_DEBUG_ASSERT(children >= 0);
3464 KMP_ATOMIC_DEC(&taskdata->td_incomplete_child_tasks);
3467 static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask) {
3468 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
3469 kmp_info_t *thread = __kmp_threads[gtid];
3471 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
3472 KMP_DEBUG_ASSERT(taskdata->td_flags.complete ==
3477 while (KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) > 0)
3480 __kmp_release_deps(gtid, taskdata);
3481 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
3492 void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask) {
3493 KMP_DEBUG_ASSERT(ptask != NULL);
3494 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
3496 10, (
"__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n",
3499 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
3501 __kmp_first_top_half_finish_proxy(taskdata);
3502 __kmp_second_top_half_finish_proxy(taskdata);
3503 __kmp_bottom_half_finish_proxy(gtid, ptask);
3506 (
"__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n",
3517 void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask) {
3518 KMP_DEBUG_ASSERT(ptask != NULL);
3519 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
3523 (
"__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n",
3526 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
3528 __kmp_first_top_half_finish_proxy(taskdata);
3532 kmp_team_t *team = taskdata->td_team;
3533 kmp_int32 nthreads = team->t.t_nproc;
3538 kmp_int32 start_k = 0;
3540 kmp_int32 k = start_k;
3544 thread = team->t.t_threads[k];
3545 k = (k + 1) % nthreads;
3551 }
while (!__kmp_give_task(thread, k, ptask, pass));
3553 __kmp_second_top_half_finish_proxy(taskdata);
3557 (
"__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n",
3567 kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src) {
3569 kmp_taskdata_t *taskdata;
3570 kmp_taskdata_t *taskdata_src;
3571 kmp_taskdata_t *parent_task = thread->th.th_current_task;
3572 size_t shareds_offset;
3575 KA_TRACE(10, (
"__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread,
3577 taskdata_src = KMP_TASK_TO_TASKDATA(task_src);
3578 KMP_DEBUG_ASSERT(taskdata_src->td_flags.proxy ==
3580 KMP_DEBUG_ASSERT(taskdata_src->td_flags.tasktype == TASK_EXPLICIT);
3581 task_size = taskdata_src->td_size_alloc;
3584 KA_TRACE(30, (
"__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread,
3587 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, task_size);
3589 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, task_size);
3591 KMP_MEMCPY(taskdata, taskdata_src, task_size);
3593 task = KMP_TASKDATA_TO_TASK(taskdata);
3596 taskdata->td_task_id = KMP_GEN_TASK_ID();
3597 if (task->shareds != NULL) {
3598 shareds_offset = (
char *)task_src->shareds - (
char *)taskdata_src;
3599 task->shareds = &((
char *)taskdata)[shareds_offset];
3600 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (
sizeof(
void *) - 1)) ==
3603 taskdata->td_alloc_thread = thread;
3604 taskdata->td_parent = parent_task;
3605 taskdata->td_taskgroup =
3611 if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) {
3612 KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks);
3613 if (parent_task->td_taskgroup)
3614 KMP_ATOMIC_INC(&parent_task->td_taskgroup->count);
3617 if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT)
3618 KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks);
3622 (
"__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n",
3623 thread, taskdata, taskdata->td_parent));
3625 if (UNLIKELY(ompt_enabled.enabled))
3626 __ompt_task_init(taskdata, thread->th.th_info.ds.ds_gtid);
3635 typedef void (*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
3637 KMP_BUILD_ASSERT(
sizeof(
long) == 4 ||
sizeof(
long) == 8);
3642 class kmp_taskloop_bounds_t {
3644 const kmp_taskdata_t *taskdata;
3645 size_t lower_offset;
3646 size_t upper_offset;
3649 kmp_taskloop_bounds_t(kmp_task_t *_task, kmp_uint64 *lb, kmp_uint64 *ub)
3650 : task(_task), taskdata(KMP_TASK_TO_TASKDATA(task)),
3651 lower_offset((char *)lb - (char *)task),
3652 upper_offset((char *)ub - (char *)task) {
3653 KMP_DEBUG_ASSERT((
char *)lb > (
char *)_task);
3654 KMP_DEBUG_ASSERT((
char *)ub > (
char *)_task);
3656 kmp_taskloop_bounds_t(kmp_task_t *_task,
const kmp_taskloop_bounds_t &bounds)
3657 : task(_task), taskdata(KMP_TASK_TO_TASKDATA(_task)),
3658 lower_offset(bounds.lower_offset), upper_offset(bounds.upper_offset) {}
3659 size_t get_lower_offset()
const {
return lower_offset; }
3660 size_t get_upper_offset()
const {
return upper_offset; }
3661 kmp_uint64 get_lb()
const {
3663 #if defined(KMP_GOMP_COMPAT) 3665 if (!taskdata->td_flags.native) {
3666 retval = *(kmp_int64 *)((
char *)task + lower_offset);
3669 if (taskdata->td_size_loop_bounds == 4) {
3670 kmp_int32 *lb = RCAST(kmp_int32 *, task->shareds);
3671 retval = (kmp_int64)*lb;
3673 kmp_int64 *lb = RCAST(kmp_int64 *, task->shareds);
3674 retval = (kmp_int64)*lb;
3678 retval = *(kmp_int64 *)((
char *)task + lower_offset);
3679 #endif // defined(KMP_GOMP_COMPAT) 3682 kmp_uint64 get_ub()
const {
3684 #if defined(KMP_GOMP_COMPAT) 3686 if (!taskdata->td_flags.native) {
3687 retval = *(kmp_int64 *)((
char *)task + upper_offset);
3690 if (taskdata->td_size_loop_bounds == 4) {
3691 kmp_int32 *ub = RCAST(kmp_int32 *, task->shareds) + 1;
3692 retval = (kmp_int64)*ub;
3694 kmp_int64 *ub = RCAST(kmp_int64 *, task->shareds) + 1;
3695 retval = (kmp_int64)*ub;
3699 retval = *(kmp_int64 *)((
char *)task + upper_offset);
3700 #endif // defined(KMP_GOMP_COMPAT) 3703 void set_lb(kmp_uint64 lb) {
3704 #if defined(KMP_GOMP_COMPAT) 3706 if (!taskdata->td_flags.native) {
3707 *(kmp_uint64 *)((
char *)task + lower_offset) = lb;
3710 if (taskdata->td_size_loop_bounds == 4) {
3711 kmp_uint32 *lower = RCAST(kmp_uint32 *, task->shareds);
3712 *lower = (kmp_uint32)lb;
3714 kmp_uint64 *lower = RCAST(kmp_uint64 *, task->shareds);
3715 *lower = (kmp_uint64)lb;
3719 *(kmp_uint64 *)((
char *)task + lower_offset) = lb;
3720 #endif // defined(KMP_GOMP_COMPAT) 3722 void set_ub(kmp_uint64 ub) {
3723 #if defined(KMP_GOMP_COMPAT) 3725 if (!taskdata->td_flags.native) {
3726 *(kmp_uint64 *)((
char *)task + upper_offset) = ub;
3729 if (taskdata->td_size_loop_bounds == 4) {
3730 kmp_uint32 *upper = RCAST(kmp_uint32 *, task->shareds) + 1;
3731 *upper = (kmp_uint32)ub;
3733 kmp_uint64 *upper = RCAST(kmp_uint64 *, task->shareds) + 1;
3734 *upper = (kmp_uint64)ub;
3738 *(kmp_uint64 *)((
char *)task + upper_offset) = ub;
3739 #endif // defined(KMP_GOMP_COMPAT) 3758 void __kmp_taskloop_linear(
ident_t *loc,
int gtid, kmp_task_t *task,
3759 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
3760 kmp_uint64 ub_glob, kmp_uint64 num_tasks,
3761 kmp_uint64 grainsize, kmp_uint64 extras,
3768 KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling);
3769 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
3771 kmp_taskloop_bounds_t task_bounds(task, lb, ub);
3772 kmp_uint64 lower = task_bounds.get_lb();
3773 kmp_uint64 upper = task_bounds.get_ub();
3775 kmp_info_t *thread = __kmp_threads[gtid];
3776 kmp_taskdata_t *current_task = thread->th.th_current_task;
3777 kmp_task_t *next_task;
3778 kmp_int32 lastpriv = 0;
3780 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
3781 KMP_DEBUG_ASSERT(num_tasks > extras);
3782 KMP_DEBUG_ASSERT(num_tasks > 0);
3783 KA_TRACE(20, (
"__kmp_taskloop_linear: T#%d: %lld tasks, grainsize %lld, " 3784 "extras %lld, i=%lld,%lld(%d)%lld, dup %p\n",
3785 gtid, num_tasks, grainsize, extras, lower, upper, ub_glob, st,
3789 for (i = 0; i < num_tasks; ++i) {
3790 kmp_uint64 chunk_minus_1;
3792 chunk_minus_1 = grainsize - 1;
3794 chunk_minus_1 = grainsize;
3797 upper = lower + st * chunk_minus_1;
3798 if (i == num_tasks - 1) {
3801 KMP_DEBUG_ASSERT(upper == *ub);
3802 if (upper == ub_glob)
3804 }
else if (st > 0) {
3805 KMP_DEBUG_ASSERT((kmp_uint64)st > *ub - upper);
3806 if ((kmp_uint64)st > ub_glob - upper)
3809 KMP_DEBUG_ASSERT(upper + st < *ub);
3810 if (upper - ub_glob < (kmp_uint64)(-st))
3814 next_task = __kmp_task_dup_alloc(thread, task);
3815 kmp_taskdata_t *next_taskdata = KMP_TASK_TO_TASKDATA(next_task);
3816 kmp_taskloop_bounds_t next_task_bounds =
3817 kmp_taskloop_bounds_t(next_task, task_bounds);
3820 next_task_bounds.set_lb(lower);
3821 if (next_taskdata->td_flags.native) {
3822 next_task_bounds.set_ub(upper + (st > 0 ? 1 : -1));
3824 next_task_bounds.set_ub(upper);
3826 if (ptask_dup != NULL)
3827 ptask_dup(next_task, task, lastpriv);
3829 (
"__kmp_taskloop_linear: T#%d; task #%llu: task %p: lower %lld, " 3830 "upper %lld stride %lld, (offsets %p %p)\n",
3831 gtid, i, next_task, lower, upper, st,
3832 next_task_bounds.get_lower_offset(),
3833 next_task_bounds.get_upper_offset()));
3835 __kmp_omp_taskloop_task(NULL, gtid, next_task,
3838 __kmp_omp_task(gtid, next_task,
true);
3843 __kmp_task_start(gtid, task, current_task);
3845 __kmp_task_finish<false>(gtid, task, current_task);
3850 typedef struct __taskloop_params {
3857 kmp_uint64 num_tasks;
3858 kmp_uint64 grainsize;
3861 kmp_uint64 num_t_min;
3865 } __taskloop_params_t;
3867 void __kmp_taskloop_recur(
ident_t *,
int, kmp_task_t *, kmp_uint64 *,
3868 kmp_uint64 *, kmp_int64, kmp_uint64, kmp_uint64,
3869 kmp_uint64, kmp_uint64, kmp_uint64, kmp_uint64,
3876 int __kmp_taskloop_task(
int gtid,
void *ptask) {
3877 __taskloop_params_t *p =
3878 (__taskloop_params_t *)((kmp_task_t *)ptask)->shareds;
3879 kmp_task_t *task = p->task;
3880 kmp_uint64 *lb = p->lb;
3881 kmp_uint64 *ub = p->ub;
3882 void *task_dup = p->task_dup;
3884 kmp_int64 st = p->st;
3885 kmp_uint64 ub_glob = p->ub_glob;
3886 kmp_uint64 num_tasks = p->num_tasks;
3887 kmp_uint64 grainsize = p->grainsize;
3888 kmp_uint64 extras = p->extras;
3889 kmp_uint64 tc = p->tc;
3890 kmp_uint64 num_t_min = p->num_t_min;
3892 void *codeptr_ra = p->codeptr_ra;
3895 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
3896 KMP_DEBUG_ASSERT(task != NULL);
3897 KA_TRACE(20, (
"__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize" 3898 " %lld, extras %lld, i=%lld,%lld(%d), dup %p\n",
3899 gtid, taskdata, num_tasks, grainsize, extras, *lb, *ub, st,
3902 KMP_DEBUG_ASSERT(num_tasks * 2 + 1 > num_t_min);
3903 if (num_tasks > num_t_min)
3904 __kmp_taskloop_recur(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
3905 grainsize, extras, tc, num_t_min,
3911 __kmp_taskloop_linear(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
3912 grainsize, extras, tc,
3918 KA_TRACE(40, (
"__kmp_taskloop_task(exit): T#%d\n", gtid));
3939 void __kmp_taskloop_recur(
ident_t *loc,
int gtid, kmp_task_t *task,
3940 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
3941 kmp_uint64 ub_glob, kmp_uint64 num_tasks,
3942 kmp_uint64 grainsize, kmp_uint64 extras,
3943 kmp_uint64 tc, kmp_uint64 num_t_min,
3949 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
3950 KMP_DEBUG_ASSERT(task != NULL);
3951 KMP_DEBUG_ASSERT(num_tasks > num_t_min);
3952 KA_TRACE(20, (
"__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize" 3953 " %lld, extras %lld, i=%lld,%lld(%d), dup %p\n",
3954 gtid, taskdata, num_tasks, grainsize, extras, *lb, *ub, st,
3957 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
3958 kmp_uint64 lower = *lb;
3959 kmp_info_t *thread = __kmp_threads[gtid];
3961 kmp_task_t *next_task;
3962 size_t lower_offset =
3963 (
char *)lb - (
char *)task;
3964 size_t upper_offset =
3965 (
char *)ub - (
char *)task;
3967 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
3968 KMP_DEBUG_ASSERT(num_tasks > extras);
3969 KMP_DEBUG_ASSERT(num_tasks > 0);
3972 kmp_uint64 lb1, ub0, tc0, tc1, ext0, ext1;
3973 kmp_uint64 gr_size0 = grainsize;
3974 kmp_uint64 n_tsk0 = num_tasks >> 1;
3975 kmp_uint64 n_tsk1 = num_tasks - n_tsk0;
3976 if (n_tsk0 <= extras) {
3979 ext1 = extras - n_tsk0;
3980 tc0 = gr_size0 * n_tsk0;
3985 tc1 = grainsize * n_tsk1;
3988 ub0 = lower + st * (tc0 - 1);
3992 next_task = __kmp_task_dup_alloc(thread, task);
3994 *(kmp_uint64 *)((
char *)next_task + lower_offset) = lb1;
3995 if (ptask_dup != NULL)
3996 ptask_dup(next_task, task, 0);
4000 kmp_task_t *new_task =
4001 __kmpc_omp_task_alloc(loc, gtid, 1, 3 *
sizeof(
void *),
4002 sizeof(__taskloop_params_t), &__kmp_taskloop_task);
4003 __taskloop_params_t *p = (__taskloop_params_t *)new_task->shareds;
4004 p->task = next_task;
4005 p->lb = (kmp_uint64 *)((
char *)next_task + lower_offset);
4006 p->ub = (kmp_uint64 *)((
char *)next_task + upper_offset);
4007 p->task_dup = task_dup;
4009 p->ub_glob = ub_glob;
4010 p->num_tasks = n_tsk1;
4011 p->grainsize = grainsize;
4014 p->num_t_min = num_t_min;
4016 p->codeptr_ra = codeptr_ra;
4021 __kmp_omp_taskloop_task(NULL, gtid, new_task, codeptr_ra);
4023 __kmp_omp_task(gtid, new_task,
true);
4027 if (n_tsk0 > num_t_min)
4028 __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0, gr_size0,
4029 ext0, tc0, num_t_min,
4035 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0,
4036 gr_size0, ext0, tc0,
4042 KA_TRACE(40, (
"__kmpc_taskloop_recur(exit): T#%d\n", gtid));
4061 void __kmpc_taskloop(
ident_t *loc,
int gtid, kmp_task_t *task,
int if_val,
4062 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
int nogroup,
4063 int sched, kmp_uint64 grainsize,
void *task_dup) {
4064 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
4065 KMP_DEBUG_ASSERT(task != NULL);
4068 #if OMPT_SUPPORT && OMPT_OPTIONAL 4069 OMPT_STORE_RETURN_ADDRESS(gtid);
4071 __kmpc_taskgroup(loc, gtid);
4076 kmp_taskloop_bounds_t task_bounds(task, lb, ub);
4079 kmp_uint64 lower = task_bounds.get_lb();
4080 kmp_uint64 upper = task_bounds.get_ub();
4081 kmp_uint64 ub_glob = upper;
4082 kmp_uint64 num_tasks = 0, extras = 0;
4083 kmp_uint64 num_tasks_min = __kmp_taskloop_min_tasks;
4084 kmp_info_t *thread = __kmp_threads[gtid];
4085 kmp_taskdata_t *current_task = thread->th.th_current_task;
4087 KA_TRACE(20, (
"__kmpc_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, " 4088 "grain %llu(%d), dup %p\n",
4089 gtid, taskdata, lower, upper, st, grainsize, sched, task_dup));
4093 tc = upper - lower + 1;
4094 }
else if (st < 0) {
4095 tc = (lower - upper) / (-st) + 1;
4097 tc = (upper - lower) / st + 1;
4100 KA_TRACE(20, (
"__kmpc_taskloop(exit): T#%d zero-trip loop\n", gtid));
4102 __kmp_task_start(gtid, task, current_task);
4104 __kmp_task_finish<false>(gtid, task, current_task);
4108 #if OMPT_SUPPORT && OMPT_OPTIONAL 4109 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
4110 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
4111 if (ompt_enabled.ompt_callback_work) {
4112 ompt_callbacks.ompt_callback(ompt_callback_work)(
4113 ompt_work_taskloop, ompt_scope_begin, &(team_info->parallel_data),
4114 &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0));
4118 if (num_tasks_min == 0)
4121 KMP_MIN(thread->th.th_team_nproc * 10, INITIAL_TASK_DEQUE_SIZE);
4127 grainsize = thread->th.th_team_nproc * 10;
4129 if (grainsize > tc) {
4134 num_tasks = grainsize;
4135 grainsize = tc / num_tasks;
4136 extras = tc % num_tasks;
4140 if (grainsize > tc) {
4145 num_tasks = tc / grainsize;
4147 grainsize = tc / num_tasks;
4148 extras = tc % num_tasks;
4152 KMP_ASSERT2(0,
"unknown scheduling of taskloop");
4154 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
4155 KMP_DEBUG_ASSERT(num_tasks > extras);
4156 KMP_DEBUG_ASSERT(num_tasks > 0);
4162 taskdata->td_flags.task_serial = 1;
4163 taskdata->td_flags.tiedness = TASK_TIED;
4165 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
4166 grainsize, extras, tc,
4168 OMPT_GET_RETURN_ADDRESS(0),
4173 }
else if (num_tasks > num_tasks_min && !taskdata->td_flags.native) {
4174 KA_TRACE(20, (
"__kmpc_taskloop: T#%d, go recursive: tc %llu, #tasks %llu" 4175 "(%lld), grain %llu, extras %llu\n",
4176 gtid, tc, num_tasks, num_tasks_min, grainsize, extras));
4177 __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
4178 grainsize, extras, tc, num_tasks_min,
4180 OMPT_GET_RETURN_ADDRESS(0),
4184 KA_TRACE(20, (
"__kmpc_taskloop: T#%d, go linear: tc %llu, #tasks %llu" 4185 "(%lld), grain %llu, extras %llu\n",
4186 gtid, tc, num_tasks, num_tasks_min, grainsize, extras));
4187 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
4188 grainsize, extras, tc,
4190 OMPT_GET_RETURN_ADDRESS(0),
4195 #if OMPT_SUPPORT && OMPT_OPTIONAL 4196 if (ompt_enabled.ompt_callback_work) {
4197 ompt_callbacks.ompt_callback(ompt_callback_work)(
4198 ompt_work_taskloop, ompt_scope_end, &(team_info->parallel_data),
4199 &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0));
4204 #if OMPT_SUPPORT && OMPT_OPTIONAL 4205 OMPT_STORE_RETURN_ADDRESS(gtid);
4207 __kmpc_end_taskgroup(loc, gtid);
4209 KA_TRACE(20, (
"__kmpc_taskloop(exit): T#%d\n", gtid));
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).