15 #include "kmp_error.h" 19 #define MAX_MESSAGE 512 23 #define KMP_DEBUG_REF_CTS(x) KF_TRACE(1, x); 25 #define THREAD_ALLOC_FOR_TASKQ 27 static int in_parallel_context(kmp_team_t *team) {
28 return !team->t.t_serialized;
31 static void __kmp_taskq_eo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
33 int tid = __kmp_tid_from_gtid(gtid);
35 kmpc_task_queue_t *taskq;
36 kmp_taskq_t *tq = &__kmp_threads[gtid]->th.th_team->t.t_taskq;
38 if (__kmp_env_consistency_check)
39 #if KMP_USE_DYNAMIC_LOCK 40 __kmp_push_sync(gtid, ct_ordered_in_taskq, loc_ref, NULL, 0);
42 __kmp_push_sync(gtid, ct_ordered_in_taskq, loc_ref, NULL);
45 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized) {
51 my_token = tq->tq_curr_thunk[tid]->th_tasknum;
53 taskq = tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue;
55 KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_EQ, NULL);
60 static void __kmp_taskq_xo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
62 int tid = __kmp_tid_from_gtid(gtid);
64 kmp_taskq_t *tq = &__kmp_threads[gtid]->th.th_team->t.t_taskq;
66 if (__kmp_env_consistency_check)
67 __kmp_pop_sync(gtid, ct_ordered_in_taskq, loc_ref);
69 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized) {
75 my_token = tq->tq_curr_thunk[tid]->th_tasknum;
79 tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue->tq_tasknum_serving =
86 static void __kmp_taskq_check_ordered(kmp_int32 gtid, kmpc_thunk_t *thunk) {
88 kmpc_task_queue_t *taskq;
94 my_token = thunk->th_tasknum;
96 taskq = thunk->th.th_shareds->sv_queue;
98 if (taskq->tq_tasknum_serving <= my_token) {
99 KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_GE, NULL);
101 taskq->tq_tasknum_serving = my_token + 1;
108 static void __kmp_dump_TQF(kmp_int32 flags) {
109 if (flags & TQF_IS_ORDERED)
110 __kmp_printf(
"ORDERED ");
111 if (flags & TQF_IS_LASTPRIVATE)
112 __kmp_printf(
"LAST_PRIV ");
113 if (flags & TQF_IS_NOWAIT)
114 __kmp_printf(
"NOWAIT ");
115 if (flags & TQF_HEURISTICS)
116 __kmp_printf(
"HEURIST ");
117 if (flags & TQF_INTERFACE_RESERVED1)
118 __kmp_printf(
"RESERV1 ");
119 if (flags & TQF_INTERFACE_RESERVED2)
120 __kmp_printf(
"RESERV2 ");
121 if (flags & TQF_INTERFACE_RESERVED3)
122 __kmp_printf(
"RESERV3 ");
123 if (flags & TQF_INTERFACE_RESERVED4)
124 __kmp_printf(
"RESERV4 ");
125 if (flags & TQF_IS_LAST_TASK)
126 __kmp_printf(
"LAST_TASK ");
127 if (flags & TQF_TASKQ_TASK)
128 __kmp_printf(
"TASKQ_TASK ");
129 if (flags & TQF_RELEASE_WORKERS)
130 __kmp_printf(
"RELEASE ");
131 if (flags & TQF_ALL_TASKS_QUEUED)
132 __kmp_printf(
"ALL_QUEUED ");
133 if (flags & TQF_PARALLEL_CONTEXT)
134 __kmp_printf(
"PARALLEL ");
135 if (flags & TQF_DEALLOCATED)
136 __kmp_printf(
"DEALLOC ");
137 if (!(flags & (TQF_INTERNAL_FLAGS | TQF_INTERFACE_FLAGS)))
138 __kmp_printf(
"(NONE)");
141 static void __kmp_dump_thunk(kmp_taskq_t *tq, kmpc_thunk_t *thunk,
142 kmp_int32 global_tid) {
144 int nproc = __kmp_threads[global_tid]->th.th_team->t.t_nproc;
146 __kmp_printf(
"\tThunk at %p on (%d): ", thunk, global_tid);
149 for (i = 0; i < nproc; i++) {
150 if (tq->tq_curr_thunk[i] == thunk) {
151 __kmp_printf(
"[%i] ", i);
154 __kmp_printf(
"th_shareds=%p, ", thunk->th.th_shareds);
155 __kmp_printf(
"th_task=%p, ", thunk->th_task);
156 __kmp_printf(
"th_encl_thunk=%p, ", thunk->th_encl_thunk);
157 __kmp_printf(
"th_status=%d, ", thunk->th_status);
158 __kmp_printf(
"th_tasknum=%u, ", thunk->th_tasknum);
159 __kmp_printf(
"th_flags=");
160 __kmp_dump_TQF(thunk->th_flags);
166 static void __kmp_dump_thunk_stack(kmpc_thunk_t *thunk, kmp_int32 thread_num) {
169 __kmp_printf(
" Thunk stack for T#%d: ", thread_num);
171 for (th = thunk; th != NULL; th = th->th_encl_thunk)
172 __kmp_printf(
"%p ", th);
177 static void __kmp_dump_task_queue(kmp_taskq_t *tq, kmpc_task_queue_t *queue,
178 kmp_int32 global_tid) {
181 kmpc_task_queue_t *taskq;
183 __kmp_printf(
"Task Queue at %p on (%d):\n", queue, global_tid);
186 int in_parallel = queue->tq_flags & TQF_PARALLEL_CONTEXT;
188 if (__kmp_env_consistency_check) {
189 __kmp_printf(
" tq_loc : ");
202 __kmp_printf(
" tq_parent : %p\n", queue->tq.tq_parent);
203 __kmp_printf(
" tq_first_child : %p\n", queue->tq_first_child);
204 __kmp_printf(
" tq_next_child : %p\n", queue->tq_next_child);
205 __kmp_printf(
" tq_prev_child : %p\n", queue->tq_prev_child);
206 __kmp_printf(
" tq_ref_count : %d\n", queue->tq_ref_count);
221 __kmp_printf(
" tq_shareds : ");
222 for (i = 0; i < ((queue == tq->tq_root) ? queue->tq_nproc : 1); i++)
223 __kmp_printf(
"%p ", queue->tq_shareds[i].ai_data);
227 __kmp_printf(
" tq_tasknum_queuing : %u\n", queue->tq_tasknum_queuing);
228 __kmp_printf(
" tq_tasknum_serving : %u\n", queue->tq_tasknum_serving);
231 __kmp_printf(
" tq_queue : %p\n", queue->tq_queue);
232 __kmp_printf(
" tq_thunk_space : %p\n", queue->tq_thunk_space);
233 __kmp_printf(
" tq_taskq_slot : %p\n", queue->tq_taskq_slot);
235 __kmp_printf(
" tq_free_thunks : ");
236 for (thunk = queue->tq_free_thunks; thunk != NULL;
237 thunk = thunk->th.th_next_free)
238 __kmp_printf(
"%p ", thunk);
241 __kmp_printf(
" tq_nslots : %d\n", queue->tq_nslots);
242 __kmp_printf(
" tq_head : %d\n", queue->tq_head);
243 __kmp_printf(
" tq_tail : %d\n", queue->tq_tail);
244 __kmp_printf(
" tq_nfull : %d\n", queue->tq_nfull);
245 __kmp_printf(
" tq_hiwat : %d\n", queue->tq_hiwat);
246 __kmp_printf(
" tq_flags : ");
247 __kmp_dump_TQF(queue->tq_flags);
251 __kmp_printf(
" tq_th_thunks : ");
252 for (i = 0; i < queue->tq_nproc; i++) {
253 __kmp_printf(
"%d ", queue->tq_th_thunks[i].ai_data);
259 __kmp_printf(
" Queue slots:\n");
262 for (count = 0; count < queue->tq_nfull; ++count) {
263 __kmp_printf(
"(%d)", qs);
264 __kmp_dump_thunk(tq, queue->tq_queue[qs].qs_thunk, global_tid);
265 qs = (qs + 1) % queue->tq_nslots;
271 if (queue->tq_taskq_slot != NULL) {
272 __kmp_printf(
" TaskQ slot:\n");
273 __kmp_dump_thunk(tq, CCAST(kmpc_thunk_t *, queue->tq_taskq_slot),
282 __kmp_printf(
" Taskq freelist: ");
290 for (taskq = tq->tq_freelist; taskq != NULL; taskq = taskq->tq.tq_next_free)
291 __kmp_printf(
"%p ", taskq);
295 __kmp_printf(
"\n\n");
298 static void __kmp_aux_dump_task_queue_tree(kmp_taskq_t *tq,
299 kmpc_task_queue_t *curr_queue,
301 kmp_int32 global_tid) {
303 int nproc = __kmp_threads[global_tid]->th.th_team->t.t_nproc;
304 kmpc_task_queue_t *queue = curr_queue;
306 if (curr_queue == NULL)
311 for (i = 0; i < level; i++)
314 __kmp_printf(
"%p", curr_queue);
316 for (i = 0; i < nproc; i++) {
317 if (tq->tq_curr_thunk[i] &&
318 tq->tq_curr_thunk[i]->th.th_shareds->sv_queue == curr_queue) {
319 __kmp_printf(
" [%i]", i);
331 qs = curr_queue->tq_tail;
333 for (count = 0; count < curr_queue->tq_nfull; ++count) {
334 __kmp_printf(
"%p ", curr_queue->tq_queue[qs].qs_thunk);
335 qs = (qs + 1) % curr_queue->tq_nslots;
342 if (curr_queue->tq_first_child) {
349 if (curr_queue->tq_first_child) {
350 for (queue = CCAST(kmpc_task_queue_t *, curr_queue->tq_first_child);
351 queue != NULL; queue = queue->tq_next_child) {
352 __kmp_aux_dump_task_queue_tree(tq, queue, level + 1, global_tid);
360 static void __kmp_dump_task_queue_tree(kmp_taskq_t *tq,
361 kmpc_task_queue_t *tqroot,
362 kmp_int32 global_tid) {
363 __kmp_printf(
"TaskQ Tree at root %p on (%d):\n", tqroot, global_tid);
365 __kmp_aux_dump_task_queue_tree(tq, tqroot, 0, global_tid);
373 static void *__kmp_taskq_allocate(
size_t size, kmp_int32 global_tid) {
374 void *addr, *orig_addr;
377 KB_TRACE(5, (
"__kmp_taskq_allocate: called size=%d, gtid=%d\n", (
int)size,
380 bytes =
sizeof(
void *) + CACHE_LINE + size;
382 #ifdef THREAD_ALLOC_FOR_TASKQ 384 (
void *)__kmp_thread_malloc(__kmp_thread_from_gtid(global_tid), bytes);
386 KE_TRACE(10, (
"%%%%%% MALLOC( %d )\n", bytes));
387 orig_addr = (
void *)KMP_INTERNAL_MALLOC(bytes);
391 KMP_FATAL(OutOfHeapMemory);
395 if (((kmp_uintptr_t)addr & (CACHE_LINE - 1)) != 0) {
396 KB_TRACE(50, (
"__kmp_taskq_allocate: adjust for cache alignment\n"));
397 addr = (
void *)(((kmp_uintptr_t)addr + CACHE_LINE) & ~(CACHE_LINE - 1));
400 (*(
void **)addr) = orig_addr;
403 (
"__kmp_taskq_allocate: allocate: %p, use: %p - %p, size: %d, " 405 orig_addr, ((
void **)addr) + 1,
406 ((
char *)(((
void **)addr) + 1)) + size - 1, (
int)size, global_tid));
408 return (((
void **)addr) + 1);
411 static void __kmpc_taskq_free(
void *p, kmp_int32 global_tid) {
412 KB_TRACE(5, (
"__kmpc_taskq_free: called addr=%p, gtid=%d\n", p, global_tid));
414 KB_TRACE(10, (
"__kmpc_taskq_free: freeing: %p, gtid: %d\n",
415 (*(((
void **)p) - 1)), global_tid));
417 #ifdef THREAD_ALLOC_FOR_TASKQ 418 __kmp_thread_free(__kmp_thread_from_gtid(global_tid), *(((
void **)p) - 1));
420 KMP_INTERNAL_FREE(*(((
void **)p) - 1));
427 static kmpc_task_queue_t *
428 __kmp_alloc_taskq(kmp_taskq_t *tq,
int in_parallel, kmp_int32 nslots,
429 kmp_int32 nthunks, kmp_int32 nshareds, kmp_int32 nproc,
430 size_t sizeof_thunk,
size_t sizeof_shareds,
431 kmpc_thunk_t **new_taskq_thunk, kmp_int32 global_tid) {
434 kmpc_task_queue_t *new_queue;
435 kmpc_aligned_shared_vars_t *shared_var_array;
436 char *shared_var_storage;
439 __kmp_acquire_lock(&tq->tq_freelist_lck, global_tid);
445 if (tq->tq_freelist) {
446 new_queue = tq->tq_freelist;
447 tq->tq_freelist = tq->tq_freelist->tq.tq_next_free;
449 KMP_DEBUG_ASSERT(new_queue->tq_flags & TQF_DEALLOCATED);
451 new_queue->tq_flags = 0;
453 __kmp_release_lock(&tq->tq_freelist_lck, global_tid);
455 __kmp_release_lock(&tq->tq_freelist_lck, global_tid);
457 new_queue = (kmpc_task_queue_t *)__kmp_taskq_allocate(
458 sizeof(kmpc_task_queue_t), global_tid);
459 new_queue->tq_flags = 0;
466 (CACHE_LINE - (sizeof_thunk % CACHE_LINE));
467 pt = (
char *)__kmp_taskq_allocate(nthunks * sizeof_thunk, global_tid);
468 new_queue->tq_thunk_space = (kmpc_thunk_t *)pt;
469 *new_taskq_thunk = (kmpc_thunk_t *)(pt + (nthunks - 1) * sizeof_thunk);
473 new_queue->tq_free_thunks = (kmpc_thunk_t *)pt;
475 for (i = 0; i < (nthunks - 2); i++) {
476 ((kmpc_thunk_t *)(pt + i * sizeof_thunk))->th.th_next_free =
477 (kmpc_thunk_t *)(pt + (i + 1) * sizeof_thunk);
479 ((kmpc_thunk_t *)(pt + i * sizeof_thunk))->th_flags = TQF_DEALLOCATED;
483 ((kmpc_thunk_t *)(pt + (nthunks - 2) * sizeof_thunk))->th.th_next_free = NULL;
485 ((kmpc_thunk_t *)(pt + (nthunks - 2) * sizeof_thunk))->th_flags =
492 __kmp_init_lock(&new_queue->tq_link_lck);
493 __kmp_init_lock(&new_queue->tq_free_thunks_lck);
494 __kmp_init_lock(&new_queue->tq_queue_lck);
499 bytes = nslots *
sizeof(kmpc_aligned_queue_slot_t);
500 new_queue->tq_queue =
501 (kmpc_aligned_queue_slot_t *)__kmp_taskq_allocate(bytes, global_tid);
504 sizeof_shareds +=
sizeof(kmpc_task_queue_t *);
506 (CACHE_LINE - (sizeof_shareds % CACHE_LINE));
508 bytes = nshareds *
sizeof(kmpc_aligned_shared_vars_t);
510 (kmpc_aligned_shared_vars_t *)__kmp_taskq_allocate(bytes, global_tid);
512 bytes = nshareds * sizeof_shareds;
513 shared_var_storage = (
char *)__kmp_taskq_allocate(bytes, global_tid);
515 for (i = 0; i < nshareds; i++) {
516 shared_var_array[i].ai_data =
517 (kmpc_shared_vars_t *)(shared_var_storage + i * sizeof_shareds);
518 shared_var_array[i].ai_data->sv_queue = new_queue;
520 new_queue->tq_shareds = shared_var_array;
525 bytes = nproc *
sizeof(kmpc_aligned_int32_t);
526 new_queue->tq_th_thunks =
527 (kmpc_aligned_int32_t *)__kmp_taskq_allocate(bytes, global_tid);
528 new_queue->tq_nproc = nproc;
530 for (i = 0; i < nproc; i++)
531 new_queue->tq_th_thunks[i].ai_data = 0;
537 static void __kmp_free_taskq(kmp_taskq_t *tq, kmpc_task_queue_t *p,
538 int in_parallel, kmp_int32 global_tid) {
539 __kmpc_taskq_free(p->tq_thunk_space, global_tid);
540 __kmpc_taskq_free(p->tq_queue, global_tid);
543 __kmpc_taskq_free(CCAST(kmpc_shared_vars_t *, p->tq_shareds[0].ai_data),
546 __kmpc_taskq_free(p->tq_shareds, global_tid);
549 p->tq_first_child = NULL;
550 p->tq_next_child = NULL;
551 p->tq_prev_child = NULL;
552 p->tq_ref_count = -10;
553 p->tq_shareds = NULL;
554 p->tq_tasknum_queuing = 0;
555 p->tq_tasknum_serving = 0;
557 p->tq_thunk_space = NULL;
558 p->tq_taskq_slot = NULL;
559 p->tq_free_thunks = NULL;
569 for (i = 0; i < p->tq_nproc; i++)
570 p->tq_th_thunks[i].ai_data = 0;
572 if (__kmp_env_consistency_check)
574 KMP_DEBUG_ASSERT(p->tq_flags & TQF_DEALLOCATED);
575 p->tq_flags = TQF_DEALLOCATED;
579 __kmpc_taskq_free(p->tq_th_thunks, global_tid);
580 __kmp_destroy_lock(&p->tq_link_lck);
581 __kmp_destroy_lock(&p->tq_queue_lck);
582 __kmp_destroy_lock(&p->tq_free_thunks_lck);
585 p->tq_th_thunks = NULL;
592 __kmp_acquire_lock(&tq->tq_freelist_lck, global_tid);
593 p->tq.tq_next_free = tq->tq_freelist;
596 __kmp_release_lock(&tq->tq_freelist_lck, global_tid);
603 static kmpc_thunk_t *__kmp_alloc_thunk(kmpc_task_queue_t *queue,
604 int in_parallel, kmp_int32 global_tid) {
608 __kmp_acquire_lock(&queue->tq_free_thunks_lck, global_tid);
614 fl = queue->tq_free_thunks;
616 KMP_DEBUG_ASSERT(fl != NULL);
618 queue->tq_free_thunks = fl->th.th_next_free;
622 __kmp_release_lock(&queue->tq_free_thunks_lck, global_tid);
627 static void __kmp_free_thunk(kmpc_task_queue_t *queue, kmpc_thunk_t *p,
628 int in_parallel, kmp_int32 global_tid) {
631 p->th_encl_thunk = 0;
638 __kmp_acquire_lock(&queue->tq_free_thunks_lck, global_tid);
644 p->th.th_next_free = queue->tq_free_thunks;
645 queue->tq_free_thunks = p;
648 p->th_flags = TQF_DEALLOCATED;
652 __kmp_release_lock(&queue->tq_free_thunks_lck, global_tid);
656 static kmp_int32 __kmp_enqueue_task(kmp_taskq_t *tq, kmp_int32 global_tid,
657 kmpc_task_queue_t *queue,
658 kmpc_thunk_t *thunk,
int in_parallel) {
664 __kmp_acquire_lock(&queue->tq_queue_lck, global_tid);
670 KMP_DEBUG_ASSERT(queue->tq_nfull < queue->tq_nslots);
672 queue->tq_queue[(queue->tq_head)++].qs_thunk = thunk;
674 if (queue->tq_head >= queue->tq_nslots)
682 ret = (in_parallel) ? (queue->tq_nfull == queue->tq_nslots) : FALSE;
686 __kmp_release_lock(&queue->tq_queue_lck, global_tid);
688 if (tq->tq_global_flags & TQF_RELEASE_WORKERS) {
693 tq->tq_global_flags &= ~TQF_RELEASE_WORKERS;
705 static kmpc_thunk_t *__kmp_dequeue_task(kmp_int32 global_tid,
706 kmpc_task_queue_t *queue,
709 int tid = __kmp_tid_from_gtid(global_tid);
711 KMP_DEBUG_ASSERT(queue->tq_nfull > 0);
713 if (queue->tq.tq_parent != NULL && in_parallel) {
715 __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
716 ct = ++(queue->tq_ref_count);
717 __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
719 (
"line %d gtid %d: Q %p inc %d\n", __LINE__, global_tid, queue, ct));
722 pt = queue->tq_queue[(queue->tq_tail)++].qs_thunk;
724 if (queue->tq_tail >= queue->tq_nslots)
728 queue->tq_th_thunks[tid].ai_data++;
733 KF_TRACE(200, (
"__kmp_dequeue_task: T#%d(:%d) now has %d outstanding " 734 "thunks from queue %p\n",
735 global_tid, tid, queue->tq_th_thunks[tid].ai_data, queue));
746 KMP_DEBUG_ASSERT(queue->tq_nfull >= 0);
749 KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data <=
750 __KMP_TASKQ_THUNKS_PER_TH);
780 static kmpc_thunk_t *__kmp_find_task_in_queue(kmp_int32 global_tid,
781 kmpc_task_queue_t *queue) {
782 kmpc_thunk_t *pt = NULL;
783 int tid = __kmp_tid_from_gtid(global_tid);
786 if (!(queue->tq_flags & TQF_DEALLOCATED)) {
788 __kmp_acquire_lock(&queue->tq_queue_lck, global_tid);
791 if (!(queue->tq_flags & TQF_DEALLOCATED)) {
796 if ((queue->tq_taskq_slot != NULL) &&
797 (queue->tq_nfull <= queue->tq_hiwat)) {
800 pt = CCAST(kmpc_thunk_t *, queue->tq_taskq_slot);
801 queue->tq_taskq_slot = NULL;
802 }
else if (queue->tq_nfull == 0 ||
803 queue->tq_th_thunks[tid].ai_data >=
804 __KMP_TASKQ_THUNKS_PER_TH) {
808 }
else if (queue->tq_nfull > 1) {
811 pt = __kmp_dequeue_task(global_tid, queue, TRUE);
812 }
else if (!(queue->tq_flags & TQF_IS_LASTPRIVATE)) {
814 pt = __kmp_dequeue_task(global_tid, queue, TRUE);
815 }
else if (queue->tq_flags & TQF_IS_LAST_TASK) {
819 pt = __kmp_dequeue_task(global_tid, queue, TRUE);
826 __kmp_release_lock(&queue->tq_queue_lck, global_tid);
835 static kmpc_thunk_t *
836 __kmp_find_task_in_descendant_queue(kmp_int32 global_tid,
837 kmpc_task_queue_t *curr_queue) {
838 kmpc_thunk_t *pt = NULL;
839 kmpc_task_queue_t *queue = curr_queue;
841 if (curr_queue->tq_first_child != NULL) {
842 __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
847 queue = CCAST(kmpc_task_queue_t *, curr_queue->tq_first_child);
849 __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
853 while (queue != NULL) {
855 kmpc_task_queue_t *next;
857 ct = ++(queue->tq_ref_count);
858 __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
860 (
"line %d gtid %d: Q %p inc %d\n", __LINE__, global_tid, queue, ct));
862 pt = __kmp_find_task_in_queue(global_tid, queue);
867 __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
872 ct = --(queue->tq_ref_count);
873 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p dec %d\n", __LINE__,
874 global_tid, queue, ct));
875 KMP_DEBUG_ASSERT(queue->tq_ref_count >= 0);
877 __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
886 pt = __kmp_find_task_in_descendant_queue(global_tid, queue);
891 __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
896 ct = --(queue->tq_ref_count);
897 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p dec %d\n", __LINE__,
898 global_tid, queue, ct));
899 KMP_DEBUG_ASSERT(ct >= 0);
901 __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
906 __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
911 next = queue->tq_next_child;
913 ct = --(queue->tq_ref_count);
915 (
"line %d gtid %d: Q %p dec %d\n", __LINE__, global_tid, queue, ct));
916 KMP_DEBUG_ASSERT(ct >= 0);
921 __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
930 static kmpc_thunk_t *
931 __kmp_find_task_in_ancestor_queue(kmp_taskq_t *tq, kmp_int32 global_tid,
932 kmpc_task_queue_t *curr_queue) {
933 kmpc_task_queue_t *queue;
938 if (curr_queue->tq.tq_parent != NULL) {
939 queue = curr_queue->tq.tq_parent;
941 while (queue != NULL) {
942 if (queue->tq.tq_parent != NULL) {
944 __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
949 ct = ++(queue->tq_ref_count);
950 __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
951 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p inc %d\n", __LINE__,
952 global_tid, queue, ct));
955 pt = __kmp_find_task_in_queue(global_tid, queue);
957 if (queue->tq.tq_parent != NULL) {
959 __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
964 ct = --(queue->tq_ref_count);
965 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p dec %d\n", __LINE__,
966 global_tid, queue, ct));
967 KMP_DEBUG_ASSERT(ct >= 0);
969 __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
975 if (queue->tq.tq_parent != NULL) {
977 __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
982 ct = --(queue->tq_ref_count);
983 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p dec %d\n", __LINE__,
984 global_tid, queue, ct));
985 KMP_DEBUG_ASSERT(ct >= 0);
987 queue = queue->tq.tq_parent;
990 __kmp_release_lock(&queue->tq_link_lck, global_tid);
994 pt = __kmp_find_task_in_descendant_queue(global_tid, tq->tq_root);
999 static int __kmp_taskq_tasks_finished(kmpc_task_queue_t *queue) {
1004 for (i = 0; i < queue->tq_nproc; i++) {
1005 if (queue->tq_th_thunks[i].ai_data != 0)
1012 static int __kmp_taskq_has_any_children(kmpc_task_queue_t *queue) {
1013 return (queue->tq_first_child != NULL);
1016 static void __kmp_remove_queue_from_tree(kmp_taskq_t *tq, kmp_int32 global_tid,
1017 kmpc_task_queue_t *queue,
1021 kmpc_thunk_t *thunk;
1025 (
"Before Deletion of TaskQ at %p on (%d):\n", queue, global_tid));
1026 KF_DUMP(50, __kmp_dump_task_queue(tq, queue, global_tid));
1029 KMP_DEBUG_ASSERT(queue->tq.tq_parent != NULL);
1032 __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
1038 KMP_DEBUG_ASSERT(queue->tq_first_child == NULL);
1041 if (queue->tq_prev_child != NULL)
1042 queue->tq_prev_child->tq_next_child = queue->tq_next_child;
1043 if (queue->tq_next_child != NULL)
1044 queue->tq_next_child->tq_prev_child = queue->tq_prev_child;
1045 if (queue->tq.tq_parent->tq_first_child == queue)
1046 queue->tq.tq_parent->tq_first_child = queue->tq_next_child;
1048 queue->tq_prev_child = NULL;
1049 queue->tq_next_child = NULL;
1053 (
"line %d gtid %d: Q %p waiting for ref_count of %d to reach 1\n",
1054 __LINE__, global_tid, queue, queue->tq_ref_count));
1057 while (queue->tq_ref_count > 1) {
1058 __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
1060 KMP_WAIT_YIELD((
volatile kmp_uint32 *)&queue->tq_ref_count, 1, KMP_LE,
1063 __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
1069 __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
1073 (
"line %d gtid %d: Q %p freeing queue\n", __LINE__, global_tid, queue));
1076 KMP_DEBUG_ASSERT(queue->tq_flags & TQF_ALL_TASKS_QUEUED);
1077 KMP_DEBUG_ASSERT(queue->tq_nfull == 0);
1079 for (i = 0; i < queue->tq_nproc; i++) {
1080 KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0);
1084 for (thunk = queue->tq_free_thunks; thunk != NULL;
1085 thunk = thunk->th.th_next_free)
1089 queue->tq_nslots + (queue->tq_nproc * __KMP_TASKQ_THUNKS_PER_TH));
1093 __kmp_free_taskq(tq, queue, TRUE, global_tid);
1095 KF_TRACE(50, (
"After Deletion of TaskQ at %p on (%d):\n", queue, global_tid));
1096 KF_DUMP(50, __kmp_dump_task_queue_tree(tq, tq->tq_root, global_tid));
1103 static void __kmp_find_and_remove_finished_child_taskq(
1104 kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *curr_queue) {
1105 kmpc_task_queue_t *queue = curr_queue;
1107 if (curr_queue->tq_first_child != NULL) {
1108 __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
1113 queue = CCAST(kmpc_task_queue_t *, curr_queue->tq_first_child);
1114 if (queue != NULL) {
1115 __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
1119 while (queue != NULL) {
1120 kmpc_task_queue_t *next;
1121 int ct = ++(queue->tq_ref_count);
1123 (
"line %d gtid %d: Q %p inc %d\n", __LINE__, global_tid, queue, ct));
1129 if (queue->tq_flags & TQF_IS_NOWAIT) {
1130 __kmp_find_and_remove_finished_child_taskq(tq, global_tid, queue);
1132 if ((queue->tq_flags & TQF_ALL_TASKS_QUEUED) &&
1133 (queue->tq_nfull == 0) && __kmp_taskq_tasks_finished(queue) &&
1134 !__kmp_taskq_has_any_children(queue)) {
1139 if (__kmp_test_lock(&queue->tq_queue_lck, global_tid)) {
1140 if (!(queue->tq_flags & TQF_DEALLOCATED)) {
1141 queue->tq_flags |= TQF_DEALLOCATED;
1142 __kmp_release_lock(&queue->tq_queue_lck, global_tid);
1144 __kmp_remove_queue_from_tree(tq, global_tid, queue, TRUE);
1150 __kmp_release_lock(&queue->tq_queue_lck, global_tid);
1157 __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
1162 next = queue->tq_next_child;
1164 ct = --(queue->tq_ref_count);
1166 (
"line %d gtid %d: Q %p dec %d\n", __LINE__, global_tid, queue, ct));
1167 KMP_DEBUG_ASSERT(ct >= 0);
1172 __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
1179 static void __kmp_remove_all_child_taskq(kmp_taskq_t *tq, kmp_int32 global_tid,
1180 kmpc_task_queue_t *queue) {
1181 kmpc_task_queue_t *next_child;
1183 queue = CCAST(kmpc_task_queue_t *, queue->tq_first_child);
1185 while (queue != NULL) {
1186 __kmp_remove_all_child_taskq(tq, global_tid, queue);
1188 next_child = queue->tq_next_child;
1189 queue->tq_flags |= TQF_DEALLOCATED;
1190 __kmp_remove_queue_from_tree(tq, global_tid, queue, FALSE);
1195 static void __kmp_execute_task_from_queue(kmp_taskq_t *tq,
ident_t *loc,
1196 kmp_int32 global_tid,
1197 kmpc_thunk_t *thunk,
1199 kmpc_task_queue_t *queue = thunk->th.th_shareds->sv_queue;
1200 kmp_int32 tid = __kmp_tid_from_gtid(global_tid);
1202 KF_TRACE(100, (
"After dequeueing this Task on (%d):\n", global_tid));
1203 KF_DUMP(100, __kmp_dump_thunk(tq, thunk, global_tid));
1204 KF_TRACE(100, (
"Task Queue: %p looks like this (%d):\n", queue, global_tid));
1205 KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid));
1221 if (!(thunk->th_flags & TQF_TASKQ_TASK)) {
1222 kmp_int32 index = (queue == tq->tq_root) ? tid : 0;
1223 thunk->th.th_shareds =
1224 CCAST(kmpc_shared_vars_t *, queue->tq_shareds[index].ai_data);
1226 if (__kmp_env_consistency_check) {
1227 __kmp_push_workshare(global_tid,
1228 (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered
1233 if (__kmp_env_consistency_check)
1234 __kmp_push_workshare(global_tid, ct_taskq, queue->tq_loc);
1238 thunk->th_encl_thunk = tq->tq_curr_thunk[tid];
1239 tq->tq_curr_thunk[tid] = thunk;
1241 KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid));
1244 KF_TRACE(50, (
"Begin Executing Thunk %p from queue %p on (%d)\n", thunk,
1245 queue, global_tid));
1246 thunk->th_task(global_tid, thunk);
1247 KF_TRACE(50, (
"End Executing Thunk %p from queue %p on (%d)\n", thunk, queue,
1250 if (!(thunk->th_flags & TQF_TASKQ_TASK)) {
1251 if (__kmp_env_consistency_check)
1252 __kmp_pop_workshare(global_tid,
1253 (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered
1258 tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
1259 thunk->th_encl_thunk = NULL;
1260 KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid));
1263 if ((thunk->th_flags & TQF_IS_ORDERED) && in_parallel) {
1264 __kmp_taskq_check_ordered(global_tid, thunk);
1267 __kmp_free_thunk(queue, thunk, in_parallel, global_tid);
1269 KF_TRACE(100, (
"T#%d After freeing thunk: %p, TaskQ looks like this:\n",
1270 global_tid, thunk));
1271 KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid));
1277 KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data >= 1);
1281 (
"__kmp_execute_task_from_queue: T#%d has %d thunks in queue %p\n",
1282 global_tid, queue->tq_th_thunks[tid].ai_data - 1, queue));
1284 queue->tq_th_thunks[tid].ai_data--;
1289 if (queue->tq.tq_parent != NULL && in_parallel) {
1291 __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
1292 ct = --(queue->tq_ref_count);
1293 __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
1295 (
"line %d gtid %d: Q %p dec %d\n", __LINE__, global_tid, queue, ct));
1296 KMP_DEBUG_ASSERT(ct >= 0);
1303 kmpc_thunk_t *__kmpc_taskq(
ident_t *loc, kmp_int32 global_tid,
1304 kmpc_task_t taskq_task,
size_t sizeof_thunk,
1305 size_t sizeof_shareds, kmp_int32 flags,
1306 kmpc_shared_vars_t **shareds) {
1308 kmp_int32 nslots, nthunks, nshareds, nproc;
1309 kmpc_task_queue_t *new_queue, *curr_queue;
1310 kmpc_thunk_t *new_taskq_thunk;
1316 KE_TRACE(10, (
"__kmpc_taskq called (%d)\n", global_tid));
1318 th = __kmp_threads[global_tid];
1319 team = th->th.th_team;
1320 tq = &team->t.t_taskq;
1321 nproc = team->t.t_nproc;
1322 tid = __kmp_tid_from_gtid(global_tid);
1325 in_parallel = in_parallel_context(team);
1330 th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo;
1333 th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo;
1339 if (__kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL)) {
1346 CCAST(kmpc_shared_vars_t *, tq->tq_root->tq_shareds[tid].ai_data);
1347 KE_TRACE(10, (
"__kmpc_taskq return (%d)\n", global_tid));
1354 if (tq->tq_curr_thunk_capacity < nproc) {
1355 if (tq->tq_curr_thunk)
1356 __kmp_free(tq->tq_curr_thunk);
1360 __kmp_init_lock(&tq->tq_freelist_lck);
1364 (kmpc_thunk_t **)__kmp_allocate(nproc *
sizeof(kmpc_thunk_t *));
1365 tq->tq_curr_thunk_capacity = nproc;
1369 tq->tq_global_flags = TQF_RELEASE_WORKERS;
1374 nslots = (in_parallel) ? (2 * nproc) : 1;
1378 nthunks = (in_parallel) ? (nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH) + 1)
1383 nshareds = (!tq->tq_root && in_parallel) ? nproc : 1;
1387 new_queue = __kmp_alloc_taskq(tq, in_parallel, nslots, nthunks, nshareds,
1388 nproc, sizeof_thunk, sizeof_shareds,
1389 &new_taskq_thunk, global_tid);
1392 new_queue->tq_flags = flags & TQF_INTERFACE_FLAGS;
1395 new_queue->tq_tasknum_queuing = 0;
1396 new_queue->tq_tasknum_serving = 0;
1397 new_queue->tq_flags |= TQF_PARALLEL_CONTEXT;
1400 new_queue->tq_taskq_slot = NULL;
1401 new_queue->tq_nslots = nslots;
1402 new_queue->tq_hiwat = HIGH_WATER_MARK(nslots);
1403 new_queue->tq_nfull = 0;
1404 new_queue->tq_head = 0;
1405 new_queue->tq_tail = 0;
1406 new_queue->tq_loc = loc;
1408 if ((new_queue->tq_flags & TQF_IS_ORDERED) && in_parallel) {
1410 new_queue->tq_tasknum_serving = 1;
1413 th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo;
1416 th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo;
1420 *shareds = CCAST(kmpc_shared_vars_t *, new_queue->tq_shareds[0].ai_data);
1422 new_taskq_thunk->th.th_shareds = *shareds;
1423 new_taskq_thunk->th_task = taskq_task;
1424 new_taskq_thunk->th_flags = new_queue->tq_flags | TQF_TASKQ_TASK;
1425 new_taskq_thunk->th_status = 0;
1427 KMP_DEBUG_ASSERT(new_taskq_thunk->th_flags & TQF_TASKQ_TASK);
1437 new_queue->tq.tq_parent = NULL;
1438 new_queue->tq_first_child = NULL;
1439 new_queue->tq_next_child = NULL;
1440 new_queue->tq_prev_child = NULL;
1441 new_queue->tq_ref_count = 1;
1442 tq->tq_root = new_queue;
1444 curr_queue = tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue;
1445 new_queue->tq.tq_parent = curr_queue;
1446 new_queue->tq_first_child = NULL;
1447 new_queue->tq_prev_child = NULL;
1448 new_queue->tq_ref_count =
1451 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p alloc %d\n", __LINE__,
1452 global_tid, new_queue, new_queue->tq_ref_count));
1454 __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
1460 new_queue->tq_next_child =
1461 CCAST(
struct kmpc_task_queue_t *, curr_queue->tq_first_child);
1463 if (curr_queue->tq_first_child != NULL)
1464 curr_queue->tq_first_child->tq_prev_child = new_queue;
1466 curr_queue->tq_first_child = new_queue;
1468 __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
1472 new_taskq_thunk->th_encl_thunk = tq->tq_curr_thunk[tid];
1473 tq->tq_curr_thunk[tid] = new_taskq_thunk;
1475 KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid));
1477 new_taskq_thunk->th_encl_thunk = 0;
1478 new_queue->tq.tq_parent = NULL;
1479 new_queue->tq_first_child = NULL;
1480 new_queue->tq_next_child = NULL;
1481 new_queue->tq_prev_child = NULL;
1482 new_queue->tq_ref_count = 1;
1486 KF_TRACE(150, (
"Creating TaskQ Task on (%d):\n", global_tid));
1487 KF_DUMP(150, __kmp_dump_thunk(tq, new_taskq_thunk, global_tid));
1491 (
"After TaskQ at %p Creation on (%d):\n", new_queue, global_tid));
1493 KF_TRACE(25, (
"After Serial TaskQ at %p Creation on (%d):\n", new_queue,
1497 KF_DUMP(25, __kmp_dump_task_queue(tq, new_queue, global_tid));
1500 KF_DUMP(50, __kmp_dump_task_queue_tree(tq, tq->tq_root, global_tid));
1504 if (__kmp_env_consistency_check)
1505 __kmp_push_workshare(global_tid, ct_taskq, new_queue->tq_loc);
1507 KE_TRACE(10, (
"__kmpc_taskq return (%d)\n", global_tid));
1509 return new_taskq_thunk;
1514 void __kmpc_end_taskq(
ident_t *loc, kmp_int32 global_tid,
1515 kmpc_thunk_t *taskq_thunk) {
1522 kmp_int32 is_outermost;
1523 kmpc_task_queue_t *queue;
1524 kmpc_thunk_t *thunk;
1527 KE_TRACE(10, (
"__kmpc_end_taskq called (%d)\n", global_tid));
1529 tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq;
1530 nproc = __kmp_threads[global_tid]->th.th_team->t.t_nproc;
1534 queue = (taskq_thunk == NULL) ? tq->tq_root
1535 : taskq_thunk->th.th_shareds->sv_queue;
1537 KE_TRACE(50, (
"__kmpc_end_taskq queue=%p (%d) \n", queue, global_tid));
1538 is_outermost = (queue == tq->tq_root);
1539 in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
1547 if (is_outermost && (KMP_MASTER_GTID(global_tid))) {
1548 if (tq->tq_global_flags & TQF_RELEASE_WORKERS) {
1550 tq->tq_global_flags &= ~TQF_RELEASE_WORKERS;
1552 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1560 KMP_INIT_YIELD(spins);
1562 while ((queue->tq_nfull == 0) && (queue->tq_taskq_slot == NULL) &&
1563 (!__kmp_taskq_has_any_children(queue)) &&
1564 (!(queue->tq_flags & TQF_ALL_TASKS_QUEUED))) {
1565 KMP_YIELD_WHEN(TRUE, spins);
1569 while (((queue->tq_nfull != 0) || (queue->tq_taskq_slot != NULL)) &&
1570 (thunk = __kmp_find_task_in_queue(global_tid, queue)) != NULL) {
1571 KF_TRACE(50, (
"Found thunk: %p in primary queue %p (%d)\n", thunk,
1572 queue, global_tid));
1573 __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel);
1577 if ((__kmp_taskq_has_any_children(queue)) &&
1578 (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) !=
1582 (
"Stole thunk: %p in descendant queue: %p while waiting in " 1584 thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
1586 __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel);
1589 }
while ((!(queue->tq_flags & TQF_ALL_TASKS_QUEUED)) ||
1590 (queue->tq_nfull != 0));
1592 KF_TRACE(50, (
"All tasks queued and dequeued in queue: %p (%d)\n", queue,
1598 while ((!__kmp_taskq_tasks_finished(queue)) &&
1599 (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) !=
1602 KF_TRACE(50, (
"Stole thunk: %p in descendant queue: %p while waiting in " 1604 thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
1606 __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel);
1609 KF_TRACE(50, (
"No work found in descendent queues or all work finished in " 1611 queue, global_tid));
1613 if (!is_outermost) {
1616 if (queue->tq_flags & TQF_IS_NOWAIT) {
1617 __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
1618 queue->tq_ref_count--;
1619 KMP_DEBUG_ASSERT(queue->tq_ref_count >= 0);
1620 __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
1623 10, (
"__kmpc_end_taskq return for nowait case (%d)\n", global_tid));
1628 __kmp_find_and_remove_finished_child_taskq(tq, global_tid, queue);
1632 KMP_INIT_YIELD(spins);
1634 while (!__kmp_taskq_tasks_finished(queue) ||
1635 __kmp_taskq_has_any_children(queue)) {
1636 thunk = __kmp_find_task_in_ancestor_queue(tq, global_tid, queue);
1638 if (thunk != NULL) {
1640 (
"Stole thunk: %p in ancestor queue: %p while waiting in " 1642 thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
1643 __kmp_execute_task_from_queue(tq, loc, global_tid, thunk,
1647 KMP_YIELD_WHEN(thunk == NULL, spins);
1649 __kmp_find_and_remove_finished_child_taskq(tq, global_tid, queue);
1652 __kmp_acquire_lock(&queue->tq_queue_lck, global_tid);
1653 if (!(queue->tq_flags & TQF_DEALLOCATED)) {
1654 queue->tq_flags |= TQF_DEALLOCATED;
1656 __kmp_release_lock(&queue->tq_queue_lck, global_tid);
1659 if (taskq_thunk != NULL) {
1660 __kmp_remove_queue_from_tree(tq, global_tid, queue, TRUE);
1665 (
"__kmpc_end_taskq return for non_outermost queue, wait case (%d)\n",
1673 KMP_INIT_YIELD(spins);
1675 while (!__kmp_taskq_tasks_finished(queue)) {
1676 thunk = __kmp_find_task_in_descendant_queue(global_tid, queue);
1678 if (thunk != NULL) {
1680 (
"Stole thunk: %p in descendant queue: %p while waiting in " 1682 thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
1684 __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel);
1687 KMP_YIELD_WHEN(thunk == NULL, spins);
1695 if (!__kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL)) {
1700 __kmp_remove_all_child_taskq(tq, global_tid, queue);
1703 KF_TRACE(100, (
"T#%d Before Deletion of top-level TaskQ at %p:\n",
1704 global_tid, queue));
1705 KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid));
1709 KMP_DEBUG_ASSERT((queue->tq.tq_parent == NULL) &&
1710 (queue->tq_next_child == NULL));
1713 KMP_DEBUG_ASSERT(queue->tq_first_child == NULL);
1715 for (i = 0; i < nproc; i++) {
1716 KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0);
1719 for (i = 0, thunk = queue->tq_free_thunks; thunk != NULL;
1720 i++, thunk = thunk->th.th_next_free)
1723 KMP_DEBUG_ASSERT(i ==
1724 queue->tq_nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH));
1726 for (i = 0; i < nproc; i++) {
1727 KMP_DEBUG_ASSERT(!tq->tq_curr_thunk[i]);
1734 KF_TRACE(50, (
"After Deletion of top-level TaskQ at %p on (%d):\n", queue,
1737 queue->tq_flags |= TQF_DEALLOCATED;
1738 __kmp_free_taskq(tq, queue, in_parallel, global_tid);
1740 KF_DUMP(50, __kmp_dump_task_queue_tree(tq, tq->tq_root, global_tid));
1743 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1746 th = __kmp_threads[global_tid];
1749 th->th.th_dispatch->th_deo_fcn = 0;
1752 th->th.th_dispatch->th_dxo_fcn = 0;
1757 if (queue->tq_nfull > 0) {
1758 KMP_DEBUG_ASSERT(queue->tq_nfull == 1);
1760 thunk = __kmp_dequeue_task(global_tid, queue, in_parallel);
1762 if (queue->tq_flags & TQF_IS_LAST_TASK) {
1768 thunk->th_flags |= TQF_IS_LAST_TASK;
1771 KF_TRACE(50, (
"T#%d found thunk: %p in serial queue: %p\n", global_tid,
1774 __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel);
1778 KF_TRACE(100, (
"Before Deletion of Serialized TaskQ at %p on (%d):\n",
1779 queue, global_tid));
1780 KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid));
1784 for (thunk = queue->tq_free_thunks; thunk != NULL;
1785 thunk = thunk->th.th_next_free)
1787 KMP_DEBUG_ASSERT(i == queue->tq_nslots + 1);
1791 (
"Serialized TaskQ at %p deleted on (%d).\n", queue, global_tid));
1793 queue->tq_flags |= TQF_DEALLOCATED;
1794 __kmp_free_taskq(tq, queue, in_parallel, global_tid);
1797 KE_TRACE(10, (
"__kmpc_end_taskq return (%d)\n", global_tid));
1803 kmp_int32 __kmpc_task(
ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk) {
1805 kmpc_task_queue_t *queue;
1809 KE_TRACE(10, (
"__kmpc_task called (%d)\n", global_tid));
1811 KMP_DEBUG_ASSERT(!(thunk->th_flags &
1814 tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq;
1815 queue = thunk->th.th_shareds->sv_queue;
1816 in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
1818 if (in_parallel && (thunk->th_flags & TQF_IS_ORDERED))
1819 thunk->th_tasknum = ++queue->tq_tasknum_queuing;
1825 if (!in_parallel && queue->tq_nfull > 0) {
1826 kmpc_thunk_t *prev_thunk;
1828 KMP_DEBUG_ASSERT(queue->tq_nfull == 1);
1830 prev_thunk = __kmp_dequeue_task(global_tid, queue, in_parallel);
1832 KF_TRACE(50, (
"T#%d found thunk: %p in serial queue: %p\n", global_tid,
1833 prev_thunk, queue));
1835 __kmp_execute_task_from_queue(tq, loc, global_tid, prev_thunk, in_parallel);
1844 KF_TRACE(100, (
"After enqueueing this Task on (%d):\n", global_tid));
1845 KF_DUMP(100, __kmp_dump_thunk(tq, thunk, global_tid));
1847 ret = __kmp_enqueue_task(tq, global_tid, queue, thunk, in_parallel);
1849 KF_TRACE(100, (
"Task Queue looks like this on (%d):\n", global_tid));
1850 KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid));
1852 KE_TRACE(10, (
"__kmpc_task return (%d)\n", global_tid));
1860 void __kmpc_taskq_task(
ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk,
1862 kmpc_task_queue_t *queue;
1863 kmp_taskq_t *tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq;
1864 int tid = __kmp_tid_from_gtid(global_tid);
1866 KE_TRACE(10, (
"__kmpc_taskq_task called (%d)\n", global_tid));
1867 KF_TRACE(100, (
"TaskQ Task argument thunk on (%d):\n", global_tid));
1868 KF_DUMP(100, __kmp_dump_thunk(tq, thunk, global_tid));
1870 queue = thunk->th.th_shareds->sv_queue;
1872 if (__kmp_env_consistency_check)
1873 __kmp_pop_workshare(global_tid, ct_taskq, loc);
1876 KMP_DEBUG_ASSERT(thunk->th_flags & TQF_TASKQ_TASK);
1879 KMP_DEBUG_ASSERT(queue->tq_taskq_slot == NULL);
1882 tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
1883 thunk->th_encl_thunk = NULL;
1885 KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid));
1887 thunk->th_status = status;
1896 queue->tq_taskq_slot = thunk;
1898 KE_TRACE(10, (
"__kmpc_taskq_task return (%d)\n", global_tid));
1903 void __kmpc_end_taskq_task(
ident_t *loc, kmp_int32 global_tid,
1904 kmpc_thunk_t *thunk) {
1906 kmpc_task_queue_t *queue;
1910 KE_TRACE(10, (
"__kmpc_end_taskq_task called (%d)\n", global_tid));
1912 tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq;
1913 queue = thunk->th.th_shareds->sv_queue;
1914 in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
1915 tid = __kmp_tid_from_gtid(global_tid);
1917 if (__kmp_env_consistency_check)
1918 __kmp_pop_workshare(global_tid, ct_taskq, loc);
1921 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1922 KMP_TEST_THEN_OR32(RCAST(
volatile kmp_uint32 *, &queue->tq_flags),
1923 TQF_ALL_TASKS_QUEUED);
1926 __kmp_acquire_lock(&queue->tq_queue_lck, global_tid);
1932 queue->tq_flags |= TQF_ALL_TASKS_QUEUED;
1933 __kmp_release_lock(&queue->tq_queue_lck, global_tid);
1938 if (thunk->th_flags & TQF_IS_LASTPRIVATE) {
1950 queue->tq_flags |= TQF_IS_LAST_TASK;
1952 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1953 KMP_TEST_THEN_OR32(RCAST(
volatile kmp_uint32 *, &queue->tq_flags),
1957 __kmp_acquire_lock(&queue->tq_queue_lck, global_tid);
1963 queue->tq_flags |= TQF_IS_LAST_TASK;
1964 __kmp_release_lock(&queue->tq_queue_lck, global_tid);
1975 tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
1976 thunk->th_encl_thunk = NULL;
1978 KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid));
1981 KE_TRACE(10, (
"__kmpc_end_taskq_task return (%d)\n", global_tid));
1987 kmpc_thunk_t *__kmpc_task_buffer(
ident_t *loc, kmp_int32 global_tid,
1988 kmpc_thunk_t *taskq_thunk, kmpc_task_t task) {
1990 kmpc_task_queue_t *queue;
1991 kmpc_thunk_t *new_thunk;
1994 KE_TRACE(10, (
"__kmpc_task_buffer called (%d)\n", global_tid));
1997 taskq_thunk->th_flags &
2000 tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq;
2001 queue = taskq_thunk->th.th_shareds->sv_queue;
2002 in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
2010 new_thunk = __kmp_alloc_thunk(queue, in_parallel, global_tid);
2011 new_thunk->th.th_shareds =
2012 CCAST(kmpc_shared_vars_t *, queue->tq_shareds[0].ai_data);
2013 new_thunk->th_encl_thunk = NULL;
2014 new_thunk->th_task = task;
2017 new_thunk->th_flags = queue->tq_flags & TQF_INTERFACE_FLAGS;
2019 new_thunk->th_status = 0;
2021 KMP_DEBUG_ASSERT(!(new_thunk->th_flags & TQF_TASKQ_TASK));
2023 KF_TRACE(100, (
"Creating Regular Task on (%d):\n", global_tid));
2024 KF_DUMP(100, __kmp_dump_thunk(tq, new_thunk, global_tid));
2026 KE_TRACE(10, (
"__kmpc_task_buffer return (%d)\n", global_tid));
KMP_EXPORT void __kmpc_end_barrier_master(ident_t *, kmp_int32 global_tid)